agent: Handle attempts to use hallucinated tools (#29946)
This change: 1. Catches attempts to use missing tools. If this happens, we now send Agent a message listing available tools, after which Agent can gracefully recover. Prior behavior: thread would stop in a broken state. Example of a hallucinated call and a message we send back:  2. Adds evals for hallucinated tool use and imagined edits 3. Adds ability to configure a profile name in evals. Release Notes: - N/A
This commit is contained in:
parent
7dfbe0b908
commit
8199664a5a
14 changed files with 111 additions and 0 deletions
|
@ -1,4 +1,5 @@
|
|||
use anyhow::Result;
|
||||
use assistant_settings::AgentProfileId;
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use std::collections::BTreeMap;
|
||||
|
@ -56,12 +57,19 @@ impl DeclarativeExample {
|
|||
None
|
||||
};
|
||||
|
||||
let profile_id = if let Some(profile_name) = base.profile_name {
|
||||
AgentProfileId(profile_name.into())
|
||||
} else {
|
||||
AgentProfileId::default()
|
||||
};
|
||||
|
||||
let metadata = ExampleMetadata {
|
||||
name,
|
||||
url: base.url,
|
||||
revision: base.revision,
|
||||
language_server,
|
||||
max_assertions: None,
|
||||
profile_id,
|
||||
};
|
||||
|
||||
Ok(DeclarativeExample {
|
||||
|
@ -97,6 +105,8 @@ pub struct ExampleToml {
|
|||
pub allow_preexisting_diagnostics: bool,
|
||||
pub prompt: String,
|
||||
#[serde(default)]
|
||||
pub profile_name: Option<String>,
|
||||
#[serde(default)]
|
||||
pub diff_assertions: BTreeMap<String, String>,
|
||||
#[serde(default)]
|
||||
pub thread_assertions: BTreeMap<String, String>,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue