agent: Handle attempts to use hallucinated tools (#29946)
This change: 1. Catches attempts to use missing tools. If this happens, we now send Agent a message listing available tools, after which Agent can gracefully recover. Prior behavior: thread would stop in a broken state. Example of a hallucinated call and a message we send back:  2. Adds evals for hallucinated tool use and imagined edits 3. Adds ability to configure a profile name in evals. Release Notes: - N/A
This commit is contained in:
parent
7dfbe0b908
commit
8199664a5a
14 changed files with 111 additions and 0 deletions
|
@ -12,6 +12,7 @@ use crate::{
|
|||
};
|
||||
use agent::{ContextLoadResult, Thread, ThreadEvent};
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_settings::AgentProfileId;
|
||||
use async_trait::async_trait;
|
||||
use buffer_diff::DiffHunkStatus;
|
||||
use collections::HashMap;
|
||||
|
@ -46,6 +47,7 @@ pub struct ExampleMetadata {
|
|||
pub revision: String,
|
||||
pub language_server: Option<LanguageServer>,
|
||||
pub max_assertions: Option<usize>,
|
||||
pub profile_id: AgentProfileId,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
@ -268,6 +270,12 @@ impl ExampleContext {
|
|||
ThreadEvent::InvalidToolInput { .. } => {
|
||||
println!("{log_prefix} invalid tool input");
|
||||
}
|
||||
ThreadEvent::MissingToolUse {
|
||||
tool_use_id: _,
|
||||
ui_text,
|
||||
} => {
|
||||
println!("{log_prefix} {ui_text}");
|
||||
}
|
||||
ThreadEvent::ToolConfirmationNeeded => {
|
||||
panic!(
|
||||
"{}Bug: Tool confirmation should not be required in eval",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue