agent: Handle attempts to use hallucinated tools (#29946)

This change: 1. Catches attempts to use missing tools. If this happens, we now send Agent a message listing available tools, after which Agent can gracefully recover. Prior behavior: thread would stop in a broken state. Example of a hallucinated call and a message we send back: ![image](https://github.com/user-attachments/assets/92a8f700-b192-4038-8c7e-0a74ca2e0146) 2. Adds evals for hallucinated tool use and imagined edits 3. Adds ability to configure a profile name in evals. Release Notes: - N/A
2025-05-05 22:31:11 +03:00 · 2025-05-05 22:31:11 +03:00 · 8199664a5a
commit 8199664a5a
parent 7dfbe0b908
14 changed files with 111 additions and 0 deletions
--- a/crates/eval/src/example.rs
+++ b/crates/eval/src/example.rs
@ -12,6 +12,7 @@ use crate::{
 };
 use agent::{ContextLoadResult, Thread, ThreadEvent};
 use anyhow::{Result, anyhow};
+use assistant_settings::AgentProfileId;
 use async_trait::async_trait;
 use buffer_diff::DiffHunkStatus;
 use collections::HashMap;
@ -46,6 +47,7 @@ pub struct ExampleMetadata {
    pub revision: String,
    pub language_server: Option<LanguageServer>,
    pub max_assertions: Option<usize>,
+    pub profile_id: AgentProfileId,
 }

 #[derive(Clone, Debug)]
@ -268,6 +270,12 @@ impl ExampleContext {
                ThreadEvent::InvalidToolInput { .. } => {
                    println!("{log_prefix} invalid tool input");
                }
+                ThreadEvent::MissingToolUse {
+                    tool_use_id: _,
+                    ui_text,
+                } => {
+                    println!("{log_prefix} {ui_text}");
+                }
                ThreadEvent::ToolConfirmationNeeded => {
                    panic!(
                        "{}Bug: Tool confirmation should not be required in eval",