agent: Handle attempts to use hallucinated tools (#29946)

This change: 1. Catches attempts to use missing tools. If this happens, we now send Agent a message listing available tools, after which Agent can gracefully recover. Prior behavior: thread would stop in a broken state. Example of a hallucinated call and a message we send back: ![image](https://github.com/user-attachments/assets/92a8f700-b192-4038-8c7e-0a74ca2e0146) 2. Adds evals for hallucinated tool use and imagined edits 3. Adds ability to configure a profile name in evals. Release Notes: - N/A
2025-05-05 22:31:11 +03:00 · 2025-05-05 22:31:11 +03:00 · 8199664a5a
commit 8199664a5a
parent 7dfbe0b908
14 changed files with 111 additions and 0 deletions
--- a/crates/agent/src/active_thread.rs
+++ b/crates/agent/src/active_thread.rs
@ -1070,6 +1070,22 @@ impl ActiveThread {
                    cx,
                );
            }
+            ThreadEvent::MissingToolUse {
+                tool_use_id,
+                ui_text,
+            } => {
+                self.render_tool_use_markdown(
+                    tool_use_id.clone(),
+                    ui_text,
+                    "",
+                    self.thread
+                        .read(cx)
+                        .output_for_tool(tool_use_id)
+                        .map(|output| output.clone().into())
+                        .unwrap_or("".into()),
+                    cx,
+                );
+            }
        }
    }

--- a/crates/agent/src/agent_diff.rs
+++ b/crates/agent/src/agent_diff.rs
@ -1372,6 +1372,7 @@ impl AgentDiff {
            | ThreadEvent::StreamedAssistantThinking(_, _)
            | ThreadEvent::StreamedToolUse { .. }
            | ThreadEvent::InvalidToolInput { .. }
+            | ThreadEvent::MissingToolUse { .. }
            | ThreadEvent::MessageAdded(_)
            | ThreadEvent::MessageEdited(_)
            | ThreadEvent::MessageDeleted(_)
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -1911,12 +1911,54 @@ impl Thread {
                        cx,
                    );
                }
+            } else {
+                self.handle_hallucinated_tool_use(
+                    tool_use.id.clone(),
+                    tool_use.name.clone(),
+                    window,
+                    cx,
+                );
            }
        }

        pending_tool_uses
    }

+    pub fn handle_hallucinated_tool_use(
+        &mut self,
+        tool_use_id: LanguageModelToolUseId,
+        hallucinated_tool_name: Arc<str>,
+        window: Option<AnyWindowHandle>,
+        cx: &mut Context<Thread>,
+    ) {
+        let available_tools = self.tools.read(cx).enabled_tools(cx);
+
+        let tool_list = available_tools
+            .iter()
+            .map(|tool| format!("- {}: {}", tool.name(), tool.description()))
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        let error_message = format!(
+            "The tool '{}' doesn't exist or is not enabled. Available tools:\n{}",
+            hallucinated_tool_name, tool_list
+        );
+
+        let pending_tool_use = self.tool_use.insert_tool_output(
+            tool_use_id.clone(),
+            hallucinated_tool_name,
+            Err(anyhow!("Missing tool call: {error_message}")),
+            self.configured_model.as_ref(),
+        );
+
+        cx.emit(ThreadEvent::MissingToolUse {
+            tool_use_id: tool_use_id.clone(),
+            ui_text: error_message.into(),
+        });
+
+        self.tool_finished(tool_use_id, pending_tool_use, false, window, cx);
+    }
+
    pub fn receive_invalid_tool_json(
        &mut self,
        tool_use_id: LanguageModelToolUseId,
@ -2574,6 +2616,10 @@ pub enum ThreadEvent {
        ui_text: Arc<str>,
        input: serde_json::Value,
    },
+    MissingToolUse {
+        tool_use_id: LanguageModelToolUseId,
+        ui_text: Arc<str>,
+    },
    InvalidToolInput {
        tool_use_id: LanguageModelToolUseId,
        ui_text: Arc<str>,