agent: Handle attempts to use hallucinated tools (#29946)

This change:

1. Catches attempts to use missing tools. If this happens, we now send
Agent a message listing available tools, after which Agent can
gracefully recover. Prior behavior: thread would stop in a broken state.

Example of a hallucinated call and a message we send back: 

![image](https://github.com/user-attachments/assets/92a8f700-b192-4038-8c7e-0a74ca2e0146)

2. Adds evals for hallucinated tool use and imagined edits
3. Adds ability to configure a profile name in evals.



Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-05-05 22:31:11 +03:00 committed by GitHub
parent 7dfbe0b908
commit 8199664a5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 111 additions and 0 deletions

View file

@ -1070,6 +1070,22 @@ impl ActiveThread {
cx,
);
}
ThreadEvent::MissingToolUse {
tool_use_id,
ui_text,
} => {
self.render_tool_use_markdown(
tool_use_id.clone(),
ui_text,
"",
self.thread
.read(cx)
.output_for_tool(tool_use_id)
.map(|output| output.clone().into())
.unwrap_or("".into()),
cx,
);
}
}
}

View file

@ -1372,6 +1372,7 @@ impl AgentDiff {
| ThreadEvent::StreamedAssistantThinking(_, _)
| ThreadEvent::StreamedToolUse { .. }
| ThreadEvent::InvalidToolInput { .. }
| ThreadEvent::MissingToolUse { .. }
| ThreadEvent::MessageAdded(_)
| ThreadEvent::MessageEdited(_)
| ThreadEvent::MessageDeleted(_)

View file

@ -1911,12 +1911,54 @@ impl Thread {
cx,
);
}
} else {
self.handle_hallucinated_tool_use(
tool_use.id.clone(),
tool_use.name.clone(),
window,
cx,
);
}
}
pending_tool_uses
}
pub fn handle_hallucinated_tool_use(
&mut self,
tool_use_id: LanguageModelToolUseId,
hallucinated_tool_name: Arc<str>,
window: Option<AnyWindowHandle>,
cx: &mut Context<Thread>,
) {
let available_tools = self.tools.read(cx).enabled_tools(cx);
let tool_list = available_tools
.iter()
.map(|tool| format!("- {}: {}", tool.name(), tool.description()))
.collect::<Vec<_>>()
.join("\n");
let error_message = format!(
"The tool '{}' doesn't exist or is not enabled. Available tools:\n{}",
hallucinated_tool_name, tool_list
);
let pending_tool_use = self.tool_use.insert_tool_output(
tool_use_id.clone(),
hallucinated_tool_name,
Err(anyhow!("Missing tool call: {error_message}")),
self.configured_model.as_ref(),
);
cx.emit(ThreadEvent::MissingToolUse {
tool_use_id: tool_use_id.clone(),
ui_text: error_message.into(),
});
self.tool_finished(tool_use_id, pending_tool_use, false, window, cx);
}
pub fn receive_invalid_tool_json(
&mut self,
tool_use_id: LanguageModelToolUseId,
@ -2574,6 +2616,10 @@ pub enum ThreadEvent {
ui_text: Arc<str>,
input: serde_json::Value,
},
MissingToolUse {
tool_use_id: LanguageModelToolUseId,
ui_text: Arc<str>,
},
InvalidToolInput {
tool_use_id: LanguageModelToolUseId,
ui_text: Arc<str>,