agent: Handle attempts to use hallucinated tools (#29946)
This change: 1. Catches attempts to use missing tools. If this happens, we now send Agent a message listing available tools, after which Agent can gracefully recover. Prior behavior: thread would stop in a broken state. Example of a hallucinated call and a message we send back:  2. Adds evals for hallucinated tool use and imagined edits 3. Adds ability to configure a profile name in evals. Release Notes: - N/A
This commit is contained in:
parent
7dfbe0b908
commit
8199664a5a
14 changed files with 111 additions and 0 deletions
|
@ -1070,6 +1070,22 @@ impl ActiveThread {
|
|||
cx,
|
||||
);
|
||||
}
|
||||
ThreadEvent::MissingToolUse {
|
||||
tool_use_id,
|
||||
ui_text,
|
||||
} => {
|
||||
self.render_tool_use_markdown(
|
||||
tool_use_id.clone(),
|
||||
ui_text,
|
||||
"",
|
||||
self.thread
|
||||
.read(cx)
|
||||
.output_for_tool(tool_use_id)
|
||||
.map(|output| output.clone().into())
|
||||
.unwrap_or("".into()),
|
||||
cx,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1372,6 +1372,7 @@ impl AgentDiff {
|
|||
| ThreadEvent::StreamedAssistantThinking(_, _)
|
||||
| ThreadEvent::StreamedToolUse { .. }
|
||||
| ThreadEvent::InvalidToolInput { .. }
|
||||
| ThreadEvent::MissingToolUse { .. }
|
||||
| ThreadEvent::MessageAdded(_)
|
||||
| ThreadEvent::MessageEdited(_)
|
||||
| ThreadEvent::MessageDeleted(_)
|
||||
|
|
|
@ -1911,12 +1911,54 @@ impl Thread {
|
|||
cx,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
self.handle_hallucinated_tool_use(
|
||||
tool_use.id.clone(),
|
||||
tool_use.name.clone(),
|
||||
window,
|
||||
cx,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pending_tool_uses
|
||||
}
|
||||
|
||||
pub fn handle_hallucinated_tool_use(
|
||||
&mut self,
|
||||
tool_use_id: LanguageModelToolUseId,
|
||||
hallucinated_tool_name: Arc<str>,
|
||||
window: Option<AnyWindowHandle>,
|
||||
cx: &mut Context<Thread>,
|
||||
) {
|
||||
let available_tools = self.tools.read(cx).enabled_tools(cx);
|
||||
|
||||
let tool_list = available_tools
|
||||
.iter()
|
||||
.map(|tool| format!("- {}: {}", tool.name(), tool.description()))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let error_message = format!(
|
||||
"The tool '{}' doesn't exist or is not enabled. Available tools:\n{}",
|
||||
hallucinated_tool_name, tool_list
|
||||
);
|
||||
|
||||
let pending_tool_use = self.tool_use.insert_tool_output(
|
||||
tool_use_id.clone(),
|
||||
hallucinated_tool_name,
|
||||
Err(anyhow!("Missing tool call: {error_message}")),
|
||||
self.configured_model.as_ref(),
|
||||
);
|
||||
|
||||
cx.emit(ThreadEvent::MissingToolUse {
|
||||
tool_use_id: tool_use_id.clone(),
|
||||
ui_text: error_message.into(),
|
||||
});
|
||||
|
||||
self.tool_finished(tool_use_id, pending_tool_use, false, window, cx);
|
||||
}
|
||||
|
||||
pub fn receive_invalid_tool_json(
|
||||
&mut self,
|
||||
tool_use_id: LanguageModelToolUseId,
|
||||
|
@ -2574,6 +2616,10 @@ pub enum ThreadEvent {
|
|||
ui_text: Arc<str>,
|
||||
input: serde_json::Value,
|
||||
},
|
||||
MissingToolUse {
|
||||
tool_use_id: LanguageModelToolUseId,
|
||||
ui_text: Arc<str>,
|
||||
},
|
||||
InvalidToolInput {
|
||||
tool_use_id: LanguageModelToolUseId,
|
||||
ui_text: Arc<str>,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue