Fix tool hallucination event order

This commit is contained in:
Ben Brandt 2025-08-07 17:29:45 +02:00
parent 342247f60f
commit 0d24686a9c
No known key found for this signature in database
GPG key ID: D4618C5D3B500571
2 changed files with 78 additions and 16 deletions

View file

@ -286,6 +286,63 @@ async fn test_tool_authorization(cx: &mut TestAppContext) {
);
}
#[gpui::test]
async fn test_tool_hallucination(cx: &mut TestAppContext) {
let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await;
let fake_model = model.as_fake();
let mut events = thread.update(cx, |thread, cx| thread.send(model.clone(), "abc", cx));
cx.run_until_parked();
fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
LanguageModelToolUse {
id: "tool_id_1".into(),
name: "nonexistent_tool".into(),
raw_input: "{}".into(),
input: json!({}),
is_input_complete: true,
},
));
fake_model.end_last_completion_stream();
let tool_call = expect_tool_call(&mut events).await;
assert_eq!(tool_call.title, "nonexistent_tool");
assert_eq!(tool_call.status, acp::ToolCallStatus::Pending);
let update = expect_tool_call_update(&mut events).await;
assert_eq!(update.fields.status, Some(acp::ToolCallStatus::Failed));
}
async fn expect_tool_call(
events: &mut UnboundedReceiver<Result<AgentResponseEvent, LanguageModelCompletionError>>,
) -> acp::ToolCall {
let event = events
.next()
.await
.expect("no tool call authorization event received")
.unwrap();
match event {
AgentResponseEvent::ToolCall(tool_call) => return tool_call,
event => {
panic!("Unexpected event {event:?}");
}
}
}
async fn expect_tool_call_update(
events: &mut UnboundedReceiver<Result<AgentResponseEvent, LanguageModelCompletionError>>,
) -> acp::ToolCallUpdate {
let event = events
.next()
.await
.expect("no tool call authorization event received")
.unwrap();
match event {
AgentResponseEvent::ToolCallUpdate(tool_call_update) => return tool_call_update,
event => {
panic!("Unexpected event {event:?}");
}
}
}
async fn next_tool_call_authorization(
events: &mut UnboundedReceiver<Result<AgentResponseEvent, LanguageModelCompletionError>>,
) -> ToolCallAuthorization {

View file

@ -434,23 +434,10 @@ impl Thread {
event_stream: &AgentResponseEventStream,
cx: &mut Context<Self>,
) -> Option<Task<LanguageModelToolResult>> {
let Some(tool) = self.tools.get(tool_use.name.as_ref()).cloned() else {
if tool_use.is_input_complete {
let content = format!("No tool named {} exists", tool_use.name);
return Some(Task::ready(LanguageModelToolResult {
content: LanguageModelToolResultContent::Text(Arc::from(content)),
tool_use_id: tool_use.id,
tool_name: tool_use.name,
is_error: true,
output: None,
}));
} else {
return None;
}
};
cx.notify();
let tool = self.tools.get(tool_use.name.as_ref()).cloned();
self.pending_tool_uses
.insert(tool_use.id.clone(), tool_use.clone());
let last_message = self.last_assistant_message();
@ -468,8 +455,15 @@ impl Thread {
true
}
});
if push_new_tool_use {
event_stream.send_tool_call(&tool_use, tool.kind());
event_stream.send_tool_call(
&tool_use,
// todo! add default
tool.as_ref()
.map(|t| t.kind())
.unwrap_or(acp::ToolKind::Other),
);
last_message
.content
.push(MessageContent::ToolUse(tool_use.clone()));
@ -487,6 +481,17 @@ impl Thread {
return None;
}
let Some(tool) = tool else {
let content = format!("No tool named {} exists", tool_use.name);
return Some(Task::ready(LanguageModelToolResult {
content: LanguageModelToolResultContent::Text(Arc::from(content)),
tool_use_id: tool_use.id,
tool_name: tool_use.name,
is_error: true,
output: None,
}));
};
let tool_result = self.run_tool(tool, tool_use.clone(), event_stream.clone(), cx);
Some(cx.foreground_executor().spawn(async move {
match tool_result.await {