E2E Claude tests (#34702)

- **Fix cancellation of tool calls**
- **Make tool_call test more resilient**
- **Fix tool call confirmation test**

Release Notes:

- N/A
This commit is contained in:
Ben Brandt 2025-07-18 15:17:41 +02:00 committed by GitHub
parent fd05f17fa7
commit cfe1adc792
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 188 additions and 38 deletions

1
Cargo.lock generated
View file

@ -158,6 +158,7 @@ dependencies = [
"serde_json", "serde_json",
"settings", "settings",
"smol", "smol",
"strum 0.27.1",
"tempfile", "tempfile",
"ui", "ui",
"util", "util",

View file

@ -664,7 +664,7 @@ impl AcpThread {
cx: &mut Context<Self>, cx: &mut Context<Self>,
) -> Result<ToolCallRequest> { ) -> Result<ToolCallRequest> {
let project = self.project.read(cx).languages().clone(); let project = self.project.read(cx).languages().clone();
let Some((_, call)) = self.tool_call_mut(tool_call_id) else { let Some((idx, call)) = self.tool_call_mut(tool_call_id) else {
anyhow::bail!("Tool call not found"); anyhow::bail!("Tool call not found");
}; };
@ -675,6 +675,8 @@ impl AcpThread {
respond_tx: tx, respond_tx: tx,
}; };
cx.emit(AcpThreadEvent::EntryUpdated(idx));
Ok(ToolCallRequest { Ok(ToolCallRequest {
id: tool_call_id, id: tool_call_id,
outcome: rx, outcome: rx,
@ -768,8 +770,13 @@ impl AcpThread {
let language_registry = self.project.read(cx).languages().clone(); let language_registry = self.project.read(cx).languages().clone();
let (ix, call) = self.tool_call_mut(id).context("Entry not found")?; let (ix, call) = self.tool_call_mut(id).context("Entry not found")?;
call.content = new_content if let Some(new_content) = new_content {
.map(|new_content| ToolCallContent::from_acp(new_content, language_registry, cx)); call.content = Some(ToolCallContent::from_acp(
new_content,
language_registry,
cx,
));
}
match &mut call.status { match &mut call.status {
ToolCallStatus::Allowed { status } => { ToolCallStatus::Allowed { status } => {

View file

@ -33,6 +33,7 @@ serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
settings.workspace = true settings.workspace = true
smol.workspace = true smol.workspace = true
strum.workspace = true
tempfile.workspace = true tempfile.workspace = true
ui.workspace = true ui.workspace = true
util.workspace = true util.workspace = true

View file

@ -281,14 +281,18 @@ impl ClaudeAgentConnection {
} => { } => {
let id = tool_id_map.borrow_mut().remove(&tool_use_id); let id = tool_id_map.borrow_mut().remove(&tool_use_id);
if let Some(id) = id { if let Some(id) = id {
let content = content.to_string();
delegate delegate
.update_tool_call(UpdateToolCallParams { .update_tool_call(UpdateToolCallParams {
tool_call_id: id, tool_call_id: id,
status: acp::ToolCallStatus::Finished, status: acp::ToolCallStatus::Finished,
content: Some(ToolCallContent::Markdown { // Don't unset existing content
// For now we only include text content content: (!content.is_empty()).then_some(
markdown: content.to_string(), ToolCallContent::Markdown {
}), // For now we only include text content
markdown: content,
},
),
}) })
.await .await
.log_err(); .log_err();
@ -577,7 +581,7 @@ pub(crate) mod tests {
use super::*; use super::*;
use serde_json::json; use serde_json::json;
// crate::common_e2e_tests!(ClaudeCode); crate::common_e2e_tests!(ClaudeCode);
pub fn local_command() -> AgentServerCommand { pub fn local_command() -> AgentServerCommand {
AgentServerCommand { AgentServerCommand {

View file

@ -118,13 +118,106 @@ impl ClaudeTool {
pub fn content(&self) -> Option<acp::ToolCallContent> { pub fn content(&self) -> Option<acp::ToolCallContent> {
match &self { match &self {
ClaudeTool::Other { input, .. } => Some(acp::ToolCallContent::Markdown { Self::Other { input, .. } => Some(acp::ToolCallContent::Markdown {
markdown: format!( markdown: format!(
"```json\n{}```", "```json\n{}```",
serde_json::to_string_pretty(&input).unwrap_or("{}".to_string()) serde_json::to_string_pretty(&input).unwrap_or("{}".to_string())
), ),
}), }),
_ => None, Self::Task(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.prompt.clone(),
}),
Self::NotebookRead(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.notebook_path.display().to_string(),
}),
Self::NotebookEdit(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.new_source.clone(),
}),
Self::Terminal(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: format!(
"`{}`\n\n{}",
params.command,
params.description.as_deref().unwrap_or_default()
),
}),
Self::ReadFile(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.abs_path.display().to_string(),
}),
Self::Ls(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.path.display().to_string(),
}),
Self::Glob(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.to_string(),
}),
Self::Grep(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: format!("`{params}`"),
}),
Self::WebFetch(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.prompt.clone(),
}),
Self::WebSearch(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.to_string(),
}),
Self::TodoWrite(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params
.todos
.iter()
.map(|todo| {
format!(
"- {} {}: {}",
match todo.status {
TodoStatus::Completed => "",
TodoStatus::InProgress => "🚧",
TodoStatus::Pending => "",
},
todo.priority,
todo.content
)
})
.join("\n"),
}),
Self::ExitPlanMode(Some(params)) => Some(acp::ToolCallContent::Markdown {
markdown: params.plan.clone(),
}),
Self::Edit(Some(params)) => Some(acp::ToolCallContent::Diff {
diff: acp::Diff {
path: params.abs_path.clone(),
old_text: Some(params.old_text.clone()),
new_text: params.new_text.clone(),
},
}),
Self::Write(Some(params)) => Some(acp::ToolCallContent::Diff {
diff: acp::Diff {
path: params.file_path.clone(),
old_text: None,
new_text: params.content.clone(),
},
}),
Self::MultiEdit(Some(params)) => {
// todo: show multiple edits in a multibuffer?
params.edits.first().map(|edit| acp::ToolCallContent::Diff {
diff: acp::Diff {
path: params.file_path.clone(),
old_text: Some(edit.old_string.clone()),
new_text: edit.new_string.clone(),
},
})
}
Self::Task(None)
| Self::NotebookRead(None)
| Self::NotebookEdit(None)
| Self::Terminal(None)
| Self::ReadFile(None)
| Self::Ls(None)
| Self::Glob(None)
| Self::Grep(None)
| Self::WebFetch(None)
| Self::WebSearch(None)
| Self::TodoWrite(None)
| Self::ExitPlanMode(None)
| Self::Edit(None)
| Self::Write(None)
| Self::MultiEdit(None) => None,
} }
} }
@ -513,7 +606,7 @@ impl std::fmt::Display for GrepToolParams {
} }
} }
#[derive(Deserialize, Serialize, JsonSchema, Debug)] #[derive(Deserialize, Serialize, JsonSchema, strum::Display, Debug)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum TodoPriority { pub enum TodoPriority {
High, High,

View file

@ -111,18 +111,21 @@ pub async fn test_tool_call(server: impl AgentServer + 'static, cx: &mut TestApp
.await .await
.unwrap(); .unwrap();
thread.read_with(cx, |thread, _cx| { thread.read_with(cx, |thread, _cx| {
assert!(matches!( assert!(thread.entries().iter().any(|entry| {
&thread.entries()[2], matches!(
AgentThreadEntry::ToolCall(ToolCall { entry,
status: ToolCallStatus::Allowed { .. }, AgentThreadEntry::ToolCall(ToolCall {
.. status: ToolCallStatus::Allowed { .. },
}) ..
)); })
)
assert!(matches!( }));
thread.entries()[3], assert!(
AgentThreadEntry::AssistantMessage(_) thread
)); .entries()
.iter()
.any(|entry| { matches!(entry, AgentThreadEntry::AssistantMessage(_)) })
);
}); });
} }
@ -134,10 +137,26 @@ pub async fn test_tool_call_with_confirmation(
let project = Project::test(fs, [path!("/private/tmp").as_ref()], cx).await; let project = Project::test(fs, [path!("/private/tmp").as_ref()], cx).await;
let thread = new_test_thread(server, project.clone(), "/private/tmp", cx).await; let thread = new_test_thread(server, project.clone(), "/private/tmp", cx).await;
let full_turn = thread.update(cx, |thread, cx| { let full_turn = thread.update(cx, |thread, cx| {
thread.send_raw(r#"Run `echo "Hello, world!"`"#, cx) thread.send_raw(
r#"Run `touch hello.txt && echo "Hello, world!" | tee hello.txt`"#,
cx,
)
}); });
run_until_first_tool_call(&thread, cx).await; run_until_first_tool_call(
&thread,
|entry| {
matches!(
entry,
AgentThreadEntry::ToolCall(ToolCall {
status: ToolCallStatus::WaitingForConfirmation { .. },
..
})
)
},
cx,
)
.await;
let tool_call_id = thread.read_with(cx, |thread, _cx| { let tool_call_id = thread.read_with(cx, |thread, _cx| {
let AgentThreadEntry::ToolCall(ToolCall { let AgentThreadEntry::ToolCall(ToolCall {
@ -148,12 +167,16 @@ pub async fn test_tool_call_with_confirmation(
.. ..
}, },
.. ..
}) = &thread.entries()[2] }) = &thread
.entries()
.iter()
.find(|entry| matches!(entry, AgentThreadEntry::ToolCall(_)))
.unwrap()
else { else {
panic!(); panic!();
}; };
assert_eq!(root_command, "echo"); assert!(root_command.contains("touch"));
*id *id
}); });
@ -161,13 +184,13 @@ pub async fn test_tool_call_with_confirmation(
thread.update(cx, |thread, cx| { thread.update(cx, |thread, cx| {
thread.authorize_tool_call(tool_call_id, acp::ToolCallConfirmationOutcome::Allow, cx); thread.authorize_tool_call(tool_call_id, acp::ToolCallConfirmationOutcome::Allow, cx);
assert!(matches!( assert!(thread.entries().iter().any(|entry| matches!(
&thread.entries()[2], entry,
AgentThreadEntry::ToolCall(ToolCall { AgentThreadEntry::ToolCall(ToolCall {
status: ToolCallStatus::Allowed { .. }, status: ToolCallStatus::Allowed { .. },
.. ..
}) })
)); )));
}); });
full_turn.await.unwrap(); full_turn.await.unwrap();
@ -177,15 +200,19 @@ pub async fn test_tool_call_with_confirmation(
content: Some(ToolCallContent::Markdown { markdown }), content: Some(ToolCallContent::Markdown { markdown }),
status: ToolCallStatus::Allowed { .. }, status: ToolCallStatus::Allowed { .. },
.. ..
}) = &thread.entries()[2] }) = thread
.entries()
.iter()
.find(|entry| matches!(entry, AgentThreadEntry::ToolCall(_)))
.unwrap()
else { else {
panic!(); panic!();
}; };
markdown.read_with(cx, |md, _cx| { markdown.read_with(cx, |md, _cx| {
assert!( assert!(
md.source().contains("Hello, world!"), md.source().contains("Hello"),
r#"Expected '{}' to contain "Hello, world!""#, r#"Expected '{}' to contain "Hello""#,
md.source() md.source()
); );
}); });
@ -198,10 +225,26 @@ pub async fn test_cancel(server: impl AgentServer + 'static, cx: &mut TestAppCon
let project = Project::test(fs, [path!("/private/tmp").as_ref()], cx).await; let project = Project::test(fs, [path!("/private/tmp").as_ref()], cx).await;
let thread = new_test_thread(server, project.clone(), "/private/tmp", cx).await; let thread = new_test_thread(server, project.clone(), "/private/tmp", cx).await;
let full_turn = thread.update(cx, |thread, cx| { let full_turn = thread.update(cx, |thread, cx| {
thread.send_raw(r#"Run `echo "Hello, world!"`"#, cx) thread.send_raw(
r#"Run `touch hello.txt && echo "Hello, world!" >> hello.txt`"#,
cx,
)
}); });
let first_tool_call_ix = run_until_first_tool_call(&thread, cx).await; let first_tool_call_ix = run_until_first_tool_call(
&thread,
|entry| {
matches!(
entry,
AgentThreadEntry::ToolCall(ToolCall {
status: ToolCallStatus::WaitingForConfirmation { .. },
..
})
)
},
cx,
)
.await;
thread.read_with(cx, |thread, _cx| { thread.read_with(cx, |thread, _cx| {
let AgentThreadEntry::ToolCall(ToolCall { let AgentThreadEntry::ToolCall(ToolCall {
@ -217,7 +260,7 @@ pub async fn test_cancel(server: impl AgentServer + 'static, cx: &mut TestAppCon
panic!("{:?}", thread.entries()[1]); panic!("{:?}", thread.entries()[1]);
}; };
assert_eq!(root_command, "echo"); assert!(root_command.contains("touch"));
*id *id
}); });
@ -340,6 +383,7 @@ pub async fn new_test_thread(
pub async fn run_until_first_tool_call( pub async fn run_until_first_tool_call(
thread: &Entity<AcpThread>, thread: &Entity<AcpThread>,
wait_until: impl Fn(&AgentThreadEntry) -> bool + 'static,
cx: &mut TestAppContext, cx: &mut TestAppContext,
) -> usize { ) -> usize {
let (mut tx, mut rx) = mpsc::channel::<usize>(1); let (mut tx, mut rx) = mpsc::channel::<usize>(1);
@ -347,7 +391,7 @@ pub async fn run_until_first_tool_call(
let subscription = cx.update(|cx| { let subscription = cx.update(|cx| {
cx.subscribe(thread, move |thread, _, cx| { cx.subscribe(thread, move |thread, _, cx| {
for (ix, entry) in thread.read(cx).entries().iter().enumerate() { for (ix, entry) in thread.read(cx).entries().iter().enumerate() {
if matches!(entry, AgentThreadEntry::ToolCall(_)) { if wait_until(entry) {
return tx.try_send(ix).unwrap(); return tx.try_send(ix).unwrap();
} }
} }
@ -357,7 +401,7 @@ pub async fn run_until_first_tool_call(
select! { select! {
// We have to use a smol timer here because // We have to use a smol timer here because
// cx.background_executor().timer isn't real in the test context // cx.background_executor().timer isn't real in the test context
_ = futures::FutureExt::fuse(smol::Timer::after(Duration::from_secs(10))) => { _ = futures::FutureExt::fuse(smol::Timer::after(Duration::from_secs(20))) => {
panic!("Timeout waiting for tool call") panic!("Timeout waiting for tool call")
} }
ix = rx.next().fuse() => { ix = rx.next().fuse() => {