From 1b593f616f94f7eb6a76f7faaaf77473e29c19d9 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Fri, 9 May 2025 08:58:45 +0200 Subject: [PATCH] Include `EditAgent`'s raw output when inspecting thread (#30337) This allows us to debug the raw edits that were generated when people report feedback, when running evals and when opening the thread as Markdown. Release Notes: - Improved debug output for agent threads. --- crates/agent/src/thread.rs | 7 +++++++ crates/assistant_tools/src/edit_agent.rs | 17 +++++++++-------- .../src/edit_agent/edit_parser.rs | 6 +++++- crates/assistant_tools/src/edit_agent/evals.rs | 6 +++--- crates/assistant_tools/src/edit_file_tool.rs | 6 ++++-- crates/eval/src/instance.rs | 9 +++++++++ 6 files changed, 37 insertions(+), 14 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index b20687884f..7ef736caa0 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -2487,6 +2487,13 @@ impl Thread { writeln!(markdown, "**\n")?; writeln!(markdown, "{}", tool_result.content)?; + if let Some(output) = tool_result.output.as_ref() { + writeln!( + markdown, + "\n\nDebug Output:\n\n```json\n{}\n```\n", + serde_json::to_string_pretty(output)? + )?; + } } } diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index e9fa4e9af7..e1856cd46e 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -20,7 +20,8 @@ use language_model::{ LanguageModelToolChoice, MessageContent, Role, }; use project::{AgentLocation, Project}; -use serde::Serialize; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll}; use streaming_diff::{CharOperation, StreamingDiff}; @@ -50,10 +51,10 @@ pub enum EditAgentOutputEvent { OldTextNotFound(SharedString), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct EditAgentOutput { - pub _raw_edits: String, - pub _parser_metrics: EditParserMetrics, + pub raw_edits: String, + pub parser_metrics: EditParserMetrics, } #[derive(Clone)] @@ -186,8 +187,8 @@ impl EditAgent { } Ok(EditAgentOutput { - _raw_edits: raw_edits, - _parser_metrics: EditParserMetrics::default(), + raw_edits, + parser_metrics: EditParserMetrics::default(), }) } @@ -426,8 +427,8 @@ impl EditAgent { } } Ok(EditAgentOutput { - _raw_edits: raw_edits, - _parser_metrics: parser.finish(), + raw_edits, + parser_metrics: parser.finish(), }) }); (output, rx) diff --git a/crates/assistant_tools/src/edit_agent/edit_parser.rs b/crates/assistant_tools/src/edit_agent/edit_parser.rs index 6822b8206a..d3f6d15514 100644 --- a/crates/assistant_tools/src/edit_agent/edit_parser.rs +++ b/crates/assistant_tools/src/edit_agent/edit_parser.rs @@ -1,4 +1,6 @@ use derive_more::{Add, AddAssign}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use std::{cmp, mem, ops::Range}; @@ -13,7 +15,9 @@ pub enum EditParserEvent { NewTextChunk { chunk: String, done: bool }, } -#[derive(Clone, Debug, Default, PartialEq, Eq, Add, AddAssign)] +#[derive( + Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema, +)] pub struct EditParserMetrics { pub tags: usize, pub mismatched_tags: usize, diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 0197b2ebc8..894da7ad34 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1116,7 +1116,7 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) { while let Ok(output) = rx.recv() { match output { Ok(output) => { - cumulative_parser_metrics += output.sample.edit_output._parser_metrics.clone(); + cumulative_parser_metrics += output.sample.edit_output.parser_metrics.clone(); eval_outputs.push(output.clone()); if output.assertion.score < 80 { failed_count += 1; @@ -1197,9 +1197,9 @@ impl Display for EvalOutput { writeln!( f, "Parser Metrics:\n{:#?}", - self.sample.edit_output._parser_metrics + self.sample.edit_output.parser_metrics )?; - writeln!(f, "Raw Edits:\n{}", self.sample.edit_output._raw_edits)?; + writeln!(f, "Raw Edits:\n{}", self.sample.edit_output.raw_edits)?; Ok(()) } } diff --git a/crates/assistant_tools/src/edit_file_tool.rs b/crates/assistant_tools/src/edit_file_tool.rs index 845b531081..bbc92ea735 100644 --- a/crates/assistant_tools/src/edit_file_tool.rs +++ b/crates/assistant_tools/src/edit_file_tool.rs @@ -1,6 +1,6 @@ use crate::{ Templates, - edit_agent::{EditAgent, EditAgentOutputEvent}, + edit_agent::{EditAgent, EditAgentOutput, EditAgentOutputEvent}, schema::json_schema_for, }; use anyhow::{Result, anyhow}; @@ -88,6 +88,7 @@ pub struct EditFileToolOutput { pub original_path: PathBuf, pub new_text: String, pub old_text: String, + pub raw_output: Option, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -248,7 +249,7 @@ impl Tool for EditFileTool { EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true, } } - output.await?; + let agent_output = output.await?; project .update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))? @@ -268,6 +269,7 @@ impl Tool for EditFileTool { original_path: project_path.path.to_path_buf(), new_text: new_text.clone(), old_text: old_text.clone(), + raw_output: Some(agent_output), }; if let Some(card) = card_clone { diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index a00967bc0a..bc0e2ac7b2 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -965,6 +965,15 @@ impl RequestMarkdown { messages.push_str("**ERROR:**\n"); } messages.push_str(&format!("{}\n\n", tool_result.content)); + + if let Some(output) = tool_result.output.as_ref() { + writeln!( + messages, + "**Debug Output**:\n\n```json\n{}\n```\n", + serde_json::to_string_pretty(output).unwrap() + ) + .unwrap(); + } } } }