Include EditAgent's raw output when inspecting thread (#30337)

This allows us to debug the raw edits that were generated when people
report feedback, when running evals and when opening the thread as
Markdown.

Release Notes:

- Improved debug output for agent threads.
This commit is contained in:
Antonio Scandurra 2025-05-09 08:58:45 +02:00 committed by GitHub
parent ea7756b362
commit 1b593f616f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 37 additions and 14 deletions

View file

@ -2487,6 +2487,13 @@ impl Thread {
writeln!(markdown, "**\n")?; writeln!(markdown, "**\n")?;
writeln!(markdown, "{}", tool_result.content)?; writeln!(markdown, "{}", tool_result.content)?;
if let Some(output) = tool_result.output.as_ref() {
writeln!(
markdown,
"\n\nDebug Output:\n\n```json\n{}\n```\n",
serde_json::to_string_pretty(output)?
)?;
}
} }
} }

View file

@ -20,7 +20,8 @@ use language_model::{
LanguageModelToolChoice, MessageContent, Role, LanguageModelToolChoice, MessageContent, Role,
}; };
use project::{AgentLocation, Project}; use project::{AgentLocation, Project};
use serde::Serialize; use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll}; use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll};
use streaming_diff::{CharOperation, StreamingDiff}; use streaming_diff::{CharOperation, StreamingDiff};
@ -50,10 +51,10 @@ pub enum EditAgentOutputEvent {
OldTextNotFound(SharedString), OldTextNotFound(SharedString),
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
pub struct EditAgentOutput { pub struct EditAgentOutput {
pub _raw_edits: String, pub raw_edits: String,
pub _parser_metrics: EditParserMetrics, pub parser_metrics: EditParserMetrics,
} }
#[derive(Clone)] #[derive(Clone)]
@ -186,8 +187,8 @@ impl EditAgent {
} }
Ok(EditAgentOutput { Ok(EditAgentOutput {
_raw_edits: raw_edits, raw_edits,
_parser_metrics: EditParserMetrics::default(), parser_metrics: EditParserMetrics::default(),
}) })
} }
@ -426,8 +427,8 @@ impl EditAgent {
} }
} }
Ok(EditAgentOutput { Ok(EditAgentOutput {
_raw_edits: raw_edits, raw_edits,
_parser_metrics: parser.finish(), parser_metrics: parser.finish(),
}) })
}); });
(output, rx) (output, rx)

View file

@ -1,4 +1,6 @@
use derive_more::{Add, AddAssign}; use derive_more::{Add, AddAssign};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec; use smallvec::SmallVec;
use std::{cmp, mem, ops::Range}; use std::{cmp, mem, ops::Range};
@ -13,7 +15,9 @@ pub enum EditParserEvent {
NewTextChunk { chunk: String, done: bool }, NewTextChunk { chunk: String, done: bool },
} }
#[derive(Clone, Debug, Default, PartialEq, Eq, Add, AddAssign)] #[derive(
Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
)]
pub struct EditParserMetrics { pub struct EditParserMetrics {
pub tags: usize, pub tags: usize,
pub mismatched_tags: usize, pub mismatched_tags: usize,

View file

@ -1116,7 +1116,7 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
while let Ok(output) = rx.recv() { while let Ok(output) = rx.recv() {
match output { match output {
Ok(output) => { Ok(output) => {
cumulative_parser_metrics += output.sample.edit_output._parser_metrics.clone(); cumulative_parser_metrics += output.sample.edit_output.parser_metrics.clone();
eval_outputs.push(output.clone()); eval_outputs.push(output.clone());
if output.assertion.score < 80 { if output.assertion.score < 80 {
failed_count += 1; failed_count += 1;
@ -1197,9 +1197,9 @@ impl Display for EvalOutput {
writeln!( writeln!(
f, f,
"Parser Metrics:\n{:#?}", "Parser Metrics:\n{:#?}",
self.sample.edit_output._parser_metrics self.sample.edit_output.parser_metrics
)?; )?;
writeln!(f, "Raw Edits:\n{}", self.sample.edit_output._raw_edits)?; writeln!(f, "Raw Edits:\n{}", self.sample.edit_output.raw_edits)?;
Ok(()) Ok(())
} }
} }

View file

@ -1,6 +1,6 @@
use crate::{ use crate::{
Templates, Templates,
edit_agent::{EditAgent, EditAgentOutputEvent}, edit_agent::{EditAgent, EditAgentOutput, EditAgentOutputEvent},
schema::json_schema_for, schema::json_schema_for,
}; };
use anyhow::{Result, anyhow}; use anyhow::{Result, anyhow};
@ -88,6 +88,7 @@ pub struct EditFileToolOutput {
pub original_path: PathBuf, pub original_path: PathBuf,
pub new_text: String, pub new_text: String,
pub old_text: String, pub old_text: String,
pub raw_output: Option<EditAgentOutput>,
} }
#[derive(Debug, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Serialize, Deserialize, JsonSchema)]
@ -248,7 +249,7 @@ impl Tool for EditFileTool {
EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true, EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true,
} }
} }
output.await?; let agent_output = output.await?;
project project
.update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))? .update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))?
@ -268,6 +269,7 @@ impl Tool for EditFileTool {
original_path: project_path.path.to_path_buf(), original_path: project_path.path.to_path_buf(),
new_text: new_text.clone(), new_text: new_text.clone(),
old_text: old_text.clone(), old_text: old_text.clone(),
raw_output: Some(agent_output),
}; };
if let Some(card) = card_clone { if let Some(card) = card_clone {

View file

@ -965,6 +965,15 @@ impl RequestMarkdown {
messages.push_str("**ERROR:**\n"); messages.push_str("**ERROR:**\n");
} }
messages.push_str(&format!("{}\n\n", tool_result.content)); messages.push_str(&format!("{}\n\n", tool_result.content));
if let Some(output) = tool_result.output.as_ref() {
writeln!(
messages,
"**Debug Output**:\n\n```json\n{}\n```\n",
serde_json::to_string_pretty(output).unwrap()
)
.unwrap();
}
} }
} }
} }