Include EditAgent's raw output when inspecting thread (#30337)

This allows us to debug the raw edits that were generated when people
report feedback, when running evals and when opening the thread as
Markdown.

Release Notes:

- Improved debug output for agent threads.
This commit is contained in:
Antonio Scandurra 2025-05-09 08:58:45 +02:00 committed by GitHub
parent ea7756b362
commit 1b593f616f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 37 additions and 14 deletions

View file

@ -2487,6 +2487,13 @@ impl Thread {
writeln!(markdown, "**\n")?;
writeln!(markdown, "{}", tool_result.content)?;
if let Some(output) = tool_result.output.as_ref() {
writeln!(
markdown,
"\n\nDebug Output:\n\n```json\n{}\n```\n",
serde_json::to_string_pretty(output)?
)?;
}
}
}

View file

@ -20,7 +20,8 @@ use language_model::{
LanguageModelToolChoice, MessageContent, Role,
};
use project::{AgentLocation, Project};
use serde::Serialize;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll};
use streaming_diff::{CharOperation, StreamingDiff};
@ -50,10 +51,10 @@ pub enum EditAgentOutputEvent {
OldTextNotFound(SharedString),
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
pub struct EditAgentOutput {
pub _raw_edits: String,
pub _parser_metrics: EditParserMetrics,
pub raw_edits: String,
pub parser_metrics: EditParserMetrics,
}
#[derive(Clone)]
@ -186,8 +187,8 @@ impl EditAgent {
}
Ok(EditAgentOutput {
_raw_edits: raw_edits,
_parser_metrics: EditParserMetrics::default(),
raw_edits,
parser_metrics: EditParserMetrics::default(),
})
}
@ -426,8 +427,8 @@ impl EditAgent {
}
}
Ok(EditAgentOutput {
_raw_edits: raw_edits,
_parser_metrics: parser.finish(),
raw_edits,
parser_metrics: parser.finish(),
})
});
(output, rx)

View file

@ -1,4 +1,6 @@
use derive_more::{Add, AddAssign};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::{cmp, mem, ops::Range};
@ -13,7 +15,9 @@ pub enum EditParserEvent {
NewTextChunk { chunk: String, done: bool },
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Add, AddAssign)]
#[derive(
Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
)]
pub struct EditParserMetrics {
pub tags: usize,
pub mismatched_tags: usize,

View file

@ -1116,7 +1116,7 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
while let Ok(output) = rx.recv() {
match output {
Ok(output) => {
cumulative_parser_metrics += output.sample.edit_output._parser_metrics.clone();
cumulative_parser_metrics += output.sample.edit_output.parser_metrics.clone();
eval_outputs.push(output.clone());
if output.assertion.score < 80 {
failed_count += 1;
@ -1197,9 +1197,9 @@ impl Display for EvalOutput {
writeln!(
f,
"Parser Metrics:\n{:#?}",
self.sample.edit_output._parser_metrics
self.sample.edit_output.parser_metrics
)?;
writeln!(f, "Raw Edits:\n{}", self.sample.edit_output._raw_edits)?;
writeln!(f, "Raw Edits:\n{}", self.sample.edit_output.raw_edits)?;
Ok(())
}
}

View file

@ -1,6 +1,6 @@
use crate::{
Templates,
edit_agent::{EditAgent, EditAgentOutputEvent},
edit_agent::{EditAgent, EditAgentOutput, EditAgentOutputEvent},
schema::json_schema_for,
};
use anyhow::{Result, anyhow};
@ -88,6 +88,7 @@ pub struct EditFileToolOutput {
pub original_path: PathBuf,
pub new_text: String,
pub old_text: String,
pub raw_output: Option<EditAgentOutput>,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@ -248,7 +249,7 @@ impl Tool for EditFileTool {
EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true,
}
}
output.await?;
let agent_output = output.await?;
project
.update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))?
@ -268,6 +269,7 @@ impl Tool for EditFileTool {
original_path: project_path.path.to_path_buf(),
new_text: new_text.clone(),
old_text: old_text.clone(),
raw_output: Some(agent_output),
};
if let Some(card) = card_clone {

View file

@ -965,6 +965,15 @@ impl RequestMarkdown {
messages.push_str("**ERROR:**\n");
}
messages.push_str(&format!("{}\n\n", tool_result.content));
if let Some(output) = tool_result.output.as_ref() {
writeln!(
messages,
"**Debug Output**:\n\n```json\n{}\n```\n",
serde_json::to_string_pretty(output).unwrap()
)
.unwrap();
}
}
}
}