Include EditAgent
's raw output when inspecting thread (#30337)
This allows us to debug the raw edits that were generated when people report feedback, when running evals and when opening the thread as Markdown. Release Notes: - Improved debug output for agent threads.
This commit is contained in:
parent
ea7756b362
commit
1b593f616f
6 changed files with 37 additions and 14 deletions
|
@ -2487,6 +2487,13 @@ impl Thread {
|
||||||
|
|
||||||
writeln!(markdown, "**\n")?;
|
writeln!(markdown, "**\n")?;
|
||||||
writeln!(markdown, "{}", tool_result.content)?;
|
writeln!(markdown, "{}", tool_result.content)?;
|
||||||
|
if let Some(output) = tool_result.output.as_ref() {
|
||||||
|
writeln!(
|
||||||
|
markdown,
|
||||||
|
"\n\nDebug Output:\n\n```json\n{}\n```\n",
|
||||||
|
serde_json::to_string_pretty(output)?
|
||||||
|
)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,8 @@ use language_model::{
|
||||||
LanguageModelToolChoice, MessageContent, Role,
|
LanguageModelToolChoice, MessageContent, Role,
|
||||||
};
|
};
|
||||||
use project::{AgentLocation, Project};
|
use project::{AgentLocation, Project};
|
||||||
use serde::Serialize;
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll};
|
use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll};
|
||||||
use streaming_diff::{CharOperation, StreamingDiff};
|
use streaming_diff::{CharOperation, StreamingDiff};
|
||||||
|
|
||||||
|
@ -50,10 +51,10 @@ pub enum EditAgentOutputEvent {
|
||||||
OldTextNotFound(SharedString),
|
OldTextNotFound(SharedString),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
|
||||||
pub struct EditAgentOutput {
|
pub struct EditAgentOutput {
|
||||||
pub _raw_edits: String,
|
pub raw_edits: String,
|
||||||
pub _parser_metrics: EditParserMetrics,
|
pub parser_metrics: EditParserMetrics,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -186,8 +187,8 @@ impl EditAgent {
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(EditAgentOutput {
|
Ok(EditAgentOutput {
|
||||||
_raw_edits: raw_edits,
|
raw_edits,
|
||||||
_parser_metrics: EditParserMetrics::default(),
|
parser_metrics: EditParserMetrics::default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -426,8 +427,8 @@ impl EditAgent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(EditAgentOutput {
|
Ok(EditAgentOutput {
|
||||||
_raw_edits: raw_edits,
|
raw_edits,
|
||||||
_parser_metrics: parser.finish(),
|
parser_metrics: parser.finish(),
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
(output, rx)
|
(output, rx)
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
use derive_more::{Add, AddAssign};
|
use derive_more::{Add, AddAssign};
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use std::{cmp, mem, ops::Range};
|
use std::{cmp, mem, ops::Range};
|
||||||
|
|
||||||
|
@ -13,7 +15,9 @@ pub enum EditParserEvent {
|
||||||
NewTextChunk { chunk: String, done: bool },
|
NewTextChunk { chunk: String, done: bool },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Add, AddAssign)]
|
#[derive(
|
||||||
|
Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
|
||||||
|
)]
|
||||||
pub struct EditParserMetrics {
|
pub struct EditParserMetrics {
|
||||||
pub tags: usize,
|
pub tags: usize,
|
||||||
pub mismatched_tags: usize,
|
pub mismatched_tags: usize,
|
||||||
|
|
|
@ -1116,7 +1116,7 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
|
||||||
while let Ok(output) = rx.recv() {
|
while let Ok(output) = rx.recv() {
|
||||||
match output {
|
match output {
|
||||||
Ok(output) => {
|
Ok(output) => {
|
||||||
cumulative_parser_metrics += output.sample.edit_output._parser_metrics.clone();
|
cumulative_parser_metrics += output.sample.edit_output.parser_metrics.clone();
|
||||||
eval_outputs.push(output.clone());
|
eval_outputs.push(output.clone());
|
||||||
if output.assertion.score < 80 {
|
if output.assertion.score < 80 {
|
||||||
failed_count += 1;
|
failed_count += 1;
|
||||||
|
@ -1197,9 +1197,9 @@ impl Display for EvalOutput {
|
||||||
writeln!(
|
writeln!(
|
||||||
f,
|
f,
|
||||||
"Parser Metrics:\n{:#?}",
|
"Parser Metrics:\n{:#?}",
|
||||||
self.sample.edit_output._parser_metrics
|
self.sample.edit_output.parser_metrics
|
||||||
)?;
|
)?;
|
||||||
writeln!(f, "Raw Edits:\n{}", self.sample.edit_output._raw_edits)?;
|
writeln!(f, "Raw Edits:\n{}", self.sample.edit_output.raw_edits)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
Templates,
|
Templates,
|
||||||
edit_agent::{EditAgent, EditAgentOutputEvent},
|
edit_agent::{EditAgent, EditAgentOutput, EditAgentOutputEvent},
|
||||||
schema::json_schema_for,
|
schema::json_schema_for,
|
||||||
};
|
};
|
||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
|
@ -88,6 +88,7 @@ pub struct EditFileToolOutput {
|
||||||
pub original_path: PathBuf,
|
pub original_path: PathBuf,
|
||||||
pub new_text: String,
|
pub new_text: String,
|
||||||
pub old_text: String,
|
pub old_text: String,
|
||||||
|
pub raw_output: Option<EditAgentOutput>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||||
|
@ -248,7 +249,7 @@ impl Tool for EditFileTool {
|
||||||
EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true,
|
EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output.await?;
|
let agent_output = output.await?;
|
||||||
|
|
||||||
project
|
project
|
||||||
.update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))?
|
.update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))?
|
||||||
|
@ -268,6 +269,7 @@ impl Tool for EditFileTool {
|
||||||
original_path: project_path.path.to_path_buf(),
|
original_path: project_path.path.to_path_buf(),
|
||||||
new_text: new_text.clone(),
|
new_text: new_text.clone(),
|
||||||
old_text: old_text.clone(),
|
old_text: old_text.clone(),
|
||||||
|
raw_output: Some(agent_output),
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(card) = card_clone {
|
if let Some(card) = card_clone {
|
||||||
|
|
|
@ -965,6 +965,15 @@ impl RequestMarkdown {
|
||||||
messages.push_str("**ERROR:**\n");
|
messages.push_str("**ERROR:**\n");
|
||||||
}
|
}
|
||||||
messages.push_str(&format!("{}\n\n", tool_result.content));
|
messages.push_str(&format!("{}\n\n", tool_result.content));
|
||||||
|
|
||||||
|
if let Some(output) = tool_result.output.as_ref() {
|
||||||
|
writeln!(
|
||||||
|
messages,
|
||||||
|
"**Debug Output**:\n\n```json\n{}\n```\n",
|
||||||
|
serde_json::to_string_pretty(output).unwrap()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue