agent: Overwrite files more cautiously (#30649)
1. The `edit_file` tool tended to use `create_or_overwrite` a bit too often, leading to corruption of long files. This change replaces the boolean flag with an `EditFileMode` enum, which helps Agent make a more deliberate choice when overwriting files. With this change, the pass rate of the new eval increased from 10% to 100%. 2. eval: Added ability to run eval on top of an existing thread. Threads can now be loaded from JSON files in the `SerializedThread` format, which makes it easy to use real threads as starting points for tests/evals. 3. Don't try to restore tool cards when running in headless or eval mode -- we don't have a window to properly do this. Release Notes: - N/A
This commit is contained in:
parent
22f76ac1a7
commit
255d8f7cf8
18 changed files with 425 additions and 37 deletions
|
@ -49,7 +49,7 @@ pub use crate::context::{ContextLoadResult, LoadedContext};
|
|||
pub use crate::inline_assistant::InlineAssistant;
|
||||
use crate::slash_command_settings::SlashCommandSettings;
|
||||
pub use crate::thread::{Message, MessageSegment, Thread, ThreadEvent};
|
||||
pub use crate::thread_store::{TextThreadStore, ThreadStore};
|
||||
pub use crate::thread_store::{SerializedThread, TextThreadStore, ThreadStore};
|
||||
pub use agent_diff::{AgentDiffPane, AgentDiffToolbar};
|
||||
pub use context_store::ContextStore;
|
||||
pub use ui::preview::{all_agent_previews, get_agent_preview};
|
||||
|
|
|
@ -458,7 +458,7 @@ impl Thread {
|
|||
tools: Entity<ToolWorkingSet>,
|
||||
prompt_builder: Arc<PromptBuilder>,
|
||||
project_context: SharedProjectContext,
|
||||
window: &mut Window,
|
||||
window: Option<&mut Window>, // None in headless mode
|
||||
cx: &mut Context<Self>,
|
||||
) -> Self {
|
||||
let next_message_id = MessageId(
|
||||
|
|
|
@ -386,6 +386,25 @@ impl ThreadStore {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn create_thread_from_serialized(
|
||||
&mut self,
|
||||
serialized: SerializedThread,
|
||||
cx: &mut Context<Self>,
|
||||
) -> Entity<Thread> {
|
||||
cx.new(|cx| {
|
||||
Thread::deserialize(
|
||||
ThreadId::new(),
|
||||
serialized,
|
||||
self.project.clone(),
|
||||
self.tools.clone(),
|
||||
self.prompt_builder.clone(),
|
||||
self.project_context.clone(),
|
||||
None,
|
||||
cx,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open_thread(
|
||||
&self,
|
||||
id: &ThreadId,
|
||||
|
@ -411,7 +430,7 @@ impl ThreadStore {
|
|||
this.tools.clone(),
|
||||
this.prompt_builder.clone(),
|
||||
this.project_context.clone(),
|
||||
window,
|
||||
Some(window),
|
||||
cx,
|
||||
)
|
||||
})
|
||||
|
|
|
@ -54,15 +54,19 @@ impl ToolUseState {
|
|||
/// Constructs a [`ToolUseState`] from the given list of [`SerializedMessage`]s.
|
||||
///
|
||||
/// Accepts a function to filter the tools that should be used to populate the state.
|
||||
///
|
||||
/// If `window` is `None` (e.g., when in headless mode or when running evals),
|
||||
/// tool cards won't be deserialized
|
||||
pub fn from_serialized_messages(
|
||||
tools: Entity<ToolWorkingSet>,
|
||||
messages: &[SerializedMessage],
|
||||
project: Entity<Project>,
|
||||
window: &mut Window,
|
||||
window: Option<&mut Window>, // None in headless mode
|
||||
cx: &mut App,
|
||||
) -> Self {
|
||||
let mut this = Self::new(tools);
|
||||
let mut tool_names_by_id = HashMap::default();
|
||||
let mut window = window;
|
||||
|
||||
for message in messages {
|
||||
match message.role {
|
||||
|
@ -107,12 +111,17 @@ impl ToolUseState {
|
|||
},
|
||||
);
|
||||
|
||||
if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) {
|
||||
if let Some(output) = tool_result.output.clone() {
|
||||
if let Some(card) =
|
||||
tool.deserialize_card(output, project.clone(), window, cx)
|
||||
{
|
||||
this.tool_result_cards.insert(tool_use_id, card);
|
||||
if let Some(window) = &mut window {
|
||||
if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) {
|
||||
if let Some(output) = tool_result.output.clone() {
|
||||
if let Some(card) = tool.deserialize_card(
|
||||
output,
|
||||
project.clone(),
|
||||
window,
|
||||
cx,
|
||||
) {
|
||||
this.tool_result_cards.insert(tool_use_id, card);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue