agent: Overwrite files more cautiously (#30649)
1. The `edit_file` tool tended to use `create_or_overwrite` a bit too often, leading to corruption of long files. This change replaces the boolean flag with an `EditFileMode` enum, which helps Agent make a more deliberate choice when overwriting files. With this change, the pass rate of the new eval increased from 10% to 100%. 2. eval: Added ability to run eval on top of an existing thread. Threads can now be loaded from JSON files in the `SerializedThread` format, which makes it easy to use real threads as starting points for tests/evals. 3. Don't try to restore tool cards when running in headless or eval mode -- we don't have a window to properly do this. Release Notes: - N/A
This commit is contained in:
parent
22f76ac1a7
commit
255d8f7cf8
18 changed files with 425 additions and 37 deletions
|
@ -1,4 +1,4 @@
|
|||
use agent::{Message, MessageSegment, ThreadStore};
|
||||
use agent::{Message, MessageSegment, SerializedThread, ThreadStore};
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use assistant_tool::ToolWorkingSet;
|
||||
use client::proto::LspWorkProgress;
|
||||
|
@ -312,7 +312,14 @@ impl ExampleInstance {
|
|||
thread_store.update(cx, |thread_store, cx| thread_store.load_profile_by_id(profile_id, cx)).expect("Failed to load profile");
|
||||
|
||||
let thread =
|
||||
thread_store.update(cx, |thread_store, cx| thread_store.create_thread(cx))?;
|
||||
thread_store.update(cx, |thread_store, cx| {
|
||||
if let Some(json) = &meta.existing_thread_json {
|
||||
let serialized = SerializedThread::from_json(json.as_bytes()).expect("Can't read serialized thread");
|
||||
thread_store.create_thread_from_serialized(serialized, cx)
|
||||
} else {
|
||||
thread_store.create_thread(cx)
|
||||
}
|
||||
})?;
|
||||
|
||||
|
||||
thread.update(cx, |thread, _cx| {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue