
Now that we've established a proper eval in tree, this PR is reboots of our agent loop back to a set of minimal tools and simpler prompts. We should aim to get this branch feeling subjectively competitive with what's on main and then merge it, and build from there. Let's invest in our eval and use it to drive better performance of the agent loop. How you can help: Pick an example, and then make the outcome faster or better. It's fine to even use your own subjective judgment, as our evaluation criteria likely need tuning as well at this point. Focus on making the agent work better in your own subjective experience first. Let's focus on simple/practical improvements to make this thing work better, then determine how we can craft our judgment criteria to lock those improvements in. Release Notes: - N/A --------- Co-authored-by: Max <max@zed.dev> Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Agus <agus@zed.dev> Co-authored-by: Richard <richard@zed.dev> Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com> Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Michael Sloan <mgsloan@gmail.com>
54 lines
1.3 KiB
TOML
54 lines
1.3 KiB
TOML
[package]
|
|
name = "eval"
|
|
version = "0.1.0"
|
|
publish.workspace = true
|
|
edition.workspace = true
|
|
|
|
[dependencies]
|
|
agent.workspace = true
|
|
anyhow.workspace = true
|
|
async-watch.workspace = true
|
|
assistant_tool.workspace = true
|
|
assistant_tools.workspace = true
|
|
chrono.workspace = true
|
|
clap.workspace = true
|
|
client.workspace = true
|
|
collections.workspace = true
|
|
context_server.workspace = true
|
|
dap.workspace = true
|
|
dirs = "5.0"
|
|
env_logger.workspace = true
|
|
extension.workspace = true
|
|
fs.workspace = true
|
|
futures.workspace = true
|
|
gpui.workspace = true
|
|
gpui_tokio.workspace = true
|
|
handlebars.workspace = true
|
|
language.workspace = true
|
|
language_extension.workspace = true
|
|
language_model.workspace = true
|
|
language_models.workspace = true
|
|
languages = { workspace = true, features = ["load-grammars"] }
|
|
node_runtime.workspace = true
|
|
paths.workspace = true
|
|
project.workspace = true
|
|
prompt_store.workspace = true
|
|
release_channel.workspace = true
|
|
reqwest_client.workspace = true
|
|
serde.workspace = true
|
|
serde_json.workspace = true
|
|
settings.workspace = true
|
|
shellexpand.workspace = true
|
|
telemetry.workspace = true
|
|
toml.workspace = true
|
|
unindent.workspace = true
|
|
util.workspace = true
|
|
uuid = { version = "1.6", features = ["v4"] }
|
|
workspace-hack.workspace = true
|
|
|
|
[[bin]]
|
|
name = "eval"
|
|
path = "src/eval.rs"
|
|
|
|
[lints]
|
|
workspace = true
|