eval: Fine-grained assertions (#29246)
- Support programmatic examples ([example](17feb260a0/crates/eval/src/examples/file_search.rs
)) - Combine data-driven example declarations into a single `.toml` file ([example](17feb260a0/crates/eval/src/examples/find_and_replace_diff_card.toml
)) - Run judge on individual assertions (previously called "criteria") - Report judge and programmatic assertions in one combined table Note: We still need to work on concept naming <img width=400 src="https://github.com/user-attachments/assets/fc719c93-467f-412b-8d47-68821bd8a5f5"> Release Notes: - N/A --------- Co-authored-by: Richard Feldman <oss@rtfeldman.com> Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com> Co-authored-by: Thomas Mickley-Doyle <tmickleydoyle@gmail.com>
This commit is contained in:
parent
0d3fe474db
commit
ce1a674eba
18 changed files with 1969 additions and 1229 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -4895,6 +4895,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"assistant_tool",
|
||||
"assistant_tools",
|
||||
"async-trait",
|
||||
"async-watch",
|
||||
"chrono",
|
||||
"clap",
|
||||
|
@ -4915,13 +4916,14 @@ dependencies = [
|
|||
"language_models",
|
||||
"languages",
|
||||
"node_runtime",
|
||||
"parking_lot",
|
||||
"paths",
|
||||
"project",
|
||||
"prompt_store",
|
||||
"regex",
|
||||
"release_channel",
|
||||
"reqwest_client",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"settings",
|
||||
"shellexpand 2.1.2",
|
||||
"smol",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue