eval: Fine-grained assertions (#29246)
- Support programmatic examples ([example](17feb260a0/crates/eval/src/examples/file_search.rs
)) - Combine data-driven example declarations into a single `.toml` file ([example](17feb260a0/crates/eval/src/examples/find_and_replace_diff_card.toml
)) - Run judge on individual assertions (previously called "criteria") - Report judge and programmatic assertions in one combined table Note: We still need to work on concept naming <img width=400 src="https://github.com/user-attachments/assets/fc719c93-467f-412b-8d47-68821bd8a5f5"> Release Notes: - N/A --------- Co-authored-by: Richard Feldman <oss@rtfeldman.com> Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com> Co-authored-by: Thomas Mickley-Doyle <tmickleydoyle@gmail.com>
This commit is contained in:
parent
0d3fe474db
commit
ce1a674eba
18 changed files with 1969 additions and 1229 deletions
|
@ -315,6 +315,7 @@ pub struct Thread {
|
|||
request_callback: Option<
|
||||
Box<dyn FnMut(&LanguageModelRequest, &[Result<LanguageModelCompletionEvent, String>])>,
|
||||
>,
|
||||
remaining_turns: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
@ -368,6 +369,7 @@ impl Thread {
|
|||
message_feedback: HashMap::default(),
|
||||
last_auto_capture_at: None,
|
||||
request_callback: None,
|
||||
remaining_turns: u32::MAX,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -442,6 +444,7 @@ impl Thread {
|
|||
message_feedback: HashMap::default(),
|
||||
last_auto_capture_at: None,
|
||||
request_callback: None,
|
||||
remaining_turns: u32::MAX,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -522,7 +525,7 @@ impl Thread {
|
|||
self.messages.iter().find(|message| message.id == id)
|
||||
}
|
||||
|
||||
pub fn messages(&self) -> impl Iterator<Item = &Message> {
|
||||
pub fn messages(&self) -> impl ExactSizeIterator<Item = &Message> {
|
||||
self.messages.iter()
|
||||
}
|
||||
|
||||
|
@ -958,7 +961,21 @@ impl Thread {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn remaining_turns(&self) -> u32 {
|
||||
self.remaining_turns
|
||||
}
|
||||
|
||||
pub fn set_remaining_turns(&mut self, remaining_turns: u32) {
|
||||
self.remaining_turns = remaining_turns;
|
||||
}
|
||||
|
||||
pub fn send_to_model(&mut self, model: Arc<dyn LanguageModel>, cx: &mut Context<Self>) {
|
||||
if self.remaining_turns == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
self.remaining_turns -= 1;
|
||||
|
||||
let mut request = self.to_completion_request(cx);
|
||||
if model.supports_tools() {
|
||||
request.tools = {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue