evals: Configurable number of max dialog turns (#31680)

Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-05-29 13:35:29 +03:00 committed by GitHub
parent d989b2260b
commit cb187b0b4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 12 additions and 1 deletions

View file

@ -49,6 +49,7 @@ pub struct ExampleMetadata {
pub max_assertions: Option<usize>,
pub profile_id: AgentProfileId,
pub existing_thread_json: Option<String>,
pub max_turns: Option<u32>,
}
#[derive(Clone, Debug)]

View file

@ -22,6 +22,7 @@ impl Example for AddArgToTraitMethod {
max_assertions: None,
profile_id: AgentProfileId::default(),
existing_thread_json: None,
max_turns: None,
}
}

View file

@ -23,6 +23,7 @@ impl Example for CodeBlockCitations {
max_assertions: None,
profile_id: AgentProfileId::default(),
existing_thread_json: None,
max_turns: None,
}
}

View file

@ -17,6 +17,7 @@ impl Example for CommentTranslation {
max_assertions: Some(1),
profile_id: AgentProfileId::default(),
existing_thread_json: None,
max_turns: None,
}
}

View file

@ -19,6 +19,7 @@ impl Example for FileSearchExample {
max_assertions: Some(3),
profile_id: AgentProfileId::default(),
existing_thread_json: None,
max_turns: None,
}
}

View file

@ -82,6 +82,7 @@ impl DeclarativeExample {
max_assertions: None,
profile_id,
existing_thread_json,
max_turns: base.max_turns,
};
Ok(DeclarativeExample {
@ -124,6 +125,8 @@ pub struct ExampleToml {
pub thread_assertions: BTreeMap<String, String>,
#[serde(default)]
pub existing_thread_path: Option<String>,
#[serde(default)]
pub max_turns: Option<u32>,
}
#[async_trait(?Send)]
@ -134,7 +137,8 @@ impl Example for DeclarativeExample {
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
cx.push_user_message(&self.prompt);
let _ = cx.run_to_end().await;
let max_turns = self.metadata.max_turns.unwrap_or(1000);
let _ = cx.run_turns(max_turns).await;
Ok(())
}

View file

@ -31,6 +31,7 @@ impl Example for FileOverwriteExample {
max_assertions: Some(1),
profile_id: AgentProfileId::default(),
existing_thread_json: Some(thread_json.to_string()),
max_turns: None,
}
}

View file

@ -19,6 +19,7 @@ impl Example for Planets {
max_assertions: None,
profile_id: AgentProfileId::default(),
existing_thread_json: None,
max_turns: None,
}
}