Add thinking budget for Gemini custom models (#31251)
Closes #31243 As described in my issue, the [thinking budget](https://ai.google.dev/gemini-api/docs/thinking) gets automatically chosen by Gemini unless it is specifically set to something. In order to have fast responses (inline assistant) I prefer to set it to 0. Release Notes: - ai: Added `thinking` mode for custom Google models with configurable token budget --------- Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com>
This commit is contained in:
parent
b74477d12e
commit
cf931247d0
3 changed files with 82 additions and 5 deletions
|
@ -289,6 +289,22 @@ pub struct UsageMetadata {
|
|||
pub total_token_count: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ThinkingConfig {
|
||||
pub thinking_budget: u32,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Copy, Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum GoogleModelMode {
|
||||
#[default]
|
||||
Default,
|
||||
Thinking {
|
||||
budget_tokens: Option<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GenerationConfig {
|
||||
|
@ -304,6 +320,8 @@ pub struct GenerationConfig {
|
|||
pub top_p: Option<f64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub top_k: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub thinking_config: Option<ThinkingConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
@ -496,6 +514,8 @@ pub enum Model {
|
|||
/// The name displayed in the UI, such as in the assistant panel model dropdown menu.
|
||||
display_name: Option<String>,
|
||||
max_tokens: usize,
|
||||
#[serde(default)]
|
||||
mode: GoogleModelMode,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -552,6 +572,21 @@ impl Model {
|
|||
Model::Custom { max_tokens, .. } => *max_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mode(&self) -> GoogleModelMode {
|
||||
match self {
|
||||
Self::Gemini15Pro
|
||||
| Self::Gemini15Flash
|
||||
| Self::Gemini20Pro
|
||||
| Self::Gemini20Flash
|
||||
| Self::Gemini20FlashThinking
|
||||
| Self::Gemini20FlashLite
|
||||
| Self::Gemini25ProExp0325
|
||||
| Self::Gemini25ProPreview0325
|
||||
| Self::Gemini25FlashPreview0417 => GoogleModelMode::Default,
|
||||
Self::Custom { mode, .. } => *mode,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Model {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue