Add thinking budget for Gemini custom models (#31251)

Closes #31243

As described in my issue, the [thinking
budget](https://ai.google.dev/gemini-api/docs/thinking) gets
automatically chosen by Gemini unless it is specifically set to
something. In order to have fast responses (inline assistant) I prefer
to set it to 0.

Release Notes:

- ai: Added `thinking` mode for custom Google models with configurable
token budget

---------

Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com>
This commit is contained in:
90aca 2025-06-03 13:40:20 +02:00 committed by GitHub
parent b74477d12e
commit cf931247d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 82 additions and 5 deletions

View file

@ -289,6 +289,22 @@ pub struct UsageMetadata {
pub total_token_count: Option<usize>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ThinkingConfig {
pub thinking_budget: u32,
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Copy, Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
pub enum GoogleModelMode {
#[default]
Default,
Thinking {
budget_tokens: Option<u32>,
},
}
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct GenerationConfig {
@ -304,6 +320,8 @@ pub struct GenerationConfig {
pub top_p: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_k: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking_config: Option<ThinkingConfig>,
}
#[derive(Debug, Serialize, Deserialize)]
@ -496,6 +514,8 @@ pub enum Model {
/// The name displayed in the UI, such as in the assistant panel model dropdown menu.
display_name: Option<String>,
max_tokens: usize,
#[serde(default)]
mode: GoogleModelMode,
},
}
@ -552,6 +572,21 @@ impl Model {
Model::Custom { max_tokens, .. } => *max_tokens,
}
}
pub fn mode(&self) -> GoogleModelMode {
match self {
Self::Gemini15Pro
| Self::Gemini15Flash
| Self::Gemini20Pro
| Self::Gemini20Flash
| Self::Gemini20FlashThinking
| Self::Gemini20FlashLite
| Self::Gemini25ProExp0325
| Self::Gemini25ProPreview0325
| Self::Gemini25FlashPreview0417 => GoogleModelMode::Default,
Self::Custom { mode, .. } => *mode,
}
}
}
impl std::fmt::Display for Model {