Add thinking budget for Gemini custom models (#31251)

Closes #31243 As described in my issue, the [thinking budget](https://ai.google.dev/gemini-api/docs/thinking) gets automatically chosen by Gemini unless it is specifically set to something. In order to have fast responses (inline assistant) I prefer to set it to 0. Release Notes: - ai: Added `thinking` mode for custom Google models with configurable token budget --------- Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com>
2025-06-03 13:40:20 +02:00 · 2025-06-03 13:40:20 +02:00 · cf931247d0
commit cf931247d0
parent b74477d12e
3 changed files with 82 additions and 5 deletions
--- a/crates/google_ai/src/google_ai.rs
+++ b/crates/google_ai/src/google_ai.rs
@ -289,6 +289,22 @@ pub struct UsageMetadata {
    pub total_token_count: Option<usize>,
 }

+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ThinkingConfig {
+    pub thinking_budget: u32,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Copy, Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
+pub enum GoogleModelMode {
+    #[default]
+    Default,
+    Thinking {
+        budget_tokens: Option<u32>,
+    },
+}
+
 #[derive(Debug, Deserialize, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct GenerationConfig {
@ -304,6 +320,8 @@ pub struct GenerationConfig {
    pub top_p: Option<f64>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_k: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_config: Option<ThinkingConfig>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@ -496,6 +514,8 @@ pub enum Model {
        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
        display_name: Option<String>,
        max_tokens: usize,
+        #[serde(default)]
+        mode: GoogleModelMode,
    },
 }

@ -552,6 +572,21 @@ impl Model {
            Model::Custom { max_tokens, .. } => *max_tokens,
        }
    }
+
+    pub fn mode(&self) -> GoogleModelMode {
+        match self {
+            Self::Gemini15Pro
+            | Self::Gemini15Flash
+            | Self::Gemini20Pro
+            | Self::Gemini20Flash
+            | Self::Gemini20FlashThinking
+            | Self::Gemini20FlashLite
+            | Self::Gemini25ProExp0325
+            | Self::Gemini25ProPreview0325
+            | Self::Gemini25FlashPreview0417 => GoogleModelMode::Default,
+            Self::Custom { mode, .. } => *mode,
+        }
+    }
 }

 impl std::fmt::Display for Model {