From dfdd2b9558d10d6871ede02a4d85af007a2429ef Mon Sep 17 00:00:00 2001
From: Umesh Yadav <23421535+imumesh18@users.noreply.github.com>
Date: Sat, 21 Jun 2025 11:33:50 +0530
Subject: [PATCH] language_models: Add thinking support to OpenRouter provider
 (#32541)

Did some bit cleanup of code for loading models for settings as that is
not required as we are fetching all the models from openrouter so it's
better to maintain one source of truth

Release Notes:

- Add thinking support to OpenRouter provider
---
 .../src/provider/open_router.rs               | 74 ++++++++++++++++---
 crates/open_router/src/open_router.rs         | 40 ++++++++++
 docs/src/ai/configuration.md                  | 43 +++++++++++
 3 files changed, 148 insertions(+), 9 deletions(-)
diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs
index 95ac69c97d..b447ee1bd7 100644
--- a/crates/language_models/src/provider/open_router.rs
+++ b/crates/language_models/src/provider/open_router.rs
@@ -14,7 +14,9 @@ use language_model::{
     LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent,
     RateLimiter, Role, StopReason, TokenUsage,
 };
-use open_router::{Model, ResponseStreamEvent, list_models, stream_completion};
+use open_router::{
+    Model, ModelMode as OpenRouterModelMode, ResponseStreamEvent, list_models, stream_completion,
+};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::{Settings, SettingsStore};
@@ -45,6 +47,39 @@ pub struct AvailableModel {
     pub max_completion_tokens: Option<u64>,
     pub supports_tools: Option<bool>,
     pub supports_images: Option<bool>,
+    pub mode: Option<ModelMode>,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ModelMode {
+    #[default]
+    Default,
+    Thinking {
+        budget_tokens: Option<u32>,
+    },
+}
+
+impl From<ModelMode> for OpenRouterModelMode {
+    fn from(value: ModelMode) -> Self {
+        match value {
+            ModelMode::Default => OpenRouterModelMode::Default,
+            ModelMode::Thinking { budget_tokens } => {
+                OpenRouterModelMode::Thinking { budget_tokens }
+            }
+        }
+    }
+}
+
+impl From<OpenRouterModelMode> for ModelMode {
+    fn from(value: OpenRouterModelMode) -> Self {
+        match value {
+            OpenRouterModelMode::Default => ModelMode::Default,
+            OpenRouterModelMode::Thinking { budget_tokens } => {
+                ModelMode::Thinking { budget_tokens }
+            }
+        }
+    }
 }
 
 pub struct OpenRouterLanguageModelProvider {
@@ -242,6 +277,7 @@ impl LanguageModelProvider for OpenRouterLanguageModelProvider {
                 max_tokens: model.max_tokens,
                 supports_tools: model.supports_tools,
                 supports_images: model.supports_images,
+                mode: model.mode.clone().unwrap_or_default().into(),
             });
         }
 
@@ -403,13 +439,12 @@ pub fn into_open_router(
     for message in request.messages {
         for content in message.content {
             match content {
-                MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
-                    add_message_content_part(
-                        open_router::MessagePart::Text { text },
-                        message.role,
-                        &mut messages,
-                    )
-                }
+                MessageContent::Text(text) => add_message_content_part(
+                    open_router::MessagePart::Text { text },
+                    message.role,
+                    &mut messages,
+                ),
+                MessageContent::Thinking { .. } => {}
                 MessageContent::RedactedThinking(_) => {}
                 MessageContent::Image(image) => {
                     add_message_content_part(
@@ -479,6 +514,16 @@ pub fn into_open_router(
             None
         },
         usage: open_router::RequestUsage { include: true },
+        reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
+            Some(open_router::Reasoning {
+                effort: None,
+                max_tokens: budget_tokens,
+                exclude: Some(false),
+                enabled: Some(true),
+            })
+        } else {
+            None
+        },
         tools: request
             .tools
             .into_iter()
@@ -569,8 +614,19 @@ impl OpenRouterEventMapper {
         };
 
         let mut events = Vec::new();
+        if let Some(reasoning) = choice.delta.reasoning.clone() {
+            events.push(Ok(LanguageModelCompletionEvent::Thinking {
+                text: reasoning,
+                signature: None,
+            }));
+        }
+
         if let Some(content) = choice.delta.content.clone() {
-            events.push(Ok(LanguageModelCompletionEvent::Text(content)));
+            // OpenRouter send empty content string with the reasoning content
+            // This is a workaround for the OpenRouter API bug
+            if !content.is_empty() {
+                events.push(Ok(LanguageModelCompletionEvent::Text(content)));
+            }
         }
 
         if let Some(tool_calls) = choice.delta.tool_calls.as_ref() {
diff --git a/crates/open_router/src/open_router.rs b/crates/open_router/src/open_router.rs
index f0cb30e7aa..4128426a7f 100644
--- a/crates/open_router/src/open_router.rs
+++ b/crates/open_router/src/open_router.rs
@@ -53,6 +53,18 @@ pub struct Model {
     pub max_tokens: u64,
     pub supports_tools: Option<bool>,
     pub supports_images: Option<bool>,
+    #[serde(default)]
+    pub mode: ModelMode,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
+pub enum ModelMode {
+    #[default]
+    Default,
+    Thinking {
+        budget_tokens: Option<u32>,
+    },
 }
 
 impl Model {
@@ -63,6 +75,7 @@ impl Model {
             Some(2000000),
             Some(true),
             Some(false),
+            Some(ModelMode::Default),
         )
     }
 
@@ -76,6 +89,7 @@ impl Model {
         max_tokens: Option<u64>,
         supports_tools: Option<bool>,
         supports_images: Option<bool>,
+        mode: Option<ModelMode>,
     ) -> Self {
         Self {
             name: name.to_owned(),
@@ -83,6 +97,7 @@ impl Model {
             max_tokens: max_tokens.unwrap_or(2000000),
             supports_tools,
             supports_images,
+            mode: mode.unwrap_or(ModelMode::Default),
         }
     }
 
@@ -127,6 +142,8 @@ pub struct Request {
     pub parallel_tool_calls: Option<bool>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub tools: Vec<ToolDefinition>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
     pub usage: RequestUsage,
 }
 
@@ -160,6 +177,18 @@ pub struct FunctionDefinition {
     pub parameters: Option<Value>,
 }
 
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Reasoning {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exclude: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub enabled: Option<bool>,
+}
+
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
 #[serde(tag = "role", rename_all = "lowercase")]
 pub enum RequestMessage {
@@ -299,6 +328,7 @@ pub struct FunctionContent {
 pub struct ResponseMessageDelta {
     pub role: Option<Role>,
     pub content: Option<String>,
+    pub reasoning: Option<String>,
     #[serde(default, skip_serializing_if = "is_none_or_empty")]
     pub tool_calls: Option<Vec<ToolCallChunk>>,
 }
@@ -591,6 +621,16 @@ pub async fn list_models(client: &dyn HttpClient, api_url: &str) -> Result<Vec<M
                         .map(|arch| arch.input_modalities.contains(&"image".to_string()))
                         .unwrap_or(false),
                 ),
+                mode: if entry
+                    .supported_parameters
+                    .contains(&"reasoning".to_string())
+                {
+                    ModelMode::Thinking {
+                        budget_tokens: Some(4_096),
+                    }
+                } else {
+                    ModelMode::Default
+                },
             })
             .collect();
 
diff --git a/docs/src/ai/configuration.md b/docs/src/ai/configuration.md
index 7c5916f5a0..03524b9527 100644
--- a/docs/src/ai/configuration.md
+++ b/docs/src/ai/configuration.md
@@ -489,6 +489,49 @@ The OpenRouter API key will be saved in your keychain.
 
 Zed will also use the `OPENROUTER_API_KEY` environment variable if it's defined.
 
+#### Custom Models {#openrouter-custom-models}
+
+You can add custom models to the OpenRouter provider by adding the following to your Zed `settings.json`:
+
+```json
+{
+  "language_models": {
+    "open_router": {
+      "api_url": "https://openrouter.ai/api/v1",
+      "available_models": [
+        {
+          "name": "google/gemini-2.0-flash-thinking-exp",
+          "display_name": "Gemini 2.0 Flash (Thinking)",
+          "max_tokens": 200000,
+          "max_output_tokens": 8192,
+          "supports_tools": true,
+          "supports_images": true,
+          "mode": {
+            "type": "thinking",
+            "budget_tokens": 8000
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+The available configuration options for each model are:
+
+- `name`: The model identifier used by OpenRouter (required)
+- `display_name`: A human-readable name shown in the UI (optional)
+- `max_tokens`: The model's context window size (required)
+- `max_output_tokens`: Maximum tokens the model can generate (optional)
+- `max_completion_tokens`: Maximum completion tokens (optional)
+- `supports_tools`: Whether the model supports tool/function calling (optional)
+- `supports_images`: Whether the model supports image inputs (optional)
+- `mode`: Special mode configuration for thinking models (optional)
+
+You can find available models and their specifications on the [OpenRouter models page](https://openrouter.ai/models).
+
+Custom models will be listed in the model dropdown in the Agent Panel.
+
 ## Advanced Configuration {#advanced-configuration}
 
 ### Custom Provider Endpoints {#custom-provider-endpoint}