language_models: Add thinking support to OpenRouter provider (#32541)

Did some bit cleanup of code for loading models for settings as that is
not required as we are fetching all the models from openrouter so it's
better to maintain one source of truth

Release Notes:

- Add thinking support to OpenRouter provider
This commit is contained in:
Umesh Yadav 2025-06-21 11:33:50 +05:30 committed by GitHub
parent b9838efaaa
commit dfdd2b9558
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 148 additions and 9 deletions

View file

@ -14,7 +14,9 @@ use language_model::{
LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent,
RateLimiter, Role, StopReason, TokenUsage, RateLimiter, Role, StopReason, TokenUsage,
}; };
use open_router::{Model, ResponseStreamEvent, list_models, stream_completion}; use open_router::{
Model, ModelMode as OpenRouterModelMode, ResponseStreamEvent, list_models, stream_completion,
};
use schemars::JsonSchema; use schemars::JsonSchema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore}; use settings::{Settings, SettingsStore};
@ -45,6 +47,39 @@ pub struct AvailableModel {
pub max_completion_tokens: Option<u64>, pub max_completion_tokens: Option<u64>,
pub supports_tools: Option<bool>, pub supports_tools: Option<bool>,
pub supports_images: Option<bool>, pub supports_images: Option<bool>,
pub mode: Option<ModelMode>,
}
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum ModelMode {
#[default]
Default,
Thinking {
budget_tokens: Option<u32>,
},
}
impl From<ModelMode> for OpenRouterModelMode {
fn from(value: ModelMode) -> Self {
match value {
ModelMode::Default => OpenRouterModelMode::Default,
ModelMode::Thinking { budget_tokens } => {
OpenRouterModelMode::Thinking { budget_tokens }
}
}
}
}
impl From<OpenRouterModelMode> for ModelMode {
fn from(value: OpenRouterModelMode) -> Self {
match value {
OpenRouterModelMode::Default => ModelMode::Default,
OpenRouterModelMode::Thinking { budget_tokens } => {
ModelMode::Thinking { budget_tokens }
}
}
}
} }
pub struct OpenRouterLanguageModelProvider { pub struct OpenRouterLanguageModelProvider {
@ -242,6 +277,7 @@ impl LanguageModelProvider for OpenRouterLanguageModelProvider {
max_tokens: model.max_tokens, max_tokens: model.max_tokens,
supports_tools: model.supports_tools, supports_tools: model.supports_tools,
supports_images: model.supports_images, supports_images: model.supports_images,
mode: model.mode.clone().unwrap_or_default().into(),
}); });
} }
@ -403,13 +439,12 @@ pub fn into_open_router(
for message in request.messages { for message in request.messages {
for content in message.content { for content in message.content {
match content { match content {
MessageContent::Text(text) | MessageContent::Thinking { text, .. } => { MessageContent::Text(text) => add_message_content_part(
add_message_content_part( open_router::MessagePart::Text { text },
open_router::MessagePart::Text { text }, message.role,
message.role, &mut messages,
&mut messages, ),
) MessageContent::Thinking { .. } => {}
}
MessageContent::RedactedThinking(_) => {} MessageContent::RedactedThinking(_) => {}
MessageContent::Image(image) => { MessageContent::Image(image) => {
add_message_content_part( add_message_content_part(
@ -479,6 +514,16 @@ pub fn into_open_router(
None None
}, },
usage: open_router::RequestUsage { include: true }, usage: open_router::RequestUsage { include: true },
reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
Some(open_router::Reasoning {
effort: None,
max_tokens: budget_tokens,
exclude: Some(false),
enabled: Some(true),
})
} else {
None
},
tools: request tools: request
.tools .tools
.into_iter() .into_iter()
@ -569,8 +614,19 @@ impl OpenRouterEventMapper {
}; };
let mut events = Vec::new(); let mut events = Vec::new();
if let Some(reasoning) = choice.delta.reasoning.clone() {
events.push(Ok(LanguageModelCompletionEvent::Thinking {
text: reasoning,
signature: None,
}));
}
if let Some(content) = choice.delta.content.clone() { if let Some(content) = choice.delta.content.clone() {
events.push(Ok(LanguageModelCompletionEvent::Text(content))); // OpenRouter send empty content string with the reasoning content
// This is a workaround for the OpenRouter API bug
if !content.is_empty() {
events.push(Ok(LanguageModelCompletionEvent::Text(content)));
}
} }
if let Some(tool_calls) = choice.delta.tool_calls.as_ref() { if let Some(tool_calls) = choice.delta.tool_calls.as_ref() {

View file

@ -53,6 +53,18 @@ pub struct Model {
pub max_tokens: u64, pub max_tokens: u64,
pub supports_tools: Option<bool>, pub supports_tools: Option<bool>,
pub supports_images: Option<bool>, pub supports_images: Option<bool>,
#[serde(default)]
pub mode: ModelMode,
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub enum ModelMode {
#[default]
Default,
Thinking {
budget_tokens: Option<u32>,
},
} }
impl Model { impl Model {
@ -63,6 +75,7 @@ impl Model {
Some(2000000), Some(2000000),
Some(true), Some(true),
Some(false), Some(false),
Some(ModelMode::Default),
) )
} }
@ -76,6 +89,7 @@ impl Model {
max_tokens: Option<u64>, max_tokens: Option<u64>,
supports_tools: Option<bool>, supports_tools: Option<bool>,
supports_images: Option<bool>, supports_images: Option<bool>,
mode: Option<ModelMode>,
) -> Self { ) -> Self {
Self { Self {
name: name.to_owned(), name: name.to_owned(),
@ -83,6 +97,7 @@ impl Model {
max_tokens: max_tokens.unwrap_or(2000000), max_tokens: max_tokens.unwrap_or(2000000),
supports_tools, supports_tools,
supports_images, supports_images,
mode: mode.unwrap_or(ModelMode::Default),
} }
} }
@ -127,6 +142,8 @@ pub struct Request {
pub parallel_tool_calls: Option<bool>, pub parallel_tool_calls: Option<bool>,
#[serde(default, skip_serializing_if = "Vec::is_empty")] #[serde(default, skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<ToolDefinition>, pub tools: Vec<ToolDefinition>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reasoning: Option<Reasoning>,
pub usage: RequestUsage, pub usage: RequestUsage,
} }
@ -160,6 +177,18 @@ pub struct FunctionDefinition {
pub parameters: Option<Value>, pub parameters: Option<Value>,
} }
#[derive(Debug, Serialize, Deserialize)]
pub struct Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub exclude: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub enabled: Option<bool>,
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
#[serde(tag = "role", rename_all = "lowercase")] #[serde(tag = "role", rename_all = "lowercase")]
pub enum RequestMessage { pub enum RequestMessage {
@ -299,6 +328,7 @@ pub struct FunctionContent {
pub struct ResponseMessageDelta { pub struct ResponseMessageDelta {
pub role: Option<Role>, pub role: Option<Role>,
pub content: Option<String>, pub content: Option<String>,
pub reasoning: Option<String>,
#[serde(default, skip_serializing_if = "is_none_or_empty")] #[serde(default, skip_serializing_if = "is_none_or_empty")]
pub tool_calls: Option<Vec<ToolCallChunk>>, pub tool_calls: Option<Vec<ToolCallChunk>>,
} }
@ -591,6 +621,16 @@ pub async fn list_models(client: &dyn HttpClient, api_url: &str) -> Result<Vec<M
.map(|arch| arch.input_modalities.contains(&"image".to_string())) .map(|arch| arch.input_modalities.contains(&"image".to_string()))
.unwrap_or(false), .unwrap_or(false),
), ),
mode: if entry
.supported_parameters
.contains(&"reasoning".to_string())
{
ModelMode::Thinking {
budget_tokens: Some(4_096),
}
} else {
ModelMode::Default
},
}) })
.collect(); .collect();

View file

@ -489,6 +489,49 @@ The OpenRouter API key will be saved in your keychain.
Zed will also use the `OPENROUTER_API_KEY` environment variable if it's defined. Zed will also use the `OPENROUTER_API_KEY` environment variable if it's defined.
#### Custom Models {#openrouter-custom-models}
You can add custom models to the OpenRouter provider by adding the following to your Zed `settings.json`:
```json
{
"language_models": {
"open_router": {
"api_url": "https://openrouter.ai/api/v1",
"available_models": [
{
"name": "google/gemini-2.0-flash-thinking-exp",
"display_name": "Gemini 2.0 Flash (Thinking)",
"max_tokens": 200000,
"max_output_tokens": 8192,
"supports_tools": true,
"supports_images": true,
"mode": {
"type": "thinking",
"budget_tokens": 8000
}
}
]
}
}
}
```
The available configuration options for each model are:
- `name`: The model identifier used by OpenRouter (required)
- `display_name`: A human-readable name shown in the UI (optional)
- `max_tokens`: The model's context window size (required)
- `max_output_tokens`: Maximum tokens the model can generate (optional)
- `max_completion_tokens`: Maximum completion tokens (optional)
- `supports_tools`: Whether the model supports tool/function calling (optional)
- `supports_images`: Whether the model supports image inputs (optional)
- `mode`: Special mode configuration for thinking models (optional)
You can find available models and their specifications on the [OpenRouter models page](https://openrouter.ai/models).
Custom models will be listed in the model dropdown in the Agent Panel.
## Advanced Configuration {#advanced-configuration} ## Advanced Configuration {#advanced-configuration}
### Custom Provider Endpoints {#custom-provider-endpoint} ### Custom Provider Endpoints {#custom-provider-endpoint}