From 8ff2e3e1956543a0bf1f801aaec05f8993030c91 Mon Sep 17 00:00:00 2001 From: Cretezy Date: Wed, 13 Aug 2025 02:09:16 -0400 Subject: [PATCH] language_models: Add reasoning_effort for custom models (#35929) Release Notes: - Added `reasoning_effort` support to custom models Tested using the following config: ```json5 "language_models": { "openai": { "available_models": [ { "name": "gpt-5-mini", "display_name": "GPT 5 Mini (custom reasoning)", "max_output_tokens": 128000, "max_tokens": 272000, "reasoning_effort": "high" // Can be minimal, low, medium (default), and high } ], "version": "1" } } ``` Docs: https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort This work could be used to split the GPT 5/5-mini/5-nano into each of it's reasoning effort variant. E.g. `gpt-5`, `gpt-5 low`, `gpt-5 minimal`, `gpt-5 high`, and same for mini/nano. Release Notes: * Added a setting to control `reasoning_effort` in OpenAI models --- crates/language_models/src/provider/cloud.rs | 1 + .../language_models/src/provider/open_ai.rs | 7 +++++- .../src/provider/open_ai_compatible.rs | 8 ++++++- crates/language_models/src/provider/vercel.rs | 1 + crates/language_models/src/provider/x_ai.rs | 1 + crates/open_ai/src/open_ai.rs | 22 +++++++++++++++++++ 6 files changed, 38 insertions(+), 2 deletions(-) diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index ba110be9c5..ff8048040e 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -942,6 +942,7 @@ impl LanguageModel for CloudLanguageModel { model.id(), model.supports_parallel_tool_calls(), None, + None, ); let llm_api_token = self.llm_api_token.clone(); let future = self.request_limiter.stream(async move { diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 9eac58c880..725027b2a7 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -14,7 +14,7 @@ use language_model::{ RateLimiter, Role, StopReason, TokenUsage, }; use menu; -use open_ai::{ImageUrl, Model, ResponseStreamEvent, stream_completion}; +use open_ai::{ImageUrl, Model, ReasoningEffort, ResponseStreamEvent, stream_completion}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use settings::{Settings, SettingsStore}; @@ -45,6 +45,7 @@ pub struct AvailableModel { pub max_tokens: u64, pub max_output_tokens: Option, pub max_completion_tokens: Option, + pub reasoning_effort: Option, } pub struct OpenAiLanguageModelProvider { @@ -213,6 +214,7 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider { max_tokens: model.max_tokens, max_output_tokens: model.max_output_tokens, max_completion_tokens: model.max_completion_tokens, + reasoning_effort: model.reasoning_effort.clone(), }, ); } @@ -369,6 +371,7 @@ impl LanguageModel for OpenAiLanguageModel { self.model.id(), self.model.supports_parallel_tool_calls(), self.max_output_tokens(), + self.model.reasoning_effort(), ); let completions = self.stream_completion(request, cx); async move { @@ -384,6 +387,7 @@ pub fn into_open_ai( model_id: &str, supports_parallel_tool_calls: bool, max_output_tokens: Option, + reasoning_effort: Option, ) -> open_ai::Request { let stream = !model_id.starts_with("o1-"); @@ -490,6 +494,7 @@ pub fn into_open_ai( LanguageModelToolChoice::Any => open_ai::ToolChoice::Required, LanguageModelToolChoice::None => open_ai::ToolChoice::None, }), + reasoning_effort, } } diff --git a/crates/language_models/src/provider/open_ai_compatible.rs b/crates/language_models/src/provider/open_ai_compatible.rs index 38bd7cee06..6e912765cd 100644 --- a/crates/language_models/src/provider/open_ai_compatible.rs +++ b/crates/language_models/src/provider/open_ai_compatible.rs @@ -355,7 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel { LanguageModelCompletionError, >, > { - let request = into_open_ai(request, &self.model.name, true, self.max_output_tokens()); + let request = into_open_ai( + request, + &self.model.name, + true, + self.max_output_tokens(), + None, + ); let completions = self.stream_completion(request, cx); async move { let mapper = OpenAiEventMapper::new(); diff --git a/crates/language_models/src/provider/vercel.rs b/crates/language_models/src/provider/vercel.rs index 037ce467d0..57a89ba4aa 100644 --- a/crates/language_models/src/provider/vercel.rs +++ b/crates/language_models/src/provider/vercel.rs @@ -356,6 +356,7 @@ impl LanguageModel for VercelLanguageModel { self.model.id(), self.model.supports_parallel_tool_calls(), self.max_output_tokens(), + None, ); let completions = self.stream_completion(request, cx); async move { diff --git a/crates/language_models/src/provider/x_ai.rs b/crates/language_models/src/provider/x_ai.rs index 5f6034571b..5e7190ea96 100644 --- a/crates/language_models/src/provider/x_ai.rs +++ b/crates/language_models/src/provider/x_ai.rs @@ -360,6 +360,7 @@ impl LanguageModel for XAiLanguageModel { self.model.id(), self.model.supports_parallel_tool_calls(), self.max_output_tokens(), + None, ); let completions = self.stream_completion(request, cx); async move { diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 919b1d9ebf..5801f29623 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -89,11 +89,13 @@ pub enum Model { max_tokens: u64, max_output_tokens: Option, max_completion_tokens: Option, + reasoning_effort: Option, }, } impl Model { pub fn default_fast() -> Self { + // TODO: Replace with FiveMini since all other models are deprecated Self::FourPointOneMini } @@ -206,6 +208,15 @@ impl Model { } } + pub fn reasoning_effort(&self) -> Option { + match self { + Self::Custom { + reasoning_effort, .. + } => reasoning_effort.to_owned(), + _ => None, + } + } + /// Returns whether the given model supports the `parallel_tool_calls` parameter. /// /// If the model does not support the parameter, do not pass it up, or the API will return an error. @@ -246,6 +257,7 @@ pub struct Request { pub tools: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] pub prompt_cache_key: Option, + pub reasoning_effort: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -257,6 +269,16 @@ pub enum ToolChoice { Other(ToolDefinition), } +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] +#[serde(rename_all = "lowercase")] +pub enum ReasoningEffort { + Minimal, + Low, + Medium, + High, +} + #[derive(Clone, Deserialize, Serialize, Debug)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ToolDefinition {