language_models: Add reasoning_effort for custom models (#35929)

Release Notes:

- Added `reasoning_effort` support to custom models

Tested using the following config:
```json5
  "language_models": {
    "openai": {
      "available_models": [
        {
          "name": "gpt-5-mini",
          "display_name": "GPT 5 Mini (custom reasoning)",
          "max_output_tokens": 128000,
          "max_tokens": 272000,
          "reasoning_effort": "high" // Can be minimal, low, medium (default), and high
        }
      ],
      "version": "1"
    }
  }
```

Docs:
https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort

This work could be used to split the GPT 5/5-mini/5-nano into each of
it's reasoning effort variant. E.g. `gpt-5`, `gpt-5 low`, `gpt-5
minimal`, `gpt-5 high`, and same for mini/nano.

Release Notes:

* Added a setting to control `reasoning_effort` in OpenAI models
This commit is contained in:
Cretezy 2025-08-13 02:09:16 -04:00 committed by GitHub
parent 96093aa465
commit 8ff2e3e195
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 38 additions and 2 deletions

View file

@ -942,6 +942,7 @@ impl LanguageModel for CloudLanguageModel {
model.id(),
model.supports_parallel_tool_calls(),
None,
None,
);
let llm_api_token = self.llm_api_token.clone();
let future = self.request_limiter.stream(async move {

View file

@ -14,7 +14,7 @@ use language_model::{
RateLimiter, Role, StopReason, TokenUsage,
};
use menu;
use open_ai::{ImageUrl, Model, ResponseStreamEvent, stream_completion};
use open_ai::{ImageUrl, Model, ReasoningEffort, ResponseStreamEvent, stream_completion};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
@ -45,6 +45,7 @@ pub struct AvailableModel {
pub max_tokens: u64,
pub max_output_tokens: Option<u64>,
pub max_completion_tokens: Option<u64>,
pub reasoning_effort: Option<ReasoningEffort>,
}
pub struct OpenAiLanguageModelProvider {
@ -213,6 +214,7 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
max_tokens: model.max_tokens,
max_output_tokens: model.max_output_tokens,
max_completion_tokens: model.max_completion_tokens,
reasoning_effort: model.reasoning_effort.clone(),
},
);
}
@ -369,6 +371,7 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
self.model.reasoning_effort(),
);
let completions = self.stream_completion(request, cx);
async move {
@ -384,6 +387,7 @@ pub fn into_open_ai(
model_id: &str,
supports_parallel_tool_calls: bool,
max_output_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
) -> open_ai::Request {
let stream = !model_id.starts_with("o1-");
@ -490,6 +494,7 @@ pub fn into_open_ai(
LanguageModelToolChoice::Any => open_ai::ToolChoice::Required,
LanguageModelToolChoice::None => open_ai::ToolChoice::None,
}),
reasoning_effort,
}
}

View file

@ -355,7 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
LanguageModelCompletionError,
>,
> {
let request = into_open_ai(request, &self.model.name, true, self.max_output_tokens());
let request = into_open_ai(
request,
&self.model.name,
true,
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {
let mapper = OpenAiEventMapper::new();

View file

@ -356,6 +356,7 @@ impl LanguageModel for VercelLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {

View file

@ -360,6 +360,7 @@ impl LanguageModel for XAiLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {

View file

@ -89,11 +89,13 @@ pub enum Model {
max_tokens: u64,
max_output_tokens: Option<u64>,
max_completion_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
},
}
impl Model {
pub fn default_fast() -> Self {
// TODO: Replace with FiveMini since all other models are deprecated
Self::FourPointOneMini
}
@ -206,6 +208,15 @@ impl Model {
}
}
pub fn reasoning_effort(&self) -> Option<ReasoningEffort> {
match self {
Self::Custom {
reasoning_effort, ..
} => reasoning_effort.to_owned(),
_ => None,
}
}
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
///
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
@ -246,6 +257,7 @@ pub struct Request {
pub tools: Vec<ToolDefinition>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
pub reasoning_effort: Option<ReasoningEffort>,
}
#[derive(Debug, Serialize, Deserialize)]
@ -257,6 +269,16 @@ pub enum ToolChoice {
Other(ToolDefinition),
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
#[serde(rename_all = "lowercase")]
pub enum ReasoningEffort {
Minimal,
Low,
Medium,
High,
}
#[derive(Clone, Deserialize, Serialize, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ToolDefinition {