language_models: Add reasoning_effort for custom models (#35929)

Release Notes:

- Added `reasoning_effort` support to custom models

Tested using the following config:
```json5
  "language_models": {
    "openai": {
      "available_models": [
        {
          "name": "gpt-5-mini",
          "display_name": "GPT 5 Mini (custom reasoning)",
          "max_output_tokens": 128000,
          "max_tokens": 272000,
          "reasoning_effort": "high" // Can be minimal, low, medium (default), and high
        }
      ],
      "version": "1"
    }
  }
```

Docs:
https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort

This work could be used to split the GPT 5/5-mini/5-nano into each of
it's reasoning effort variant. E.g. `gpt-5`, `gpt-5 low`, `gpt-5
minimal`, `gpt-5 high`, and same for mini/nano.

Release Notes:

* Added a setting to control `reasoning_effort` in OpenAI models
This commit is contained in:
Cretezy 2025-08-13 02:09:16 -04:00 committed by GitHub
parent 96093aa465
commit 8ff2e3e195
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 38 additions and 2 deletions

View file

@ -942,6 +942,7 @@ impl LanguageModel for CloudLanguageModel {
model.id(),
model.supports_parallel_tool_calls(),
None,
None,
);
let llm_api_token = self.llm_api_token.clone();
let future = self.request_limiter.stream(async move {

View file

@ -14,7 +14,7 @@ use language_model::{
RateLimiter, Role, StopReason, TokenUsage,
};
use menu;
use open_ai::{ImageUrl, Model, ResponseStreamEvent, stream_completion};
use open_ai::{ImageUrl, Model, ReasoningEffort, ResponseStreamEvent, stream_completion};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
@ -45,6 +45,7 @@ pub struct AvailableModel {
pub max_tokens: u64,
pub max_output_tokens: Option<u64>,
pub max_completion_tokens: Option<u64>,
pub reasoning_effort: Option<ReasoningEffort>,
}
pub struct OpenAiLanguageModelProvider {
@ -213,6 +214,7 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
max_tokens: model.max_tokens,
max_output_tokens: model.max_output_tokens,
max_completion_tokens: model.max_completion_tokens,
reasoning_effort: model.reasoning_effort.clone(),
},
);
}
@ -369,6 +371,7 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
self.model.reasoning_effort(),
);
let completions = self.stream_completion(request, cx);
async move {
@ -384,6 +387,7 @@ pub fn into_open_ai(
model_id: &str,
supports_parallel_tool_calls: bool,
max_output_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
) -> open_ai::Request {
let stream = !model_id.starts_with("o1-");
@ -490,6 +494,7 @@ pub fn into_open_ai(
LanguageModelToolChoice::Any => open_ai::ToolChoice::Required,
LanguageModelToolChoice::None => open_ai::ToolChoice::None,
}),
reasoning_effort,
}
}

View file

@ -355,7 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
LanguageModelCompletionError,
>,
> {
let request = into_open_ai(request, &self.model.name, true, self.max_output_tokens());
let request = into_open_ai(
request,
&self.model.name,
true,
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {
let mapper = OpenAiEventMapper::new();

View file

@ -356,6 +356,7 @@ impl LanguageModel for VercelLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {

View file

@ -360,6 +360,7 @@ impl LanguageModel for XAiLanguageModel {
self.model.id(),
self.model.supports_parallel_tool_calls(),
self.max_output_tokens(),
None,
);
let completions = self.stream_completion(request, cx);
async move {