language_models: Add reasoning_effort for custom models (#35929)

Release Notes:

- Added `reasoning_effort` support to custom models

Tested using the following config:
```json5
  "language_models": {
    "openai": {
      "available_models": [
        {
          "name": "gpt-5-mini",
          "display_name": "GPT 5 Mini (custom reasoning)",
          "max_output_tokens": 128000,
          "max_tokens": 272000,
          "reasoning_effort": "high" // Can be minimal, low, medium (default), and high
        }
      ],
      "version": "1"
    }
  }
```

Docs:
https://platform.openai.com/docs/api-reference/chat/create#chat_create-reasoning_effort

This work could be used to split the GPT 5/5-mini/5-nano into each of
it's reasoning effort variant. E.g. `gpt-5`, `gpt-5 low`, `gpt-5
minimal`, `gpt-5 high`, and same for mini/nano.

Release Notes:

* Added a setting to control `reasoning_effort` in OpenAI models
This commit is contained in:
Cretezy 2025-08-13 02:09:16 -04:00 committed by GitHub
parent 96093aa465
commit 8ff2e3e195
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 38 additions and 2 deletions

View file

@ -89,11 +89,13 @@ pub enum Model {
max_tokens: u64,
max_output_tokens: Option<u64>,
max_completion_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
},
}
impl Model {
pub fn default_fast() -> Self {
// TODO: Replace with FiveMini since all other models are deprecated
Self::FourPointOneMini
}
@ -206,6 +208,15 @@ impl Model {
}
}
pub fn reasoning_effort(&self) -> Option<ReasoningEffort> {
match self {
Self::Custom {
reasoning_effort, ..
} => reasoning_effort.to_owned(),
_ => None,
}
}
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
///
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
@ -246,6 +257,7 @@ pub struct Request {
pub tools: Vec<ToolDefinition>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
pub reasoning_effort: Option<ReasoningEffort>,
}
#[derive(Debug, Serialize, Deserialize)]
@ -257,6 +269,16 @@ pub enum ToolChoice {
Other(ToolDefinition),
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
#[serde(rename_all = "lowercase")]
pub enum ReasoningEffort {
Minimal,
Low,
Medium,
High,
}
#[derive(Clone, Deserialize, Serialize, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ToolDefinition {