From 2a57b160b03c8e8543fdae12a0c191ed1a985e54 Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Fri, 15 Aug 2025 13:54:24 +0300 Subject: [PATCH] openai: Don't send prompt_cache_key for OpenAI-compatible models (#36231) Some APIs fail when they get this parameter Closes #36215 Release Notes: - Fixed OpenAI-compatible providers that don't support prompt caching and/or reasoning --- crates/language_models/src/provider/cloud.rs | 1 + crates/language_models/src/provider/open_ai.rs | 8 +++++++- crates/language_models/src/provider/open_ai_compatible.rs | 5 ++++- crates/language_models/src/provider/vercel.rs | 1 + crates/language_models/src/provider/x_ai.rs | 1 + crates/open_ai/src/open_ai.rs | 7 +++++++ crates/vercel/src/vercel.rs | 4 ++++ crates/x_ai/src/x_ai.rs | 4 ++++ 8 files changed, 29 insertions(+), 2 deletions(-) diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index ff8048040e..c1337399f9 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -941,6 +941,7 @@ impl LanguageModel for CloudLanguageModel { request, model.id(), model.supports_parallel_tool_calls(), + model.supports_prompt_cache_key(), None, None, ); diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 725027b2a7..eaf8d885b3 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -370,6 +370,7 @@ impl LanguageModel for OpenAiLanguageModel { request, self.model.id(), self.model.supports_parallel_tool_calls(), + self.model.supports_prompt_cache_key(), self.max_output_tokens(), self.model.reasoning_effort(), ); @@ -386,6 +387,7 @@ pub fn into_open_ai( request: LanguageModelRequest, model_id: &str, supports_parallel_tool_calls: bool, + supports_prompt_cache_key: bool, max_output_tokens: Option, reasoning_effort: Option, ) -> open_ai::Request { @@ -477,7 +479,11 @@ pub fn into_open_ai( } else { None }, - prompt_cache_key: request.thread_id, + prompt_cache_key: if supports_prompt_cache_key { + request.thread_id + } else { + None + }, tools: request .tools .into_iter() diff --git a/crates/language_models/src/provider/open_ai_compatible.rs b/crates/language_models/src/provider/open_ai_compatible.rs index 6e912765cd..5f546f5219 100644 --- a/crates/language_models/src/provider/open_ai_compatible.rs +++ b/crates/language_models/src/provider/open_ai_compatible.rs @@ -355,10 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel { LanguageModelCompletionError, >, > { + let supports_parallel_tool_call = true; + let supports_prompt_cache_key = false; let request = into_open_ai( request, &self.model.name, - true, + supports_parallel_tool_call, + supports_prompt_cache_key, self.max_output_tokens(), None, ); diff --git a/crates/language_models/src/provider/vercel.rs b/crates/language_models/src/provider/vercel.rs index 57a89ba4aa..9f447cb68b 100644 --- a/crates/language_models/src/provider/vercel.rs +++ b/crates/language_models/src/provider/vercel.rs @@ -355,6 +355,7 @@ impl LanguageModel for VercelLanguageModel { request, self.model.id(), self.model.supports_parallel_tool_calls(), + self.model.supports_prompt_cache_key(), self.max_output_tokens(), None, ); diff --git a/crates/language_models/src/provider/x_ai.rs b/crates/language_models/src/provider/x_ai.rs index 5e7190ea96..fed6fe92bf 100644 --- a/crates/language_models/src/provider/x_ai.rs +++ b/crates/language_models/src/provider/x_ai.rs @@ -359,6 +359,7 @@ impl LanguageModel for XAiLanguageModel { request, self.model.id(), self.model.supports_parallel_tool_calls(), + self.model.supports_prompt_cache_key(), self.max_output_tokens(), None, ); diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 8bbe858995..604e8fe622 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -236,6 +236,13 @@ impl Model { Self::O1 | Self::O3 | Self::O3Mini | Self::O4Mini | Model::Custom { .. } => false, } } + + /// Returns whether the given model supports the `prompt_cache_key` parameter. + /// + /// If the model does not support the parameter, do not pass it up. + pub fn supports_prompt_cache_key(&self) -> bool { + return true; + } } #[derive(Debug, Serialize, Deserialize)] diff --git a/crates/vercel/src/vercel.rs b/crates/vercel/src/vercel.rs index 1ae22c5fef..8686fda53f 100644 --- a/crates/vercel/src/vercel.rs +++ b/crates/vercel/src/vercel.rs @@ -71,4 +71,8 @@ impl Model { Model::Custom { .. } => false, } } + + pub fn supports_prompt_cache_key(&self) -> bool { + false + } } diff --git a/crates/x_ai/src/x_ai.rs b/crates/x_ai/src/x_ai.rs index ac116b2f8f..23cd5b9320 100644 --- a/crates/x_ai/src/x_ai.rs +++ b/crates/x_ai/src/x_ai.rs @@ -105,6 +105,10 @@ impl Model { } } + pub fn supports_prompt_cache_key(&self) -> bool { + false + } + pub fn supports_tool(&self) -> bool { match self { Self::Grok2Vision