openai: Don't send prompt_cache_key for OpenAI-compatible models (#36231)

Some APIs fail when they get this parameter

Closes #36215

Release Notes:

- Fixed OpenAI-compatible providers that don't support prompt caching
and/or reasoning
This commit is contained in:
Oleksiy Syvokon 2025-08-15 13:54:24 +03:00 committed by GitHub
parent d891348442
commit 2a57b160b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 29 additions and 2 deletions

View file

@ -941,6 +941,7 @@ impl LanguageModel for CloudLanguageModel {
request, request,
model.id(), model.id(),
model.supports_parallel_tool_calls(), model.supports_parallel_tool_calls(),
model.supports_prompt_cache_key(),
None, None,
None, None,
); );

View file

@ -370,6 +370,7 @@ impl LanguageModel for OpenAiLanguageModel {
request, request,
self.model.id(), self.model.id(),
self.model.supports_parallel_tool_calls(), self.model.supports_parallel_tool_calls(),
self.model.supports_prompt_cache_key(),
self.max_output_tokens(), self.max_output_tokens(),
self.model.reasoning_effort(), self.model.reasoning_effort(),
); );
@ -386,6 +387,7 @@ pub fn into_open_ai(
request: LanguageModelRequest, request: LanguageModelRequest,
model_id: &str, model_id: &str,
supports_parallel_tool_calls: bool, supports_parallel_tool_calls: bool,
supports_prompt_cache_key: bool,
max_output_tokens: Option<u64>, max_output_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>, reasoning_effort: Option<ReasoningEffort>,
) -> open_ai::Request { ) -> open_ai::Request {
@ -477,7 +479,11 @@ pub fn into_open_ai(
} else { } else {
None None
}, },
prompt_cache_key: request.thread_id, prompt_cache_key: if supports_prompt_cache_key {
request.thread_id
} else {
None
},
tools: request tools: request
.tools .tools
.into_iter() .into_iter()

View file

@ -355,10 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
LanguageModelCompletionError, LanguageModelCompletionError,
>, >,
> { > {
let supports_parallel_tool_call = true;
let supports_prompt_cache_key = false;
let request = into_open_ai( let request = into_open_ai(
request, request,
&self.model.name, &self.model.name,
true, supports_parallel_tool_call,
supports_prompt_cache_key,
self.max_output_tokens(), self.max_output_tokens(),
None, None,
); );

View file

@ -355,6 +355,7 @@ impl LanguageModel for VercelLanguageModel {
request, request,
self.model.id(), self.model.id(),
self.model.supports_parallel_tool_calls(), self.model.supports_parallel_tool_calls(),
self.model.supports_prompt_cache_key(),
self.max_output_tokens(), self.max_output_tokens(),
None, None,
); );

View file

@ -359,6 +359,7 @@ impl LanguageModel for XAiLanguageModel {
request, request,
self.model.id(), self.model.id(),
self.model.supports_parallel_tool_calls(), self.model.supports_parallel_tool_calls(),
self.model.supports_prompt_cache_key(),
self.max_output_tokens(), self.max_output_tokens(),
None, None,
); );

View file

@ -236,6 +236,13 @@ impl Model {
Self::O1 | Self::O3 | Self::O3Mini | Self::O4Mini | Model::Custom { .. } => false, Self::O1 | Self::O3 | Self::O3Mini | Self::O4Mini | Model::Custom { .. } => false,
} }
} }
/// Returns whether the given model supports the `prompt_cache_key` parameter.
///
/// If the model does not support the parameter, do not pass it up.
pub fn supports_prompt_cache_key(&self) -> bool {
return true;
}
} }
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]

View file

@ -71,4 +71,8 @@ impl Model {
Model::Custom { .. } => false, Model::Custom { .. } => false,
} }
} }
pub fn supports_prompt_cache_key(&self) -> bool {
false
}
} }

View file

@ -105,6 +105,10 @@ impl Model {
} }
} }
pub fn supports_prompt_cache_key(&self) -> bool {
false
}
pub fn supports_tool(&self) -> bool { pub fn supports_tool(&self) -> bool {
match self { match self {
Self::Grok2Vision Self::Grok2Vision