Reuse conversation cache when streaming edits (#30245)

Release Notes:

- Improved latency when the agent applies edits.
This commit is contained in:
Antonio Scandurra 2025-05-08 14:36:34 +02:00 committed by GitHub
parent 032022e37b
commit 9f6809a28d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 847 additions and 21557 deletions

View file

@ -12,7 +12,7 @@ use language_model::{
AuthenticateError, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
LanguageModelToolUse, MessageContent, RateLimiter, Role, StopReason,
LanguageModelToolChoice, LanguageModelToolUse, MessageContent, RateLimiter, Role, StopReason,
};
use open_ai::{Model, ResponseStreamEvent, stream_completion};
use schemars::JsonSchema;
@ -295,6 +295,14 @@ impl LanguageModel for OpenAiLanguageModel {
true
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto => true,
LanguageModelToolChoice::Any => true,
LanguageModelToolChoice::None => true,
}
}
fn telemetry_id(&self) -> String {
format!("openai/{}", self.model.id())
}
@ -417,7 +425,11 @@ pub fn into_open_ai(
},
})
.collect(),
tool_choice: None,
tool_choice: request.tool_choice.map(|choice| match choice {
LanguageModelToolChoice::Auto => open_ai::ToolChoice::Auto,
LanguageModelToolChoice::Any => open_ai::ToolChoice::Required,
LanguageModelToolChoice::None => open_ai::ToolChoice::None,
}),
}
}