Reuse conversation cache when streaming edits (#30245)

Release Notes:

- Improved latency when the agent applies edits.
This commit is contained in:
Antonio Scandurra 2025-05-08 14:36:34 +02:00 committed by GitHub
parent 032022e37b
commit 9f6809a28d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 847 additions and 21557 deletions

View file

@ -1411,6 +1411,7 @@ impl ActiveThread {
mode: None,
messages: vec![request_message],
tools: vec![],
tool_choice: None,
stop: vec![],
temperature: AssistantSettings::temperature_for_model(
&configured_model.model,
@ -3256,7 +3257,7 @@ impl ActiveThread {
c.tool_use_id.clone(),
c.ui_text.clone(),
c.input.clone(),
&c.messages,
c.request.clone(),
c.tool.clone(),
configured.model,
Some(window.window_handle()),

View file

@ -466,6 +466,7 @@ impl CodegenAlternative {
prompt_id: None,
mode: None,
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature,
messages: vec![request_message],

View file

@ -4,7 +4,7 @@ use anyhow::{Result, anyhow, bail};
use assistant_tool::{ActionLog, Tool, ToolResult, ToolSource};
use context_server::{ContextServerId, types};
use gpui::{AnyWindowHandle, App, Entity, Task};
use language_model::{LanguageModel, LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
use language_model::{LanguageModel, LanguageModelRequest, LanguageModelToolSchemaFormat};
use project::{Project, context_server_store::ContextServerStore};
use ui::IconName;
@ -72,7 +72,7 @@ impl Tool for ContextServerTool {
fn run(
self: Arc<Self>,
input: serde_json::Value,
_messages: &[LanguageModelRequestMessage],
_request: Arc<LanguageModelRequest>,
_project: Entity<Project>,
_action_log: Entity<ActionLog>,
_model: Arc<dyn LanguageModel>,

View file

@ -1245,6 +1245,7 @@ impl MessageEditor {
mode: None,
messages: vec![request_message],
tools: vec![],
tool_choice: None,
stop: vec![],
temperature: AssistantSettings::temperature_for_model(&model.model, cx),
};

View file

@ -293,6 +293,7 @@ impl TerminalInlineAssistant {
mode: None,
messages: vec![request_message],
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature,
}

View file

@ -1183,6 +1183,7 @@ impl Thread {
mode: None,
messages: vec![],
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature: AssistantSettings::temperature_for_model(&model, cx),
};
@ -1227,6 +1228,7 @@ impl Thread {
}));
}
let mut message_ix_to_cache = None;
for message in &self.messages {
let mut request_message = LanguageModelRequestMessage {
role: message.role,
@ -1263,19 +1265,57 @@ impl Thread {
};
}
self.tool_use
.attach_tool_uses(message.id, &mut request_message);
let mut cache_message = true;
let mut tool_results_message = LanguageModelRequestMessage {
role: Role::User,
content: Vec::new(),
cache: false,
};
for (tool_use, tool_result) in self.tool_use.tool_results(message.id) {
if let Some(tool_result) = tool_result {
request_message
.content
.push(MessageContent::ToolUse(tool_use.clone()));
tool_results_message
.content
.push(MessageContent::ToolResult(LanguageModelToolResult {
tool_use_id: tool_use.id.clone(),
tool_name: tool_result.tool_name.clone(),
is_error: tool_result.is_error,
content: if tool_result.content.is_empty() {
// Surprisingly, the API fails if we return an empty string here.
// It thinks we are sending a tool use without a tool result.
"<Tool returned an empty string>".into()
} else {
tool_result.content.clone()
},
output: None,
}));
} else {
cache_message = false;
log::debug!(
"skipped tool use {:?} because it is still pending",
tool_use
);
}
}
if cache_message {
message_ix_to_cache = Some(request.messages.len());
}
request.messages.push(request_message);
if let Some(tool_results_message) = self.tool_use.tool_results_message(message.id) {
if !tool_results_message.content.is_empty() {
if cache_message {
message_ix_to_cache = Some(request.messages.len());
}
request.messages.push(tool_results_message);
}
}
// https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
if let Some(last) = request.messages.last_mut() {
last.cache = true;
if let Some(message_ix_to_cache) = message_ix_to_cache {
request.messages[message_ix_to_cache].cache = true;
}
self.attached_tracked_files_state(&mut request.messages, cx);
@ -1302,6 +1342,7 @@ impl Thread {
mode: None,
messages: vec![],
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature: AssistantSettings::temperature_for_model(model, cx),
};
@ -1918,8 +1959,7 @@ impl Thread {
model: Arc<dyn LanguageModel>,
) -> Vec<PendingToolUse> {
self.auto_capture_telemetry(cx);
let request = self.to_completion_request(model.clone(), cx);
let messages = Arc::new(request.messages);
let request = Arc::new(self.to_completion_request(model.clone(), cx));
let pending_tool_uses = self
.tool_use
.pending_tool_uses()
@ -1937,7 +1977,7 @@ impl Thread {
tool_use.id.clone(),
tool_use.ui_text.clone(),
tool_use.input.clone(),
messages.clone(),
request.clone(),
tool,
);
cx.emit(ThreadEvent::ToolConfirmationNeeded);
@ -1946,7 +1986,7 @@ impl Thread {
tool_use.id.clone(),
tool_use.ui_text.clone(),
tool_use.input.clone(),
&messages,
request.clone(),
tool,
model.clone(),
window,
@ -2041,21 +2081,14 @@ impl Thread {
tool_use_id: LanguageModelToolUseId,
ui_text: impl Into<SharedString>,
input: serde_json::Value,
messages: &[LanguageModelRequestMessage],
request: Arc<LanguageModelRequest>,
tool: Arc<dyn Tool>,
model: Arc<dyn LanguageModel>,
window: Option<AnyWindowHandle>,
cx: &mut Context<Thread>,
) {
let task = self.spawn_tool_use(
tool_use_id.clone(),
messages,
input,
tool,
model,
window,
cx,
);
let task =
self.spawn_tool_use(tool_use_id.clone(), request, input, tool, model, window, cx);
self.tool_use
.run_pending_tool(tool_use_id, ui_text.into(), task);
}
@ -2063,7 +2096,7 @@ impl Thread {
fn spawn_tool_use(
&mut self,
tool_use_id: LanguageModelToolUseId,
messages: &[LanguageModelRequestMessage],
request: Arc<LanguageModelRequest>,
input: serde_json::Value,
tool: Arc<dyn Tool>,
model: Arc<dyn LanguageModel>,
@ -2077,7 +2110,7 @@ impl Thread {
} else {
tool.run(
input,
messages,
request,
self.project.clone(),
self.action_log.clone(),
model,

View file

@ -7,8 +7,8 @@ use futures::FutureExt as _;
use futures::future::Shared;
use gpui::{App, Entity, SharedString, Task};
use language_model::{
ConfiguredModel, LanguageModel, LanguageModelRequestMessage, LanguageModelToolResult,
LanguageModelToolUse, LanguageModelToolUseId, MessageContent, Role,
ConfiguredModel, LanguageModel, LanguageModelRequest, LanguageModelToolResult,
LanguageModelToolUse, LanguageModelToolUseId, Role,
};
use project::Project;
use ui::{IconName, Window};
@ -354,7 +354,7 @@ impl ToolUseState {
tool_use_id: LanguageModelToolUseId,
ui_text: impl Into<Arc<str>>,
input: serde_json::Value,
messages: Arc<Vec<LanguageModelRequestMessage>>,
request: Arc<LanguageModelRequest>,
tool: Arc<dyn Tool>,
) {
if let Some(tool_use) = self.pending_tool_uses_by_id.get_mut(&tool_use_id) {
@ -363,7 +363,7 @@ impl ToolUseState {
let confirmation = Confirmation {
tool_use_id,
input,
messages,
request,
tool,
ui_text,
};
@ -449,72 +449,20 @@ impl ToolUseState {
}
}
pub fn attach_tool_uses(
&self,
message_id: MessageId,
request_message: &mut LanguageModelRequestMessage,
) {
if let Some(tool_uses) = self.tool_uses_by_assistant_message.get(&message_id) {
for tool_use in tool_uses {
if self.tool_results.contains_key(&tool_use.id) {
// Do not send tool uses until they are completed
request_message
.content
.push(MessageContent::ToolUse(tool_use.clone()));
} else {
log::debug!(
"skipped tool use {:?} because it is still pending",
tool_use
);
}
}
}
}
pub fn has_tool_results(&self, assistant_message_id: MessageId) -> bool {
self.tool_uses_by_assistant_message
.contains_key(&assistant_message_id)
}
pub fn tool_results_message(
pub fn tool_results(
&self,
assistant_message_id: MessageId,
) -> Option<LanguageModelRequestMessage> {
let tool_uses = self
.tool_uses_by_assistant_message
.get(&assistant_message_id)?;
if tool_uses.is_empty() {
return None;
}
let mut request_message = LanguageModelRequestMessage {
role: Role::User,
content: vec![],
cache: false,
};
for tool_use in tool_uses {
if let Some(tool_result) = self.tool_results.get(&tool_use.id) {
request_message
.content
.push(MessageContent::ToolResult(LanguageModelToolResult {
tool_use_id: tool_use.id.clone(),
tool_name: tool_result.tool_name.clone(),
is_error: tool_result.is_error,
content: if tool_result.content.is_empty() {
// Surprisingly, the API fails if we return an empty string here.
// It thinks we are sending a tool use without a tool result.
"<Tool returned an empty string>".into()
} else {
tool_result.content.clone()
},
output: None,
}));
}
}
Some(request_message)
) -> impl Iterator<Item = (&LanguageModelToolUse, Option<&LanguageModelToolResult>)> {
self.tool_uses_by_assistant_message
.get(&assistant_message_id)
.into_iter()
.flatten()
.map(|tool_use| (tool_use, self.tool_results.get(&tool_use.id)))
}
}
@ -535,7 +483,7 @@ pub struct Confirmation {
pub tool_use_id: LanguageModelToolUseId,
pub input: serde_json::Value,
pub ui_text: Arc<str>,
pub messages: Arc<Vec<LanguageModelRequestMessage>>,
pub request: Arc<LanguageModelRequest>,
pub tool: Arc<dyn Tool>,
}