Reuse conversation cache when streaming edits (#30245)

Release Notes:

- Improved latency when the agent applies edits.
This commit is contained in:
Antonio Scandurra 2025-05-08 14:36:34 +02:00 committed by GitHub
parent 032022e37b
commit 9f6809a28d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 847 additions and 21557 deletions

View file

@ -1183,6 +1183,7 @@ impl Thread {
mode: None,
messages: vec![],
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature: AssistantSettings::temperature_for_model(&model, cx),
};
@ -1227,6 +1228,7 @@ impl Thread {
}));
}
let mut message_ix_to_cache = None;
for message in &self.messages {
let mut request_message = LanguageModelRequestMessage {
role: message.role,
@ -1263,19 +1265,57 @@ impl Thread {
};
}
self.tool_use
.attach_tool_uses(message.id, &mut request_message);
let mut cache_message = true;
let mut tool_results_message = LanguageModelRequestMessage {
role: Role::User,
content: Vec::new(),
cache: false,
};
for (tool_use, tool_result) in self.tool_use.tool_results(message.id) {
if let Some(tool_result) = tool_result {
request_message
.content
.push(MessageContent::ToolUse(tool_use.clone()));
tool_results_message
.content
.push(MessageContent::ToolResult(LanguageModelToolResult {
tool_use_id: tool_use.id.clone(),
tool_name: tool_result.tool_name.clone(),
is_error: tool_result.is_error,
content: if tool_result.content.is_empty() {
// Surprisingly, the API fails if we return an empty string here.
// It thinks we are sending a tool use without a tool result.
"<Tool returned an empty string>".into()
} else {
tool_result.content.clone()
},
output: None,
}));
} else {
cache_message = false;
log::debug!(
"skipped tool use {:?} because it is still pending",
tool_use
);
}
}
if cache_message {
message_ix_to_cache = Some(request.messages.len());
}
request.messages.push(request_message);
if let Some(tool_results_message) = self.tool_use.tool_results_message(message.id) {
if !tool_results_message.content.is_empty() {
if cache_message {
message_ix_to_cache = Some(request.messages.len());
}
request.messages.push(tool_results_message);
}
}
// https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
if let Some(last) = request.messages.last_mut() {
last.cache = true;
if let Some(message_ix_to_cache) = message_ix_to_cache {
request.messages[message_ix_to_cache].cache = true;
}
self.attached_tracked_files_state(&mut request.messages, cx);
@ -1302,6 +1342,7 @@ impl Thread {
mode: None,
messages: vec![],
tools: Vec::new(),
tool_choice: None,
stop: Vec::new(),
temperature: AssistantSettings::temperature_for_model(model, cx),
};
@ -1918,8 +1959,7 @@ impl Thread {
model: Arc<dyn LanguageModel>,
) -> Vec<PendingToolUse> {
self.auto_capture_telemetry(cx);
let request = self.to_completion_request(model.clone(), cx);
let messages = Arc::new(request.messages);
let request = Arc::new(self.to_completion_request(model.clone(), cx));
let pending_tool_uses = self
.tool_use
.pending_tool_uses()
@ -1937,7 +1977,7 @@ impl Thread {
tool_use.id.clone(),
tool_use.ui_text.clone(),
tool_use.input.clone(),
messages.clone(),
request.clone(),
tool,
);
cx.emit(ThreadEvent::ToolConfirmationNeeded);
@ -1946,7 +1986,7 @@ impl Thread {
tool_use.id.clone(),
tool_use.ui_text.clone(),
tool_use.input.clone(),
&messages,
request.clone(),
tool,
model.clone(),
window,
@ -2041,21 +2081,14 @@ impl Thread {
tool_use_id: LanguageModelToolUseId,
ui_text: impl Into<SharedString>,
input: serde_json::Value,
messages: &[LanguageModelRequestMessage],
request: Arc<LanguageModelRequest>,
tool: Arc<dyn Tool>,
model: Arc<dyn LanguageModel>,
window: Option<AnyWindowHandle>,
cx: &mut Context<Thread>,
) {
let task = self.spawn_tool_use(
tool_use_id.clone(),
messages,
input,
tool,
model,
window,
cx,
);
let task =
self.spawn_tool_use(tool_use_id.clone(), request, input, tool, model, window, cx);
self.tool_use
.run_pending_tool(tool_use_id, ui_text.into(), task);
}
@ -2063,7 +2096,7 @@ impl Thread {
fn spawn_tool_use(
&mut self,
tool_use_id: LanguageModelToolUseId,
messages: &[LanguageModelRequestMessage],
request: Arc<LanguageModelRequest>,
input: serde_json::Value,
tool: Arc<dyn Tool>,
model: Arc<dyn LanguageModel>,
@ -2077,7 +2110,7 @@ impl Thread {
} else {
tool.run(
input,
messages,
request,
self.project.clone(),
self.action_log.clone(),
model,