Reuse conversation cache when streaming edits (#30245)
Release Notes: - Improved latency when the agent applies edits.
This commit is contained in:
parent
032022e37b
commit
9f6809a28d
50 changed files with 847 additions and 21557 deletions
|
@ -1411,6 +1411,7 @@ impl ActiveThread {
|
|||
mode: None,
|
||||
messages: vec![request_message],
|
||||
tools: vec![],
|
||||
tool_choice: None,
|
||||
stop: vec![],
|
||||
temperature: AssistantSettings::temperature_for_model(
|
||||
&configured_model.model,
|
||||
|
@ -3256,7 +3257,7 @@ impl ActiveThread {
|
|||
c.tool_use_id.clone(),
|
||||
c.ui_text.clone(),
|
||||
c.input.clone(),
|
||||
&c.messages,
|
||||
c.request.clone(),
|
||||
c.tool.clone(),
|
||||
configured.model,
|
||||
Some(window.window_handle()),
|
||||
|
|
|
@ -466,6 +466,7 @@ impl CodegenAlternative {
|
|||
prompt_id: None,
|
||||
mode: None,
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
temperature,
|
||||
messages: vec![request_message],
|
||||
|
|
|
@ -4,7 +4,7 @@ use anyhow::{Result, anyhow, bail};
|
|||
use assistant_tool::{ActionLog, Tool, ToolResult, ToolSource};
|
||||
use context_server::{ContextServerId, types};
|
||||
use gpui::{AnyWindowHandle, App, Entity, Task};
|
||||
use language_model::{LanguageModel, LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
||||
use language_model::{LanguageModel, LanguageModelRequest, LanguageModelToolSchemaFormat};
|
||||
use project::{Project, context_server_store::ContextServerStore};
|
||||
use ui::IconName;
|
||||
|
||||
|
@ -72,7 +72,7 @@ impl Tool for ContextServerTool {
|
|||
fn run(
|
||||
self: Arc<Self>,
|
||||
input: serde_json::Value,
|
||||
_messages: &[LanguageModelRequestMessage],
|
||||
_request: Arc<LanguageModelRequest>,
|
||||
_project: Entity<Project>,
|
||||
_action_log: Entity<ActionLog>,
|
||||
_model: Arc<dyn LanguageModel>,
|
||||
|
|
|
@ -1245,6 +1245,7 @@ impl MessageEditor {
|
|||
mode: None,
|
||||
messages: vec![request_message],
|
||||
tools: vec![],
|
||||
tool_choice: None,
|
||||
stop: vec![],
|
||||
temperature: AssistantSettings::temperature_for_model(&model.model, cx),
|
||||
};
|
||||
|
|
|
@ -293,6 +293,7 @@ impl TerminalInlineAssistant {
|
|||
mode: None,
|
||||
messages: vec![request_message],
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
temperature,
|
||||
}
|
||||
|
|
|
@ -1183,6 +1183,7 @@ impl Thread {
|
|||
mode: None,
|
||||
messages: vec![],
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
temperature: AssistantSettings::temperature_for_model(&model, cx),
|
||||
};
|
||||
|
@ -1227,6 +1228,7 @@ impl Thread {
|
|||
}));
|
||||
}
|
||||
|
||||
let mut message_ix_to_cache = None;
|
||||
for message in &self.messages {
|
||||
let mut request_message = LanguageModelRequestMessage {
|
||||
role: message.role,
|
||||
|
@ -1263,19 +1265,57 @@ impl Thread {
|
|||
};
|
||||
}
|
||||
|
||||
self.tool_use
|
||||
.attach_tool_uses(message.id, &mut request_message);
|
||||
let mut cache_message = true;
|
||||
let mut tool_results_message = LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: Vec::new(),
|
||||
cache: false,
|
||||
};
|
||||
for (tool_use, tool_result) in self.tool_use.tool_results(message.id) {
|
||||
if let Some(tool_result) = tool_result {
|
||||
request_message
|
||||
.content
|
||||
.push(MessageContent::ToolUse(tool_use.clone()));
|
||||
tool_results_message
|
||||
.content
|
||||
.push(MessageContent::ToolResult(LanguageModelToolResult {
|
||||
tool_use_id: tool_use.id.clone(),
|
||||
tool_name: tool_result.tool_name.clone(),
|
||||
is_error: tool_result.is_error,
|
||||
content: if tool_result.content.is_empty() {
|
||||
// Surprisingly, the API fails if we return an empty string here.
|
||||
// It thinks we are sending a tool use without a tool result.
|
||||
"<Tool returned an empty string>".into()
|
||||
} else {
|
||||
tool_result.content.clone()
|
||||
},
|
||||
output: None,
|
||||
}));
|
||||
} else {
|
||||
cache_message = false;
|
||||
log::debug!(
|
||||
"skipped tool use {:?} because it is still pending",
|
||||
tool_use
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if cache_message {
|
||||
message_ix_to_cache = Some(request.messages.len());
|
||||
}
|
||||
request.messages.push(request_message);
|
||||
|
||||
if let Some(tool_results_message) = self.tool_use.tool_results_message(message.id) {
|
||||
if !tool_results_message.content.is_empty() {
|
||||
if cache_message {
|
||||
message_ix_to_cache = Some(request.messages.len());
|
||||
}
|
||||
request.messages.push(tool_results_message);
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||
if let Some(last) = request.messages.last_mut() {
|
||||
last.cache = true;
|
||||
if let Some(message_ix_to_cache) = message_ix_to_cache {
|
||||
request.messages[message_ix_to_cache].cache = true;
|
||||
}
|
||||
|
||||
self.attached_tracked_files_state(&mut request.messages, cx);
|
||||
|
@ -1302,6 +1342,7 @@ impl Thread {
|
|||
mode: None,
|
||||
messages: vec![],
|
||||
tools: Vec::new(),
|
||||
tool_choice: None,
|
||||
stop: Vec::new(),
|
||||
temperature: AssistantSettings::temperature_for_model(model, cx),
|
||||
};
|
||||
|
@ -1918,8 +1959,7 @@ impl Thread {
|
|||
model: Arc<dyn LanguageModel>,
|
||||
) -> Vec<PendingToolUse> {
|
||||
self.auto_capture_telemetry(cx);
|
||||
let request = self.to_completion_request(model.clone(), cx);
|
||||
let messages = Arc::new(request.messages);
|
||||
let request = Arc::new(self.to_completion_request(model.clone(), cx));
|
||||
let pending_tool_uses = self
|
||||
.tool_use
|
||||
.pending_tool_uses()
|
||||
|
@ -1937,7 +1977,7 @@ impl Thread {
|
|||
tool_use.id.clone(),
|
||||
tool_use.ui_text.clone(),
|
||||
tool_use.input.clone(),
|
||||
messages.clone(),
|
||||
request.clone(),
|
||||
tool,
|
||||
);
|
||||
cx.emit(ThreadEvent::ToolConfirmationNeeded);
|
||||
|
@ -1946,7 +1986,7 @@ impl Thread {
|
|||
tool_use.id.clone(),
|
||||
tool_use.ui_text.clone(),
|
||||
tool_use.input.clone(),
|
||||
&messages,
|
||||
request.clone(),
|
||||
tool,
|
||||
model.clone(),
|
||||
window,
|
||||
|
@ -2041,21 +2081,14 @@ impl Thread {
|
|||
tool_use_id: LanguageModelToolUseId,
|
||||
ui_text: impl Into<SharedString>,
|
||||
input: serde_json::Value,
|
||||
messages: &[LanguageModelRequestMessage],
|
||||
request: Arc<LanguageModelRequest>,
|
||||
tool: Arc<dyn Tool>,
|
||||
model: Arc<dyn LanguageModel>,
|
||||
window: Option<AnyWindowHandle>,
|
||||
cx: &mut Context<Thread>,
|
||||
) {
|
||||
let task = self.spawn_tool_use(
|
||||
tool_use_id.clone(),
|
||||
messages,
|
||||
input,
|
||||
tool,
|
||||
model,
|
||||
window,
|
||||
cx,
|
||||
);
|
||||
let task =
|
||||
self.spawn_tool_use(tool_use_id.clone(), request, input, tool, model, window, cx);
|
||||
self.tool_use
|
||||
.run_pending_tool(tool_use_id, ui_text.into(), task);
|
||||
}
|
||||
|
@ -2063,7 +2096,7 @@ impl Thread {
|
|||
fn spawn_tool_use(
|
||||
&mut self,
|
||||
tool_use_id: LanguageModelToolUseId,
|
||||
messages: &[LanguageModelRequestMessage],
|
||||
request: Arc<LanguageModelRequest>,
|
||||
input: serde_json::Value,
|
||||
tool: Arc<dyn Tool>,
|
||||
model: Arc<dyn LanguageModel>,
|
||||
|
@ -2077,7 +2110,7 @@ impl Thread {
|
|||
} else {
|
||||
tool.run(
|
||||
input,
|
||||
messages,
|
||||
request,
|
||||
self.project.clone(),
|
||||
self.action_log.clone(),
|
||||
model,
|
||||
|
|
|
@ -7,8 +7,8 @@ use futures::FutureExt as _;
|
|||
use futures::future::Shared;
|
||||
use gpui::{App, Entity, SharedString, Task};
|
||||
use language_model::{
|
||||
ConfiguredModel, LanguageModel, LanguageModelRequestMessage, LanguageModelToolResult,
|
||||
LanguageModelToolUse, LanguageModelToolUseId, MessageContent, Role,
|
||||
ConfiguredModel, LanguageModel, LanguageModelRequest, LanguageModelToolResult,
|
||||
LanguageModelToolUse, LanguageModelToolUseId, Role,
|
||||
};
|
||||
use project::Project;
|
||||
use ui::{IconName, Window};
|
||||
|
@ -354,7 +354,7 @@ impl ToolUseState {
|
|||
tool_use_id: LanguageModelToolUseId,
|
||||
ui_text: impl Into<Arc<str>>,
|
||||
input: serde_json::Value,
|
||||
messages: Arc<Vec<LanguageModelRequestMessage>>,
|
||||
request: Arc<LanguageModelRequest>,
|
||||
tool: Arc<dyn Tool>,
|
||||
) {
|
||||
if let Some(tool_use) = self.pending_tool_uses_by_id.get_mut(&tool_use_id) {
|
||||
|
@ -363,7 +363,7 @@ impl ToolUseState {
|
|||
let confirmation = Confirmation {
|
||||
tool_use_id,
|
||||
input,
|
||||
messages,
|
||||
request,
|
||||
tool,
|
||||
ui_text,
|
||||
};
|
||||
|
@ -449,72 +449,20 @@ impl ToolUseState {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn attach_tool_uses(
|
||||
&self,
|
||||
message_id: MessageId,
|
||||
request_message: &mut LanguageModelRequestMessage,
|
||||
) {
|
||||
if let Some(tool_uses) = self.tool_uses_by_assistant_message.get(&message_id) {
|
||||
for tool_use in tool_uses {
|
||||
if self.tool_results.contains_key(&tool_use.id) {
|
||||
// Do not send tool uses until they are completed
|
||||
request_message
|
||||
.content
|
||||
.push(MessageContent::ToolUse(tool_use.clone()));
|
||||
} else {
|
||||
log::debug!(
|
||||
"skipped tool use {:?} because it is still pending",
|
||||
tool_use
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_tool_results(&self, assistant_message_id: MessageId) -> bool {
|
||||
self.tool_uses_by_assistant_message
|
||||
.contains_key(&assistant_message_id)
|
||||
}
|
||||
|
||||
pub fn tool_results_message(
|
||||
pub fn tool_results(
|
||||
&self,
|
||||
assistant_message_id: MessageId,
|
||||
) -> Option<LanguageModelRequestMessage> {
|
||||
let tool_uses = self
|
||||
.tool_uses_by_assistant_message
|
||||
.get(&assistant_message_id)?;
|
||||
|
||||
if tool_uses.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut request_message = LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
content: vec![],
|
||||
cache: false,
|
||||
};
|
||||
|
||||
for tool_use in tool_uses {
|
||||
if let Some(tool_result) = self.tool_results.get(&tool_use.id) {
|
||||
request_message
|
||||
.content
|
||||
.push(MessageContent::ToolResult(LanguageModelToolResult {
|
||||
tool_use_id: tool_use.id.clone(),
|
||||
tool_name: tool_result.tool_name.clone(),
|
||||
is_error: tool_result.is_error,
|
||||
content: if tool_result.content.is_empty() {
|
||||
// Surprisingly, the API fails if we return an empty string here.
|
||||
// It thinks we are sending a tool use without a tool result.
|
||||
"<Tool returned an empty string>".into()
|
||||
} else {
|
||||
tool_result.content.clone()
|
||||
},
|
||||
output: None,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
Some(request_message)
|
||||
) -> impl Iterator<Item = (&LanguageModelToolUse, Option<&LanguageModelToolResult>)> {
|
||||
self.tool_uses_by_assistant_message
|
||||
.get(&assistant_message_id)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|tool_use| (tool_use, self.tool_results.get(&tool_use.id)))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -535,7 +483,7 @@ pub struct Confirmation {
|
|||
pub tool_use_id: LanguageModelToolUseId,
|
||||
pub input: serde_json::Value,
|
||||
pub ui_text: Arc<str>,
|
||||
pub messages: Arc<Vec<LanguageModelRequestMessage>>,
|
||||
pub request: Arc<LanguageModelRequest>,
|
||||
pub tool: Arc<dyn Tool>,
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue