agent: Do not create user messages for tool results in thread (#29354)

We used to insert empty user messages into the `Thread::messages` `Vec` when tools finished running and then we would attach the results when creating the request. This approach was very easy to mess up during state handling, leading to empty user messages displayed in the conversation and API failures. Instead, we will no longer insert actual user messages for tool results to the `Thread`, and will only do this on the fly when creating the model request. This simplifies a lot of code and show fix the mentioned errors. Release Notes: - agent: Improve reliability of LLM requests when including tool results --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de> Co-authored-by: Oleksiy Syvokon <oleksiy.syvokon@gmail.com>
2025-04-24 13:30:15 -03:00 · 2025-04-24 13:30:15 -03:00 · f81e65ae7c
commit f81e65ae7c
parent 952fe34aaa
4 changed files with 144 additions and 113 deletions
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -391,8 +391,7 @@ impl Thread {
                .map(|message| message.id.0 + 1)
                .unwrap_or(0),
        );
-        let tool_use =
-            ToolUseState::from_serialized_messages(tools.clone(), &serialized.messages, |_| true);
+        let tool_use = ToolUseState::from_serialized_messages(tools.clone(), &serialized.messages);

        Self {
            id,
@ -524,7 +523,12 @@ impl Thread {
    }

    pub fn message(&self, id: MessageId) -> Option<&Message> {
-        self.messages.iter().find(|message| message.id == id)
+        let index = self
+            .messages
+            .binary_search_by(|message| message.id.cmp(&id))
+            .ok()?;
+
+        self.messages.get(index)
    }

    pub fn messages(&self) -> impl ExactSizeIterator<Item = &Message> {
@ -673,6 +677,32 @@ impl Thread {
            })
    }

+    pub fn is_turn_end(&self, ix: usize) -> bool {
+        if self.messages.is_empty() {
+            return false;
+        }
+
+        if !self.is_generating() && ix == self.messages.len() - 1 {
+            return true;
+        }
+
+        let Some(message) = self.messages.get(ix) else {
+            return false;
+        };
+
+        if message.role != Role::Assistant {
+            return false;
+        }
+
+        self.messages
+            .get(ix + 1)
+            .and_then(|message| {
+                self.message(message.id)
+                    .map(|next_message| next_message.role == Role::User)
+            })
+            .unwrap_or(false)
+    }
+
    /// Returns whether all of the tool uses have finished running.
    pub fn all_tools_finished(&self) -> bool {
        // If the only pending tool uses left are the ones with errors, then
@ -687,8 +717,11 @@ impl Thread {
        self.tool_use.tool_uses_for_message(id, cx)
    }

-    pub fn tool_results_for_message(&self, id: MessageId) -> Vec<&LanguageModelToolResult> {
-        self.tool_use.tool_results_for_message(id)
+    pub fn tool_results_for_message(
+        &self,
+        assistant_message_id: MessageId,
+    ) -> Vec<&LanguageModelToolResult> {
+        self.tool_use.tool_results_for_message(assistant_message_id)
    }

    pub fn tool_result(&self, id: &LanguageModelToolUseId) -> Option<&LanguageModelToolResult> {
@ -703,10 +736,6 @@ impl Thread {
        self.tool_use.tool_result_card(id).cloned()
    }

-    pub fn message_has_tool_results(&self, message_id: MessageId) -> bool {
-        self.tool_use.message_has_tool_results(message_id)
-    }
-
    /// Filter out contexts that have already been included in previous messages
    pub fn filter_new_context<'a>(
        &self,
@ -1051,9 +1080,6 @@ impl Thread {
                cache: false,
            };

-            self.tool_use
-                .attach_tool_results(message.id, &mut request_message);
-
            if !message.context.is_empty() {
                request_message
                    .content
@ -1104,6 +1130,10 @@ impl Thread {
                .attach_tool_uses(message.id, &mut request_message);

            request.messages.push(request_message);
+
+            if let Some(tool_results_message) = self.tool_use.tool_results_message(message.id) {
+                request.messages.push(tool_results_message);
+            }
        }

        // https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
@ -1133,11 +1163,6 @@ impl Thread {
                cache: false,
            };

-            // Skip tool results during summarization.
-            if self.tool_use.message_has_tool_results(message.id) {
-                continue;
-            }
-
            for segment in &message.segments {
                match segment {
                    MessageSegment::Text(text) => request_message
@ -1272,7 +1297,9 @@ impl Thread {
                            LanguageModelCompletionEvent::Text(chunk) => {
                                cx.emit(ThreadEvent::ReceivedTextChunk);
                                if let Some(last_message) = thread.messages.last_mut() {
-                                    if last_message.role == Role::Assistant {
+                                    if last_message.role == Role::Assistant
+                                        && !thread.tool_use.has_tool_results(last_message.id)
+                                    {
                                        last_message.push_text(&chunk);
                                        cx.emit(ThreadEvent::StreamedAssistantText(
                                            last_message.id,
@ -1297,7 +1324,9 @@ impl Thread {
                                signature,
                            } => {
                                if let Some(last_message) = thread.messages.last_mut() {
-                                    if last_message.role == Role::Assistant {
+                                    if last_message.role == Role::Assistant
+                                        && !thread.tool_use.has_tool_results(last_message.id)
+                                    {
                                        last_message.push_thinking(&chunk, signature);
                                        cx.emit(ThreadEvent::StreamedAssistantThinking(
                                            last_message.id,
@ -1725,10 +1754,10 @@ impl Thread {
        if self.all_tools_finished() {
            let model_registry = LanguageModelRegistry::read_global(cx);
            if let Some(ConfiguredModel { model, .. }) = model_registry.default_model() {
-                self.attach_tool_results(cx);
                if !canceled {
                    self.send_to_model(model, window, cx);
                }
+                self.auto_capture_telemetry(cx);
            }
        }

@ -1738,14 +1767,6 @@ impl Thread {
        });
    }

-    /// Insert an empty message to be populated with tool results upon send.
-    pub fn attach_tool_results(&mut self, cx: &mut Context<Self>) {
-        // Tool results are assumed to be waiting on the next message id, so they will populate
-        // this empty message before sending to model. Would prefer this to be more straightforward.
-        self.insert_message(Role::User, vec![], cx);
-        self.auto_capture_telemetry(cx);
-    }
-
    /// Cancels the last pending completion, if there are any pending.
    ///
    /// Returns whether a completion was canceled.
@ -2050,7 +2071,7 @@ impl Thread {
            }

            for tool_result in self.tool_results_for_message(message.id) {
-                write!(markdown, "**Tool Results: {}", tool_result.tool_use_id)?;
+                write!(markdown, "\n**Tool Results: {}", tool_result.tool_use_id)?;
                if tool_result.is_error {
                    write!(markdown, " (Error)")?;
                }