agent: Fix issue with Anthropic thinking models (#33317)

cc @osyvokon We were seeing a bunch of errors in our backend when people were using Claude models with thinking enabled. In the logs we would see > an error occurred while interacting with the Anthropic API: invalid_request_error: messages.x.content.0.type: Expected `thinking` or `redacted_thinking`, but found `text`. When `thinking` is enabled, a final `assistant` message must start with a thinking block (preceeding the lastmost set of `tool_use` and `tool_result` blocks). We recommend you include thinking blocks from previous turns. To avoid this requirement, disable `thinking`. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking However, this issue did not occur frequently and was not easily reproducible. Turns out it was triggered by us not correctly handling [Redacted Thinking Blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#thinking-redaction). I could constantly reproduce this issue by including this magic string: `ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB ` in the request, which forces `claude-3-7-sonnet` to emit redacted thinking blocks (confusingly the magic string does not seem to be working for `claude-sonnet-4`). As soon as we hit a tool call Anthropic would return an error. Thanks to @osyvokon for pointing me in the right direction 😄! Release Notes: - agent: Fixed an issue where Anthropic models would sometimes return an error when thinking was enabled
2025-06-24 18:23:59 +02:00 · 2025-06-24 18:23:59 +02:00 · 7be57baef0
commit 7be57baef0
parent 39dc4b9040
7 changed files with 36 additions and 10 deletions
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -145,6 +145,10 @@ impl Message {
        }
    }

+    pub fn push_redacted_thinking(&mut self, data: String) {
+        self.segments.push(MessageSegment::RedactedThinking(data));
+    }
+
    pub fn push_text(&mut self, text: &str) {
        if let Some(MessageSegment::Text(segment)) = self.segments.last_mut() {
            segment.push_str(text);
@ -183,7 +187,7 @@ pub enum MessageSegment {
        text: String,
        signature: Option<String>,
    },
-    RedactedThinking(Vec<u8>),
+    RedactedThinking(String),
 }

 impl MessageSegment {
@ -1643,6 +1647,25 @@ impl Thread {
                                    };
                                }
                            }
+                            LanguageModelCompletionEvent::RedactedThinking {
+                                data
+                            } => {
+                                thread.received_chunk();
+
+                                if let Some(last_message) = thread.messages.last_mut() {
+                                    if last_message.role == Role::Assistant
+                                        && !thread.tool_use.has_tool_results(last_message.id)
+                                    {
+                                        last_message.push_redacted_thinking(data);
+                                    } else {
+                                        request_assistant_message_id =
+                                            Some(thread.insert_assistant_message(
+                                                vec![MessageSegment::RedactedThinking(data)],
+                                                cx,
+                                            ));
+                                    };
+                                }
+                            }
                            LanguageModelCompletionEvent::ToolUse(tool_use) => {
                                let last_assistant_message_id = request_assistant_message_id
                                    .unwrap_or_else(|| {
--- a/crates/agent/src/thread_store.rs
+++ b/crates/agent/src/thread_store.rs
@ -731,7 +731,7 @@ pub enum SerializedMessageSegment {
        signature: Option<String>,
    },
    RedactedThinking {
-        data: Vec<u8>,
+        data: String,
    },
 }

--- a/crates/assistant_context/src/assistant_context.rs
+++ b/crates/assistant_context/src/assistant_context.rs
@ -2117,6 +2117,7 @@ impl AssistantContext {
                                            );
                                        }
                                    }
+                                    LanguageModelCompletionEvent::RedactedThinking { .. } => {},
                                    LanguageModelCompletionEvent::Text(mut chunk) => {
                                        if let Some(start) = thought_process_stack.pop() {
                                            let end = buffer.anchor_before(message_old_end_offset);
--- a/crates/eval/src/instance.rs
+++ b/crates/eval/src/instance.rs
@ -1030,6 +1030,7 @@ pub fn response_events_to_markdown(
            Ok(LanguageModelCompletionEvent::Thinking { text, .. }) => {
                thinking_buffer.push_str(text);
            }
+            Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => {}
            Ok(LanguageModelCompletionEvent::Stop(reason)) => {
                flush_buffers(&mut response, &mut text_buffer, &mut thinking_buffer);
                response.push_str(&format!("**Stop**: {:?}\n\n", reason));
@ -1126,6 +1127,7 @@ impl ThreadDialog {

                // Skip these
                Ok(LanguageModelCompletionEvent::UsageUpdate(_))
+                | Ok(LanguageModelCompletionEvent::RedactedThinking { .. })
                | Ok(LanguageModelCompletionEvent::StatusUpdate { .. })
                | Ok(LanguageModelCompletionEvent::StartMessage { .. })
                | Ok(LanguageModelCompletionEvent::Stop(_)) => {}
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@ -67,6 +67,9 @@ pub enum LanguageModelCompletionEvent {
        text: String,
        signature: Option<String>,
    },
+    RedactedThinking {
+        data: String,
+    },
    ToolUse(LanguageModelToolUse),
    StartMessage {
        message_id: String,
@ -359,6 +362,7 @@ pub trait LanguageModel: Send + Sync {
                                Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None,
                                Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
                                Ok(LanguageModelCompletionEvent::Thinking { .. }) => None,
+                                Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => None,
                                Ok(LanguageModelCompletionEvent::Stop(_)) => None,
                                Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
                                Ok(LanguageModelCompletionEvent::UsageUpdate(token_usage)) => {
--- a/crates/language_model/src/request.rs
+++ b/crates/language_model/src/request.rs
@ -303,7 +303,7 @@ pub enum MessageContent {
        text: String,
        signature: Option<String>,
    },
-    RedactedThinking(Vec<u8>),
+    RedactedThinking(String),
    Image(LanguageModelImage),
    ToolUse(LanguageModelToolUse),
    ToolResult(LanguageModelToolResult),
--- a/crates/language_models/src/provider/anthropic.rs
+++ b/crates/language_models/src/provider/anthropic.rs
@ -554,9 +554,7 @@ pub fn into_anthropic(
                        }
                        MessageContent::RedactedThinking(data) => {
                            if !data.is_empty() {
-                                Some(anthropic::RequestContent::RedactedThinking {
-                                    data: String::from_utf8(data).ok()?,
-                                })
+                                Some(anthropic::RequestContent::RedactedThinking { data })
                            } else {
                                None
                            }
@ -730,10 +728,8 @@ impl AnthropicEventMapper {
                        signature: None,
                    })]
                }
-                ResponseContent::RedactedThinking { .. } => {
-                    // Redacted thinking is encrypted and not accessible to the user, see:
-                    // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#suggestions-for-handling-redacted-thinking-in-production
-                    Vec::new()
+                ResponseContent::RedactedThinking { data } => {
+                    vec![Ok(LanguageModelCompletionEvent::RedactedThinking { data })]
                }
                ResponseContent::ToolUse { id, name, .. } => {
                    self.tool_uses_by_index.insert(