agent: Fix issue with Anthropic thinking models (#33317)
cc @osyvokon We were seeing a bunch of errors in our backend when people were using Claude models with thinking enabled. In the logs we would see > an error occurred while interacting with the Anthropic API: invalid_request_error: messages.x.content.0.type: Expected `thinking` or `redacted_thinking`, but found `text`. When `thinking` is enabled, a final `assistant` message must start with a thinking block (preceeding the lastmost set of `tool_use` and `tool_result` blocks). We recommend you include thinking blocks from previous turns. To avoid this requirement, disable `thinking`. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking However, this issue did not occur frequently and was not easily reproducible. Turns out it was triggered by us not correctly handling [Redacted Thinking Blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#thinking-redaction). I could constantly reproduce this issue by including this magic string: `ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB ` in the request, which forces `claude-3-7-sonnet` to emit redacted thinking blocks (confusingly the magic string does not seem to be working for `claude-sonnet-4`). As soon as we hit a tool call Anthropic would return an error. Thanks to @osyvokon for pointing me in the right direction 😄! Release Notes: - agent: Fixed an issue where Anthropic models would sometimes return an error when thinking was enabled
This commit is contained in:
parent
39dc4b9040
commit
7be57baef0
7 changed files with 36 additions and 10 deletions
|
@ -145,6 +145,10 @@ impl Message {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn push_redacted_thinking(&mut self, data: String) {
|
||||||
|
self.segments.push(MessageSegment::RedactedThinking(data));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn push_text(&mut self, text: &str) {
|
pub fn push_text(&mut self, text: &str) {
|
||||||
if let Some(MessageSegment::Text(segment)) = self.segments.last_mut() {
|
if let Some(MessageSegment::Text(segment)) = self.segments.last_mut() {
|
||||||
segment.push_str(text);
|
segment.push_str(text);
|
||||||
|
@ -183,7 +187,7 @@ pub enum MessageSegment {
|
||||||
text: String,
|
text: String,
|
||||||
signature: Option<String>,
|
signature: Option<String>,
|
||||||
},
|
},
|
||||||
RedactedThinking(Vec<u8>),
|
RedactedThinking(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MessageSegment {
|
impl MessageSegment {
|
||||||
|
@ -1643,6 +1647,25 @@ impl Thread {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
LanguageModelCompletionEvent::RedactedThinking {
|
||||||
|
data
|
||||||
|
} => {
|
||||||
|
thread.received_chunk();
|
||||||
|
|
||||||
|
if let Some(last_message) = thread.messages.last_mut() {
|
||||||
|
if last_message.role == Role::Assistant
|
||||||
|
&& !thread.tool_use.has_tool_results(last_message.id)
|
||||||
|
{
|
||||||
|
last_message.push_redacted_thinking(data);
|
||||||
|
} else {
|
||||||
|
request_assistant_message_id =
|
||||||
|
Some(thread.insert_assistant_message(
|
||||||
|
vec![MessageSegment::RedactedThinking(data)],
|
||||||
|
cx,
|
||||||
|
));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
LanguageModelCompletionEvent::ToolUse(tool_use) => {
|
LanguageModelCompletionEvent::ToolUse(tool_use) => {
|
||||||
let last_assistant_message_id = request_assistant_message_id
|
let last_assistant_message_id = request_assistant_message_id
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
|
|
|
@ -731,7 +731,7 @@ pub enum SerializedMessageSegment {
|
||||||
signature: Option<String>,
|
signature: Option<String>,
|
||||||
},
|
},
|
||||||
RedactedThinking {
|
RedactedThinking {
|
||||||
data: Vec<u8>,
|
data: String,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2117,6 +2117,7 @@ impl AssistantContext {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
LanguageModelCompletionEvent::RedactedThinking { .. } => {},
|
||||||
LanguageModelCompletionEvent::Text(mut chunk) => {
|
LanguageModelCompletionEvent::Text(mut chunk) => {
|
||||||
if let Some(start) = thought_process_stack.pop() {
|
if let Some(start) = thought_process_stack.pop() {
|
||||||
let end = buffer.anchor_before(message_old_end_offset);
|
let end = buffer.anchor_before(message_old_end_offset);
|
||||||
|
|
|
@ -1030,6 +1030,7 @@ pub fn response_events_to_markdown(
|
||||||
Ok(LanguageModelCompletionEvent::Thinking { text, .. }) => {
|
Ok(LanguageModelCompletionEvent::Thinking { text, .. }) => {
|
||||||
thinking_buffer.push_str(text);
|
thinking_buffer.push_str(text);
|
||||||
}
|
}
|
||||||
|
Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => {}
|
||||||
Ok(LanguageModelCompletionEvent::Stop(reason)) => {
|
Ok(LanguageModelCompletionEvent::Stop(reason)) => {
|
||||||
flush_buffers(&mut response, &mut text_buffer, &mut thinking_buffer);
|
flush_buffers(&mut response, &mut text_buffer, &mut thinking_buffer);
|
||||||
response.push_str(&format!("**Stop**: {:?}\n\n", reason));
|
response.push_str(&format!("**Stop**: {:?}\n\n", reason));
|
||||||
|
@ -1126,6 +1127,7 @@ impl ThreadDialog {
|
||||||
|
|
||||||
// Skip these
|
// Skip these
|
||||||
Ok(LanguageModelCompletionEvent::UsageUpdate(_))
|
Ok(LanguageModelCompletionEvent::UsageUpdate(_))
|
||||||
|
| Ok(LanguageModelCompletionEvent::RedactedThinking { .. })
|
||||||
| Ok(LanguageModelCompletionEvent::StatusUpdate { .. })
|
| Ok(LanguageModelCompletionEvent::StatusUpdate { .. })
|
||||||
| Ok(LanguageModelCompletionEvent::StartMessage { .. })
|
| Ok(LanguageModelCompletionEvent::StartMessage { .. })
|
||||||
| Ok(LanguageModelCompletionEvent::Stop(_)) => {}
|
| Ok(LanguageModelCompletionEvent::Stop(_)) => {}
|
||||||
|
|
|
@ -67,6 +67,9 @@ pub enum LanguageModelCompletionEvent {
|
||||||
text: String,
|
text: String,
|
||||||
signature: Option<String>,
|
signature: Option<String>,
|
||||||
},
|
},
|
||||||
|
RedactedThinking {
|
||||||
|
data: String,
|
||||||
|
},
|
||||||
ToolUse(LanguageModelToolUse),
|
ToolUse(LanguageModelToolUse),
|
||||||
StartMessage {
|
StartMessage {
|
||||||
message_id: String,
|
message_id: String,
|
||||||
|
@ -359,6 +362,7 @@ pub trait LanguageModel: Send + Sync {
|
||||||
Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None,
|
Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None,
|
||||||
Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
|
Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
|
||||||
Ok(LanguageModelCompletionEvent::Thinking { .. }) => None,
|
Ok(LanguageModelCompletionEvent::Thinking { .. }) => None,
|
||||||
|
Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => None,
|
||||||
Ok(LanguageModelCompletionEvent::Stop(_)) => None,
|
Ok(LanguageModelCompletionEvent::Stop(_)) => None,
|
||||||
Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
|
Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
|
||||||
Ok(LanguageModelCompletionEvent::UsageUpdate(token_usage)) => {
|
Ok(LanguageModelCompletionEvent::UsageUpdate(token_usage)) => {
|
||||||
|
|
|
@ -303,7 +303,7 @@ pub enum MessageContent {
|
||||||
text: String,
|
text: String,
|
||||||
signature: Option<String>,
|
signature: Option<String>,
|
||||||
},
|
},
|
||||||
RedactedThinking(Vec<u8>),
|
RedactedThinking(String),
|
||||||
Image(LanguageModelImage),
|
Image(LanguageModelImage),
|
||||||
ToolUse(LanguageModelToolUse),
|
ToolUse(LanguageModelToolUse),
|
||||||
ToolResult(LanguageModelToolResult),
|
ToolResult(LanguageModelToolResult),
|
||||||
|
|
|
@ -554,9 +554,7 @@ pub fn into_anthropic(
|
||||||
}
|
}
|
||||||
MessageContent::RedactedThinking(data) => {
|
MessageContent::RedactedThinking(data) => {
|
||||||
if !data.is_empty() {
|
if !data.is_empty() {
|
||||||
Some(anthropic::RequestContent::RedactedThinking {
|
Some(anthropic::RequestContent::RedactedThinking { data })
|
||||||
data: String::from_utf8(data).ok()?,
|
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -730,10 +728,8 @@ impl AnthropicEventMapper {
|
||||||
signature: None,
|
signature: None,
|
||||||
})]
|
})]
|
||||||
}
|
}
|
||||||
ResponseContent::RedactedThinking { .. } => {
|
ResponseContent::RedactedThinking { data } => {
|
||||||
// Redacted thinking is encrypted and not accessible to the user, see:
|
vec![Ok(LanguageModelCompletionEvent::RedactedThinking { data })]
|
||||||
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#suggestions-for-handling-redacted-thinking-in-production
|
|
||||||
Vec::new()
|
|
||||||
}
|
}
|
||||||
ResponseContent::ToolUse { id, name, .. } => {
|
ResponseContent::ToolUse { id, name, .. } => {
|
||||||
self.tool_uses_by_index.insert(
|
self.tool_uses_by_index.insert(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue