agent: Remove last turn after a refusal (#31220)

This is a follow-up to https://github.com/zed-industries/zed/pull/31217
that removes the last turn after we get a `refusal` stop reason, as
advised by the Anthropic docs.

Meant to include it in that PR, but accidentally merged it before
pushing these changes 🤦🏻‍♂️.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2025-05-22 17:38:33 -04:00 committed by GitHub
parent 24a108d876
commit ceb5164114
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1698,7 +1698,34 @@ impl Thread {
project.set_agent_location(None, cx);
});
cx.emit (ThreadEvent::ShowError(ThreadError::Message {
// Remove the turn that was refused.
//
// https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/handle-streaming-refusals#reset-context-after-refusal
{
let mut messages_to_remove = Vec::new();
for (ix, message) in thread.messages.iter().enumerate().rev() {
messages_to_remove.push(message.id);
if message.role == Role::User {
if ix == 0 {
break;
}
if let Some(prev_message) = thread.messages.get(ix - 1) {
if prev_message.role == Role::Assistant {
break;
}
}
}
}
for message_id in messages_to_remove {
thread.delete_message(message_id, cx);
}
}
cx.emit(ThreadEvent::ShowError(ThreadError::Message {
header: "Language model refusal".into(),
message: "Model refused to generate content for safety reasons.".into(),
}));