Have read_file support images (#30435)

This is very basic support for them. There are a number of other TODOs before this is really a first-class supported feature, so not adding any release notes for it; for now, this PR just makes it so that if read_file tries to read a PNG (which has come up in practice), it at least correctly sends it to Anthropic instead of messing up. This also lays the groundwork for future PRs for more first-class support for images in tool calls across more image file formats and LLM providers. Release Notes: - N/A --------- Co-authored-by: Agus Zubiaga <hi@aguz.me> Co-authored-by: Agus Zubiaga <agus@zed.dev>
2025-05-13 10:58:00 +02:00 · 2025-05-13 10:58:00 +02:00 · 8fdf309a4a
commit 8fdf309a4a
parent f01af006e1
30 changed files with 557 additions and 194 deletions
--- a/crates/language_models/src/provider/anthropic.rs
+++ b/crates/language_models/src/provider/anthropic.rs
@ -1,6 +1,9 @@
 use crate::AllLanguageModelSettings;
 use crate::ui::InstructionListItem;
-use anthropic::{AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, Usage};
+use anthropic::{
+    AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, ToolResultContent,
+    ToolResultPart, Usage,
+};
 use anyhow::{Context as _, Result, anyhow};
 use collections::{BTreeMap, HashMap};
 use credentials_provider::CredentialsProvider;
@ -15,8 +18,8 @@ use language_model::{
    AuthenticateError, LanguageModel, LanguageModelCacheConfiguration,
    LanguageModelCompletionError, LanguageModelId, LanguageModelKnownError, LanguageModelName,
    LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
-    LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, MessageContent,
-    RateLimiter, Role,
+    LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice,
+    LanguageModelToolResultContent, MessageContent, RateLimiter, Role,
 };
 use language_model::{LanguageModelCompletionEvent, LanguageModelToolUse, StopReason};
 use schemars::JsonSchema;
@ -346,9 +349,14 @@ pub fn count_anthropic_tokens(
                    MessageContent::ToolUse(_tool_use) => {
                        // TODO: Estimate token usage from tool uses.
                    }
-                    MessageContent::ToolResult(tool_result) => {
-                        string_contents.push_str(&tool_result.content);
-                    }
+                    MessageContent::ToolResult(tool_result) => match &tool_result.content {
+                        LanguageModelToolResultContent::Text(txt) => {
+                            string_contents.push_str(txt);
+                        }
+                        LanguageModelToolResultContent::Image(image) => {
+                            tokens_from_images += image.estimate_tokens();
+                        }
+                    },
                }
            }

@ -421,6 +429,10 @@ impl LanguageModel for AnthropicModel {
        true
    }

+    fn supports_images(&self) -> bool {
+        true
+    }
+
    fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
        match choice {
            LanguageModelToolChoice::Auto
@ -575,7 +587,20 @@ pub fn into_anthropic(
                            Some(anthropic::RequestContent::ToolResult {
                                tool_use_id: tool_result.tool_use_id.to_string(),
                                is_error: tool_result.is_error,
-                                content: tool_result.content.to_string(),
+                                content: match tool_result.content {
+                                    LanguageModelToolResultContent::Text(text) => {
+                                        ToolResultContent::JustText(text.to_string())
+                                    }
+                                    LanguageModelToolResultContent::Image(image) => {
+                                        ToolResultContent::Multipart(vec![ToolResultPart::Image {
+                                            source: anthropic::ImageSource {
+                                                source_type: "base64".to_string(),
+                                                media_type: "image/png".to_string(),
+                                                data: image.source.to_string(),
+                                            },
+                                        }])
+                                    }
+                                },
                                cache_control,
                            })
                        }