Have read_file support images (#30435)
This is very basic support for them. There are a number of other TODOs before this is really a first-class supported feature, so not adding any release notes for it; for now, this PR just makes it so that if read_file tries to read a PNG (which has come up in practice), it at least correctly sends it to Anthropic instead of messing up. This also lays the groundwork for future PRs for more first-class support for images in tool calls across more image file formats and LLM providers. Release Notes: - N/A --------- Co-authored-by: Agus Zubiaga <hi@aguz.me> Co-authored-by: Agus Zubiaga <agus@zed.dev>
This commit is contained in:
parent
f01af006e1
commit
8fdf309a4a
30 changed files with 557 additions and 194 deletions
|
@ -1,6 +1,9 @@
|
|||
use crate::AllLanguageModelSettings;
|
||||
use crate::ui::InstructionListItem;
|
||||
use anthropic::{AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, Usage};
|
||||
use anthropic::{
|
||||
AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, ToolResultContent,
|
||||
ToolResultPart, Usage,
|
||||
};
|
||||
use anyhow::{Context as _, Result, anyhow};
|
||||
use collections::{BTreeMap, HashMap};
|
||||
use credentials_provider::CredentialsProvider;
|
||||
|
@ -15,8 +18,8 @@ use language_model::{
|
|||
AuthenticateError, LanguageModel, LanguageModelCacheConfiguration,
|
||||
LanguageModelCompletionError, LanguageModelId, LanguageModelKnownError, LanguageModelName,
|
||||
LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, MessageContent,
|
||||
RateLimiter, Role,
|
||||
LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice,
|
||||
LanguageModelToolResultContent, MessageContent, RateLimiter, Role,
|
||||
};
|
||||
use language_model::{LanguageModelCompletionEvent, LanguageModelToolUse, StopReason};
|
||||
use schemars::JsonSchema;
|
||||
|
@ -346,9 +349,14 @@ pub fn count_anthropic_tokens(
|
|||
MessageContent::ToolUse(_tool_use) => {
|
||||
// TODO: Estimate token usage from tool uses.
|
||||
}
|
||||
MessageContent::ToolResult(tool_result) => {
|
||||
string_contents.push_str(&tool_result.content);
|
||||
}
|
||||
MessageContent::ToolResult(tool_result) => match &tool_result.content {
|
||||
LanguageModelToolResultContent::Text(txt) => {
|
||||
string_contents.push_str(txt);
|
||||
}
|
||||
LanguageModelToolResultContent::Image(image) => {
|
||||
tokens_from_images += image.estimate_tokens();
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -421,6 +429,10 @@ impl LanguageModel for AnthropicModel {
|
|||
true
|
||||
}
|
||||
|
||||
fn supports_images(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
|
||||
match choice {
|
||||
LanguageModelToolChoice::Auto
|
||||
|
@ -575,7 +587,20 @@ pub fn into_anthropic(
|
|||
Some(anthropic::RequestContent::ToolResult {
|
||||
tool_use_id: tool_result.tool_use_id.to_string(),
|
||||
is_error: tool_result.is_error,
|
||||
content: tool_result.content.to_string(),
|
||||
content: match tool_result.content {
|
||||
LanguageModelToolResultContent::Text(text) => {
|
||||
ToolResultContent::JustText(text.to_string())
|
||||
}
|
||||
LanguageModelToolResultContent::Image(image) => {
|
||||
ToolResultContent::Multipart(vec![ToolResultPart::Image {
|
||||
source: anthropic::ImageSource {
|
||||
source_type: "base64".to_string(),
|
||||
media_type: "image/png".to_string(),
|
||||
data: image.source.to_string(),
|
||||
},
|
||||
}])
|
||||
}
|
||||
},
|
||||
cache_control,
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue