Have read_file support images (#30435)
This is very basic support for them. There are a number of other TODOs before this is really a first-class supported feature, so not adding any release notes for it; for now, this PR just makes it so that if read_file tries to read a PNG (which has come up in practice), it at least correctly sends it to Anthropic instead of messing up. This also lays the groundwork for future PRs for more first-class support for images in tool calls across more image file formats and LLM providers. Release Notes: - N/A --------- Co-authored-by: Agus Zubiaga <hi@aguz.me> Co-authored-by: Agus Zubiaga <agus@zed.dev>
This commit is contained in:
parent
f01af006e1
commit
8fdf309a4a
30 changed files with 557 additions and 194 deletions
|
@ -157,6 +157,10 @@ impl LanguageModel for FakeLanguageModel {
|
|||
false
|
||||
}
|
||||
|
||||
fn supports_images(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn telemetry_id(&self) -> String {
|
||||
"fake".to_string()
|
||||
}
|
||||
|
|
|
@ -243,6 +243,9 @@ pub trait LanguageModel: Send + Sync {
|
|||
LanguageModelAvailability::Public
|
||||
}
|
||||
|
||||
/// Whether this model supports images
|
||||
fn supports_images(&self) -> bool;
|
||||
|
||||
/// Whether this model supports tools.
|
||||
fn supports_tools(&self) -> bool;
|
||||
|
||||
|
|
|
@ -21,6 +21,16 @@ pub struct LanguageModelImage {
|
|||
size: Size<DevicePixels>,
|
||||
}
|
||||
|
||||
impl LanguageModelImage {
|
||||
pub fn len(&self) -> usize {
|
||||
self.source.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.source.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for LanguageModelImage {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("LanguageModelImage")
|
||||
|
@ -134,10 +144,45 @@ pub struct LanguageModelToolResult {
|
|||
pub tool_use_id: LanguageModelToolUseId,
|
||||
pub tool_name: Arc<str>,
|
||||
pub is_error: bool,
|
||||
pub content: Arc<str>,
|
||||
pub content: LanguageModelToolResultContent,
|
||||
pub output: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Eq, PartialEq, Hash)]
|
||||
#[serde(untagged)]
|
||||
pub enum LanguageModelToolResultContent {
|
||||
Text(Arc<str>),
|
||||
Image(LanguageModelImage),
|
||||
}
|
||||
|
||||
impl LanguageModelToolResultContent {
|
||||
pub fn to_str(&self) -> Option<&str> {
|
||||
match self {
|
||||
Self::Text(text) => Some(&text),
|
||||
Self::Image(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
|
||||
Self::Image(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for LanguageModelToolResultContent {
|
||||
fn from(value: &str) -> Self {
|
||||
Self::Text(Arc::from(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for LanguageModelToolResultContent {
|
||||
fn from(value: String) -> Self {
|
||||
Self::Text(Arc::from(value))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
|
||||
pub enum MessageContent {
|
||||
Text(String),
|
||||
|
@ -151,6 +196,29 @@ pub enum MessageContent {
|
|||
ToolResult(LanguageModelToolResult),
|
||||
}
|
||||
|
||||
impl MessageContent {
|
||||
pub fn to_str(&self) -> Option<&str> {
|
||||
match self {
|
||||
MessageContent::Text(text) => Some(text.as_str()),
|
||||
MessageContent::Thinking { text, .. } => Some(text.as_str()),
|
||||
MessageContent::RedactedThinking(_) => None,
|
||||
MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
|
||||
MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
|
||||
MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
|
||||
MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
|
||||
MessageContent::RedactedThinking(_)
|
||||
| MessageContent::ToolUse(_)
|
||||
| MessageContent::Image(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for MessageContent {
|
||||
fn from(value: String) -> Self {
|
||||
MessageContent::Text(value)
|
||||
|
@ -173,13 +241,7 @@ pub struct LanguageModelRequestMessage {
|
|||
impl LanguageModelRequestMessage {
|
||||
pub fn string_contents(&self) -> String {
|
||||
let mut buffer = String::new();
|
||||
for string in self.content.iter().filter_map(|content| match content {
|
||||
MessageContent::Text(text) => Some(text.as_str()),
|
||||
MessageContent::Thinking { text, .. } => Some(text.as_str()),
|
||||
MessageContent::RedactedThinking(_) => None,
|
||||
MessageContent::ToolResult(tool_result) => Some(tool_result.content.as_ref()),
|
||||
MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
|
||||
}) {
|
||||
for string in self.content.iter().filter_map(|content| content.to_str()) {
|
||||
buffer.push_str(string);
|
||||
}
|
||||
|
||||
|
@ -187,16 +249,7 @@ impl LanguageModelRequestMessage {
|
|||
}
|
||||
|
||||
pub fn contents_empty(&self) -> bool {
|
||||
self.content.iter().all(|content| match content {
|
||||
MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
|
||||
MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
|
||||
MessageContent::ToolResult(tool_result) => {
|
||||
tool_result.content.chars().all(|c| c.is_whitespace())
|
||||
}
|
||||
MessageContent::RedactedThinking(_)
|
||||
| MessageContent::ToolUse(_)
|
||||
| MessageContent::Image(_) => false,
|
||||
})
|
||||
self.content.iter().all(|content| content.is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue