Have read_file support images (#30435)

This is very basic support for them. There are a number of other TODOs
before this is really a first-class supported feature, so not adding any
release notes for it; for now, this PR just makes it so that if
read_file tries to read a PNG (which has come up in practice), it at
least correctly sends it to Anthropic instead of messing up.

This also lays the groundwork for future PRs for more first-class
support for images in tool calls across more image file formats and LLM
providers.

Release Notes:

- N/A

---------

Co-authored-by: Agus Zubiaga <hi@aguz.me>
Co-authored-by: Agus Zubiaga <agus@zed.dev>
This commit is contained in:
Richard Feldman 2025-05-13 10:58:00 +02:00 committed by GitHub
parent f01af006e1
commit 8fdf309a4a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 557 additions and 194 deletions

View file

@ -1,13 +1,17 @@
use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult};
use assistant_tool::{ToolResultContent, outline};
use gpui::{AnyWindowHandle, App, Entity, Task};
use project::{ImageItem, image_store};
use assistant_tool::ToolResultOutput;
use indoc::formatdoc;
use itertools::Itertools;
use language::{Anchor, Point};
use language_model::{LanguageModel, LanguageModelRequest, LanguageModelToolSchemaFormat};
use language_model::{
LanguageModel, LanguageModelImage, LanguageModelRequest, LanguageModelToolSchemaFormat,
};
use project::{AgentLocation, Project};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
@ -86,7 +90,7 @@ impl Tool for ReadFileTool {
_request: Arc<LanguageModelRequest>,
project: Entity<Project>,
action_log: Entity<ActionLog>,
_model: Arc<dyn LanguageModel>,
model: Arc<dyn LanguageModel>,
_window: Option<AnyWindowHandle>,
cx: &mut App,
) -> ToolResult {
@ -100,6 +104,42 @@ impl Tool for ReadFileTool {
};
let file_path = input.path.clone();
if image_store::is_image_file(&project, &project_path, cx) {
if !model.supports_images() {
return Task::ready(Err(anyhow!(
"Attempted to read an image, but Zed doesn't currently sending images to {}.",
model.name().0
)))
.into();
}
let task = cx.spawn(async move |cx| -> Result<ToolResultOutput> {
let image_entity: Entity<ImageItem> = cx
.update(|cx| {
project.update(cx, |project, cx| {
project.open_image(project_path.clone(), cx)
})
})?
.await?;
let image =
image_entity.read_with(cx, |image_item, _| Arc::clone(&image_item.image))?;
let language_model_image = cx
.update(|cx| LanguageModelImage::from_image(image, cx))?
.await
.ok_or_else(|| anyhow!("Failed to process image"))?;
Ok(ToolResultOutput {
content: ToolResultContent::Image(language_model_image),
output: None,
})
});
return task.into();
}
cx.spawn(async move |cx| {
let buffer = cx
.update(|cx| {
@ -282,7 +322,10 @@ mod test {
.output
})
.await;
assert_eq!(result.unwrap().content, "This is a small file content");
assert_eq!(
result.unwrap().content.as_str(),
Some("This is a small file content")
);
}
#[gpui::test]
@ -322,6 +365,7 @@ mod test {
})
.await;
let content = result.unwrap();
let content = content.as_str().unwrap();
assert_eq!(
content.lines().skip(4).take(6).collect::<Vec<_>>(),
vec![
@ -365,6 +409,8 @@ mod test {
.collect::<Vec<_>>();
pretty_assertions::assert_eq!(
content
.as_str()
.unwrap()
.lines()
.skip(4)
.take(expected_content.len())
@ -408,7 +454,10 @@ mod test {
.output
})
.await;
assert_eq!(result.unwrap().content, "Line 2\nLine 3\nLine 4");
assert_eq!(
result.unwrap().content.as_str(),
Some("Line 2\nLine 3\nLine 4")
);
}
#[gpui::test]
@ -448,7 +497,7 @@ mod test {
.output
})
.await;
assert_eq!(result.unwrap().content, "Line 1\nLine 2");
assert_eq!(result.unwrap().content.as_str(), Some("Line 1\nLine 2"));
// end_line of 0 should result in at least 1 line
let result = cx
@ -471,7 +520,7 @@ mod test {
.output
})
.await;
assert_eq!(result.unwrap().content, "Line 1");
assert_eq!(result.unwrap().content.as_str(), Some("Line 1"));
// when start_line > end_line, should still return at least 1 line
let result = cx
@ -494,7 +543,7 @@ mod test {
.output
})
.await;
assert_eq!(result.unwrap().content, "Line 3");
assert_eq!(result.unwrap().content.as_str(), Some("Line 3"));
}
fn init_test(cx: &mut TestAppContext) {