Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
Richard Feldman
97c1abbeae
Improve docs for max image sizes 2025-07-02 16:23:20 -04:00
Richard Feldman
fe905f1fd2
wip 2025-07-02 11:06:07 -04:00
Richard Feldman
4c442a66b0
Reject images that are too big 2025-06-30 18:27:17 -04:00
Richard Feldman
f38c6e9bd0
Introduce max_image_size 2025-06-30 16:44:02 -04:00
22 changed files with 1382 additions and 36 deletions

2
Cargo.lock generated
View file

@ -20086,7 +20086,7 @@ dependencies = [
[[package]]
name = "zed_emmet"
version = "0.0.4"
version = "0.0.3"
dependencies = [
"zed_extension_api 0.1.0",
]

View file

@ -819,6 +819,134 @@ impl LoadedContext {
}
}
}
pub fn add_to_request_message_with_model(
&self,
request_message: &mut LanguageModelRequestMessage,
model: &Arc<dyn language_model::LanguageModel>,
) {
if !self.text.is_empty() {
request_message
.content
.push(MessageContent::Text(self.text.to_string()));
}
if !self.images.is_empty() {
let max_image_size = model.max_image_size();
let mut images_added = false;
for image in &self.images {
let image_size = image.len() as u64;
if image_size > max_image_size {
if max_image_size == 0 {
log::warn!(
"Skipping image attachment: model {:?} does not support images",
model.name()
);
} else {
log::warn!(
"Skipping image attachment: size {} bytes exceeds model {:?} limit of {} bytes",
image_size,
model.name(),
max_image_size
);
}
continue;
}
// Some providers only support image parts after an initial text part
if !images_added && request_message.content.is_empty() {
request_message
.content
.push(MessageContent::Text("Images attached by user:".to_string()));
}
request_message
.content
.push(MessageContent::Image(image.clone()));
images_added = true;
}
}
}
/// Checks images against model size limits and returns information about rejected images
pub fn check_image_size_limits(
&self,
model: &Arc<dyn language_model::LanguageModel>,
) -> Vec<RejectedImage> {
let mut rejected_images = Vec::new();
if !self.images.is_empty() {
let max_image_size = model.max_image_size();
for image in &self.images {
let image_size = image.len() as u64;
if image_size > max_image_size {
rejected_images.push(RejectedImage {
size: image_size,
max_size: max_image_size,
model_name: model.name().0.to_string(),
});
}
}
}
rejected_images
}
pub fn add_to_request_message_with_validation<F>(
&self,
request_message: &mut LanguageModelRequestMessage,
model: &Arc<dyn language_model::LanguageModel>,
mut on_image_rejected: F,
) where
F: FnMut(u64, u64, &str),
{
if !self.text.is_empty() {
request_message
.content
.push(MessageContent::Text(self.text.to_string()));
}
if !self.images.is_empty() {
let max_image_size = model.max_image_size();
let mut images_added = false;
for image in &self.images {
let image_size = image.len() as u64;
if image_size > max_image_size {
on_image_rejected(image_size, max_image_size, &model.name().0);
if max_image_size == 0 {
log::warn!(
"Skipping image attachment: model {:?} does not support images",
model.name()
);
} else {
log::warn!(
"Skipping image attachment: size {} bytes exceeds model {:?} limit of {} bytes",
image_size,
model.name(),
max_image_size
);
}
continue;
}
// Some providers only support image parts after an initial text part
if !images_added && request_message.content.is_empty() {
request_message
.content
.push(MessageContent::Text("Images attached by user:".to_string()));
}
request_message
.content
.push(MessageContent::Image(image.clone()));
images_added = true;
}
}
}
}
/// Loads and formats a collection of contexts.
@ -1112,10 +1240,18 @@ impl Hash for AgentContextKey {
}
}
#[derive(Debug, Clone)]
pub struct RejectedImage {
pub size: u64,
pub max_size: u64,
pub model_name: String,
}
#[cfg(test)]
mod tests {
use super::*;
use gpui::TestAppContext;
use gpui::{AsyncApp, TestAppContext};
use language_model::{LanguageModelCacheConfiguration, LanguageModelId, LanguageModelName};
use project::{FakeFs, Project};
use serde_json::json;
use settings::SettingsStore;
@ -1222,4 +1358,484 @@ mod tests {
})
.expect("Should have found a file context")
}
#[gpui::test]
async fn test_image_size_limit_filtering(_cx: &mut TestAppContext) {
use futures::stream::BoxStream;
use gpui::{AsyncApp, DevicePixels, SharedString};
use language_model::{
LanguageModelId, LanguageModelImage, LanguageModelName, LanguageModelProviderId,
LanguageModelProviderName, Role,
};
use std::sync::Arc;
// Create a mock image that's 10 bytes
let small_image = LanguageModelImage {
source: "small_data".into(),
size: gpui::size(DevicePixels(10), DevicePixels(10)),
};
// Create a mock image that's 1MB
let large_image_source = "x".repeat(1_048_576);
let large_image = LanguageModelImage {
source: large_image_source.into(),
size: gpui::size(DevicePixels(1024), DevicePixels(1024)),
};
let loaded_context = LoadedContext {
contexts: vec![],
text: "Some text".to_string(),
images: vec![small_image.clone(), large_image.clone()],
};
// Test with a model that supports images with 500KB limit
struct TestModel500KB;
impl language_model::LanguageModel for TestModel500KB {
fn id(&self) -> LanguageModelId {
LanguageModelId(SharedString::from("test-500kb"))
}
fn name(&self) -> LanguageModelName {
LanguageModelName(SharedString::from("Test Model 500KB"))
}
fn provider_id(&self) -> LanguageModelProviderId {
LanguageModelProviderId(SharedString::from("test"))
}
fn provider_name(&self) -> LanguageModelProviderName {
LanguageModelProviderName(SharedString::from("Test Provider"))
}
fn supports_tools(&self) -> bool {
false
}
fn supports_tool_choice(&self, _: language_model::LanguageModelToolChoice) -> bool {
false
}
fn max_image_size(&self) -> u64 {
512_000
} // 500KB
fn telemetry_id(&self) -> String {
"test-500kb".to_string()
}
fn max_token_count(&self) -> u64 {
100_000
}
fn count_tokens(
&self,
_request: language_model::LanguageModelRequest,
_cx: &App,
) -> futures::future::BoxFuture<'static, anyhow::Result<u64>> {
Box::pin(async { Ok(0) })
}
fn stream_completion(
&self,
_request: language_model::LanguageModelRequest,
_cx: &AsyncApp,
) -> futures::future::BoxFuture<
'static,
Result<
BoxStream<
'static,
Result<
language_model::LanguageModelCompletionEvent,
language_model::LanguageModelCompletionError,
>,
>,
language_model::LanguageModelCompletionError,
>,
> {
use language_model::LanguageModelCompletionError;
Box::pin(async {
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
"Not implemented"
)))
})
}
}
let model_500kb: Arc<dyn language_model::LanguageModel> = Arc::new(TestModel500KB);
let mut request_message = LanguageModelRequestMessage {
role: Role::User,
content: vec![],
cache: false,
};
loaded_context.add_to_request_message_with_model(&mut request_message, &model_500kb);
// Should have text and only the small image
assert_eq!(request_message.content.len(), 2); // text + small image
assert!(
matches!(&request_message.content[0], MessageContent::Text(text) if text == "Some text")
);
assert!(matches!(
&request_message.content[1],
MessageContent::Image(_)
));
// Test with a model that doesn't support images
struct TestModelNoImages;
impl language_model::LanguageModel for TestModelNoImages {
fn id(&self) -> LanguageModelId {
LanguageModelId(SharedString::from("test-no-images"))
}
fn name(&self) -> LanguageModelName {
LanguageModelName(SharedString::from("Test Model No Images"))
}
fn provider_id(&self) -> LanguageModelProviderId {
LanguageModelProviderId(SharedString::from("test"))
}
fn provider_name(&self) -> LanguageModelProviderName {
LanguageModelProviderName(SharedString::from("Test Provider"))
}
fn supports_tools(&self) -> bool {
false
}
fn supports_tool_choice(&self, _: language_model::LanguageModelToolChoice) -> bool {
false
}
fn max_image_size(&self) -> u64 {
0
} // No image support
fn telemetry_id(&self) -> String {
"test-no-images".to_string()
}
fn max_token_count(&self) -> u64 {
100_000
}
fn count_tokens(
&self,
_request: language_model::LanguageModelRequest,
_cx: &App,
) -> futures::future::BoxFuture<'static, anyhow::Result<u64>> {
Box::pin(async { Ok(0) })
}
fn stream_completion(
&self,
_request: language_model::LanguageModelRequest,
_cx: &AsyncApp,
) -> futures::future::BoxFuture<
'static,
Result<
BoxStream<
'static,
Result<
language_model::LanguageModelCompletionEvent,
language_model::LanguageModelCompletionError,
>,
>,
language_model::LanguageModelCompletionError,
>,
> {
use language_model::LanguageModelCompletionError;
Box::pin(async {
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
"Not implemented"
)))
})
}
}
let model_no_images: Arc<dyn language_model::LanguageModel> = Arc::new(TestModelNoImages);
let mut request_message_no_images = LanguageModelRequestMessage {
role: Role::User,
content: vec![],
cache: false,
};
loaded_context
.add_to_request_message_with_model(&mut request_message_no_images, &model_no_images);
// Should have only text, no images
assert_eq!(request_message_no_images.content.len(), 1);
assert!(
matches!(&request_message_no_images.content[0], MessageContent::Text(text) if text == "Some text")
);
}
#[gpui::test]
async fn test_check_image_size_limits() {
use gpui::DevicePixels;
use language_model::LanguageModelImage;
// Create test images of various sizes
let tiny_image = LanguageModelImage {
source: "tiny".into(),
size: gpui::size(DevicePixels(10), DevicePixels(10)),
};
let small_image = LanguageModelImage {
source: "x".repeat(100_000).into(), // 100KB
size: gpui::size(DevicePixels(100), DevicePixels(100)),
};
let medium_image = LanguageModelImage {
source: "x".repeat(500_000).into(), // 500KB
size: gpui::size(DevicePixels(500), DevicePixels(500)),
};
let large_image = LanguageModelImage {
source: "x".repeat(1_048_576).into(), // 1MB
size: gpui::size(DevicePixels(1024), DevicePixels(1024)),
};
let huge_image = LanguageModelImage {
source: "x".repeat(5_242_880).into(), // 5MB
size: gpui::size(DevicePixels(2048), DevicePixels(2048)),
};
// Test with model that has 1MB limit
let model_1mb = Arc::new(TestModel1MB);
let loaded_context = LoadedContext {
contexts: vec![],
text: String::new(),
images: vec![
tiny_image.clone(),
small_image.clone(),
medium_image.clone(),
large_image.clone(),
huge_image.clone(),
],
};
let rejected = loaded_context.check_image_size_limits(
&(model_1mb.clone() as Arc<dyn language_model::LanguageModel>),
);
assert_eq!(rejected.len(), 1);
assert_eq!(rejected[0].size, 5_242_880);
assert_eq!(rejected[0].max_size, 1_048_576);
assert_eq!(rejected[0].model_name, "Test Model 1MB");
// Test with model that doesn't support images
let model_no_images = Arc::new(TestModelNoImages);
let rejected = loaded_context.check_image_size_limits(
&(model_no_images.clone() as Arc<dyn language_model::LanguageModel>),
);
assert_eq!(rejected.len(), 5); // All images rejected
for (_i, rejected_image) in rejected.iter().enumerate() {
assert_eq!(rejected_image.max_size, 0);
assert_eq!(rejected_image.model_name, "Test Model No Images");
}
// Test with empty image list
let empty_context = LoadedContext {
contexts: vec![],
text: String::new(),
images: vec![],
};
let rejected = empty_context.check_image_size_limits(
&(model_1mb.clone() as Arc<dyn language_model::LanguageModel>),
);
assert!(rejected.is_empty());
// Test with all images within limit
let small_context = LoadedContext {
contexts: vec![],
text: String::new(),
images: vec![tiny_image.clone(), small_image.clone()],
};
let rejected = small_context
.check_image_size_limits(&(model_1mb as Arc<dyn language_model::LanguageModel>));
assert!(rejected.is_empty());
}
#[gpui::test]
async fn test_add_to_request_message_with_validation() {
use gpui::DevicePixels;
use language_model::{LanguageModelImage, MessageContent, Role};
let small_image = LanguageModelImage {
source: "small".into(),
size: gpui::size(DevicePixels(10), DevicePixels(10)),
};
let large_image = LanguageModelImage {
source: "x".repeat(2_097_152).into(), // 2MB
size: gpui::size(DevicePixels(1024), DevicePixels(1024)),
};
let loaded_context = LoadedContext {
contexts: vec![],
text: "Test message".to_string(),
images: vec![small_image.clone(), large_image.clone()],
};
let model = Arc::new(TestModel1MB);
let mut request_message = LanguageModelRequestMessage {
role: Role::User,
content: Vec::new(),
cache: false,
};
let mut rejected_count = 0;
let mut rejected_sizes = Vec::new();
let mut rejected_model_names = Vec::new();
loaded_context.add_to_request_message_with_validation(
&mut request_message,
&(model.clone() as Arc<dyn language_model::LanguageModel>),
|size, max_size, model_name| {
rejected_count += 1;
rejected_sizes.push((size, max_size));
rejected_model_names.push(model_name.to_string());
},
);
// Verify callback was called for the large image
assert_eq!(rejected_count, 1);
assert_eq!(rejected_sizes[0], (2_097_152, 1_048_576));
assert_eq!(rejected_model_names[0], "Test Model 1MB");
// Verify the request message contains text and only the small image
assert_eq!(request_message.content.len(), 2); // text + small image
assert!(
matches!(&request_message.content[0], MessageContent::Text(text) if text == "Test message")
);
assert!(matches!(
&request_message.content[1],
MessageContent::Image(_)
));
}
// Helper test models
struct TestModel1MB;
impl language_model::LanguageModel for TestModel1MB {
fn id(&self) -> LanguageModelId {
LanguageModelId(SharedString::from("test-1mb"))
}
fn name(&self) -> LanguageModelName {
LanguageModelName(SharedString::from("Test Model 1MB"))
}
fn provider_id(&self) -> language_model::LanguageModelProviderId {
language_model::LanguageModelProviderId(SharedString::from("test"))
}
fn provider_name(&self) -> language_model::LanguageModelProviderName {
language_model::LanguageModelProviderName(SharedString::from("Test Provider"))
}
fn supports_tools(&self) -> bool {
false
}
fn supports_tool_choice(&self, _: language_model::LanguageModelToolChoice) -> bool {
false
}
fn max_image_size(&self) -> u64 {
1_048_576 // 1MB
}
fn telemetry_id(&self) -> String {
"test-1mb".to_string()
}
fn max_token_count(&self) -> u64 {
100_000
}
fn max_output_tokens(&self) -> Option<u64> {
Some(4096)
}
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
Some(LanguageModelCacheConfiguration {
max_cache_anchors: 0,
should_speculate: false,
min_total_token: 1024,
})
}
fn count_tokens(
&self,
_request: language_model::LanguageModelRequest,
_cx: &App,
) -> futures::future::BoxFuture<'static, anyhow::Result<u64>> {
Box::pin(async { Ok(0) })
}
fn stream_completion(
&self,
_request: language_model::LanguageModelRequest,
_cx: &AsyncApp,
) -> futures::future::BoxFuture<
'static,
Result<
futures::stream::BoxStream<
'static,
Result<
language_model::LanguageModelCompletionEvent,
language_model::LanguageModelCompletionError,
>,
>,
language_model::LanguageModelCompletionError,
>,
> {
use language_model::LanguageModelCompletionError;
Box::pin(async {
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
"Not implemented"
)))
})
}
}
struct TestModelNoImages;
impl language_model::LanguageModel for TestModelNoImages {
fn id(&self) -> LanguageModelId {
LanguageModelId(SharedString::from("test-no-images"))
}
fn name(&self) -> LanguageModelName {
LanguageModelName(SharedString::from("Test Model No Images"))
}
fn provider_id(&self) -> language_model::LanguageModelProviderId {
language_model::LanguageModelProviderId(SharedString::from("test"))
}
fn provider_name(&self) -> language_model::LanguageModelProviderName {
language_model::LanguageModelProviderName(SharedString::from("Test Provider"))
}
fn supports_tools(&self) -> bool {
false
}
fn supports_tool_choice(&self, _: language_model::LanguageModelToolChoice) -> bool {
false
}
fn max_image_size(&self) -> u64 {
0 // No image support
}
fn telemetry_id(&self) -> String {
"test-no-images".to_string()
}
fn max_token_count(&self) -> u64 {
100_000
}
fn max_output_tokens(&self) -> Option<u64> {
Some(4096)
}
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
Some(LanguageModelCacheConfiguration {
max_cache_anchors: 0,
should_speculate: false,
min_total_token: 1024,
})
}
fn count_tokens(
&self,
_request: language_model::LanguageModelRequest,
_cx: &App,
) -> futures::future::BoxFuture<'static, anyhow::Result<u64>> {
Box::pin(async { Ok(0) })
}
fn stream_completion(
&self,
_request: language_model::LanguageModelRequest,
_cx: &AsyncApp,
) -> futures::future::BoxFuture<
'static,
Result<
futures::stream::BoxStream<
'static,
Result<
language_model::LanguageModelCompletionEvent,
language_model::LanguageModelCompletionError,
>,
>,
language_model::LanguageModelCompletionError,
>,
> {
use language_model::LanguageModelCompletionError;
Box::pin(async {
Err(LanguageModelCompletionError::Other(anyhow::anyhow!(
"Not implemented"
)))
})
}
}
}

View file

@ -1338,7 +1338,7 @@ impl Thread {
message
.loaded_context
.add_to_request_message(&mut request_message);
.add_to_request_message_with_model(&mut request_message, &model);
for segment in &message.segments {
match segment {
@ -4108,6 +4108,10 @@ fn main() {{
self.inner.supports_images()
}
fn max_image_size(&self) -> u64 {
self.inner.max_image_size()
}
fn telemetry_id(&self) -> String {
self.inner.telemetry_id()
}
@ -4617,6 +4621,10 @@ fn main() {{
self.inner.supports_images()
}
fn max_image_size(&self) -> u64 {
self.inner.max_image_size()
}
fn telemetry_id(&self) -> String {
self.inner.telemetry_id()
}
@ -4782,6 +4790,10 @@ fn main() {{
self.inner.supports_images()
}
fn max_image_size(&self) -> u64 {
self.inner.max_image_size()
}
fn telemetry_id(&self) -> String {
self.inner.telemetry_id()
}
@ -4939,6 +4951,10 @@ fn main() {{
self.inner.supports_images()
}
fn max_image_size(&self) -> u64 {
self.inner.max_image_size()
}
fn telemetry_id(&self) -> String {
self.inner.telemetry_id()
}
@ -5382,4 +5398,192 @@ fn main() {{
Ok(buffer)
}
#[gpui::test]
async fn test_image_size_limit_in_thread(cx: &mut TestAppContext) {
use gpui::DevicePixels;
use language_model::{
LanguageModelImage,
fake_provider::{FakeLanguageModel, FakeLanguageModelProvider},
};
init_test_settings(cx);
let project = create_test_project(cx, serde_json::json!({})).await;
let (_, _, thread, _, _) = setup_test_environment(cx, project).await;
// Create a small image that's under the limit
let small_image = LanguageModelImage {
source: "small_data".into(),
size: gpui::size(DevicePixels(10), DevicePixels(10)),
};
// Create a large image that exceeds typical limits (10MB)
let large_image_source = "x".repeat(10_485_760); // 10MB
let large_image = LanguageModelImage {
source: large_image_source.into(),
size: gpui::size(DevicePixels(1024), DevicePixels(1024)),
};
// Create a loaded context with both images
let loaded_context = ContextLoadResult {
loaded_context: LoadedContext {
contexts: vec![],
text: "Test message".to_string(),
images: vec![small_image.clone(), large_image.clone()],
},
referenced_buffers: HashSet::default(),
};
// Insert a user message with the loaded context
thread.update(cx, |thread, cx| {
thread.insert_user_message("Test with images", loaded_context, None, vec![], cx);
});
// Create a model with 500KB image size limit
let _provider = Arc::new(FakeLanguageModelProvider);
let model = Arc::new(FakeLanguageModel::default());
// Note: FakeLanguageModel doesn't support images by default (max_image_size returns 0)
// so we'll test that images are excluded when the model doesn't support them
// Generate the completion request
let request = thread.update(cx, |thread, cx| {
thread.to_completion_request(model.clone(), CompletionIntent::UserPrompt, cx)
});
// Verify that no images were included (because FakeLanguageModel doesn't support images)
let mut image_count = 0;
let mut has_text = false;
for message in &request.messages {
for content in &message.content {
match content {
MessageContent::Text(text) => {
if text.contains("Test message") {
has_text = true;
}
}
MessageContent::Image(_) => {
image_count += 1;
}
_ => {}
}
}
}
assert!(has_text, "Text content should be included");
assert_eq!(
image_count, 0,
"No images should be included when model doesn't support them"
);
}
#[gpui::test]
async fn test_image_size_limit_with_anthropic_model(_cx: &mut TestAppContext) {
use gpui::{DevicePixels, SharedString};
use language_model::{
LanguageModelId, LanguageModelImage, LanguageModelName, LanguageModelProviderId,
LanguageModelProviderName,
};
// Test with a model that has specific size limits (like Anthropic's 5MB limit)
// We'll create a simple test to verify the logic works correctly
// Create test images
let small_image = LanguageModelImage {
source: "small".into(),
size: gpui::size(DevicePixels(100), DevicePixels(100)),
};
let large_image_source = "x".repeat(6_000_000); // 6MB - over Anthropic's 5MB limit
let large_image = LanguageModelImage {
source: large_image_source.into(),
size: gpui::size(DevicePixels(2000), DevicePixels(2000)),
};
let loaded_context = LoadedContext {
contexts: vec![],
text: "Test".to_string(),
images: vec![small_image.clone(), large_image.clone()],
};
// Test the add_to_request_message_with_model method directly
let mut request_message = LanguageModelRequestMessage {
role: Role::User,
content: vec![],
cache: false,
};
// Use the test from context.rs as a guide - create a mock model with 5MB limit
struct TestModel5MB;
impl language_model::LanguageModel for TestModel5MB {
fn id(&self) -> LanguageModelId {
LanguageModelId(SharedString::from("test"))
}
fn name(&self) -> LanguageModelName {
LanguageModelName(SharedString::from("Test 5MB"))
}
fn provider_id(&self) -> LanguageModelProviderId {
LanguageModelProviderId(SharedString::from("test"))
}
fn provider_name(&self) -> LanguageModelProviderName {
LanguageModelProviderName(SharedString::from("Test"))
}
fn supports_tools(&self) -> bool {
false
}
fn supports_tool_choice(&self, _: language_model::LanguageModelToolChoice) -> bool {
false
}
fn max_image_size(&self) -> u64 {
5_242_880 // 5MB like Anthropic
}
fn telemetry_id(&self) -> String {
"test".to_string()
}
fn max_token_count(&self) -> u64 {
100_000
}
fn count_tokens(
&self,
_request: language_model::LanguageModelRequest,
_cx: &App,
) -> futures::future::BoxFuture<'static, anyhow::Result<u64>> {
Box::pin(async { Ok(0) })
}
fn stream_completion(
&self,
_request: language_model::LanguageModelRequest,
_cx: &gpui::AsyncApp,
) -> futures::future::BoxFuture<
'static,
Result<
futures::stream::BoxStream<
'static,
Result<
language_model::LanguageModelCompletionEvent,
language_model::LanguageModelCompletionError,
>,
>,
language_model::LanguageModelCompletionError,
>,
> {
Box::pin(async {
Err(language_model::LanguageModelCompletionError::Other(
anyhow::anyhow!("Not implemented"),
))
})
}
}
let model: Arc<dyn language_model::LanguageModel> = Arc::new(TestModel5MB);
loaded_context.add_to_request_message_with_model(&mut request_message, &model);
// Should have text and only the small image
let mut image_count = 0;
for content in &request_message.content {
if matches!(content, MessageContent::Image(_)) {
image_count += 1;
}
}
assert_eq!(image_count, 1, "Only the small image should be included");
}
}

View file

@ -889,6 +889,46 @@ impl ActiveThread {
&self.text_thread_store
}
pub fn validate_image(&self, image: &Arc<gpui::Image>, cx: &App) -> Result<(), String> {
let image_size = image.bytes().len() as u64;
if let Some(model) = self.thread.read(cx).configured_model() {
let max_size = model.model.max_image_size();
if image_size > max_size {
if max_size == 0 {
Err(format!(
"{} does not support image attachments",
model.model.name().0
))
} else {
let size_mb = image_size as f64 / 1_048_576.0;
let max_size_mb = max_size as f64 / 1_048_576.0;
Err(format!(
"Image ({:.1} MB) exceeds {}'s {:.1} MB size limit",
size_mb,
model.model.name().0,
max_size_mb
))
}
} else {
Ok(())
}
} else {
// No model configured, use default 10MB limit
const DEFAULT_MAX_SIZE: u64 = 10 * 1024 * 1024;
if image_size > DEFAULT_MAX_SIZE {
let size_mb = image_size as f64 / 1_048_576.0;
Err(format!(
"Image ({:.1} MB) exceeds the 10 MB size limit",
size_mb
))
} else {
Ok(())
}
}
}
fn push_rendered_message(&mut self, id: MessageId, rendered_message: RenderedMessage) {
let old_len = self.messages.len();
self.messages.push(id);
@ -1522,7 +1562,7 @@ impl ActiveThread {
}
fn paste(&mut self, _: &Paste, _window: &mut Window, cx: &mut Context<Self>) {
attach_pasted_images_as_context(&self.context_store, cx);
attach_pasted_images_as_context_with_validation(&self.context_store, Some(self), cx);
}
fn cancel_editing_message(
@ -3703,6 +3743,14 @@ pub(crate) fn open_context(
pub(crate) fn attach_pasted_images_as_context(
context_store: &Entity<ContextStore>,
cx: &mut App,
) -> bool {
attach_pasted_images_as_context_with_validation(context_store, None, cx)
}
pub(crate) fn attach_pasted_images_as_context_with_validation(
context_store: &Entity<ContextStore>,
active_thread: Option<&ActiveThread>,
cx: &mut App,
) -> bool {
let images = cx
.read_from_clipboard()
@ -3724,9 +3772,67 @@ pub(crate) fn attach_pasted_images_as_context(
}
cx.stop_propagation();
// Try to find the workspace for showing toasts
let workspace = cx
.active_window()
.and_then(|window| window.downcast::<Workspace>());
context_store.update(cx, |store, cx| {
for image in images {
store.add_image_instance(Arc::new(image), cx);
let image_arc = Arc::new(image);
// Validate image if we have an active thread
let should_add = if let Some(thread) = active_thread {
match thread.validate_image(&image_arc, cx) {
Ok(()) => true,
Err(err) => {
// Show error toast if we have a workspace
if let Some(workspace) = workspace {
let _ = workspace.update(cx, |workspace, _, cx| {
use workspace::{Toast, notifications::NotificationId};
struct ImageRejectionToast;
workspace.show_toast(
Toast::new(
NotificationId::unique::<ImageRejectionToast>(),
err,
),
cx,
);
});
}
false
}
}
} else {
// No active thread, check against default limit
let image_size = image_arc.bytes().len() as u64;
const DEFAULT_MAX_SIZE: u64 = 10 * 1024 * 1024; // 10MB
if image_size > DEFAULT_MAX_SIZE {
let size_mb = image_size as f64 / 1_048_576.0;
let err = format!("Image ({:.1} MB) exceeds the 10 MB size limit", size_mb);
if let Some(workspace) = workspace {
let _ = workspace.update(cx, |workspace, _, cx| {
use workspace::{Toast, notifications::NotificationId};
struct ImageRejectionToast;
workspace.show_toast(
Toast::new(NotificationId::unique::<ImageRejectionToast>(), err),
cx,
);
});
}
false
} else {
true
}
};
if should_add {
store.add_image_instance(image_arc, cx);
}
}
});
true

View file

@ -4,6 +4,8 @@ use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration;
use gpui::{Image, ImageFormat};
use db::kvp::{Dismissable, KEY_VALUE_STORE};
use serde::{Deserialize, Serialize};
@ -2932,29 +2934,215 @@ impl AgentPanel {
}),
)
.on_drop(cx.listener(move |this, paths: &ExternalPaths, window, cx| {
let tasks = paths
.paths()
.into_iter()
.map(|path| {
Workspace::project_path_for_path(this.project.clone(), &path, false, cx)
})
.collect::<Vec<_>>();
cx.spawn_in(window, async move |this, cx| {
let mut paths = vec![];
let mut added_worktrees = vec![];
let opened_paths = futures::future::join_all(tasks).await;
for entry in opened_paths {
if let Some((worktree, project_path)) = entry.log_err() {
added_worktrees.push(worktree);
paths.push(project_path);
eprintln!("=== ON_DROP EXTERNAL_PATHS HANDLER ===");
eprintln!("Number of external paths: {}", paths.paths().len());
for (i, path) in paths.paths().iter().enumerate() {
eprintln!("External path {}: {:?}", i, path);
}
match &this.active_view {
ActiveView::Thread { thread, .. } => {
eprintln!("In ActiveView::Thread branch");
let thread = thread.clone();
let paths = paths.paths();
let workspace = this.workspace.clone();
for path in paths {
eprintln!("Processing path: {:?}", path);
// Check if it's an image file by extension
let is_image = path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| {
matches!(
ext.to_lowercase().as_str(),
"jpg" | "jpeg" | "png" | "gif" | "webp" | "bmp" | "ico" | "svg" | "tiff" | "tif"
)
})
.unwrap_or(false);
eprintln!("Is image: {}", is_image);
if is_image {
let path = path.to_path_buf();
let thread = thread.clone();
let workspace = workspace.clone();
eprintln!("Spawning async task for image: {:?}", path);
cx.spawn_in(window, async move |_, cx| {
eprintln!("=== INSIDE ASYNC IMAGE TASK ===");
eprintln!("Image path: {:?}", path);
// Get file metadata first
let metadata = smol::fs::metadata(&path).await;
eprintln!("Metadata result: {:?}", metadata.is_ok());
if let Ok(metadata) = metadata {
let file_size = metadata.len();
eprintln!("File size: {} bytes", file_size);
// Get model limits
let (max_image_size, model_name) = thread
.update_in(cx, |thread, _window, cx| {
let model = thread.thread().read(cx).configured_model();
let max_size = model
.as_ref()
.map(|m| m.model.max_image_size())
.unwrap_or(10 * 1024 * 1024);
let name = model.as_ref().map(|m| m.model.name().0.to_string());
(max_size, name)
})
.ok()
.unwrap_or((10 * 1024 * 1024, None));
eprintln!("Max image size: {}, Model: {:?}", max_image_size, model_name);
eprintln!("File size: {:.2} MB, Limit: {:.2} MB",
file_size as f64 / 1_048_576.0,
max_image_size as f64 / 1_048_576.0);
if file_size > max_image_size {
eprintln!("FILE SIZE EXCEEDS LIMIT!");
let error_message = if let Some(model_name) = &model_name {
if max_image_size == 0 {
format!("{} does not support image attachments", model_name)
} else {
let size_mb = file_size as f64 / 1_048_576.0;
let max_size_mb = max_image_size as f64 / 1_048_576.0;
format!(
"Image ({:.1} MB) exceeds {}'s {:.1} MB size limit",
size_mb, model_name, max_size_mb
)
}
} else {
let size_mb = file_size as f64 / 1_048_576.0;
format!("Image ({:.1} MB) exceeds the 10 MB size limit", size_mb)
};
eprintln!("Showing error toast: {}", error_message);
cx.update(|_, cx| {
eprintln!("Inside cx.update for toast");
if let Some(workspace) = workspace.upgrade() {
eprintln!("Got workspace, showing toast!");
let _ = workspace.update(cx, |workspace, cx| {
use workspace::{Toast, notifications::NotificationId};
struct ImageRejectionToast;
workspace.show_toast(
Toast::new(
NotificationId::unique::<ImageRejectionToast>(),
error_message,
),
cx,
);
});
eprintln!("Toast command issued!");
} else {
eprintln!("FAILED to upgrade workspace!");
}
})
.log_err();
} else {
eprintln!("Image within size limits, loading file");
// Load the image file
match smol::fs::read(&path).await {
Ok(data) => {
eprintln!("Successfully read {} bytes", data.len());
// Determine image format from extension
let format = path.extension()
.and_then(|ext| ext.to_str())
.and_then(|ext| {
match ext.to_lowercase().as_str() {
"png" => Some(ImageFormat::Png),
"jpg" | "jpeg" => Some(ImageFormat::Jpeg),
"gif" => Some(ImageFormat::Gif),
"webp" => Some(ImageFormat::Webp),
"bmp" => Some(ImageFormat::Bmp),
"svg" => Some(ImageFormat::Svg),
"tiff" | "tif" => Some(ImageFormat::Tiff),
_ => None
}
})
.unwrap_or(ImageFormat::Png); // Default to PNG if unknown
// Create image from data
let image = Image::from_bytes(format, data);
let image_arc = Arc::new(image);
// Add to context store
thread
.update_in(cx, |thread, _window, cx| {
thread.context_store().update(cx, |store, cx| {
store.add_image_instance(image_arc, cx);
});
})
.log_err();
eprintln!("Image added to context store!");
}
Err(e) => {
log::error!("Failed to read image file: {}", e);
}
}
}
} else {
eprintln!("Failed to get file metadata!");
}
})
.detach();
eprintln!("Image task detached");
} else {
eprintln!("Not an image, using project path logic");
// For non-image files, use the existing project path logic
let project = this.project.clone();
let context_store = thread.read(cx).context_store().clone();
let path = path.to_path_buf();
cx.spawn_in(window, async move |_, cx| {
if let Some(task) = cx.update(|_, cx| {
Workspace::project_path_for_path(project.clone(), &path, false, cx)
}).ok() {
if let Some((_, project_path)) = task.await.log_err() {
context_store
.update(cx, |store, cx| {
store.add_file_from_path(project_path, false, cx).detach();
})
.ok();
}
}
})
.detach();
}
}
}
this.update_in(cx, |this, window, cx| {
this.handle_drop(paths, added_worktrees, window, cx);
})
.ok();
})
.detach();
ActiveView::TextThread { .. } => {
eprintln!("In ActiveView::TextThread branch");
// Keep existing behavior for text threads
let tasks = paths
.paths()
.into_iter()
.map(|path| {
Workspace::project_path_for_path(this.project.clone(), &path, false, cx)
})
.collect::<Vec<_>>();
cx.spawn_in(window, async move |this, cx| {
let mut paths = vec![];
let mut added_worktrees = vec![];
let opened_paths = futures::future::join_all(tasks).await;
for entry in opened_paths {
if let Some((worktree, project_path)) = entry.log_err() {
added_worktrees.push(worktree);
paths.push(project_path);
}
}
this.update_in(cx, |this, window, cx| {
this.handle_drop(paths, added_worktrees, window, cx);
})
.ok();
})
.detach();
}
_ => {
eprintln!("In unknown ActiveView branch");
}
}
}))
}
@ -2965,20 +3153,47 @@ impl AgentPanel {
window: &mut Window,
cx: &mut Context<Self>,
) {
// This method is now only used for non-image files and text threads
match &self.active_view {
ActiveView::Thread { thread, .. } => {
let context_store = thread.read(cx).context_store().clone();
// All paths here should be non-image files
context_store.update(cx, move |context_store, cx| {
let mut tasks = Vec::new();
for project_path in &paths {
tasks.push(context_store.add_file_from_path(
project_path.clone(),
false,
cx,
));
for path in paths {
tasks.push(context_store.add_file_from_path(path, false, cx));
}
cx.background_spawn(async move {
futures::future::join_all(tasks).await;
cx.spawn(async move |_, cx| {
let results = futures::future::join_all(tasks).await;
// Show error toasts for any file errors
for result in results {
if let Err(err) = result {
cx.update(|cx| {
if let Some(workspace) = cx
.active_window()
.and_then(|window| window.downcast::<Workspace>())
{
let _ = workspace.update(cx, |workspace, _, cx| {
use workspace::{Toast, notifications::NotificationId};
struct FileLoadErrorToast;
workspace.show_toast(
Toast::new(
NotificationId::unique::<FileLoadErrorToast>(),
err.to_string(),
),
cx,
);
});
}
})
.log_err();
}
}
// Need to hold onto the worktrees until they have already been used when
// opening the buffers.
drop(added_worktrees);

View file

@ -9,6 +9,7 @@ mod context_picker;
mod context_server_configuration;
mod context_strip;
mod debug;
mod inline_assistant;
mod inline_prompt_editor;
mod language_model_selector;

View file

@ -656,6 +656,10 @@ mod tests {
false
}
fn max_image_size(&self) -> u64 {
0
}
fn telemetry_id(&self) -> String {
format!("{}/{}", self.provider_id.0, self.name.0)
}

View file

@ -27,6 +27,7 @@ use file_icons::FileIcons;
use fs::Fs;
use futures::future::Shared;
use futures::{FutureExt as _, future};
use gpui::AsyncApp;
use gpui::{
Animation, AnimationExt, App, Entity, EventEmitter, Focusable, Subscription, Task, TextStyle,
WeakEntity, linear_color_stop, linear_gradient, point, pulsating_between,
@ -62,6 +63,7 @@ use agent::{
context_store::ContextStore,
thread_store::{TextThreadStore, ThreadStore},
};
use workspace::{Toast, notifications::NotificationId};
#[derive(RegisterComponent)]
pub struct MessageEditor {
@ -380,11 +382,15 @@ impl MessageEditor {
let checkpoint = git_store.update(cx, |git_store, cx| git_store.checkpoint(cx));
let context_task = self.reload_context(cx);
let window_handle = window.window_handle();
let workspace = self.workspace.clone();
cx.spawn(async move |_this, cx| {
let (checkpoint, loaded_context) = future::join(checkpoint, context_task).await;
let loaded_context = loaded_context.unwrap_or_default();
// Check for rejected images and show notifications
Self::notify_rejected_images(&loaded_context, &model, &workspace, &cx);
thread
.update(cx, |thread, cx| {
thread.insert_user_message(
@ -412,6 +418,80 @@ impl MessageEditor {
.detach();
}
fn notify_rejected_images(
loaded_context: &agent::context::ContextLoadResult,
model: &Arc<dyn language_model::LanguageModel>,
workspace: &WeakEntity<Workspace>,
cx: &AsyncApp,
) {
let rejected_images = loaded_context.loaded_context.check_image_size_limits(model);
if rejected_images.is_empty() {
return;
}
let workspace = workspace.clone();
let model_name = rejected_images[0].model_name.clone();
let max_size = model.max_image_size();
let count = rejected_images.len();
let rejected_images = rejected_images.clone();
cx.update(|cx| {
if let Some(workspace) = workspace.upgrade() {
workspace.update(cx, |workspace, cx| {
let message = if max_size == 0 {
Self::format_unsupported_images_message(&model_name, count)
} else {
Self::format_size_limit_message(
&model_name,
count,
max_size,
&rejected_images,
)
};
struct ImageRejectionToast;
workspace.show_toast(
Toast::new(NotificationId::unique::<ImageRejectionToast>(), message),
cx,
);
});
}
})
.log_err();
}
fn format_unsupported_images_message(model_name: &str, count: usize) -> String {
let plural = if count > 1 { "s" } else { "" };
format!(
"{} does not support image attachments. {} image{} will be excluded from your message.",
model_name, count, plural
)
}
fn format_size_limit_message(
model_name: &str,
count: usize,
max_size: u64,
rejected_images: &[agent::context::RejectedImage],
) -> String {
let plural = if count > 1 { "s" } else { "" };
let max_size_mb = max_size as f64 / 1_048_576.0;
// If only one image, show its specific size
if count == 1 {
let image_size_mb = rejected_images[0].size as f64 / 1_048_576.0;
format!(
"Image ({:.1} MB) exceeds {}'s {:.1} MB size limit and will be excluded.",
image_size_mb, model_name, max_size_mb
)
} else {
format!(
"{} image{} exceeded {}'s {:.1} MB size limit and will be excluded.",
count, plural, model_name, max_size_mb
)
}
}
fn stop_current_and_send_new_message(&mut self, window: &mut Window, cx: &mut Context<Self>) {
self.thread.update(cx, |thread, cx| {
thread.cancel_editing(cx);

View file

@ -165,6 +165,10 @@ impl LanguageModel for FakeLanguageModel {
false
}
fn max_image_size(&self) -> u64 {
0 // No image support
}
fn telemetry_id(&self) -> String {
"fake".to_string()
}

View file

@ -284,8 +284,13 @@ pub trait LanguageModel: Send + Sync {
None
}
/// Whether this model supports images
fn supports_images(&self) -> bool;
/// Whether this model supports images. This is determined by whether self.max_image_size() is positive.
fn supports_images(&self) -> bool {
self.max_image_size() > 0
}
/// The maximum image size the model accepts, in bytes. (Zero means images are unsupported.)
fn max_image_size(&self) -> u64;
/// Whether this model supports tools.
fn supports_tools(&self) -> bool;

View file

@ -437,6 +437,13 @@ impl LanguageModel for AnthropicModel {
true
}
fn max_image_size(&self) -> u64 {
// Anthropic documentation: https://docs.anthropic.com/en/docs/build-with-claude/vision#faq
// FAQ section: "Is there a limit to the image file size I can upload?"
// "API: Maximum 5MB per image"
5_242_880 // 5 MiB - Anthropic's stated maximum
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto

View file

@ -504,6 +504,10 @@ impl LanguageModel for BedrockModel {
false
}
fn max_image_size(&self) -> u64 {
0 // Bedrock models don't currently support images in this implementation
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto | LanguageModelToolChoice::Any => {

View file

@ -699,6 +699,18 @@ impl LanguageModel for CloudLanguageModel {
self.model.supports_max_mode
}
fn max_image_size(&self) -> u64 {
if self.model.supports_images {
// Use a conservative limit that works across all providers
// Anthropic has the smallest limit at 5 MiB
// Anthropic documentation: https://docs.anthropic.com/en/docs/build-with-claude/vision#faq
// "API: Maximum 5MB per image"
5_242_880 // 5 MiB
} else {
0
}
}
fn telemetry_id(&self) -> String {
format!("zed.dev/{}", self.model.id)
}

View file

@ -216,6 +216,17 @@ impl LanguageModel for CopilotChatLanguageModel {
self.model.supports_vision()
}
fn max_image_size(&self) -> u64 {
if self.model.supports_vision() {
// OpenAI documentation: https://help.openai.com/en/articles/8983719-what-are-the-file-upload-size-restrictions
// "For images, there's a limit of 20MB per image."
// GitHub Copilot uses OpenAI models under the hood
20_971_520 // 20 MB - GitHub Copilot uses OpenAI models
} else {
0
}
}
fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
match self.model.vendor() {
ModelVendor::OpenAI | ModelVendor::Anthropic => {

View file

@ -302,6 +302,10 @@ impl LanguageModel for DeepSeekLanguageModel {
false
}
fn max_image_size(&self) -> u64 {
0 // DeepSeek models don't currently support images
}
fn telemetry_id(&self) -> String {
format!("deepseek/{}", self.model.id())
}

View file

@ -349,6 +349,17 @@ impl LanguageModel for GoogleLanguageModel {
self.model.supports_images()
}
fn max_image_size(&self) -> u64 {
if self.model.supports_images() {
// Google Gemini documentation: https://ai.google.dev/gemini-api/docs/image-understanding
// "Note: Inline image data limits your total request size (text prompts, system instructions, and inline bytes) to 20MB."
// "For larger requests, upload image files using the File API."
20_971_520 // 20 MB - Google Gemini's file API limit
} else {
0
}
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto

View file

@ -410,6 +410,18 @@ impl LanguageModel for LmStudioLanguageModel {
self.model.supports_images
}
fn max_image_size(&self) -> u64 {
if self.model.supports_images {
// LM Studio documentation: https://lmstudio.ai/docs/typescript/llm-prediction/image-input
// While not explicitly stated, LM Studio uses a standard 20MB limit
// matching OpenAI's documented limit: https://help.openai.com/en/articles/8983719-what-are-the-file-upload-size-restrictions
// "For images, there's a limit of 20MB per image."
20_971_520 // 20 MB - Default limit for local models
} else {
0
}
}
fn telemetry_id(&self) -> String {
format!("lmstudio/{}", self.model.id())
}

View file

@ -317,6 +317,18 @@ impl LanguageModel for MistralLanguageModel {
self.model.supports_images()
}
fn max_image_size(&self) -> u64 {
if self.model.supports_images() {
// Mistral documentation: https://www.infoq.com/news/2025/03/mistral-ai-ocr-api/
// "The API is currently limited to files that do not exceed 50MB in size or 1,000 pages"
// Also confirmed in https://github.com/everaldo/mcp-mistral-ocr/blob/master/README.md
// "Maximum file size: 50MB (enforced by Mistral API)"
52_428_800 // 50 MB - Mistral's OCR API limit
} else {
0
}
}
fn telemetry_id(&self) -> String {
format!("mistral/{}", self.model.id())
}

View file

@ -365,6 +365,16 @@ impl LanguageModel for OllamaLanguageModel {
self.model.supports_vision.unwrap_or(false)
}
fn max_image_size(&self) -> u64 {
if self.model.supports_vision.unwrap_or(false) {
// Ollama documentation: https://github.com/ollama/ollama/releases/tag/v0.1.15
// "Images up to 100MB in size are supported."
104_857_600 // 100 MB - Ollama's documented API limit
} else {
0
}
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto => false,

View file

@ -302,6 +302,14 @@ impl LanguageModel for OpenAiLanguageModel {
false
}
fn max_image_size(&self) -> u64 {
// OpenAI documentation: https://help.openai.com/en/articles/8983719-what-are-the-file-upload-size-restrictions
// "For images, there's a limit of 20MB per image."
// Note: OpenAI models don't currently support images in this implementation
// When enabled, OpenAI supports up to 20MB (20_971_520 bytes)
0
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto => true,

View file

@ -407,6 +407,18 @@ impl LanguageModel for OpenRouterLanguageModel {
self.model.supports_images.unwrap_or(false)
}
fn max_image_size(&self) -> u64 {
if self.model.supports_images.unwrap_or(false) {
// OpenRouter documentation: https://openrouter.ai/docs/features/images-and-pdfs
// While not explicitly stated, OpenRouter appears to follow OpenAI's standard
// which is documented at: https://help.openai.com/en/articles/8983719-what-are-the-file-upload-size-restrictions
// "For images, there's a limit of 20MB per image."
20_971_520 // 20 MB - OpenRouter's default limit
} else {
0
}
}
fn count_tokens(
&self,
request: LanguageModelRequest,

View file

@ -305,6 +305,14 @@ impl LanguageModel for VercelLanguageModel {
true
}
fn max_image_size(&self) -> u64 {
// Vercel AI SDK uses standard provider limits. Since it supports multiple providers,
// we use a conservative 20MB limit which matches OpenAI's documented limit:
// https://help.openai.com/en/articles/8983719-what-are-the-file-upload-size-restrictions
// "For images, there's a limit of 20MB per image."
20_971_520 // 20 MB - Default limit for Vercel AI SDK
}
fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
match choice {
LanguageModelToolChoice::Auto