agent: Support pasting images as context (#29177)

https://github.com/user-attachments/assets/d6a27b05-3590-4f40-a820-f6f99f6bd581

Release Notes:

- agent: Added support for pasting images as context

---------

Co-authored-by: Danilo Leal <daniloleal09@gmail.com>
This commit is contained in:
Bennet Bo Fenner 2025-04-22 11:01:01 +02:00 committed by GitHub
parent 3357736aea
commit eca6d5a04e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 407 additions and 99 deletions

1
assets/icons/image.svg Normal file
View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-image-icon lucide-image"><rect width="18" height="18" x="3" y="3" rx="2" ry="2"/><circle cx="9" cy="9" r="2"/><path d="m21 15-3.086-3.086a2 2 0 0 0-2.828 0L6 21"/></svg>

After

Width:  |  Height:  |  Size: 372 B

View file

@ -3344,6 +3344,7 @@ pub(crate) fn open_context(
}),
cx,
),
AssistantContext::Image(_) => {}
}
}

View file

@ -4,9 +4,10 @@ use std::{
sync::Arc,
};
use gpui::{App, Entity, SharedString};
use futures::{FutureExt, future::Shared};
use gpui::{App, Entity, SharedString, Task};
use language::Buffer;
use language_model::LanguageModelRequestMessage;
use language_model::{LanguageModelImage, LanguageModelRequestMessage};
use project::{ProjectEntryId, ProjectPath, Worktree};
use prompt_store::UserPromptId;
use rope::Point;
@ -36,6 +37,7 @@ pub enum ContextKind {
FetchedUrl,
Thread,
Rules,
Image,
}
impl ContextKind {
@ -48,6 +50,7 @@ impl ContextKind {
ContextKind::FetchedUrl => IconName::Globe,
ContextKind::Thread => IconName::MessageBubbles,
ContextKind::Rules => RULES_ICON,
ContextKind::Image => IconName::Image,
}
}
}
@ -61,6 +64,7 @@ pub enum AssistantContext {
Thread(ThreadContext),
Excerpt(ExcerptContext),
Rules(RulesContext),
Image(ImageContext),
}
impl AssistantContext {
@ -73,6 +77,7 @@ impl AssistantContext {
Self::Thread(thread) => thread.id,
Self::Excerpt(excerpt) => excerpt.id,
Self::Rules(rules) => rules.id,
Self::Image(image) => image.id,
}
}
}
@ -140,6 +145,31 @@ impl ThreadContext {
}
}
#[derive(Debug, Clone)]
pub struct ImageContext {
pub id: ContextId,
pub original_image: Arc<gpui::Image>,
pub image_task: Shared<Task<Option<LanguageModelImage>>>,
}
impl ImageContext {
pub fn image(&self) -> Option<LanguageModelImage> {
self.image_task.clone().now_or_never().flatten()
}
pub fn is_loading(&self) -> bool {
self.image_task.clone().now_or_never().is_none()
}
pub fn is_error(&self) -> bool {
self.image_task
.clone()
.now_or_never()
.map(|result| result.is_none())
.unwrap_or(false)
}
}
#[derive(Clone)]
pub struct ContextBuffer {
pub id: BufferId,
@ -227,6 +257,7 @@ pub fn format_context_as_string<'a>(
AssistantContext::FetchedUrl(context) => fetch_context.push(context),
AssistantContext::Thread(context) => thread_context.push(context),
AssistantContext::Rules(context) => rules_context.push(context),
AssistantContext::Image(_) => {}
}
}

View file

@ -6,8 +6,9 @@ use anyhow::{Context as _, Result, anyhow};
use collections::{BTreeMap, HashMap, HashSet};
use futures::future::join_all;
use futures::{self, Future, FutureExt, future};
use gpui::{App, AppContext as _, Context, Entity, SharedString, Task, WeakEntity};
use gpui::{App, AppContext as _, Context, Entity, Image, SharedString, Task, WeakEntity};
use language::Buffer;
use language_model::LanguageModelImage;
use project::{Project, ProjectEntryId, ProjectItem, ProjectPath, Worktree};
use prompt_store::UserPromptId;
use rope::{Point, Rope};
@ -17,7 +18,8 @@ use util::{ResultExt as _, maybe};
use crate::ThreadStore;
use crate::context::{
AssistantContext, ContextBuffer, ContextId, ContextSymbol, ContextSymbolId, DirectoryContext,
ExcerptContext, FetchedUrlContext, FileContext, RulesContext, SymbolContext, ThreadContext,
ExcerptContext, FetchedUrlContext, FileContext, ImageContext, RulesContext, SymbolContext,
ThreadContext,
};
use crate::context_strip::SuggestedContext;
use crate::thread::{Thread, ThreadId};
@ -448,6 +450,32 @@ impl ContextStore {
cx.notify();
}
pub fn add_image(&mut self, image: Arc<Image>, cx: &mut Context<ContextStore>) {
let image_task = LanguageModelImage::from_image(image.clone(), cx).shared();
let id = self.next_context_id.post_inc();
self.context.push(AssistantContext::Image(ImageContext {
id,
original_image: image,
image_task,
}));
cx.notify();
}
pub fn wait_for_images(&self, cx: &App) -> Task<()> {
let tasks = self
.context
.iter()
.filter_map(|ctx| match ctx {
AssistantContext::Image(ctx) => Some(ctx.image_task.clone()),
_ => None,
})
.collect::<Vec<_>>();
cx.spawn(async move |_cx| {
join_all(tasks).await;
})
}
pub fn add_excerpt(
&mut self,
range: Range<Anchor>,
@ -545,6 +573,7 @@ impl ContextStore {
AssistantContext::Rules(RulesContext { prompt_id, .. }) => {
self.user_rules.remove(&prompt_id);
}
AssistantContext::Image(_) => {}
}
cx.notify();
@ -673,7 +702,8 @@ impl ContextStore {
| AssistantContext::Excerpt(_)
| AssistantContext::FetchedUrl(_)
| AssistantContext::Thread(_)
| AssistantContext::Rules(_) => None,
| AssistantContext::Rules(_)
| AssistantContext::Image(_) => None,
})
.collect()
}
@ -907,6 +937,7 @@ pub fn refresh_context_store_text(
let context_store = context_store.clone();
return Some(refresh_user_rules(context_store, user_rules_context, cx));
}
AssistantContext::Image(_) => {}
}
None

View file

@ -6,7 +6,7 @@ use crate::context::{AssistantContext, format_context_as_string};
use crate::tool_compatibility::{IncompatibleToolsState, IncompatibleToolsTooltip};
use buffer_diff::BufferDiff;
use collections::HashSet;
use editor::actions::MoveUp;
use editor::actions::{MoveUp, Paste};
use editor::{
ContextMenuOptions, ContextMenuPlacement, Editor, EditorElement, EditorEvent, EditorMode,
EditorStyle, MultiBuffer,
@ -14,8 +14,8 @@ use editor::{
use file_icons::FileIcons;
use fs::Fs;
use gpui::{
Animation, AnimationExt, App, Entity, EventEmitter, Focusable, Subscription, Task, TextStyle,
WeakEntity, linear_color_stop, linear_gradient, point, pulsating_between,
Animation, AnimationExt, App, ClipboardEntry, Entity, EventEmitter, Focusable, Subscription,
Task, TextStyle, WeakEntity, linear_color_stop, linear_gradient, point, pulsating_between,
};
use language::{Buffer, Language};
use language_model::{ConfiguredModel, LanguageModelRegistry, LanguageModelRequestMessage};
@ -271,6 +271,7 @@ impl MessageEditor {
let refresh_task =
refresh_context_store_text(self.context_store.clone(), &HashSet::default(), cx);
let wait_for_images = self.context_store.read(cx).wait_for_images(cx);
let thread = self.thread.clone();
let context_store = self.context_store.clone();
@ -280,6 +281,7 @@ impl MessageEditor {
cx.spawn(async move |this, cx| {
let checkpoint = checkpoint.await.ok();
refresh_task.await;
wait_for_images.await;
thread
.update(cx, |thread, cx| {
@ -293,7 +295,12 @@ impl MessageEditor {
let excerpt_ids = context_store
.context()
.iter()
.filter(|ctx| matches!(ctx, AssistantContext::Excerpt(_)))
.filter(|ctx| {
matches!(
ctx,
AssistantContext::Excerpt(_) | AssistantContext::Image(_)
)
})
.map(|ctx| ctx.id())
.collect::<Vec<_>>();
@ -370,6 +377,34 @@ impl MessageEditor {
}
}
fn paste(&mut self, _: &Paste, _: &mut Window, cx: &mut Context<Self>) {
let images = cx
.read_from_clipboard()
.map(|item| {
item.into_entries()
.filter_map(|entry| {
if let ClipboardEntry::Image(image) = entry {
Some(image)
} else {
None
}
})
.collect::<Vec<_>>()
})
.unwrap_or_default();
if images.is_empty() {
return;
}
cx.stop_propagation();
self.context_store.update(cx, |store, cx| {
for image in images {
store.add_image(Arc::new(image), cx);
}
});
}
fn handle_review_click(&self, window: &mut Window, cx: &mut Context<Self>) {
AgentDiff::deploy(self.thread.clone(), self.workspace.clone(), window, cx).log_err();
}
@ -445,6 +480,7 @@ impl MessageEditor {
.on_action(cx.listener(Self::move_up))
.on_action(cx.listener(Self::toggle_chat_mode))
.on_action(cx.listener(Self::expand_message_editor))
.capture_action(cx.listener(Self::paste))
.gap_2()
.p_2()
.bg(editor_bg_color)

View file

@ -16,7 +16,7 @@ use git::repository::DiffType;
use gpui::{App, AppContext, Context, Entity, EventEmitter, SharedString, Task, WeakEntity};
use language_model::{
ConfiguredModel, LanguageModel, LanguageModelCompletionEvent, LanguageModelId,
LanguageModelKnownError, LanguageModelRegistry, LanguageModelRequest,
LanguageModelImage, LanguageModelKnownError, LanguageModelRegistry, LanguageModelRequest,
LanguageModelRequestMessage, LanguageModelRequestTool, LanguageModelToolResult,
LanguageModelToolUseId, MaxMonthlySpendReachedError, MessageContent,
ModelRequestLimitReachedError, PaymentRequiredError, RequestUsage, Role, StopReason,
@ -97,6 +97,7 @@ pub struct Message {
pub role: Role,
pub segments: Vec<MessageSegment>,
pub context: String,
pub images: Vec<LanguageModelImage>,
}
impl Message {
@ -415,6 +416,7 @@ impl Thread {
})
.collect(),
context: message.context,
images: Vec::new(),
})
.collect(),
next_message_id,
@ -747,6 +749,19 @@ impl Thread {
}
}
if let Some(message) = self.messages.iter_mut().find(|m| m.id == message_id) {
message.images = new_context
.iter()
.filter_map(|context| {
if let AssistantContext::Image(image_context) = context {
image_context.image_task.clone().now_or_never().flatten()
} else {
None
}
})
.collect::<Vec<_>>();
}
self.action_log.update(cx, |log, cx| {
// Track all buffers added as context
for ctx in &new_context {
@ -773,7 +788,8 @@ impl Thread {
}
AssistantContext::FetchedUrl(_)
| AssistantContext::Thread(_)
| AssistantContext::Rules(_) => {}
| AssistantContext::Rules(_)
| AssistantContext::Image(_) => {}
}
}
});
@ -814,6 +830,7 @@ impl Thread {
role,
segments,
context: String::new(),
images: Vec::new(),
});
self.touch_updated_at();
cx.emit(ThreadEvent::MessageAdded(id));
@ -1037,6 +1054,21 @@ impl Thread {
.push(MessageContent::Text(message.context.to_string()));
}
if !message.images.is_empty() {
// Some providers only support image parts after an initial text part
if request_message.content.is_empty() {
request_message
.content
.push(MessageContent::Text("Images attached by user:".to_string()));
}
for image in &message.images {
request_message
.content
.push(MessageContent::Image(image.clone()))
}
}
for segment in &message.segments {
match segment {
MessageSegment::Text(text) => {

View file

@ -1,11 +1,14 @@
use std::sync::Arc;
use std::{rc::Rc, time::Duration};
use file_icons::FileIcons;
use gpui::ClickEvent;
use gpui::{Animation, AnimationExt as _, pulsating_between};
use ui::{IconButtonShape, Tooltip, prelude::*};
use futures::FutureExt;
use gpui::{Animation, AnimationExt as _, AnyView, Image, MouseButton, pulsating_between};
use gpui::{ClickEvent, Task};
use language_model::LanguageModelImage;
use ui::{IconButtonShape, Tooltip, prelude::*, tooltip_container};
use crate::context::{AssistantContext, ContextId, ContextKind};
use crate::context::{AssistantContext, ContextId, ContextKind, ImageContext};
#[derive(IntoElement)]
pub enum ContextPill {
@ -120,74 +123,95 @@ impl RenderOnce for ContextPill {
on_remove,
focused,
on_click,
} => base_pill
.bg(color.element_background)
.border_color(if *focused {
color.border_focused
} else {
color.border.opacity(0.5)
})
.pr(if on_remove.is_some() { px(2.) } else { px(4.) })
.child(
h_flex()
.id("context-data")
.gap_1()
.child(
div().max_w_64().child(
Label::new(context.name.clone())
.size(LabelSize::Small)
.truncate(),
),
)
.when_some(context.parent.as_ref(), |element, parent_name| {
if *dupe_name {
element.child(
Label::new(parent_name.clone())
.size(LabelSize::XSmall)
.color(Color::Muted),
)
} else {
element
}
})
.when_some(context.tooltip.as_ref(), |element, tooltip| {
element.tooltip(Tooltip::text(tooltip.clone()))
}),
)
.when_some(on_remove.as_ref(), |element, on_remove| {
element.child(
IconButton::new(("remove", context.id.0), IconName::Close)
.shape(IconButtonShape::Square)
.icon_size(IconSize::XSmall)
.tooltip(Tooltip::text("Remove Context"))
.on_click({
let on_remove = on_remove.clone();
move |event, window, cx| on_remove(event, window, cx)
} => {
let status_is_error = matches!(context.status, ContextStatus::Error { .. });
base_pill
.pr(if on_remove.is_some() { px(2.) } else { px(4.) })
.map(|pill| {
if status_is_error {
pill.bg(cx.theme().status().error_background)
.border_color(cx.theme().status().error_border)
} else if *focused {
pill.bg(color.element_background)
.border_color(color.border_focused)
} else {
pill.bg(color.element_background)
.border_color(color.border.opacity(0.5))
}
})
.child(
h_flex()
.id("context-data")
.gap_1()
.child(
div().max_w_64().child(
Label::new(context.name.clone())
.size(LabelSize::Small)
.truncate(),
),
)
.when_some(context.parent.as_ref(), |element, parent_name| {
if *dupe_name {
element.child(
Label::new(parent_name.clone())
.size(LabelSize::XSmall)
.color(Color::Muted),
)
} else {
element
}
})
.when_some(context.tooltip.as_ref(), |element, tooltip| {
element.tooltip(Tooltip::text(tooltip.clone()))
})
.map(|element| match &context.status {
ContextStatus::Ready => element
.when_some(
context.show_preview.as_ref(),
|element, show_preview| {
element.hoverable_tooltip({
let show_preview = show_preview.clone();
move |window, cx| show_preview(window, cx)
})
},
)
.into_any(),
ContextStatus::Loading { message } => element
.tooltip(ui::Tooltip::text(message.clone()))
.with_animation(
"pulsating-ctx-pill",
Animation::new(Duration::from_secs(2))
.repeat()
.with_easing(pulsating_between(0.4, 0.8)),
|label, delta| label.opacity(delta),
)
.into_any_element(),
ContextStatus::Error { message } => element
.tooltip(ui::Tooltip::text(message.clone()))
.into_any_element(),
}),
)
})
.when_some(on_click.as_ref(), |element, on_click| {
let on_click = on_click.clone();
element
.cursor_pointer()
.on_click(move |event, window, cx| on_click(event, window, cx))
})
.map(|element| {
if context.summarizing {
.when_some(on_remove.as_ref(), |element, on_remove| {
element.child(
IconButton::new(("remove", context.id.0), IconName::Close)
.shape(IconButtonShape::Square)
.icon_size(IconSize::XSmall)
.tooltip(Tooltip::text("Remove Context"))
.on_click({
let on_remove = on_remove.clone();
move |event, window, cx| on_remove(event, window, cx)
}),
)
})
.when_some(on_click.as_ref(), |element, on_click| {
let on_click = on_click.clone();
element
.tooltip(ui::Tooltip::text("Summarizing..."))
.with_animation(
"pulsating-ctx-pill",
Animation::new(Duration::from_secs(2))
.repeat()
.with_easing(pulsating_between(0.4, 0.8)),
|label, delta| label.opacity(delta),
)
.into_any_element()
} else {
element.into_any()
}
}),
.cursor_pointer()
.on_click(move |event, window, cx| on_click(event, window, cx))
})
.into_any_element()
}
ContextPill::Suggested {
name,
icon_path: _,
@ -198,15 +222,15 @@ impl RenderOnce for ContextPill {
.cursor_pointer()
.pr_1()
.border_dashed()
.border_color(if *focused {
color.border_focused
} else {
color.border
.map(|pill| {
if *focused {
pill.border_color(color.border_focused)
.bg(color.element_background.opacity(0.5))
} else {
pill.border_color(color.border)
}
})
.hover(|style| style.bg(color.element_hover.opacity(0.5)))
.when(*focused, |this| {
this.bg(color.element_background.opacity(0.5))
})
.child(
div().max_w_64().child(
Label::new(name.clone())
@ -227,6 +251,13 @@ impl RenderOnce for ContextPill {
}
}
pub enum ContextStatus {
Ready,
Loading { message: SharedString },
Error { message: SharedString },
}
#[derive(RegisterComponent)]
pub struct AddedContext {
pub id: ContextId,
pub kind: ContextKind,
@ -234,7 +265,8 @@ pub struct AddedContext {
pub parent: Option<SharedString>,
pub tooltip: Option<SharedString>,
pub icon_path: Option<SharedString>,
pub summarizing: bool,
pub status: ContextStatus,
pub show_preview: Option<Rc<dyn Fn(&mut Window, &mut App) -> AnyView + 'static>>,
}
impl AddedContext {
@ -259,7 +291,8 @@ impl AddedContext {
parent,
tooltip: Some(full_path_string),
icon_path: FileIcons::get_icon(&full_path, cx),
summarizing: false,
status: ContextStatus::Ready,
show_preview: None,
}
}
@ -289,7 +322,8 @@ impl AddedContext {
parent,
tooltip: Some(full_path_string),
icon_path: None,
summarizing: false,
status: ContextStatus::Ready,
show_preview: None,
}
}
@ -300,7 +334,8 @@ impl AddedContext {
parent: None,
tooltip: None,
icon_path: None,
summarizing: false,
status: ContextStatus::Ready,
show_preview: None,
},
AssistantContext::Excerpt(excerpt_context) => {
@ -327,12 +362,13 @@ impl AddedContext {
AddedContext {
id: excerpt_context.id,
kind: ContextKind::File, // Use File icon for excerpts
kind: ContextKind::File,
name: name.into(),
parent,
tooltip: Some(full_path_string.into()),
icon_path: FileIcons::get_icon(&full_path, cx),
summarizing: false,
status: ContextStatus::Ready,
show_preview: None,
}
}
@ -343,7 +379,8 @@ impl AddedContext {
parent: None,
tooltip: None,
icon_path: None,
summarizing: false,
status: ContextStatus::Ready,
show_preview: None,
},
AssistantContext::Thread(thread_context) => AddedContext {
@ -353,10 +390,18 @@ impl AddedContext {
parent: None,
tooltip: None,
icon_path: None,
summarizing: thread_context
status: if thread_context
.thread
.read(cx)
.is_generating_detailed_summary(),
.is_generating_detailed_summary()
{
ContextStatus::Loading {
message: "Summarizing…".into(),
}
} else {
ContextStatus::Ready
},
show_preview: None,
},
AssistantContext::Rules(user_rules_context) => AddedContext {
@ -366,8 +411,122 @@ impl AddedContext {
parent: None,
tooltip: None,
icon_path: None,
summarizing: false,
status: ContextStatus::Ready,
show_preview: None,
},
AssistantContext::Image(image_context) => AddedContext {
id: image_context.id,
kind: ContextKind::Image,
name: "Image".into(),
parent: None,
tooltip: None,
icon_path: None,
status: if image_context.is_loading() {
ContextStatus::Loading {
message: "Loading…".into(),
}
} else if image_context.is_error() {
ContextStatus::Error {
message: "Failed to load image".into(),
}
} else {
ContextStatus::Ready
},
show_preview: Some(Rc::new({
let image = image_context.original_image.clone();
move |_, cx| {
cx.new(|_| ImagePreview {
image: image.clone(),
})
.into()
}
})),
},
}
}
}
struct ImagePreview {
image: Arc<Image>,
}
impl Render for ImagePreview {
fn render(&mut self, window: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
tooltip_container(window, cx, move |this, _, _| {
this.occlude()
.on_mouse_move(|_, _, cx| cx.stop_propagation())
.on_mouse_down(MouseButton::Left, |_, _, cx| cx.stop_propagation())
.child(gpui::img(self.image.clone()).max_w_96().max_h_96())
})
}
}
impl Component for AddedContext {
fn scope() -> ComponentScope {
ComponentScope::Agent
}
fn sort_name() -> &'static str {
"AddedContext"
}
fn preview(_window: &mut Window, cx: &mut App) -> Option<AnyElement> {
let image_ready = (
"Ready",
AddedContext::new(
&AssistantContext::Image(ImageContext {
id: ContextId(0),
original_image: Arc::new(Image::empty()),
image_task: Task::ready(Some(LanguageModelImage::empty())).shared(),
}),
cx,
),
);
let image_loading = (
"Loading",
AddedContext::new(
&AssistantContext::Image(ImageContext {
id: ContextId(1),
original_image: Arc::new(Image::empty()),
image_task: cx
.background_spawn(async move {
smol::Timer::after(Duration::from_secs(60 * 5)).await;
Some(LanguageModelImage::empty())
})
.shared(),
}),
cx,
),
);
let image_error = (
"Error",
AddedContext::new(
&AssistantContext::Image(ImageContext {
id: ContextId(2),
original_image: Arc::new(Image::empty()),
image_task: Task::ready(None).shared(),
}),
cx,
),
);
Some(
v_flex()
.gap_6()
.children(
vec![image_ready, image_loading, image_error]
.into_iter()
.map(|(text, context)| {
single_example(
text,
ContextPill::added(context, false, false, None).into_any_element(),
)
}),
)
.into_any(),
)
}
}

View file

@ -2089,7 +2089,7 @@ impl ContextEditor {
continue;
};
let image_id = image.id();
let image_task = LanguageModelImage::from_image(image, cx).shared();
let image_task = LanguageModelImage::from_image(Arc::new(image), cx).shared();
for image_position in image_positions.iter() {
context.insert_content(

View file

@ -1497,6 +1497,15 @@ impl Hash for Image {
}
impl Image {
/// An empty image containing no data
pub fn empty() -> Self {
Self {
format: ImageFormat::Png,
bytes: Vec::new(),
id: 0,
}
}
/// Get this image's ID
pub fn id(&self) -> u64 {
self.id

View file

@ -139,6 +139,7 @@ pub enum IconName {
Globe,
Hash,
HistoryRerun,
Image,
Indicator,
Info,
InlayHint,

View file

@ -32,7 +32,14 @@ impl std::fmt::Debug for LanguageModelImage {
const ANTHROPIC_SIZE_LIMT: f32 = 1568.;
impl LanguageModelImage {
pub fn from_image(data: Image, cx: &mut App) -> Task<Option<Self>> {
pub fn empty() -> Self {
Self {
source: "".into(),
size: size(DevicePixels(0), DevicePixels(0)),
}
}
pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
cx.background_spawn(async move {
match data.format() {
gpui::ImageFormat::Png