language_models: Add vision support for Copilot Chat models (#30155)

Problem Statement:
Support for image analysis (vision) is currently restricted to Anthropic
and Gemini models. This limits users who wish to leverage vision
capabilities available in other models, such as Copilot, for tasks like
attaching image context within the agent message editor.

Proposed Change:
This PR extends vision support to include Copilot models that are
already equipped with vision capabilities. This integration will allow
users within VS Code to attach and analyze images using supported
Copilot models via the agent message editor.

Scope Limitation:

This PR does not implement controls within the message editor to ensure
that image context (e.g., through copy-paste or attachment) is
exclusively enabled or prompted only when a vision-supported model is
active. Long term the message editor should have access to each models
vision capability and stop the users from attaching images by either
greying out the context saying it's not support or not work through both
copy paste and file/directory search.

Closes #30076 

Release Notes:

- Add vision support for Copilot Chat models

---------

Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
This commit is contained in:
Umesh Yadav 2025-05-12 18:41:38 +05:30 committed by GitHub
parent 5a38bbbd22
commit a6c3d49bb9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 80 additions and 22 deletions

View file

@ -96,6 +96,10 @@ struct ModelSupportedFeatures {
streaming: bool,
#[serde(default)]
tool_calls: bool,
#[serde(default)]
parallel_tool_calls: bool,
#[serde(default)]
vision: bool,
}
#[derive(Clone, Copy, Serialize, Deserialize, Debug, Eq, PartialEq)]
@ -107,6 +111,20 @@ pub enum ModelVendor {
Anthropic,
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
#[serde(tag = "type")]
pub enum ChatMessageContent {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "image_url")]
Image { image_url: ImageUrl },
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
pub struct ImageUrl {
pub url: String,
}
impl Model {
pub fn uses_streaming(&self) -> bool {
self.capabilities.supports.streaming
@ -131,6 +149,14 @@ impl Model {
pub fn vendor(&self) -> ModelVendor {
self.vendor
}
pub fn supports_vision(&self) -> bool {
self.capabilities.supports.vision
}
pub fn supports_parallel_tool_calls(&self) -> bool {
self.capabilities.supports.parallel_tool_calls
}
}
#[derive(Serialize, Deserialize)]
@ -177,7 +203,7 @@ pub enum ChatMessage {
tool_calls: Vec<ToolCall>,
},
User {
content: String,
content: Vec<ChatMessageContent>,
},
System {
content: String,
@ -536,7 +562,8 @@ async fn stream_completion(
)
.header("Authorization", format!("Bearer {}", api_key))
.header("Content-Type", "application/json")
.header("Copilot-Integration-Id", "vscode-chat");
.header("Copilot-Integration-Id", "vscode-chat")
.header("Copilot-Vision-Request", "true");
let is_streaming = request.stream;