language_models: Add image support to OpenRouter models (#32012)
- [x] Manual Testing(Tested this with Qwen2.5 VL 32B Instruct (free) and Llama 4 Scout (free), Llama 4 Maverick (free). Llama models have some issues in write profile due to one of the in built tools schema, so I tested it with minimal profile. Closes #ISSUE Release Notes: - Add image support to OpenRouter models --------- Signed-off-by: Umesh Yadav <umesh4257@gmail.com> Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com>
This commit is contained in:
parent
47ac01842b
commit
0852912fd6
2 changed files with 187 additions and 23 deletions
|
@ -43,6 +43,8 @@ pub struct AvailableModel {
|
||||||
pub max_tokens: usize,
|
pub max_tokens: usize,
|
||||||
pub max_output_tokens: Option<u32>,
|
pub max_output_tokens: Option<u32>,
|
||||||
pub max_completion_tokens: Option<u32>,
|
pub max_completion_tokens: Option<u32>,
|
||||||
|
pub supports_tools: Option<bool>,
|
||||||
|
pub supports_images: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct OpenRouterLanguageModelProvider {
|
pub struct OpenRouterLanguageModelProvider {
|
||||||
|
@ -227,7 +229,8 @@ impl LanguageModelProvider for OpenRouterLanguageModelProvider {
|
||||||
name: model.name.clone(),
|
name: model.name.clone(),
|
||||||
display_name: model.display_name.clone(),
|
display_name: model.display_name.clone(),
|
||||||
max_tokens: model.max_tokens,
|
max_tokens: model.max_tokens,
|
||||||
supports_tools: Some(false),
|
supports_tools: model.supports_tools,
|
||||||
|
supports_images: model.supports_images,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,7 +348,7 @@ impl LanguageModel for OpenRouterLanguageModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn supports_images(&self) -> bool {
|
fn supports_images(&self) -> bool {
|
||||||
false
|
self.model.supports_images.unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_tokens(
|
fn count_tokens(
|
||||||
|
@ -386,20 +389,26 @@ pub fn into_open_router(
|
||||||
max_output_tokens: Option<u32>,
|
max_output_tokens: Option<u32>,
|
||||||
) -> open_router::Request {
|
) -> open_router::Request {
|
||||||
let mut messages = Vec::new();
|
let mut messages = Vec::new();
|
||||||
for req_message in request.messages {
|
for message in request.messages {
|
||||||
for content in req_message.content {
|
for content in message.content {
|
||||||
match content {
|
match content {
|
||||||
MessageContent::Text(text) | MessageContent::Thinking { text, .. } => messages
|
MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
|
||||||
.push(match req_message.role {
|
add_message_content_part(
|
||||||
Role::User => open_router::RequestMessage::User { content: text },
|
open_router::MessagePart::Text { text: text },
|
||||||
Role::Assistant => open_router::RequestMessage::Assistant {
|
message.role,
|
||||||
content: Some(text),
|
&mut messages,
|
||||||
tool_calls: Vec::new(),
|
)
|
||||||
},
|
}
|
||||||
Role::System => open_router::RequestMessage::System { content: text },
|
|
||||||
}),
|
|
||||||
MessageContent::RedactedThinking(_) => {}
|
MessageContent::RedactedThinking(_) => {}
|
||||||
MessageContent::Image(_) => {}
|
MessageContent::Image(image) => {
|
||||||
|
add_message_content_part(
|
||||||
|
open_router::MessagePart::Image {
|
||||||
|
image_url: image.to_base64_url(),
|
||||||
|
},
|
||||||
|
message.role,
|
||||||
|
&mut messages,
|
||||||
|
);
|
||||||
|
}
|
||||||
MessageContent::ToolUse(tool_use) => {
|
MessageContent::ToolUse(tool_use) => {
|
||||||
let tool_call = open_router::ToolCall {
|
let tool_call = open_router::ToolCall {
|
||||||
id: tool_use.id.to_string(),
|
id: tool_use.id.to_string(),
|
||||||
|
@ -425,16 +434,20 @@ pub fn into_open_router(
|
||||||
}
|
}
|
||||||
MessageContent::ToolResult(tool_result) => {
|
MessageContent::ToolResult(tool_result) => {
|
||||||
let content = match &tool_result.content {
|
let content = match &tool_result.content {
|
||||||
LanguageModelToolResultContent::Text(text) => {
|
LanguageModelToolResultContent::Text(text) => {
|
||||||
text.to_string()
|
vec![open_router::MessagePart::Text {
|
||||||
|
text: text.to_string(),
|
||||||
|
}]
|
||||||
}
|
}
|
||||||
LanguageModelToolResultContent::Image(_) => {
|
LanguageModelToolResultContent::Image(image) => {
|
||||||
"[Tool responded with an image, but Zed doesn't support these in Open AI models yet]".to_string()
|
vec![open_router::MessagePart::Image {
|
||||||
|
image_url: image.to_base64_url(),
|
||||||
|
}]
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
messages.push(open_router::RequestMessage::Tool {
|
messages.push(open_router::RequestMessage::Tool {
|
||||||
content: content,
|
content: content.into(),
|
||||||
tool_call_id: tool_result.tool_use_id.to_string(),
|
tool_call_id: tool_result.tool_use_id.to_string(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -473,6 +486,42 @@ pub fn into_open_router(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn add_message_content_part(
|
||||||
|
new_part: open_router::MessagePart,
|
||||||
|
role: Role,
|
||||||
|
messages: &mut Vec<open_router::RequestMessage>,
|
||||||
|
) {
|
||||||
|
match (role, messages.last_mut()) {
|
||||||
|
(Role::User, Some(open_router::RequestMessage::User { content }))
|
||||||
|
| (Role::System, Some(open_router::RequestMessage::System { content })) => {
|
||||||
|
content.push_part(new_part);
|
||||||
|
}
|
||||||
|
(
|
||||||
|
Role::Assistant,
|
||||||
|
Some(open_router::RequestMessage::Assistant {
|
||||||
|
content: Some(content),
|
||||||
|
..
|
||||||
|
}),
|
||||||
|
) => {
|
||||||
|
content.push_part(new_part);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
messages.push(match role {
|
||||||
|
Role::User => open_router::RequestMessage::User {
|
||||||
|
content: open_router::MessageContent::from(vec![new_part]),
|
||||||
|
},
|
||||||
|
Role::Assistant => open_router::RequestMessage::Assistant {
|
||||||
|
content: Some(open_router::MessageContent::from(vec![new_part])),
|
||||||
|
tool_calls: Vec::new(),
|
||||||
|
},
|
||||||
|
Role::System => open_router::RequestMessage::System {
|
||||||
|
content: open_router::MessageContent::from(vec![new_part]),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct OpenRouterEventMapper {
|
pub struct OpenRouterEventMapper {
|
||||||
tool_calls_by_index: HashMap<usize, RawToolCall>,
|
tool_calls_by_index: HashMap<usize, RawToolCall>,
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,6 +52,7 @@ pub struct Model {
|
||||||
pub display_name: Option<String>,
|
pub display_name: Option<String>,
|
||||||
pub max_tokens: usize,
|
pub max_tokens: usize,
|
||||||
pub supports_tools: Option<bool>,
|
pub supports_tools: Option<bool>,
|
||||||
|
pub supports_images: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Model {
|
impl Model {
|
||||||
|
@ -61,6 +62,7 @@ impl Model {
|
||||||
Some("Auto Router"),
|
Some("Auto Router"),
|
||||||
Some(2000000),
|
Some(2000000),
|
||||||
Some(true),
|
Some(true),
|
||||||
|
Some(false),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,12 +75,14 @@ impl Model {
|
||||||
display_name: Option<&str>,
|
display_name: Option<&str>,
|
||||||
max_tokens: Option<usize>,
|
max_tokens: Option<usize>,
|
||||||
supports_tools: Option<bool>,
|
supports_tools: Option<bool>,
|
||||||
|
supports_images: Option<bool>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
display_name: display_name.map(|s| s.to_owned()),
|
display_name: display_name.map(|s| s.to_owned()),
|
||||||
max_tokens: max_tokens.unwrap_or(2000000),
|
max_tokens: max_tokens.unwrap_or(2000000),
|
||||||
supports_tools,
|
supports_tools,
|
||||||
|
supports_images,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,22 +158,118 @@ pub struct FunctionDefinition {
|
||||||
#[serde(tag = "role", rename_all = "lowercase")]
|
#[serde(tag = "role", rename_all = "lowercase")]
|
||||||
pub enum RequestMessage {
|
pub enum RequestMessage {
|
||||||
Assistant {
|
Assistant {
|
||||||
content: Option<String>,
|
content: Option<MessageContent>,
|
||||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
tool_calls: Vec<ToolCall>,
|
tool_calls: Vec<ToolCall>,
|
||||||
},
|
},
|
||||||
User {
|
User {
|
||||||
content: String,
|
content: MessageContent,
|
||||||
},
|
},
|
||||||
System {
|
System {
|
||||||
content: String,
|
content: MessageContent,
|
||||||
},
|
},
|
||||||
Tool {
|
Tool {
|
||||||
content: String,
|
content: MessageContent,
|
||||||
tool_call_id: String,
|
tool_call_id: String,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum MessageContent {
|
||||||
|
Plain(String),
|
||||||
|
Multipart(Vec<MessagePart>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MessageContent {
|
||||||
|
pub fn empty() -> Self {
|
||||||
|
Self::Plain(String::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push_part(&mut self, part: MessagePart) {
|
||||||
|
match self {
|
||||||
|
Self::Plain(text) if text.is_empty() => {
|
||||||
|
*self = Self::Multipart(vec![part]);
|
||||||
|
}
|
||||||
|
Self::Plain(text) => {
|
||||||
|
let text_part = MessagePart::Text {
|
||||||
|
text: std::mem::take(text),
|
||||||
|
};
|
||||||
|
*self = Self::Multipart(vec![text_part, part]);
|
||||||
|
}
|
||||||
|
Self::Multipart(parts) => parts.push(part),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Vec<MessagePart>> for MessageContent {
|
||||||
|
fn from(parts: Vec<MessagePart>) -> Self {
|
||||||
|
if parts.len() == 1 {
|
||||||
|
if let MessagePart::Text { text } = &parts[0] {
|
||||||
|
return Self::Plain(text.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Self::Multipart(parts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for MessageContent {
|
||||||
|
fn from(text: String) -> Self {
|
||||||
|
Self::Plain(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for MessageContent {
|
||||||
|
fn from(text: &str) -> Self {
|
||||||
|
Self::Plain(text.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MessageContent {
|
||||||
|
pub fn as_text(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Self::Plain(text) => Some(text),
|
||||||
|
Self::Multipart(parts) if parts.len() == 1 => {
|
||||||
|
if let MessagePart::Text { text } = &parts[0] {
|
||||||
|
Some(text)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_text(&self) -> String {
|
||||||
|
match self {
|
||||||
|
Self::Plain(text) => text.clone(),
|
||||||
|
Self::Multipart(parts) => parts
|
||||||
|
.iter()
|
||||||
|
.filter_map(|part| {
|
||||||
|
if let MessagePart::Text { text } = part {
|
||||||
|
Some(text.as_str())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(""),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
|
||||||
|
#[serde(tag = "type", rename_all = "snake_case")]
|
||||||
|
pub enum MessagePart {
|
||||||
|
Text {
|
||||||
|
text: String,
|
||||||
|
},
|
||||||
|
#[serde(rename = "image_url")]
|
||||||
|
Image {
|
||||||
|
image_url: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
|
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
|
||||||
pub struct ToolCall {
|
pub struct ToolCall {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
|
@ -266,6 +366,14 @@ pub struct ModelEntry {
|
||||||
pub context_length: Option<usize>,
|
pub context_length: Option<usize>,
|
||||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
pub supported_parameters: Vec<String>,
|
pub supported_parameters: Vec<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub architecture: Option<ModelArchitecture>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Debug, Clone, PartialEq, Deserialize)]
|
||||||
|
pub struct ModelArchitecture {
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub input_modalities: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn complete(
|
pub async fn complete(
|
||||||
|
@ -470,6 +578,13 @@ pub async fn list_models(client: &dyn HttpClient, api_url: &str) -> Result<Vec<M
|
||||||
),
|
),
|
||||||
max_tokens: entry.context_length.unwrap_or(2000000),
|
max_tokens: entry.context_length.unwrap_or(2000000),
|
||||||
supports_tools: Some(entry.supported_parameters.contains(&"tools".to_string())),
|
supports_tools: Some(entry.supported_parameters.contains(&"tools".to_string())),
|
||||||
|
supports_images: Some(
|
||||||
|
entry
|
||||||
|
.architecture
|
||||||
|
.as_ref()
|
||||||
|
.map(|arch| arch.input_modalities.contains(&"image".to_string()))
|
||||||
|
.unwrap_or(false),
|
||||||
|
),
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue