Copy/paste images into editors (Mac only) (#15782)

For future reference: WIP branch of copy/pasting a mixture of images and text: https://github.com/zed-industries/zed/tree/copy-paste-images - we'll come back to that one after landing this one. Release Notes: - You can now paste images into the Assistant Panel to include them as context. Currently works only on Mac, and with Anthropic models. Future support is planned for more models, operating systems, and image clipboard operations. --------- Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Mikayla <mikayla@zed.dev> Co-authored-by: Jason <jason@zed.dev> Co-authored-by: Kyle <kylek@zed.dev>
2024-08-13 13:18:25 -04:00 · 2024-08-13 13:18:25 -04:00 · b1a581e81b
commit b1a581e81b
parent e3b0de5dda
58 changed files with 2983 additions and 1708 deletions
--- a/crates/language_model/src/provider/anthropic.rs
+++ b/crates/language_model/src/provider/anthropic.rs
@ -221,24 +221,44 @@ pub fn count_anthropic_tokens(
 ) -> BoxFuture<'static, Result<usize>> {
    cx.background_executor()
        .spawn(async move {
-            let messages = request
-                .messages
-                .into_iter()
-                .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
-                    role: match message.role {
-                        Role::User => "user".into(),
-                        Role::Assistant => "assistant".into(),
-                        Role::System => "system".into(),
-                    },
-                    content: Some(message.content),
-                    name: None,
-                    function_call: None,
-                })
-                .collect::<Vec<_>>();
+            let messages = request.messages;
+            let mut tokens_from_images = 0;
+            let mut string_messages = Vec::with_capacity(messages.len());
+
+            for message in messages {
+                use crate::MessageContent;
+
+                let mut string_contents = String::new();
+
+                for content in message.content {
+                    match content {
+                        MessageContent::Text(string) => {
+                            string_contents.push_str(&string);
+                        }
+                        MessageContent::Image(image) => {
+                            tokens_from_images += image.estimate_tokens();
+                        }
+                    }
+                }
+
+                if !string_contents.is_empty() {
+                    string_messages.push(tiktoken_rs::ChatCompletionRequestMessage {
+                        role: match message.role {
+                            Role::User => "user".into(),
+                            Role::Assistant => "assistant".into(),
+                            Role::System => "system".into(),
+                        },
+                        content: Some(string_contents),
+                        name: None,
+                        function_call: None,
+                    });
+                }
+            }

            // Tiktoken doesn't yet support these models, so we manually use the
            // same tokenizer as GPT-4.
-            tiktoken_rs::num_tokens_from_messages("gpt-4", &messages)
+            tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages)
+                .map(|tokens| tokens + tokens_from_images)
        })
        .boxed()
 }
--- a/crates/language_model/src/provider/copilot_chat.rs
+++ b/crates/language_model/src/provider/copilot_chat.rs
@ -193,7 +193,7 @@ impl LanguageModel for CopilotChatLanguageModel {
        cx: &AsyncAppContext,
    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
        if let Some(message) = request.messages.last() {
-            if message.content.trim().is_empty() {
+            if message.contents_empty() {
                const EMPTY_PROMPT_MSG: &str =
                    "Empty prompts aren't allowed. Please provide a non-empty prompt.";
                return futures::future::ready(Err(anyhow::anyhow!(EMPTY_PROMPT_MSG))).boxed();
@ -270,7 +270,7 @@ impl CopilotChatLanguageModel {
                        Role::Assistant => CopilotChatRole::Assistant,
                        Role::System => CopilotChatRole::System,
                    },
-                    content: msg.content,
+                    content: msg.string_contents(),
                })
                .collect(),
        )
--- a/crates/language_model/src/provider/ollama.rs
+++ b/crates/language_model/src/provider/ollama.rs
@ -182,14 +182,14 @@ impl OllamaLanguageModel {
                .into_iter()
                .map(|msg| match msg.role {
                    Role::User => ChatMessage::User {
-                        content: msg.content,
+                        content: msg.string_contents(),
                    },
                    Role::Assistant => ChatMessage::Assistant {
-                        content: msg.content,
+                        content: msg.string_contents(),
                        tool_calls: None,
                    },
                    Role::System => ChatMessage::System {
-                        content: msg.content,
+                        content: msg.string_contents(),
                    },
                })
                .collect(),
@ -257,7 +257,7 @@ impl LanguageModel for OllamaLanguageModel {
        let token_count = request
            .messages
            .iter()
-            .map(|msg| msg.content.chars().count())
+            .map(|msg| msg.string_contents().chars().count())
            .sum::<usize>()
            / 4;

--- a/crates/language_model/src/provider/open_ai.rs
+++ b/crates/language_model/src/provider/open_ai.rs
@ -363,7 +363,7 @@ pub fn count_open_ai_tokens(
                        Role::Assistant => "assistant".into(),
                        Role::System => "system".into(),
                    },
-                    content: Some(message.content),
+                    content: Some(message.string_contents()),
                    name: None,
                    function_call: None,
                })