Allow AI interactions to be proxied through Zed's server so you don't need an API key (#7367)

Co-authored-by: Antonio <antonio@zed.dev> Resurrected this from some assistant work I did in Spring of 2023. - [x] Resurrect streaming responses - [x] Use streaming responses to enable AI via Zed's servers by default (but preserve API key option for now) - [x] Simplify protobuf - [x] Proxy to OpenAI on zed.dev - [x] Proxy to Gemini on zed.dev - [x] Improve UX for switching between openAI and google models - We current disallow cycling when setting a custom model, but we need a better solution to keep OpenAI models available while testing the google ones - [x] Show remaining tokens correctly for Google models - [x] Remove semantic index - [x] Delete `ai` crate - [x] Cloud front so we can ban abuse - [x] Rate-limiting - [x] Fix panic when using inline assistant - [x] Double check the upgraded `AssistantSettings` are backwards-compatible - [x] Add hosted LLM interaction behind a `language-models` feature flag. Release Notes: - We are temporarily removing the semantic index in order to redesign it from scratch. --------- Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Thorsten <thorsten@zed.dev> Co-authored-by: Max <max@zed.dev>
2024-03-19 12:22:26 -06:00 · 2024-03-19 12:22:26 -06:00 · 8ae5a3b61a
commit 8ae5a3b61a
parent 905a24079a
87 changed files with 3647 additions and 8937 deletions
--- a/crates/assistant/src/codegen.rs
+++ b/crates/assistant/src/codegen.rs
@ -1,12 +1,13 @@
-use crate::streaming_diff::{Hunk, StreamingDiff};
-use ai::completion::{CompletionProvider, CompletionRequest};
+use crate::{
+    streaming_diff::{Hunk, StreamingDiff},
+    CompletionProvider, LanguageModelRequest,
+};
 use anyhow::Result;
 use editor::{Anchor, MultiBuffer, MultiBufferSnapshot, ToOffset, ToPoint};
 use futures::{channel::mpsc, SinkExt, Stream, StreamExt};
 use gpui::{EventEmitter, Model, ModelContext, Task};
 use language::{Rope, TransactionId};
-use multi_buffer;
-use std::{cmp, future, ops::Range, sync::Arc};
+use std::{cmp, future, ops::Range};

 pub enum Event {
    Finished,
@ -20,7 +21,6 @@ pub enum CodegenKind {
 }

 pub struct Codegen {
-    provider: Arc<dyn CompletionProvider>,
    buffer: Model<MultiBuffer>,
    snapshot: MultiBufferSnapshot,
    kind: CodegenKind,
@ -35,15 +35,9 @@ pub struct Codegen {
 impl EventEmitter<Event> for Codegen {}

 impl Codegen {
-    pub fn new(
-        buffer: Model<MultiBuffer>,
-        kind: CodegenKind,
-        provider: Arc<dyn CompletionProvider>,
-        cx: &mut ModelContext<Self>,
-    ) -> Self {
+    pub fn new(buffer: Model<MultiBuffer>, kind: CodegenKind, cx: &mut ModelContext<Self>) -> Self {
        let snapshot = buffer.read(cx).snapshot(cx);
        Self {
-            provider,
            buffer: buffer.clone(),
            snapshot,
            kind,
@ -94,7 +88,7 @@ impl Codegen {
        self.error.as_ref()
    }

-    pub fn start(&mut self, prompt: Box<dyn CompletionRequest>, cx: &mut ModelContext<Self>) {
+    pub fn start(&mut self, prompt: LanguageModelRequest, cx: &mut ModelContext<Self>) {
        let range = self.range();
        let snapshot = self.snapshot.clone();
        let selected_text = snapshot
@ -108,7 +102,7 @@ impl Codegen {
            .next()
            .unwrap_or_else(|| snapshot.indent_size_for_line(selection_start.row));

-        let response = self.provider.complete(prompt);
+        let response = CompletionProvider::global(cx).complete(prompt);
        self.generation = cx.spawn(|this, mut cx| {
            async move {
                let generate = async {
@ -305,7 +299,7 @@ fn strip_invalid_spans_from_codeblock(
        }

        if first_line {
-            if buffer == "" || buffer == "`" || buffer == "``" {
+            if buffer.is_empty() || buffer == "`" || buffer == "``" {
                return future::ready(None);
            } else if buffer.starts_with("```") {
                starts_with_markdown_codeblock = true;
@ -360,8 +354,9 @@ fn strip_invalid_spans_from_codeblock(
 mod tests {
    use std::sync::Arc;

+    use crate::FakeCompletionProvider;
+
    use super::*;
-    use ai::test::FakeCompletionProvider;
    use futures::stream::{self};
    use gpui::{Context, TestAppContext};
    use indoc::indoc;
@ -378,15 +373,11 @@ mod tests {
        pub name: String,
    }

-    impl CompletionRequest for DummyCompletionRequest {
-        fn data(&self) -> serde_json::Result<String> {
-            serde_json::to_string(self)
-        }
-    }
-
    #[gpui::test(iterations = 10)]
    async fn test_transform_autoindent(cx: &mut TestAppContext, mut rng: StdRng) {
+        let provider = FakeCompletionProvider::default();
        cx.set_global(cx.update(SettingsStore::test));
+        cx.set_global(CompletionProvider::Fake(provider.clone()));
        cx.update(language_settings::init);

        let text = indoc! {"
@ -405,19 +396,10 @@ mod tests {
            let snapshot = buffer.snapshot(cx);
            snapshot.anchor_before(Point::new(1, 0))..snapshot.anchor_after(Point::new(4, 5))
        });
-        let provider = Arc::new(FakeCompletionProvider::new());
-        let codegen = cx.new_model(|cx| {
-            Codegen::new(
-                buffer.clone(),
-                CodegenKind::Transform { range },
-                provider.clone(),
-                cx,
-            )
-        });
+        let codegen =
+            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Transform { range }, cx));

-        let request = Box::new(DummyCompletionRequest {
-            name: "test".to_string(),
-        });
+        let request = LanguageModelRequest::default();
        codegen.update(cx, |codegen, cx| codegen.start(request, cx));

        let mut new_text = concat!(
@ -430,8 +412,7 @@ mod tests {
            let max_len = cmp::min(new_text.len(), 10);
            let len = rng.gen_range(1..=max_len);
            let (chunk, suffix) = new_text.split_at(len);
-            println!("CHUNK: {:?}", &chunk);
-            provider.send_completion(chunk);
+            provider.send_completion(chunk.into());
            new_text = suffix;
            cx.background_executor.run_until_parked();
        }
@ -456,6 +437,8 @@ mod tests {
        cx: &mut TestAppContext,
        mut rng: StdRng,
    ) {
+        let provider = FakeCompletionProvider::default();
+        cx.set_global(CompletionProvider::Fake(provider.clone()));
        cx.set_global(cx.update(SettingsStore::test));
        cx.update(language_settings::init);

@ -472,19 +455,10 @@ mod tests {
            let snapshot = buffer.snapshot(cx);
            snapshot.anchor_before(Point::new(1, 6))
        });
-        let provider = Arc::new(FakeCompletionProvider::new());
-        let codegen = cx.new_model(|cx| {
-            Codegen::new(
-                buffer.clone(),
-                CodegenKind::Generate { position },
-                provider.clone(),
-                cx,
-            )
-        });
+        let codegen =
+            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));

-        let request = Box::new(DummyCompletionRequest {
-            name: "test".to_string(),
-        });
+        let request = LanguageModelRequest::default();
        codegen.update(cx, |codegen, cx| codegen.start(request, cx));

        let mut new_text = concat!(
@ -497,7 +471,7 @@ mod tests {
            let max_len = cmp::min(new_text.len(), 10);
            let len = rng.gen_range(1..=max_len);
            let (chunk, suffix) = new_text.split_at(len);
-            provider.send_completion(chunk);
+            provider.send_completion(chunk.into());
            new_text = suffix;
            cx.background_executor.run_until_parked();
        }
@ -522,6 +496,8 @@ mod tests {
        cx: &mut TestAppContext,
        mut rng: StdRng,
    ) {
+        let provider = FakeCompletionProvider::default();
+        cx.set_global(CompletionProvider::Fake(provider.clone()));
        cx.set_global(cx.update(SettingsStore::test));
        cx.update(language_settings::init);

@ -538,19 +514,10 @@ mod tests {
            let snapshot = buffer.snapshot(cx);
            snapshot.anchor_before(Point::new(1, 2))
        });
-        let provider = Arc::new(FakeCompletionProvider::new());
-        let codegen = cx.new_model(|cx| {
-            Codegen::new(
-                buffer.clone(),
-                CodegenKind::Generate { position },
-                provider.clone(),
-                cx,
-            )
-        });
+        let codegen =
+            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));

-        let request = Box::new(DummyCompletionRequest {
-            name: "test".to_string(),
-        });
+        let request = LanguageModelRequest::default();
        codegen.update(cx, |codegen, cx| codegen.start(request, cx));

        let mut new_text = concat!(
@ -563,8 +530,7 @@ mod tests {
            let max_len = cmp::min(new_text.len(), 10);
            let len = rng.gen_range(1..=max_len);
            let (chunk, suffix) = new_text.split_at(len);
-            println!("{:?}", &chunk);
-            provider.send_completion(chunk);
+            provider.send_completion(chunk.into());
            new_text = suffix;
            cx.background_executor.run_until_parked();
        }