Allow AI interactions to be proxied through Zed's server so you don't need an API key (#7367)

Co-authored-by: Antonio <antonio@zed.dev> Resurrected this from some assistant work I did in Spring of 2023. - [x] Resurrect streaming responses - [x] Use streaming responses to enable AI via Zed's servers by default (but preserve API key option for now) - [x] Simplify protobuf - [x] Proxy to OpenAI on zed.dev - [x] Proxy to Gemini on zed.dev - [x] Improve UX for switching between openAI and google models - We current disallow cycling when setting a custom model, but we need a better solution to keep OpenAI models available while testing the google ones - [x] Show remaining tokens correctly for Google models - [x] Remove semantic index - [x] Delete `ai` crate - [x] Cloud front so we can ban abuse - [x] Rate-limiting - [x] Fix panic when using inline assistant - [x] Double check the upgraded `AssistantSettings` are backwards-compatible - [x] Add hosted LLM interaction behind a `language-models` feature flag. Release Notes: - We are temporarily removing the semantic index in order to redesign it from scratch. --------- Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Thorsten <thorsten@zed.dev> Co-authored-by: Max <max@zed.dev>
2024-03-19 12:22:26 -06:00 · 2024-03-19 12:22:26 -06:00 · 8ae5a3b61a
commit 8ae5a3b61a
parent 905a24079a
87 changed files with 3647 additions and 8937 deletions
--- a/crates/client/src/client.rs
+++ b/crates/client/src/client.rs
@ -13,7 +13,7 @@ use async_tungstenite::tungstenite::{
 use clock::SystemClock;
 use collections::HashMap;
 use futures::{
-    channel::oneshot, future::LocalBoxFuture, AsyncReadExt, FutureExt, SinkExt, StreamExt,
+    channel::oneshot, future::LocalBoxFuture, AsyncReadExt, FutureExt, SinkExt, Stream, StreamExt,
    TryFutureExt as _, TryStreamExt,
 };
 use gpui::{
@ -36,7 +36,10 @@ use std::{
    future::Future,
    marker::PhantomData,
    path::PathBuf,
-    sync::{atomic::AtomicU64, Arc, Weak},
+    sync::{
+        atomic::{AtomicU64, Ordering},
+        Arc, Weak,
+    },
    time::{Duration, Instant},
 };
 use telemetry::Telemetry;
@ -442,7 +445,7 @@ impl Client {
    }

    pub fn id(&self) -> u64 {
-        self.id.load(std::sync::atomic::Ordering::SeqCst)
+        self.id.load(Ordering::SeqCst)
    }

    pub fn http_client(&self) -> Arc<HttpClientWithUrl> {
@ -450,7 +453,7 @@ impl Client {
    }

    pub fn set_id(&self, id: u64) -> &Self {
-        self.id.store(id, std::sync::atomic::Ordering::SeqCst);
+        self.id.store(id, Ordering::SeqCst);
        self
    }

@ -1260,6 +1263,30 @@ impl Client {
            .map_ok(|envelope| envelope.payload)
    }

+    pub fn request_stream<T: RequestMessage>(
+        &self,
+        request: T,
+    ) -> impl Future<Output = Result<impl Stream<Item = Result<T::Response>>>> {
+        let client_id = self.id.load(Ordering::SeqCst);
+        log::debug!(
+            "rpc request start. client_id:{}. name:{}",
+            client_id,
+            T::NAME
+        );
+        let response = self
+            .connection_id()
+            .map(|conn_id| self.peer.request_stream(conn_id, request));
+        async move {
+            let response = response?.await;
+            log::debug!(
+                "rpc request finish. client_id:{}. name:{}",
+                client_id,
+                T::NAME
+            );
+            response
+        }
+    }
+
    pub fn request_envelope<T: RequestMessage>(
        &self,
        request: T,
--- a/crates/client/src/telemetry.rs
+++ b/crates/client/src/telemetry.rs
@ -261,7 +261,7 @@ impl Telemetry {
        self: &Arc<Self>,
        conversation_id: Option<String>,
        kind: AssistantKind,
-        model: &str,
+        model: String,
    ) {
        let event = Event::Assistant(AssistantEvent {
            conversation_id,