assistant: Limit amount of concurrent completion requests (#13856)

This PR refactors the completion providers to only process a maximum amount of completion requests at a time. Also started refactoring language model providers to use traits, so it's easier to allow specifying multiple providers in the future. Release Notes: - N/A
2024-07-05 14:52:45 +02:00 · 2024-07-05 14:52:45 +02:00 · c4dbe32f20
commit c4dbe32f20
parent f2711b2fca
11 changed files with 693 additions and 532 deletions
--- a/crates/assistant/src/assistant_settings.rs
+++ b/crates/assistant/src/assistant_settings.rs
@ -1,5 +1,6 @@
 use std::fmt;

+use crate::{preprocess_anthropic_request, LanguageModel, LanguageModelRequest};
 pub use anthropic::Model as AnthropicModel;
 use gpui::Pixels;
 pub use ollama::Model as OllamaModel;
@ -15,8 +16,6 @@ use serde::{
 use settings::{Settings, SettingsSources};
 use strum::{EnumIter, IntoEnumIterator};

-use crate::{preprocess_anthropic_request, LanguageModel, LanguageModelRequest};
-
 #[derive(Clone, Debug, Default, PartialEq, EnumIter)]
 pub enum CloudModel {
    Gpt3Point5Turbo,