assistant: Limit amount of concurrent completion requests (#13856)

This PR refactors the completion providers to only process a maximum
amount of completion requests at a time.

Also started refactoring language model providers to use traits, so it's
easier to allow specifying multiple providers in the future.

Release Notes:

- N/A
This commit is contained in:
Bennet Bo Fenner 2024-07-05 14:52:45 +02:00 committed by GitHub
parent f2711b2fca
commit c4dbe32f20
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 693 additions and 532 deletions

View file

@ -1026,9 +1026,10 @@ impl Codegen {
let telemetry = self.telemetry.clone();
let model_telemetry_id = prompt.model.telemetry_id();
let response = CompletionProvider::global(cx).complete(prompt);
let response = CompletionProvider::global(cx).complete(prompt, cx);
self.generation = cx.spawn(|this, mut cx| async move {
let response = response.await;
let generate = async {
let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
@ -1036,7 +1037,7 @@ impl Codegen {
let mut response_latency = None;
let request_start = Instant::now();
let task = async {
let mut response = response.await?;
let mut response = response.inner.await?;
while let Some(chunk) = response.next().await {
if response_latency.is_none() {
response_latency = Some(request_start.elapsed());