assistant: Limit amount of concurrent completion requests (#13856)
This PR refactors the completion providers to only process a maximum amount of completion requests at a time. Also started refactoring language model providers to use traits, so it's easier to allow specifying multiple providers in the future. Release Notes: - N/A
This commit is contained in:
parent
f2711b2fca
commit
c4dbe32f20
11 changed files with 693 additions and 532 deletions
|
@ -1026,9 +1026,10 @@ impl Codegen {
|
|||
|
||||
let telemetry = self.telemetry.clone();
|
||||
let model_telemetry_id = prompt.model.telemetry_id();
|
||||
let response = CompletionProvider::global(cx).complete(prompt);
|
||||
let response = CompletionProvider::global(cx).complete(prompt, cx);
|
||||
|
||||
self.generation = cx.spawn(|this, mut cx| async move {
|
||||
let response = response.await;
|
||||
let generate = async {
|
||||
let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
|
||||
|
||||
|
@ -1036,7 +1037,7 @@ impl Codegen {
|
|||
let mut response_latency = None;
|
||||
let request_start = Instant::now();
|
||||
let task = async {
|
||||
let mut response = response.await?;
|
||||
let mut response = response.inner.await?;
|
||||
while let Some(chunk) = response.next().await {
|
||||
if response_latency.is_none() {
|
||||
response_latency = Some(request_start.elapsed());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue