assistant: Remove low_speed_timeout (#20681)

This removes the `low_speed_timeout` setting from all providers as a
response to issue #19509.

Reason being that the original `low_speed_timeout` was only as part of
#9913 because users wanted to _get rid of timeouts_. They wanted to bump
the default timeout from 5sec to a lot more.

Then, in the meantime, the meaning of `low_speed_timeout` changed in
#19055 and was changed to a normal `timeout`, which is a different thing
and breaks slower LLMs that don't reply with a complete response in the
configured timeout.

So we figured: let's remove the whole thing and replace it with a
default _connect_ timeout to make sure that we can connect to a server
in 10s, but then give the server as long as it wants to complete its
response.

Closes #19509

Release Notes:

- Removed the `low_speed_timeout` setting from LLM provider settings,
since it was only used to _increase_ the timeout to give LLMs more time,
but since we don't have any other use for it, we simply remove the
setting to give LLMs as long as they need.

---------

Co-authored-by: Antonio <antonio@zed.dev>
Co-authored-by: Peter Tripp <peter@zed.dev>
This commit is contained in:
Thorsten Ball 2024-11-15 07:37:31 +01:00 committed by GitHub
parent c9546070ac
commit aee01f2c50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 109 additions and 345 deletions

View file

@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
use std::pin::Pin;
use std::str::FromStr;
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use strum::IntoEnumIterator;
use theme::ThemeSettings;
use ui::{prelude::*, Icon, IconName, Tooltip};
@ -32,7 +32,6 @@ const PROVIDER_NAME: &str = "Anthropic";
#[derive(Default, Clone, Debug, PartialEq)]
pub struct AnthropicSettings {
pub api_url: String,
pub low_speed_timeout: Option<Duration>,
/// Extend Zed's list of Anthropic models.
pub available_models: Vec<AvailableModel>,
pub needs_setting_migration: bool,
@ -309,26 +308,17 @@ impl AnthropicModel {
{
let http_client = self.http_client.clone();
let Ok((api_key, api_url, low_speed_timeout)) = cx.read_model(&self.state, |state, cx| {
let Ok((api_key, api_url)) = cx.read_model(&self.state, |state, cx| {
let settings = &AllLanguageModelSettings::get_global(cx).anthropic;
(
state.api_key.clone(),
settings.api_url.clone(),
settings.low_speed_timeout,
)
(state.api_key.clone(), settings.api_url.clone())
}) else {
return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
};
async move {
let api_key = api_key.ok_or_else(|| anyhow!("Missing Anthropic API Key"))?;
let request = anthropic::stream_completion(
http_client.as_ref(),
&api_url,
&api_key,
request,
low_speed_timeout,
);
let request =
anthropic::stream_completion(http_client.as_ref(), &api_url, &api_key, request);
request.await.context("failed to stream completion")
}
.boxed()

View file

@ -21,7 +21,7 @@ use gpui::{
AnyElement, AnyView, AppContext, AsyncAppContext, EventEmitter, FontWeight, Global, Model,
ModelContext, ReadGlobal, Subscription, Task,
};
use http_client::{AsyncBody, HttpClient, HttpRequestExt, Method, Response, StatusCode};
use http_client::{AsyncBody, HttpClient, Method, Response, StatusCode};
use proto::TypedEnvelope;
use schemars::JsonSchema;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
@ -32,7 +32,6 @@ use smol::{
lock::{RwLock, RwLockUpgradableReadGuard, RwLockWriteGuard},
};
use std::fmt;
use std::time::Duration;
use std::{
future,
sync::{Arc, LazyLock},
@ -63,7 +62,6 @@ fn zed_cloud_provider_additional_models() -> &'static [AvailableModel] {
#[derive(Default, Clone, Debug, PartialEq)]
pub struct ZedDotDevSettings {
pub available_models: Vec<AvailableModel>,
pub low_speed_timeout: Option<Duration>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
@ -475,7 +473,6 @@ impl CloudLanguageModel {
client: Arc<Client>,
llm_api_token: LlmApiToken,
body: PerformCompletionParams,
low_speed_timeout: Option<Duration>,
) -> Result<Response<AsyncBody>> {
let http_client = &client.http_client();
@ -483,10 +480,7 @@ impl CloudLanguageModel {
let mut did_retry = false;
let response = loop {
let mut request_builder = http_client::Request::builder();
if let Some(low_speed_timeout) = low_speed_timeout {
request_builder = request_builder.read_timeout(low_speed_timeout);
};
let request_builder = http_client::Request::builder();
let request = request_builder
.method(Method::POST)
.uri(http_client.build_zed_llm_url("/completion", &[])?.as_ref())
@ -607,11 +601,8 @@ impl LanguageModel for CloudLanguageModel {
fn stream_completion(
&self,
request: LanguageModelRequest,
cx: &AsyncAppContext,
_cx: &AsyncAppContext,
) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
let openai_low_speed_timeout =
AllLanguageModelSettings::try_read_global(cx, |s| s.openai.low_speed_timeout.unwrap());
match &self.model {
CloudModel::Anthropic(model) => {
let request = request.into_anthropic(
@ -632,7 +623,6 @@ impl LanguageModel for CloudLanguageModel {
&request,
)?)?,
},
None,
)
.await?;
Ok(map_to_language_model_completion_events(Box::pin(
@ -656,7 +646,6 @@ impl LanguageModel for CloudLanguageModel {
&request,
)?)?,
},
openai_low_speed_timeout,
)
.await?;
Ok(open_ai::extract_text_from_events(response_lines(response)))
@ -684,7 +673,6 @@ impl LanguageModel for CloudLanguageModel {
&request,
)?)?,
},
None,
)
.await?;
Ok(google_ai::extract_text_from_events(response_lines(
@ -741,7 +729,6 @@ impl LanguageModel for CloudLanguageModel {
&request,
)?)?,
},
None,
)
.await?;
@ -786,7 +773,6 @@ impl LanguageModel for CloudLanguageModel {
&request,
)?)?,
},
None,
)
.await?;

View file

@ -14,7 +14,7 @@ use gpui::{
percentage, svg, Animation, AnimationExt, AnyView, AppContext, AsyncAppContext, Model, Render,
Subscription, Task, Transformation,
};
use settings::{Settings, SettingsStore};
use settings::SettingsStore;
use std::time::Duration;
use strum::IntoEnumIterator;
use ui::{
@ -23,7 +23,6 @@ use ui::{
ViewContext, VisualContext, WindowContext,
};
use crate::settings::AllLanguageModelSettings;
use crate::{
LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
LanguageModelProviderId, LanguageModelProviderName, LanguageModelRequest, RateLimiter, Role,
@ -37,9 +36,7 @@ const PROVIDER_ID: &str = "copilot_chat";
const PROVIDER_NAME: &str = "GitHub Copilot Chat";
#[derive(Default, Clone, Debug, PartialEq)]
pub struct CopilotChatSettings {
pub low_speed_timeout: Option<Duration>,
}
pub struct CopilotChatSettings {}
pub struct CopilotChatLanguageModelProvider {
state: Model<State>,
@ -218,17 +215,10 @@ impl LanguageModel for CopilotChatLanguageModel {
let copilot_request = self.to_copilot_chat_request(request);
let is_streaming = copilot_request.stream;
let Ok(low_speed_timeout) = cx.update(|cx| {
AllLanguageModelSettings::get_global(cx)
.copilot_chat
.low_speed_timeout
}) else {
return futures::future::ready(Err(anyhow::anyhow!("App state dropped"))).boxed();
};
let request_limiter = self.request_limiter.clone();
let future = cx.spawn(|cx| async move {
let response = CopilotChat::stream_completion(copilot_request, low_speed_timeout, cx);
let response = CopilotChat::stream_completion(copilot_request, cx);
request_limiter.stream(async move {
let response = response.await?;
let stream = response

View file

@ -11,7 +11,7 @@ use http_client::HttpClient;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
use std::{future, sync::Arc, time::Duration};
use std::{future, sync::Arc};
use strum::IntoEnumIterator;
use theme::ThemeSettings;
use ui::{prelude::*, Icon, IconName, Tooltip};
@ -30,7 +30,6 @@ const PROVIDER_NAME: &str = "Google AI";
#[derive(Default, Clone, Debug, PartialEq)]
pub struct GoogleSettings {
pub api_url: String,
pub low_speed_timeout: Option<Duration>,
pub available_models: Vec<AvailableModel>,
}
@ -262,7 +261,6 @@ impl LanguageModel for GoogleLanguageModel {
let settings = &AllLanguageModelSettings::get_global(cx).google;
let api_url = settings.api_url.clone();
let low_speed_timeout = settings.low_speed_timeout;
async move {
let api_key = api_key.ok_or_else(|| anyhow!("Missing Google API key"))?;
@ -273,7 +271,6 @@ impl LanguageModel for GoogleLanguageModel {
google_ai::CountTokensRequest {
contents: request.contents,
},
low_speed_timeout,
)
.await?;
Ok(response.total_tokens)
@ -292,26 +289,17 @@ impl LanguageModel for GoogleLanguageModel {
let request = request.into_google(self.model.id().to_string());
let http_client = self.http_client.clone();
let Ok((api_key, api_url, low_speed_timeout)) = cx.read_model(&self.state, |state, cx| {
let Ok((api_key, api_url)) = cx.read_model(&self.state, |state, cx| {
let settings = &AllLanguageModelSettings::get_global(cx).google;
(
state.api_key.clone(),
settings.api_url.clone(),
settings.low_speed_timeout,
)
(state.api_key.clone(), settings.api_url.clone())
}) else {
return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
};
let future = self.rate_limiter.stream(async move {
let api_key = api_key.ok_or_else(|| anyhow!("Missing Google API Key"))?;
let response = stream_generate_content(
http_client.as_ref(),
&api_url,
&api_key,
request,
low_speed_timeout,
);
let response =
stream_generate_content(http_client.as_ref(), &api_url, &api_key, request);
let events = response.await?;
Ok(google_ai::extract_text_from_events(events).boxed())
});

View file

@ -9,7 +9,7 @@ use ollama::{
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
use std::{collections::BTreeMap, sync::Arc, time::Duration};
use std::{collections::BTreeMap, sync::Arc};
use ui::{prelude::*, ButtonLike, Indicator};
use util::ResultExt;
@ -30,7 +30,6 @@ const PROVIDER_NAME: &str = "Ollama";
#[derive(Default, Debug, Clone, PartialEq)]
pub struct OllamaSettings {
pub api_url: String,
pub low_speed_timeout: Option<Duration>,
pub available_models: Vec<AvailableModel>,
}
@ -327,17 +326,15 @@ impl LanguageModel for OllamaLanguageModel {
let request = self.to_ollama_request(request);
let http_client = self.http_client.clone();
let Ok((api_url, low_speed_timeout)) = cx.update(|cx| {
let Ok(api_url) = cx.update(|cx| {
let settings = &AllLanguageModelSettings::get_global(cx).ollama;
(settings.api_url.clone(), settings.low_speed_timeout)
settings.api_url.clone()
}) else {
return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
};
let future = self.request_limiter.stream(async move {
let response =
stream_chat_completion(http_client.as_ref(), &api_url, request, low_speed_timeout)
.await?;
let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
let stream = response
.filter_map(|response| async move {
match response {

View file

@ -13,7 +13,7 @@ use open_ai::{
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use strum::IntoEnumIterator;
use theme::ThemeSettings;
use ui::{prelude::*, Icon, IconName, Tooltip};
@ -32,7 +32,6 @@ const PROVIDER_NAME: &str = "OpenAI";
#[derive(Default, Clone, Debug, PartialEq)]
pub struct OpenAiSettings {
pub api_url: String,
pub low_speed_timeout: Option<Duration>,
pub available_models: Vec<AvailableModel>,
pub needs_setting_migration: bool,
}
@ -229,26 +228,16 @@ impl OpenAiLanguageModel {
) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponseStreamEvent>>>>
{
let http_client = self.http_client.clone();
let Ok((api_key, api_url, low_speed_timeout)) = cx.read_model(&self.state, |state, cx| {
let Ok((api_key, api_url)) = cx.read_model(&self.state, |state, cx| {
let settings = &AllLanguageModelSettings::get_global(cx).openai;
(
state.api_key.clone(),
settings.api_url.clone(),
settings.low_speed_timeout,
)
(state.api_key.clone(), settings.api_url.clone())
}) else {
return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
};
let future = self.request_limiter.stream(async move {
let api_key = api_key.ok_or_else(|| anyhow!("Missing OpenAI API Key"))?;
let request = stream_completion(
http_client.as_ref(),
&api_url,
&api_key,
request,
low_speed_timeout,
);
let request = stream_completion(http_client.as_ref(), &api_url, &api_key, request);
let response = request.await?;
Ok(response)
});

View file

@ -1,4 +1,4 @@
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use anyhow::Result;
use gpui::AppContext;
@ -87,7 +87,6 @@ impl AnthropicSettingsContent {
AnthropicSettingsContent::Legacy(content) => (
AnthropicSettingsContentV1 {
api_url: content.api_url,
low_speed_timeout_in_seconds: content.low_speed_timeout_in_seconds,
available_models: content.available_models.map(|models| {
models
.into_iter()
@ -132,7 +131,6 @@ impl AnthropicSettingsContent {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct LegacyAnthropicSettingsContent {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<anthropic::Model>>,
}
@ -146,14 +144,12 @@ pub enum VersionedAnthropicSettingsContent {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct AnthropicSettingsContentV1 {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<provider::anthropic::AvailableModel>>,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct OllamaSettingsContent {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<provider::ollama::AvailableModel>>,
}
@ -170,7 +166,6 @@ impl OpenAiSettingsContent {
OpenAiSettingsContent::Legacy(content) => (
OpenAiSettingsContentV1 {
api_url: content.api_url,
low_speed_timeout_in_seconds: content.low_speed_timeout_in_seconds,
available_models: content.available_models.map(|models| {
models
.into_iter()
@ -205,7 +200,6 @@ impl OpenAiSettingsContent {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct LegacyOpenAiSettingsContent {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<open_ai::Model>>,
}
@ -219,27 +213,22 @@ pub enum VersionedOpenAiSettingsContent {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct OpenAiSettingsContentV1 {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<provider::open_ai::AvailableModel>>,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct GoogleSettingsContent {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<provider::google::AvailableModel>>,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct ZedDotDevSettingsContent {
available_models: Option<Vec<cloud::AvailableModel>>,
pub low_speed_timeout_in_seconds: Option<u64>,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct CopilotChatSettingsContent {
low_speed_timeout_in_seconds: Option<u64>,
}
pub struct CopilotChatSettingsContent {}
impl settings::Settings for AllLanguageModelSettings {
const KEY: Option<&'static str> = Some("language_models");
@ -272,13 +261,6 @@ impl settings::Settings for AllLanguageModelSettings {
&mut settings.anthropic.api_url,
anthropic.as_ref().and_then(|s| s.api_url.clone()),
);
if let Some(low_speed_timeout_in_seconds) = anthropic
.as_ref()
.and_then(|s| s.low_speed_timeout_in_seconds)
{
settings.anthropic.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout_in_seconds));
}
merge(
&mut settings.anthropic.available_models,
anthropic.as_ref().and_then(|s| s.available_models.clone()),
@ -291,14 +273,6 @@ impl settings::Settings for AllLanguageModelSettings {
&mut settings.ollama.api_url,
value.ollama.as_ref().and_then(|s| s.api_url.clone()),
);
if let Some(low_speed_timeout_in_seconds) = value
.ollama
.as_ref()
.and_then(|s| s.low_speed_timeout_in_seconds)
{
settings.ollama.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout_in_seconds));
}
merge(
&mut settings.ollama.available_models,
ollama.as_ref().and_then(|s| s.available_models.clone()),
@ -318,17 +292,10 @@ impl settings::Settings for AllLanguageModelSettings {
&mut settings.openai.api_url,
openai.as_ref().and_then(|s| s.api_url.clone()),
);
if let Some(low_speed_timeout_in_seconds) =
openai.as_ref().and_then(|s| s.low_speed_timeout_in_seconds)
{
settings.openai.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout_in_seconds));
}
merge(
&mut settings.openai.available_models,
openai.as_ref().and_then(|s| s.available_models.clone()),
);
merge(
&mut settings.zed_dot_dev.available_models,
value
@ -336,27 +303,10 @@ impl settings::Settings for AllLanguageModelSettings {
.as_ref()
.and_then(|s| s.available_models.clone()),
);
if let Some(low_speed_timeout_in_seconds) = value
.zed_dot_dev
.as_ref()
.and_then(|s| s.low_speed_timeout_in_seconds)
{
settings.zed_dot_dev.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout_in_seconds));
}
merge(
&mut settings.google.api_url,
value.google.as_ref().and_then(|s| s.api_url.clone()),
);
if let Some(low_speed_timeout_in_seconds) = value
.google
.as_ref()
.and_then(|s| s.low_speed_timeout_in_seconds)
{
settings.google.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout_in_seconds));
}
merge(
&mut settings.google.available_models,
value
@ -364,15 +314,6 @@ impl settings::Settings for AllLanguageModelSettings {
.as_ref()
.and_then(|s| s.available_models.clone()),
);
if let Some(low_speed_timeout) = value
.copilot_chat
.as_ref()
.and_then(|s| s.low_speed_timeout_in_seconds)
{
settings.copilot_chat.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout));
}
}
Ok(settings)