Default to fast model for thread summaries and titles + don't include system prompt / context / thinking segments (#29102)

* Adds a fast / cheaper model to providers and defaults thread
summarization to this model. Initial motivation for this was that
https://github.com/zed-industries/zed/pull/29099 would cause these
requests to fail when used with a thinking model. It doesn't seem
correct to use a thinking model for summarization.

* Skips system prompt, context, and thinking segments.

* If tool use is happening, allows 2 tool uses + one more agent response
before summarizing.

Downside of this is that there was potential for some prefix cache reuse
before, especially for title summarization (thread summarization omitted
tool results and so would not share a prefix for those). This seems fine
as these requests should typically be fairly small. Even for full thread
summarization, skipping all tool use / context should greatly reduce the
token use.

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2025-04-19 17:26:29 -06:00 committed by GitHub
parent d48152d958
commit fbf7caf93e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 270 additions and 205 deletions

View file

@ -201,7 +201,7 @@ impl AnthropicLanguageModelProvider {
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
})
}
}
@ -227,14 +227,11 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = anthropic::Model::default();
Some(Arc::new(AnthropicModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(anthropic::Model::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(anthropic::Model::default_fast()))
}
fn recommended_models(&self, _cx: &App) -> Vec<Arc<dyn LanguageModel>> {

View file

@ -286,6 +286,18 @@ impl BedrockLanguageModelProvider {
state,
}
}
fn create_language_model(&self, model: bedrock::Model) -> Arc<dyn LanguageModel> {
Arc::new(BedrockModel {
id: LanguageModelId::from(model.id().to_string()),
model,
http_client: self.http_client.clone(),
handler: self.handler.clone(),
state: self.state.clone(),
client: OnceCell::new(),
request_limiter: RateLimiter::new(4),
})
}
}
impl LanguageModelProvider for BedrockLanguageModelProvider {
@ -302,16 +314,11 @@ impl LanguageModelProvider for BedrockLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = bedrock::Model::default();
Some(Arc::new(BedrockModel {
id: LanguageModelId::from(model.id().to_string()),
model,
http_client: self.http_client.clone(),
handler: self.handler.clone(),
state: self.state.clone(),
client: OnceCell::new(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(bedrock::Model::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(bedrock::Model::default_fast()))
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
@ -343,17 +350,7 @@ impl LanguageModelProvider for BedrockLanguageModelProvider {
models
.into_values()
.map(|model| {
Arc::new(BedrockModel {
id: LanguageModelId::from(model.id().to_string()),
model,
http_client: self.http_client.clone(),
handler: self.handler.clone(),
state: self.state.clone(),
client: OnceCell::new(),
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
})
.map(|model| self.create_language_model(model))
.collect()
}

View file

@ -242,7 +242,7 @@ impl CloudLanguageModelProvider {
llm_api_token: llm_api_token.clone(),
client: self.client.clone(),
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
})
}
}
@ -270,13 +270,13 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
fn default_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
let llm_api_token = self.state.read(cx).llm_api_token.clone();
let model = CloudModel::Anthropic(anthropic::Model::default());
Some(Arc::new(CloudLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
llm_api_token: llm_api_token.clone(),
client: self.client.clone(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(model, llm_api_token))
}
fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
let llm_api_token = self.state.read(cx).llm_api_token.clone();
let model = CloudModel::Anthropic(anthropic::Model::default_fast());
Some(self.create_language_model(model, llm_api_token))
}
fn recommended_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {

View file

@ -70,6 +70,13 @@ impl CopilotChatLanguageModelProvider {
Self { state }
}
fn create_language_model(&self, model: CopilotChatModel) -> Arc<dyn LanguageModel> {
Arc::new(CopilotChatLanguageModel {
model,
request_limiter: RateLimiter::new(4),
})
}
}
impl LanguageModelProviderState for CopilotChatLanguageModelProvider {
@ -94,21 +101,16 @@ impl LanguageModelProvider for CopilotChatLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = CopilotChatModel::default();
Some(Arc::new(CopilotChatLanguageModel {
model,
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>)
Some(self.create_language_model(CopilotChatModel::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(CopilotChatModel::default_fast()))
}
fn provided_models(&self, _cx: &App) -> Vec<Arc<dyn LanguageModel>> {
CopilotChatModel::iter()
.map(|model| {
Arc::new(CopilotChatLanguageModel {
model,
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
})
.map(|model| self.create_language_model(model))
.collect()
}

View file

@ -140,6 +140,16 @@ impl DeepSeekLanguageModelProvider {
Self { http_client, state }
}
fn create_language_model(&self, model: deepseek::Model) -> Arc<dyn LanguageModel> {
Arc::new(DeepSeekLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
}
}
impl LanguageModelProviderState for DeepSeekLanguageModelProvider {
@ -164,14 +174,11 @@ impl LanguageModelProvider for DeepSeekLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = deepseek::Model::Chat;
Some(Arc::new(DeepSeekLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(deepseek::Model::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(deepseek::Model::default_fast()))
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
@ -198,15 +205,7 @@ impl LanguageModelProvider for DeepSeekLanguageModelProvider {
models
.into_values()
.map(|model| {
Arc::new(DeepSeekLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
})
.map(|model| self.create_language_model(model))
.collect()
}

View file

@ -150,6 +150,16 @@ impl GoogleLanguageModelProvider {
Self { http_client, state }
}
fn create_language_model(&self, model: google_ai::Model) -> Arc<dyn LanguageModel> {
Arc::new(GoogleLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
})
}
}
impl LanguageModelProviderState for GoogleLanguageModelProvider {
@ -174,14 +184,11 @@ impl LanguageModelProvider for GoogleLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = google_ai::Model::default();
Some(Arc::new(GoogleLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(google_ai::Model::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(google_ai::Model::default_fast()))
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {

View file

@ -157,6 +157,10 @@ impl LanguageModelProvider for LmStudioLanguageModelProvider {
self.provided_models(cx).into_iter().next()
}
fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
self.default_model(cx)
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
let mut models: BTreeMap<String, lmstudio::Model> = BTreeMap::default();

View file

@ -144,6 +144,16 @@ impl MistralLanguageModelProvider {
Self { http_client, state }
}
fn create_language_model(&self, model: mistral::Model) -> Arc<dyn LanguageModel> {
Arc::new(MistralLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
})
}
}
impl LanguageModelProviderState for MistralLanguageModelProvider {
@ -168,14 +178,11 @@ impl LanguageModelProvider for MistralLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = mistral::Model::default();
Some(Arc::new(MistralLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(mistral::Model::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(mistral::Model::default_fast()))
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {

View file

@ -162,6 +162,10 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
self.provided_models(cx).into_iter().next()
}
fn default_fast_model(&self, cx: &App) -> Option<Arc<dyn LanguageModel>> {
self.default_model(cx)
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();

View file

@ -148,6 +148,16 @@ impl OpenAiLanguageModelProvider {
Self { http_client, state }
}
fn create_language_model(&self, model: open_ai::Model) -> Arc<dyn LanguageModel> {
Arc::new(OpenAiLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
})
}
}
impl LanguageModelProviderState for OpenAiLanguageModelProvider {
@ -172,14 +182,11 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
}
fn default_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
let model = open_ai::Model::default();
Some(Arc::new(OpenAiLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}))
Some(self.create_language_model(open_ai::Model::default()))
}
fn default_fast_model(&self, _cx: &App) -> Option<Arc<dyn LanguageModel>> {
Some(self.create_language_model(open_ai::Model::default_fast()))
}
fn provided_models(&self, cx: &App) -> Vec<Arc<dyn LanguageModel>> {
@ -211,15 +218,7 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
models
.into_values()
.map(|model| {
Arc::new(OpenAiLanguageModel {
id: LanguageModelId::from(model.id().to_string()),
model,
state: self.state.clone(),
http_client: self.http_client.clone(),
request_limiter: RateLimiter::new(4),
}) as Arc<dyn LanguageModel>
})
.map(|model| self.create_language_model(model))
.collect()
}