Default to fast model for thread summaries and titles + don't include system prompt / context / thinking segments (#29102)

* Adds a fast / cheaper model to providers and defaults thread summarization to this model. Initial motivation for this was that https://github.com/zed-industries/zed/pull/29099 would cause these requests to fail when used with a thinking model. It doesn't seem correct to use a thinking model for summarization. * Skips system prompt, context, and thinking segments. * If tool use is happening, allows 2 tool uses + one more agent response before summarizing. Downside of this is that there was potential for some prefix cache reuse before, especially for title summarization (thread summarization omitted tool results and so would not share a prefix for those). This seems fine as these requests should typically be fairly small. Even for full thread summarization, skipping all tool use / context should greatly reduce the token use. Release Notes: - N/A
2025-04-19 17:26:29 -06:00 · 2025-04-19 17:26:29 -06:00 · fbf7caf93e
commit fbf7caf93e
parent d48152d958
25 changed files with 270 additions and 205 deletions
--- a/crates/language_model/src/registry.rs
+++ b/crates/language_model/src/registry.rs
@ -5,6 +5,7 @@ use crate::{
 use collections::BTreeMap;
 use gpui::{App, Context, Entity, EventEmitter, Global, prelude::*};
 use std::sync::Arc;
+use util::maybe;

 pub fn init(cx: &mut App) {
    let registry = cx.new(|_cx| LanguageModelRegistry::default());
@ -18,6 +19,7 @@ impl Global for GlobalLanguageModelRegistry {}
 #[derive(Default)]
 pub struct LanguageModelRegistry {
    default_model: Option<ConfiguredModel>,
+    default_fast_model: Option<ConfiguredModel>,
    inline_assistant_model: Option<ConfiguredModel>,
    commit_message_model: Option<ConfiguredModel>,
    thread_summary_model: Option<ConfiguredModel>,
@ -202,6 +204,14 @@ impl LanguageModelRegistry {
            (None, None) => {}
            _ => cx.emit(Event::DefaultModelChanged),
        }
+        self.default_fast_model = maybe!({
+            let provider = &model.as_ref()?.provider;
+            let fast_model = provider.default_fast_model(cx)?;
+            Some(ConfiguredModel {
+                provider: provider.clone(),
+                model: fast_model,
+            })
+        });
        self.default_model = model;
    }

@ -254,21 +264,37 @@ impl LanguageModelRegistry {
    }

    pub fn inline_assistant_model(&self) -> Option<ConfiguredModel> {
+        #[cfg(debug_assertions)]
+        if std::env::var("ZED_SIMULATE_NO_LLM_PROVIDER").is_ok() {
+            return None;
+        }
+
        self.inline_assistant_model
            .clone()
-            .or_else(|| self.default_model())
+            .or_else(|| self.default_model.clone())
    }

    pub fn commit_message_model(&self) -> Option<ConfiguredModel> {
+        #[cfg(debug_assertions)]
+        if std::env::var("ZED_SIMULATE_NO_LLM_PROVIDER").is_ok() {
+            return None;
+        }
+
        self.commit_message_model
            .clone()
-            .or_else(|| self.default_model())
+            .or_else(|| self.default_model.clone())
    }

    pub fn thread_summary_model(&self) -> Option<ConfiguredModel> {
+        #[cfg(debug_assertions)]
+        if std::env::var("ZED_SIMULATE_NO_LLM_PROVIDER").is_ok() {
+            return None;
+        }
+
        self.thread_summary_model
            .clone()
-            .or_else(|| self.default_model())
+            .or_else(|| self.default_fast_model.clone())
+            .or_else(|| self.default_model.clone())
    }

    /// The models to use for inline assists. Returns the union of the active