Add thinking budget for Gemini custom models (#31251)

Closes #31243

As described in my issue, the [thinking
budget](https://ai.google.dev/gemini-api/docs/thinking) gets
automatically chosen by Gemini unless it is specifically set to
something. In order to have fast responses (inline assistant) I prefer
to set it to 0.

Release Notes:

- ai: Added `thinking` mode for custom Google models with configurable
token budget

---------

Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com>
This commit is contained in:
90aca 2025-06-03 13:40:20 +02:00 committed by GitHub
parent b74477d12e
commit cf931247d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 82 additions and 5 deletions

View file

@ -4,6 +4,7 @@ use client::{Client, UserStore, zed_urls};
use futures::{
AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
};
use google_ai::GoogleModelMode;
use gpui::{
AnyElement, AnyView, App, AsyncApp, Context, Entity, SemanticVersion, Subscription, Task,
};
@ -750,7 +751,8 @@ impl LanguageModel for CloudLanguageModel {
let client = self.client.clone();
let llm_api_token = self.llm_api_token.clone();
let model_id = self.model.id.to_string();
let generate_content_request = into_google(request, model_id.clone());
let generate_content_request =
into_google(request, model_id.clone(), GoogleModelMode::Default);
async move {
let http_client = &client.http_client();
let token = llm_api_token.acquire(&client).await?;
@ -922,7 +924,8 @@ impl LanguageModel for CloudLanguageModel {
}
zed_llm_client::LanguageModelProvider::Google => {
let client = self.client.clone();
let request = into_google(request, self.model.id.to_string());
let request =
into_google(request, self.model.id.to_string(), GoogleModelMode::Default);
let llm_api_token = self.llm_api_token.clone();
let future = self.request_limiter.stream(async move {
let PerformLlmCompletionResponse {