From 52fa7ababb7feff1a3434e03e94d7a26b34b3d24 Mon Sep 17 00:00:00 2001 From: Elijah McMorris Date: Fri, 6 Jun 2025 13:21:23 -0700 Subject: [PATCH] lmstudio: Fill max_tokens using the response from /models (#25606) The info for `max_tokens` for the model is included in `{api_url}/models` I don't think this needs to be `.clamp` like in `crates/ollama/src/ollama.rs` `get_max_tokens`, but it might need to be ## Before: Every model shows 2k ![image](https://github.com/user-attachments/assets/676075c8-0ceb-44b1-ae27-72ed6a6d783c) ## After: ![image](https://github.com/user-attachments/assets/8291535b-976e-4601-b617-1a508bf44e12) ### Json from `{api_url}/models` with model not loaded ```json { "id": "qwen2.5-coder-1.5b-instruct-mlx", "object": "model", "type": "llm", "publisher": "lmstudio-community", "arch": "qwen2", "compatibility_type": "mlx", "quantization": "4bit", "state": "not-loaded", "max_context_length": 32768 }, ``` ## Notes The response from `{api_url}/models` seems to return the `max_tokens` for the model, not the currently configured context length, but I think showing the `max_tokens` for the model is better than setting 2k for everything `loaded_context_length` exists, but only if the model is loaded at the startup of zed, which usually isn't the case maybe `fetch_models` should be rerun when swapping lmstudio models ### Currently configured context this isn't shown in `{api_url}/models` ![image](https://github.com/user-attachments/assets/8511cb9d-914b-4065-9eba-c0b086ad253b) ### Json from `{api_url}/models` with model loaded ```json { "id": "qwen2.5-coder-1.5b-instruct-mlx", "object": "model", "type": "llm", "publisher": "lmstudio-community", "arch": "qwen2", "compatibility_type": "mlx", "quantization": "4bit", "state": "loaded", "max_context_length": 32768, "loaded_context_length": 4096 }, ``` Release Notes: - lmstudio: Fixed showing `max_tokens` in the assistant panel --------- Co-authored-by: Peter Tripp --- crates/language_models/src/provider/lmstudio.rs | 4 +++- crates/lmstudio/src/lmstudio.rs | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/language_models/src/provider/lmstudio.rs b/crates/language_models/src/provider/lmstudio.rs index 6840f30fca..a9129027d6 100644 --- a/crates/language_models/src/provider/lmstudio.rs +++ b/crates/language_models/src/provider/lmstudio.rs @@ -84,7 +84,9 @@ impl State { lmstudio::Model::new( &model.id, None, - None, + model + .loaded_context_length + .or_else(|| model.max_context_length), model.capabilities.supports_tool_calls(), ) }) diff --git a/crates/lmstudio/src/lmstudio.rs b/crates/lmstudio/src/lmstudio.rs index 1c4a902b93..b62909fe31 100644 --- a/crates/lmstudio/src/lmstudio.rs +++ b/crates/lmstudio/src/lmstudio.rs @@ -243,8 +243,8 @@ pub struct ModelEntry { pub compatibility_type: CompatibilityType, pub quantization: Option, pub state: ModelState, - pub max_context_length: Option, - pub loaded_context_length: Option, + pub max_context_length: Option, + pub loaded_context_length: Option, #[serde(default)] pub capabilities: Capabilities, }