lmstudio: Fill max_tokens using the response from /models (#25606)
The info for `max_tokens` for the model is included in `{api_url}/models` I don't think this needs to be `.clamp` like in `crates/ollama/src/ollama.rs` `get_max_tokens`, but it might need to be ## Before: Every model shows 2k  ## After:  ### Json from `{api_url}/models` with model not loaded ```json { "id": "qwen2.5-coder-1.5b-instruct-mlx", "object": "model", "type": "llm", "publisher": "lmstudio-community", "arch": "qwen2", "compatibility_type": "mlx", "quantization": "4bit", "state": "not-loaded", "max_context_length": 32768 }, ``` ## Notes The response from `{api_url}/models` seems to return the `max_tokens` for the model, not the currently configured context length, but I think showing the `max_tokens` for the model is better than setting 2k for everything `loaded_context_length` exists, but only if the model is loaded at the startup of zed, which usually isn't the case maybe `fetch_models` should be rerun when swapping lmstudio models ### Currently configured context this isn't shown in `{api_url}/models`  ### Json from `{api_url}/models` with model loaded ```json { "id": "qwen2.5-coder-1.5b-instruct-mlx", "object": "model", "type": "llm", "publisher": "lmstudio-community", "arch": "qwen2", "compatibility_type": "mlx", "quantization": "4bit", "state": "loaded", "max_context_length": 32768, "loaded_context_length": 4096 }, ``` Release Notes: - lmstudio: Fixed showing `max_tokens` in the assistant panel --------- Co-authored-by: Peter Tripp <peter@zed.dev>
This commit is contained in:
parent
5ad51ca48e
commit
52fa7ababb
2 changed files with 5 additions and 3 deletions
|
@ -84,7 +84,9 @@ impl State {
|
|||
lmstudio::Model::new(
|
||||
&model.id,
|
||||
None,
|
||||
None,
|
||||
model
|
||||
.loaded_context_length
|
||||
.or_else(|| model.max_context_length),
|
||||
model.capabilities.supports_tool_calls(),
|
||||
)
|
||||
})
|
||||
|
|
|
@ -243,8 +243,8 @@ pub struct ModelEntry {
|
|||
pub compatibility_type: CompatibilityType,
|
||||
pub quantization: Option<String>,
|
||||
pub state: ModelState,
|
||||
pub max_context_length: Option<u32>,
|
||||
pub loaded_context_length: Option<u32>,
|
||||
pub max_context_length: Option<usize>,
|
||||
pub loaded_context_length: Option<usize>,
|
||||
#[serde(default)]
|
||||
pub capabilities: Capabilities,
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue