Add ollama phi4 context size defaults (#23036)

Add `phi4` maximum context length (128K).
By default this clamps to `16384` but if you have enough video memory
you can set it higher or connect to a non-local machine via settings:

```json
"language_models": {
  "ollama": {
    "api_url": "http://localhost:11434",
    "available_models": [
      {
        "name": "phi4",
        "display_name": "Phi4 64K",
         "max_tokens": 65536
      }
    ]
  }
}
```

Release Notes:

- Improve support for Phi4 with ollama.
This commit is contained in:
Peter Tripp 2025-01-15 10:44:15 -05:00 committed by GitHub
parent 0e4a619c9f
commit 67525cca71
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -83,8 +83,8 @@ fn get_max_tokens(name: &str) -> usize {
"codellama" | "starcoder2" => 16384,
"mistral" | "codestral" | "mixstral" | "llava" | "qwen2" | "qwen2.5-coder"
| "dolphin-mixtral" => 32768,
"llama3.1" | "phi3" | "phi3.5" | "command-r" | "deepseek-coder-v2" | "yi-coder"
| "llama3.2" => 128000,
"llama3.1" | "phi3" | "phi3.5" | "phi4" | "command-r" | "deepseek-coder-v2"
| "yi-coder" | "llama3.2" => 128000,
_ => DEFAULT_TOKENS,
}
.clamp(1, MAXIMUM_TOKENS)