Fix inaccurate Ollama context length for qwen2.5 models (#20933)
Since Ollama/llama.cpp do not currently YARN for context length extension, the context length is limited to `32768`. This can be confirmed by the Ollama model card. See corresponding issue on Ollama repo : https://github.com/ollama/ollama/issues/6865 Co-authored-by: Patrick Samson <1416027+patricksamson@users.noreply.github.com>
This commit is contained in:
parent
d5f2bca382
commit
b4659bb44e
1 changed files with 3 additions and 2 deletions
|
@ -81,9 +81,10 @@ fn get_max_tokens(name: &str) -> usize {
|
|||
"llama2" | "yi" | "vicuna" | "stablelm2" => 4096,
|
||||
"llama3" | "gemma2" | "gemma" | "codegemma" | "starcoder" | "aya" => 8192,
|
||||
"codellama" | "starcoder2" => 16384,
|
||||
"mistral" | "codestral" | "mixstral" | "llava" | "qwen2" | "dolphin-mixtral" => 32768,
|
||||
"mistral" | "codestral" | "mixstral" | "llava" | "qwen2" | "qwen2.5-coder"
|
||||
| "dolphin-mixtral" => 32768,
|
||||
"llama3.1" | "phi3" | "phi3.5" | "command-r" | "deepseek-coder-v2" | "yi-coder"
|
||||
| "llama3.2" | "qwen2.5-coder" => 128000,
|
||||
| "llama3.2" => 128000,
|
||||
_ => DEFAULT_TOKENS,
|
||||
}
|
||||
.clamp(1, MAXIMUM_TOKENS)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue