diff --git a/assets/settings/default.json b/assets/settings/default.json index 09ca4b850e..5df2c32f57 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -895,7 +895,8 @@ "api_url": "https://generativelanguage.googleapis.com" }, "ollama": { - "api_url": "http://localhost:11434" + "api_url": "http://localhost:11434", + "low_speed_timeout_in_seconds": 60 }, "openai": { "version": "1", diff --git a/crates/ollama/src/ollama.rs b/crates/ollama/src/ollama.rs index b7359acf6f..145adc36a3 100644 --- a/crates/ollama/src/ollama.rs +++ b/crates/ollama/src/ollama.rs @@ -70,11 +70,36 @@ pub struct Model { pub keep_alive: Option, } +// This could be dynamically retrieved via the API (1 call per model) +// curl -s http://localhost:11434/api/show -d '{"model": "llama3.1:latest"}' | jq '.model_info."llama.context_length"' +fn get_max_tokens(name: &str) -> usize { + match name { + "dolphin-llama3:8b-256k" => 262144, // 256K + _ => match name.split(':').next().unwrap() { + "mistral-nemo" => 1024000, // 1M + "deepseek-coder-v2" => 163840, // 160K + "llama3.1" | "phi3" | "command-r" | "command-r-plus" => 131072, // 128K + "codeqwen" => 65536, // 64K + "mistral" | "mistral-large" | "dolphin-mistral" | "codestral" // 32K + | "mistral-openorca" | "dolphin-mixtral" | "mixstral" | "llava" + | "qwen" | "qwen2" | "wizardlm2" | "wizard-math" => 32768, + "codellama" | "stable-code" | "deepseek-coder" | "starcoder2" // 16K + | "wizardcoder" => 16384, + "llama3" | "gemma2" | "gemma" | "codegemma" | "dolphin-llama3" // 8K + | "llava-llama3" | "starcoder" | "openchat" | "aya" => 8192, + "llama2" | "yi" | "llama2-chinese" | "vicuna" | "nous-hermes2" // 4K + | "stablelm2" => 4096, + "phi" | "orca-mini" | "tinyllama" | "granite-code" => 2048, // 2K + _ => 2048, // 2K (default) + }, + } +} + impl Model { pub fn new(name: &str) -> Self { Self { name: name.to_owned(), - max_tokens: 2048, + max_tokens: get_max_tokens(name), keep_alive: Some(KeepAlive::indefinite()), } }