This commit is contained in:
Oliver Azevedo Barnes 2025-08-26 15:59:15 -04:00 committed by GitHub
commit 76c84f07e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 2862 additions and 16 deletions

18
Cargo.lock generated
View file

@ -5001,6 +5001,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"client",
"clock",
"cloud_llm_client",
"copilot",
"edit_prediction",
@ -5009,9 +5010,14 @@ dependencies = [
"fs",
"futures 0.3.31",
"gpui",
"gpui_tokio",
"http_client",
"indoc",
"language",
"language_model",
"language_models",
"lsp",
"ollama",
"paths",
"project",
"regex",
@ -11065,11 +11071,22 @@ name = "ollama"
version = "0.1.0"
dependencies = [
"anyhow",
"client",
"edit_prediction",
"editor",
"futures 0.3.31",
"gpui",
"http_client",
"language",
"log",
"project",
"schemars",
"serde",
"serde_json",
"settings",
"text",
"theme",
"workspace",
"workspace-hack",
]
@ -20479,6 +20496,7 @@ dependencies = [
"nix 0.29.0",
"node_runtime",
"notifications",
"ollama",
"onboarding",
"outline",
"outline_panel",

View file

@ -21,9 +21,13 @@ editor.workspace = true
feature_flags.workspace = true
fs.workspace = true
gpui.workspace = true
http_client.workspace = true
indoc.workspace = true
edit_prediction.workspace = true
language.workspace = true
language_models.workspace = true
ollama.workspace = true
paths.workspace = true
project.workspace = true
regex.workspace = true
@ -37,11 +41,18 @@ zed_actions.workspace = true
zeta.workspace = true
[dev-dependencies]
clock.workspace = true
client = { workspace = true, features = ["test-support"] }
copilot = { workspace = true, features = ["test-support"] }
editor = { workspace = true, features = ["test-support"] }
futures.workspace = true
http_client = { workspace = true, features = ["test-support"] }
indoc.workspace = true
language_model = { workspace = true, features = ["test-support"] }
lsp = { workspace = true, features = ["test-support"] }
ollama = { workspace = true, features = ["test-support"] }
project = { workspace = true, features = ["test-support"] }
serde_json.workspace = true
settings = { workspace = true, features = ["test-support"] }
theme = { workspace = true, features = ["test-support"] }
gpui_tokio.workspace = true

File diff suppressed because it is too large Load diff

View file

@ -562,3 +562,36 @@ impl EditPredictionProvider for FakeNonZedEditPredictionProvider {
self.completion.clone()
}
}
#[gpui::test]
async fn test_partial_accept_edit_prediction(cx: &mut gpui::TestAppContext) {
init_test(cx, |_| {});
let mut cx = EditorTestContext::new(cx).await;
let provider = cx.new(|_| FakeEditPredictionProvider::default());
assign_editor_completion_provider(provider.clone(), &mut cx);
cx.set_state("let x = ˇ;");
// Propose a completion with multiple words
propose_edits(
&provider,
vec![(Point::new(0, 8)..Point::new(0, 8), "hello world")],
&mut cx,
);
cx.update_editor(|editor, window, cx| editor.update_visible_edit_prediction(window, cx));
// Verify the completion is shown
cx.assert_editor_state("let x = ˇ;");
cx.editor(|editor, _, _| {
assert!(editor.has_active_edit_prediction());
});
// Accept partial completion - should accept first word
cx.update_editor(|editor, window, cx| {
editor.accept_partial_edit_prediction(&Default::default(), window, cx);
});
cx.assert_editor_state("let x = helloˇ;");
}

View file

@ -9157,6 +9157,7 @@ impl Editor {
) -> IconName {
match provider {
Some(provider) => match provider.provider.name() {
"ollama" => IconName::AiOllama,
"copilot" => IconName::Copilot,
"supermaven" => IconName::Supermaven,
_ => IconName::ZedPredict,
@ -9206,6 +9207,7 @@ impl Editor {
use text::ToPoint as _;
if target.text_anchor.to_point(snapshot).row > cursor_point.row
{
// For move predictions, still use directional icons
Icon::new(IconName::ZedPredictDown)
} else {
Icon::new(IconName::ZedPredictUp)

View file

@ -215,6 +215,7 @@ pub enum EditPredictionProvider {
Copilot,
Supermaven,
Zed,
Ollama,
}
impl EditPredictionProvider {
@ -223,7 +224,8 @@ impl EditPredictionProvider {
EditPredictionProvider::Zed => true,
EditPredictionProvider::None
| EditPredictionProvider::Copilot
| EditPredictionProvider::Supermaven => false,
| EditPredictionProvider::Supermaven
| EditPredictionProvider::Ollama => false,
}
}
}

View file

@ -1,7 +1,7 @@
use anyhow::{Result, anyhow};
use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
use futures::{Stream, TryFutureExt, stream};
use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
use gpui::{AnyView, App, AsyncApp, Context, Entity, Global, Subscription, Task};
use http_client::HttpClient;
use language_model::{
AuthenticateError, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
@ -81,7 +81,7 @@ impl State {
// As a proxy for the server being "authenticated", we'll check if its up by fetching the models
cx.spawn(async move |this, cx| {
let models = get_models(http_client.as_ref(), &api_url, None).await?;
let models = get_models(http_client.as_ref(), &api_url, None, None).await?;
let tasks = models
.into_iter()
@ -94,7 +94,8 @@ impl State {
let api_url = api_url.clone();
async move {
let name = model.name.as_str();
let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
let capabilities =
show_model(http_client.as_ref(), &api_url, None, name).await?;
let ollama_model = ollama::Model::new(
name,
None,
@ -141,6 +142,29 @@ impl State {
}
impl OllamaLanguageModelProvider {
pub fn global(cx: &App) -> Option<Entity<Self>> {
cx.try_global::<GlobalOllamaLanguageModelProvider>()
.map(|provider| provider.0.clone())
}
pub fn set_global(provider: Entity<Self>, cx: &mut App) {
cx.set_global(GlobalOllamaLanguageModelProvider(provider));
}
pub fn available_models_for_completion(&self, cx: &App) -> Vec<ollama::Model> {
self.state.read(cx).available_models.clone()
}
pub fn http_client(&self) -> Arc<dyn HttpClient> {
self.http_client.clone()
}
pub fn refresh_models(&self, cx: &mut App) {
self.state.update(cx, |state, cx| {
state.restart_fetch_models_task(cx);
});
}
pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
let this = Self {
http_client: http_client.clone(),
@ -676,6 +700,10 @@ impl Render for ConfigurationView {
}
}
struct GlobalOllamaLanguageModelProvider(Entity<OllamaLanguageModelProvider>);
impl Global for GlobalOllamaLanguageModelProvider {}
fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
ollama::OllamaTool::Function {
function: OllamaFunctionTool {

View file

@ -9,17 +9,42 @@ license = "GPL-3.0-or-later"
workspace = true
[lib]
path = "src/ollama.rs"
path = "src/lib.rs"
[features]
default = []
schemars = ["dep:schemars"]
test-support = [
"gpui/test-support",
"http_client/test-support",
"language/test-support",
]
[dependencies]
anyhow.workspace = true
futures.workspace = true
gpui.workspace = true
http_client.workspace = true
edit_prediction.workspace = true
language.workspace = true
log.workspace = true
project.workspace = true
settings.workspace = true
schemars = { workspace = true, optional = true }
serde.workspace = true
serde_json.workspace = true
text.workspace = true
workspace-hack.workspace = true
[dev-dependencies]
client = { workspace = true, features = ["test-support"] }
editor = { workspace = true, features = ["test-support"] }
gpui = { workspace = true, features = ["test-support"] }
http_client = { workspace = true, features = ["test-support"] }
language = { workspace = true, features = ["test-support"] }
project = { workspace = true, features = ["test-support"] }
settings = { workspace = true, features = ["test-support"] }
theme = { workspace = true, features = ["test-support"] }
workspace = { workspace = true, features = ["test-support"] }

8
crates/ollama/src/lib.rs Normal file
View file

@ -0,0 +1,8 @@
mod ollama;
mod ollama_completion_provider;
pub use ollama::*;
pub use ollama_completion_provider::*;
#[cfg(any(test, feature = "test-support"))]
pub use ollama::fake;

View file

@ -6,6 +6,7 @@ use serde_json::Value;
use std::time::Duration;
pub const OLLAMA_API_URL: &str = "http://localhost:11434";
pub const OLLAMA_API_KEY_VAR: &str = "OLLAMA_API_KEY";
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialEq)]
@ -99,6 +100,39 @@ impl Model {
}
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateRequest {
pub model: String,
pub prompt: String,
pub suffix: Option<String>,
pub stream: bool,
pub options: Option<GenerateOptions>,
pub keep_alive: Option<KeepAlive>,
pub context: Option<Vec<i64>>,
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateOptions {
pub num_predict: Option<i32>,
pub temperature: Option<f32>,
pub top_p: Option<f32>,
pub stop: Option<Vec<String>>,
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateResponse {
pub response: String,
pub done: bool,
pub context: Option<Vec<i64>>,
pub total_duration: Option<u64>,
pub load_duration: Option<u64>,
pub prompt_eval_count: Option<i32>,
pub eval_count: Option<i32>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "role", rename_all = "lowercase")]
pub enum ChatMessage {
@ -309,14 +343,19 @@ pub async fn stream_chat_completion(
pub async fn get_models(
client: &dyn HttpClient,
api_url: &str,
api_key: Option<String>,
_: Option<Duration>,
) -> Result<Vec<LocalModelListing>> {
let uri = format!("{api_url}/api/tags");
let request_builder = HttpRequest::builder()
let mut request_builder = HttpRequest::builder()
.method(Method::GET)
.uri(uri)
.header("Accept", "application/json");
if let Some(api_key) = api_key {
request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"))
}
let request = request_builder.body(AsyncBody::default())?;
let mut response = client.send(request).await?;
@ -336,15 +375,25 @@ pub async fn get_models(
}
/// Fetch details of a model, used to determine model capabilities
pub async fn show_model(client: &dyn HttpClient, api_url: &str, model: &str) -> Result<ModelShow> {
pub async fn show_model(
client: &dyn HttpClient,
api_url: &str,
api_key: Option<String>,
model: &str,
) -> Result<ModelShow> {
let uri = format!("{api_url}/api/show");
let request = HttpRequest::builder()
let mut request_builder = HttpRequest::builder()
.method(Method::POST)
.uri(uri)
.header("Content-Type", "application/json")
.body(AsyncBody::from(
serde_json::json!({ "model": model }).to_string(),
))?;
.header("Content-Type", "application/json");
if let Some(api_key) = api_key {
request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"));
}
let request = request_builder.body(AsyncBody::from(
serde_json::json!({ "model": model }).to_string(),
))?;
let mut response = client.send(request).await?;
let mut body = String::new();
@ -360,10 +409,198 @@ pub async fn show_model(client: &dyn HttpClient, api_url: &str, model: &str) ->
Ok(details)
}
pub async fn generate(
client: &dyn HttpClient,
api_url: &str,
api_key: Option<String>,
request: GenerateRequest,
) -> Result<GenerateResponse> {
let uri = format!("{api_url}/api/generate");
let mut request_builder = HttpRequest::builder()
.method(Method::POST)
.uri(uri)
.header("Content-Type", "application/json");
if let Some(api_key) = api_key {
request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"))
}
let serialized_request = serde_json::to_string(&request)?;
let request = request_builder.body(AsyncBody::from(serialized_request))?;
let mut response = match client.send(request).await {
Ok(response) => response,
Err(err) => {
log::error!("Ollama server unavailable at {}: {}", api_url, err);
return Err(err);
}
};
let mut body = String::new();
response.body_mut().read_to_string(&mut body).await?;
anyhow::ensure!(
response.status().is_success(),
"Failed to connect to Ollama API: {} {}",
response.status(),
body,
);
let response: GenerateResponse =
serde_json::from_str(&body).context("Unable to parse Ollama generate response")?;
Ok(response)
}
#[cfg(any(test, feature = "test-support"))]
pub mod fake {
use super::*;
use crate::ollama_completion_provider::OllamaCompletionProvider;
use gpui::AppContext;
use http_client::{AsyncBody, Response, Url};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
pub struct FakeHttpClient {
responses: Arc<Mutex<HashMap<String, String>>>,
requests: Arc<Mutex<Vec<(String, String)>>>, // (path, body)
}
impl FakeHttpClient {
pub fn new() -> Self {
Self {
responses: Arc::new(Mutex::new(HashMap::new())),
requests: Arc::new(Mutex::new(Vec::new())),
}
}
pub fn set_response(&self, path: &str, response: String) {
self.responses
.lock()
.unwrap()
.insert(path.to_string(), response);
}
pub fn set_generate_response(&self, completion_text: &str) {
let response = serde_json::json!({
"response": completion_text,
"done": true,
"context": [],
"total_duration": 1000000_u64,
"load_duration": 1000000_u64,
"prompt_eval_count": 10,
"prompt_eval_duration": 1000000_u64,
"eval_count": 20,
"eval_duration": 1000000_u64
});
self.set_response("/api/generate", response.to_string());
}
pub fn set_error(&self, path: &str) {
// Remove any existing response to force an error
self.responses.lock().unwrap().remove(path);
}
pub fn get_requests(&self) -> Vec<(String, String)> {
self.requests.lock().unwrap().clone()
}
pub fn clear_requests(&self) {
self.requests.lock().unwrap().clear();
}
}
impl HttpClient for FakeHttpClient {
fn type_name(&self) -> &'static str {
"FakeHttpClient"
}
fn user_agent(&self) -> Option<&http::HeaderValue> {
None
}
fn proxy(&self) -> Option<&Url> {
None
}
fn send(
&self,
req: http_client::Request<AsyncBody>,
) -> futures::future::BoxFuture<'static, Result<Response<AsyncBody>, anyhow::Error>>
{
let path = req.uri().path().to_string();
let responses = Arc::clone(&self.responses);
let requests = Arc::clone(&self.requests);
Box::pin(async move {
// Store the request
requests.lock().unwrap().push((path.clone(), String::new()));
let responses = responses.lock().unwrap();
if let Some(response_body) = responses.get(&path).cloned() {
let response = Response::builder()
.status(200)
.header("content-type", "application/json")
.body(AsyncBody::from(response_body))
.unwrap();
Ok(response)
} else {
Err(anyhow::anyhow!("No mock response set for {}", path))
}
})
}
}
pub struct Ollama;
impl Ollama {
pub fn fake(
cx: &mut gpui::TestAppContext,
) -> (
gpui::Entity<OllamaCompletionProvider>,
std::sync::Arc<FakeHttpClient>,
) {
let fake_client = std::sync::Arc::new(FakeHttpClient::new());
let provider =
cx.new(|cx| OllamaCompletionProvider::new("qwencoder".to_string(), None, cx));
(provider, fake_client)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generate_request_with_suffix_serialization() {
let request = GenerateRequest {
model: "qwen2.5-coder:32b".to_string(),
prompt: "def fibonacci(n):".to_string(),
suffix: Some(" return result".to_string()),
stream: false,
options: Some(GenerateOptions {
num_predict: Some(150),
temperature: Some(0.1),
top_p: Some(0.95),
stop: None,
}),
keep_alive: None,
context: None,
};
let json = serde_json::to_string(&request).unwrap();
let parsed: GenerateRequest = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.model, "qwen2.5-coder:32b");
assert_eq!(parsed.prompt, "def fibonacci(n):");
assert_eq!(parsed.suffix, Some(" return result".to_string()));
assert!(!parsed.stream);
assert!(parsed.options.is_some());
}
#[test]
fn parse_completion() {
let response = serde_json::json!({
@ -585,4 +822,64 @@ mod tests {
assert_eq!(message_images.len(), 1);
assert_eq!(message_images[0].as_str().unwrap(), base64_image);
}
#[test]
fn test_generate_request_with_api_key_serialization() {
let request = GenerateRequest {
model: "qwen2.5-coder:32b".to_string(),
prompt: "def fibonacci(n):".to_string(),
suffix: Some(" return result".to_string()),
stream: false,
options: Some(GenerateOptions {
num_predict: Some(150),
temperature: Some(0.1),
top_p: Some(0.95),
stop: None,
}),
keep_alive: None,
context: None,
};
// Test with API key
let json = serde_json::to_string(&request).unwrap();
let parsed: GenerateRequest = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.model, "qwen2.5-coder:32b");
assert_eq!(parsed.prompt, "def fibonacci(n):");
assert_eq!(parsed.suffix, Some(" return result".to_string()));
assert!(!parsed.stream);
assert!(parsed.options.is_some());
// Note: The API key parameter is passed to the generate function itself,
// not included in the GenerateRequest struct that gets serialized to JSON
}
#[test]
fn test_generate_request_with_stop_tokens() {
let request = GenerateRequest {
model: "codellama:7b-code".to_string(),
prompt: "def fibonacci(n):".to_string(),
suffix: Some(" return result".to_string()),
stream: false,
options: Some(GenerateOptions {
num_predict: Some(150),
temperature: Some(0.1),
top_p: Some(0.95),
stop: Some(vec!["<EOT>".to_string()]),
}),
keep_alive: None,
context: None,
};
let json = serde_json::to_string(&request).unwrap();
let parsed: GenerateRequest = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.model, "codellama:7b-code");
assert_eq!(parsed.prompt, "def fibonacci(n):");
assert_eq!(parsed.suffix, Some(" return result".to_string()));
assert!(!parsed.stream);
assert!(parsed.options.is_some());
let options = parsed.options.unwrap();
assert_eq!(options.stop, Some(vec!["<EOT>".to_string()]));
}
}

File diff suppressed because it is too large Load diff

View file

@ -82,6 +82,7 @@ image_viewer.workspace = true
indoc.workspace = true
edit_prediction_button.workspace = true
inspector_ui.workspace = true
ollama.workspace = true
install_cli.workspace = true
jj_ui.workspace = true
journal.workspace = true

View file

@ -3,8 +3,11 @@ use collections::HashMap;
use copilot::{Copilot, CopilotCompletionProvider};
use editor::Editor;
use gpui::{AnyWindowHandle, App, AppContext as _, Context, Entity, WeakEntity};
use language::language_settings::{EditPredictionProvider, all_language_settings};
use settings::SettingsStore;
use language_models::AllLanguageModelSettings;
use ollama::{OLLAMA_API_KEY_VAR, OllamaCompletionProvider, SettingsModel, State};
use settings::{Settings as _, SettingsStore};
use std::{cell::RefCell, rc::Rc, sync::Arc};
use supermaven::{Supermaven, SupermavenCompletionProvider};
use ui::Window;
@ -12,6 +15,33 @@ use workspace::Workspace;
use zeta::{ProviderDataCollection, ZetaEditPredictionProvider};
pub fn init(client: Arc<Client>, user_store: Entity<UserStore>, cx: &mut App) {
// Initialize global Ollama service
let (api_url, settings_models) = {
let settings = &AllLanguageModelSettings::get_global(cx).ollama;
let api_url = settings.api_url.clone();
let settings_models: Vec<SettingsModel> = settings
.available_models
.iter()
.map(|model| SettingsModel {
name: model.name.clone(),
display_name: model.display_name.clone(),
max_tokens: model.max_tokens,
supports_tools: model.supports_tools,
supports_images: model.supports_images,
supports_thinking: model.supports_thinking,
})
.collect();
(api_url, settings_models)
};
let ollama_service = State::new(client.http_client(), api_url, None, cx);
ollama_service.update(cx, |service, cx| {
service.set_settings_models(settings_models, cx);
});
State::set_global(ollama_service, cx);
let editors: Rc<RefCell<HashMap<WeakEntity<Editor>, AnyWindowHandle>>> = Rc::default();
cx.observe_new({
let editors = editors.clone();
@ -89,6 +119,27 @@ pub fn init(client: Arc<Client>, user_store: Entity<UserStore>, cx: &mut App) {
user_store.clone(),
cx,
);
} else if provider == EditPredictionProvider::Ollama {
// Update global Ollama service when settings change
let settings = &AllLanguageModelSettings::get_global(cx).ollama;
if let Some(service) = State::global(cx) {
let settings_models: Vec<SettingsModel> = settings
.available_models
.iter()
.map(|model| SettingsModel {
name: model.name.clone(),
display_name: model.display_name.clone(),
max_tokens: model.max_tokens,
supports_tools: model.supports_tools,
supports_images: model.supports_images,
supports_thinking: model.supports_thinking,
})
.collect();
service.update(cx, |service, cx| {
service.set_settings_models(settings_models, cx);
});
}
}
}
})
@ -229,5 +280,81 @@ fn assign_edit_prediction_provider(
editor.set_edit_prediction_provider(Some(provider), window, cx);
}
}
EditPredictionProvider::Ollama => {
let settings = &AllLanguageModelSettings::get_global(cx).ollama;
let api_key = std::env::var(OLLAMA_API_KEY_VAR).ok();
// Get model from settings or use discovered models
let model = if let Some(first_model) = settings.available_models.first() {
Some(first_model.name.clone())
} else if let Some(service) = State::global(cx) {
// Use first discovered model
service
.read(cx)
.available_models()
.first()
.map(|m| m.name.clone())
} else {
None
};
if let Some(model) = model {
let provider = cx.new(|cx| OllamaCompletionProvider::new(model, api_key, cx));
editor.set_edit_prediction_provider(Some(provider), window, cx);
} else {
log::error!(
"No Ollama models available. Please configure models in settings or pull models using 'ollama pull <model-name>'"
);
editor.set_edit_prediction_provider::<OllamaCompletionProvider>(None, window, cx);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::zed::tests::init_test;
use editor::{Editor, MultiBuffer};
use gpui::TestAppContext;
use language::Buffer;
use language_models::{AllLanguageModelSettings, provider::ollama::OllamaSettings};
#[gpui::test]
async fn test_assign_edit_prediction_provider_with_no_ollama_models(cx: &mut TestAppContext) {
let app_state = init_test(cx);
let buffer = cx.new(|cx| Buffer::local("test content", cx));
let multibuffer = cx.new(|cx| MultiBuffer::singleton(buffer, cx));
let (editor, cx) =
cx.add_window_view(|window, cx| Editor::for_multibuffer(multibuffer, None, window, cx));
// Override settings to have empty available_models
cx.update(|_, cx| {
let new_settings = AllLanguageModelSettings {
ollama: OllamaSettings {
api_url: "http://localhost:11434".to_string(),
available_models: vec![], // Empty models list
},
..Default::default()
};
AllLanguageModelSettings::override_global(new_settings, cx);
});
// Call assign_edit_prediction_provider with Ollama provider
// This should complete without panicking even when no models are available
let result = editor.update_in(cx, |editor, window, cx| {
assign_edit_prediction_provider(
editor,
language::language_settings::EditPredictionProvider::Ollama,
&app_state.client,
app_state.user_store.clone(),
window,
cx,
)
});
// Assert that assign_edit_prediction_provider returns ()
assert_eq!(result, ());
}
}

View file

@ -44,7 +44,7 @@ On Linux, `alt-tab` is often used by the window manager for switching windows, s
{#action editor::AcceptPartialEditPrediction} ({#kb editor::AcceptPartialEditPrediction}) can be used to accept the current edit prediction up to the next word boundary.
See the [Configuring GitHub Copilot](#github-copilot) and [Configuring Supermaven](#supermaven) sections below for configuration of other providers. Only text insertions at the current cursor are supported for these providers, whereas the Zeta model provides multiple predictions including deletions.
See the [Configuring GitHub Copilot](#github-copilot), [Configuring Supermaven](#supermaven), and [Configuring Ollama](#ollama) sections below for configuration of other providers. Only text insertions at the current cursor are supported for these providers, whereas the Zeta model provides multiple predictions including deletions.
## Configuring Edit Prediction Keybindings {#edit-predictions-keybinding}
@ -304,6 +304,74 @@ To use Supermaven as your provider, set this within `settings.json`:
You should be able to sign-in to Supermaven by clicking on the Supermaven icon in the status bar and following the setup instructions.
## Configuring Ollama {#ollama}
To use Ollama as your edit prediction provider, set this within `settings.json`:
```json
{
"features": {
"edit_prediction_provider": "ollama"
}
}
```
### Setup
1. Download and install Ollama from [ollama.com/download](https://ollama.com/download)
2. Pull completion-capable models, for example:
```sh
ollama pull qwen2.5-coder:3b
ollama pull codellama:7b
```
3. Ensure Ollama is running:
```sh
ollama serve
```
4. Configure the model in your language model settings
The Edit Prediction menu will automatically detect available models. When one is newly selected in the menu, it will be added to your `settings.json`, and put at the top of the list. You can then manually configure it in the settings file if you need more control.
```json
{
"language_models": {
"ollama": {
"api_url": "http://localhost:11434",
"available_models": [
{
"name": "qwen2.5-coder:3b",
"display_name": "Qwen 2.5 Coder 3B",
"max_tokens": 8192
},
{
"name": "codellama:7b",
"display_name": "CodeLlama 7B",
"max_tokens": 8192
}
]
}
}
}
```
You can also switch between them in the menu, and the order of the models in the settings file will be updated behind the scenes.
The settings allows for configuring Ollama's API url too, so one can use Ollama either locally or hosted. The Edit Prediction menu includes a shortcut for it that will open the settings file where the url is set.
### Authentication
Ollama itself doesn't require an API key, but when running it remotely it's a good idea and common practice to setup a proxy server in front of it that does. When sending edit prediction requests to it, Zed will forward the API key as an authentication header so the proxy can authenticate against it:
```bash
export OLLAMA_API_KEY=your_api_key_here
```
## See also
You may also use the [Agent Panel](./agent-panel.md) or the [Inline Assistant](./inline-assistant.md) to interact with language models, see the [AI documentation](./overview.md) for more information on the other AI features in Zed.

View file

@ -3,7 +3,7 @@
Zed supports two sources for completions:
1. "Code Completions" provided by Language Servers (LSPs) automatically installed by Zed or via [Zed Language Extensions](languages.md).
2. "Edit Predictions" provided by Zed's own Zeta model or by external providers like [GitHub Copilot](#github-copilot) or [Supermaven](#supermaven).
2. "Edit Predictions" provided by Zed's own Zeta model or by external providers like [GitHub Copilot](#github-copilot), [Supermaven](#supermaven), or [Ollama](#ollama).
## Language Server Code Completions {#code-completions}