Merge 62b69ffdca into bd4e943597

2025-08-26 15:59:15 -04:00 · 2025-08-26 15:59:15 -04:00 · 76c84f07e3
commit 76c84f07e3
parent bd4e943597 62b69ffdca
15 changed files with 2862 additions and 16 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -5001,6 +5001,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "client",
+ "clock",
 "cloud_llm_client",
 "copilot",
 "edit_prediction",
@ -5009,9 +5010,14 @@ dependencies = [
 "fs",
 "futures 0.3.31",
 "gpui",
+ "gpui_tokio",
+ "http_client",
 "indoc",
 "language",
+ "language_model",
+ "language_models",
 "lsp",
+ "ollama",
 "paths",
 "project",
 "regex",
@ -11065,11 +11071,22 @@ name = "ollama"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "client",
+ "edit_prediction",
+ "editor",
 "futures 0.3.31",
+ "gpui",
 "http_client",
+ "language",
+ "log",
+ "project",
 "schemars",
 "serde",
 "serde_json",
+ "settings",
+ "text",
+ "theme",
+ "workspace",
 "workspace-hack",
 ]

@ -20479,6 +20496,7 @@ dependencies = [
 "nix 0.29.0",
 "node_runtime",
 "notifications",
+ "ollama",
 "onboarding",
 "outline",
 "outline_panel",
--- a/crates/edit_prediction_button/Cargo.toml
+++ b/crates/edit_prediction_button/Cargo.toml
@ -21,9 +21,13 @@ editor.workspace = true
 feature_flags.workspace = true
 fs.workspace = true
 gpui.workspace = true
+http_client.workspace = true
 indoc.workspace = true
 edit_prediction.workspace = true
 language.workspace = true
+language_models.workspace = true
+ollama.workspace = true
+
 paths.workspace = true
 project.workspace = true
 regex.workspace = true
@ -37,11 +41,18 @@ zed_actions.workspace = true
 zeta.workspace = true

 [dev-dependencies]
+clock.workspace = true
+client = { workspace = true, features = ["test-support"] }
 copilot = { workspace = true, features = ["test-support"] }
 editor = { workspace = true, features = ["test-support"] }
 futures.workspace = true
+http_client = { workspace = true, features = ["test-support"] }
 indoc.workspace = true
+language_model = { workspace = true, features = ["test-support"] }
 lsp = { workspace = true, features = ["test-support"] }
+ollama = { workspace = true, features = ["test-support"] }
 project = { workspace = true, features = ["test-support"] }
 serde_json.workspace = true
+settings = { workspace = true, features = ["test-support"] }
 theme = { workspace = true, features = ["test-support"] }
+gpui_tokio.workspace = true
--- a/crates/edit_prediction_button/src/edit_prediction_button.rs
+++ b/crates/edit_prediction_button/src/edit_prediction_button.rs
--- a/crates/editor/src/edit_prediction_tests.rs
+++ b/crates/editor/src/edit_prediction_tests.rs
@ -562,3 +562,36 @@ impl EditPredictionProvider for FakeNonZedEditPredictionProvider {
        self.completion.clone()
    }
 }
+
+#[gpui::test]
+async fn test_partial_accept_edit_prediction(cx: &mut gpui::TestAppContext) {
+    init_test(cx, |_| {});
+
+    let mut cx = EditorTestContext::new(cx).await;
+    let provider = cx.new(|_| FakeEditPredictionProvider::default());
+    assign_editor_completion_provider(provider.clone(), &mut cx);
+
+    cx.set_state("let x = ˇ;");
+
+    // Propose a completion with multiple words
+    propose_edits(
+        &provider,
+        vec![(Point::new(0, 8)..Point::new(0, 8), "hello world")],
+        &mut cx,
+    );
+
+    cx.update_editor(|editor, window, cx| editor.update_visible_edit_prediction(window, cx));
+
+    // Verify the completion is shown
+    cx.assert_editor_state("let x = ˇ;");
+    cx.editor(|editor, _, _| {
+        assert!(editor.has_active_edit_prediction());
+    });
+
+    // Accept partial completion - should accept first word
+    cx.update_editor(|editor, window, cx| {
+        editor.accept_partial_edit_prediction(&Default::default(), window, cx);
+    });
+
+    cx.assert_editor_state("let x = helloˇ;");
+}
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@ -9157,6 +9157,7 @@ impl Editor {
    ) -> IconName {
        match provider {
            Some(provider) => match provider.provider.name() {
+                "ollama" => IconName::AiOllama,
                "copilot" => IconName::Copilot,
                "supermaven" => IconName::Supermaven,
                _ => IconName::ZedPredict,
@ -9206,6 +9207,7 @@ impl Editor {
                                    use text::ToPoint as _;
                                    if target.text_anchor.to_point(snapshot).row > cursor_point.row
                                    {
+                                        // For move predictions, still use directional icons
                                        Icon::new(IconName::ZedPredictDown)
                                    } else {
                                        Icon::new(IconName::ZedPredictUp)
--- a/crates/language/src/language_settings.rs
+++ b/crates/language/src/language_settings.rs
@ -215,6 +215,7 @@ pub enum EditPredictionProvider {
    Copilot,
    Supermaven,
    Zed,
+    Ollama,
 }

 impl EditPredictionProvider {
@ -223,7 +224,8 @@ impl EditPredictionProvider {
            EditPredictionProvider::Zed => true,
            EditPredictionProvider::None
            | EditPredictionProvider::Copilot
-            | EditPredictionProvider::Supermaven => false,
+            | EditPredictionProvider::Supermaven
+            | EditPredictionProvider::Ollama => false,
        }
    }
 }
--- a/crates/language_models/src/provider/ollama.rs
+++ b/crates/language_models/src/provider/ollama.rs
@ -1,7 +1,7 @@
 use anyhow::{Result, anyhow};
 use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream};
 use futures::{Stream, TryFutureExt, stream};
-use gpui::{AnyView, App, AsyncApp, Context, Subscription, Task};
+use gpui::{AnyView, App, AsyncApp, Context, Entity, Global, Subscription, Task};
 use http_client::HttpClient;
 use language_model::{
    AuthenticateError, LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
@ -81,7 +81,7 @@ impl State {

        // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
        cx.spawn(async move |this, cx| {
-            let models = get_models(http_client.as_ref(), &api_url, None).await?;
+            let models = get_models(http_client.as_ref(), &api_url, None, None).await?;

            let tasks = models
                .into_iter()
@ -94,7 +94,8 @@ impl State {
                    let api_url = api_url.clone();
                    async move {
                        let name = model.name.as_str();
-                        let capabilities = show_model(http_client.as_ref(), &api_url, name).await?;
+                        let capabilities =
+                            show_model(http_client.as_ref(), &api_url, None, name).await?;
                        let ollama_model = ollama::Model::new(
                            name,
                            None,
@ -141,6 +142,29 @@ impl State {
 }

 impl OllamaLanguageModelProvider {
+    pub fn global(cx: &App) -> Option<Entity<Self>> {
+        cx.try_global::<GlobalOllamaLanguageModelProvider>()
+            .map(|provider| provider.0.clone())
+    }
+
+    pub fn set_global(provider: Entity<Self>, cx: &mut App) {
+        cx.set_global(GlobalOllamaLanguageModelProvider(provider));
+    }
+
+    pub fn available_models_for_completion(&self, cx: &App) -> Vec<ollama::Model> {
+        self.state.read(cx).available_models.clone()
+    }
+
+    pub fn http_client(&self) -> Arc<dyn HttpClient> {
+        self.http_client.clone()
+    }
+
+    pub fn refresh_models(&self, cx: &mut App) {
+        self.state.update(cx, |state, cx| {
+            state.restart_fetch_models_task(cx);
+        });
+    }
+
    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut App) -> Self {
        let this = Self {
            http_client: http_client.clone(),
@ -676,6 +700,10 @@ impl Render for ConfigurationView {
    }
 }

+struct GlobalOllamaLanguageModelProvider(Entity<OllamaLanguageModelProvider>);
+
+impl Global for GlobalOllamaLanguageModelProvider {}
+
 fn tool_into_ollama(tool: LanguageModelRequestTool) -> ollama::OllamaTool {
    ollama::OllamaTool::Function {
        function: OllamaFunctionTool {
--- a/crates/ollama/Cargo.toml
+++ b/crates/ollama/Cargo.toml
@ -9,17 +9,42 @@ license = "GPL-3.0-or-later"
 workspace = true

 [lib]
-path = "src/ollama.rs"
+path = "src/lib.rs"

 [features]
 default = []
 schemars = ["dep:schemars"]
+test-support = [
+    "gpui/test-support",
+    "http_client/test-support",
+    "language/test-support",
+]

 [dependencies]
 anyhow.workspace = true
 futures.workspace = true
+gpui.workspace = true
 http_client.workspace = true
+edit_prediction.workspace = true
+language.workspace = true
+
+log.workspace = true
+
+project.workspace = true
+settings.workspace = true
 schemars = { workspace = true, optional = true }
 serde.workspace = true
 serde_json.workspace = true
+text.workspace = true
 workspace-hack.workspace = true
+
+[dev-dependencies]
+client = { workspace = true, features = ["test-support"] }
+editor = { workspace = true, features = ["test-support"] }
+gpui = { workspace = true, features = ["test-support"] }
+http_client = { workspace = true, features = ["test-support"] }
+language = { workspace = true, features = ["test-support"] }
+project = { workspace = true, features = ["test-support"] }
+settings = { workspace = true, features = ["test-support"] }
+theme = { workspace = true, features = ["test-support"] }
+workspace = { workspace = true, features = ["test-support"] }
--- a/crates/ollama/src/lib.rs
+++ b/crates/ollama/src/lib.rs
@ -0,0 +1,8 @@
+mod ollama;
+mod ollama_completion_provider;
+
+pub use ollama::*;
+pub use ollama_completion_provider::*;
+
+#[cfg(any(test, feature = "test-support"))]
+pub use ollama::fake;
--- a/crates/ollama/src/ollama.rs
+++ b/crates/ollama/src/ollama.rs
@ -6,6 +6,7 @@ use serde_json::Value;
 use std::time::Duration;

 pub const OLLAMA_API_URL: &str = "http://localhost:11434";
+pub const OLLAMA_API_KEY_VAR: &str = "OLLAMA_API_KEY";

 #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 #[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialEq)]
@ -99,6 +100,39 @@ impl Model {
    }
 }

+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GenerateRequest {
+    pub model: String,
+    pub prompt: String,
+    pub suffix: Option<String>,
+    pub stream: bool,
+    pub options: Option<GenerateOptions>,
+    pub keep_alive: Option<KeepAlive>,
+    pub context: Option<Vec<i64>>,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GenerateOptions {
+    pub num_predict: Option<i32>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub stop: Option<Vec<String>>,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GenerateResponse {
+    pub response: String,
+    pub done: bool,
+    pub context: Option<Vec<i64>>,
+    pub total_duration: Option<u64>,
+    pub load_duration: Option<u64>,
+    pub prompt_eval_count: Option<i32>,
+    pub eval_count: Option<i32>,
+}
+
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(tag = "role", rename_all = "lowercase")]
 pub enum ChatMessage {
@ -309,14 +343,19 @@ pub async fn stream_chat_completion(
 pub async fn get_models(
    client: &dyn HttpClient,
    api_url: &str,
+    api_key: Option<String>,
    _: Option<Duration>,
 ) -> Result<Vec<LocalModelListing>> {
    let uri = format!("{api_url}/api/tags");
-    let request_builder = HttpRequest::builder()
+    let mut request_builder = HttpRequest::builder()
        .method(Method::GET)
        .uri(uri)
        .header("Accept", "application/json");

+    if let Some(api_key) = api_key {
+        request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"))
+    }
+
    let request = request_builder.body(AsyncBody::default())?;

    let mut response = client.send(request).await?;
@ -336,15 +375,25 @@ pub async fn get_models(
 }

 /// Fetch details of a model, used to determine model capabilities
-pub async fn show_model(client: &dyn HttpClient, api_url: &str, model: &str) -> Result<ModelShow> {
+pub async fn show_model(
+    client: &dyn HttpClient,
+    api_url: &str,
+    api_key: Option<String>,
+    model: &str,
+) -> Result<ModelShow> {
    let uri = format!("{api_url}/api/show");
-    let request = HttpRequest::builder()
+    let mut request_builder = HttpRequest::builder()
        .method(Method::POST)
        .uri(uri)
-        .header("Content-Type", "application/json")
-        .body(AsyncBody::from(
-            serde_json::json!({ "model": model }).to_string(),
-        ))?;
+        .header("Content-Type", "application/json");
+
+    if let Some(api_key) = api_key {
+        request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"));
+    }
+
+    let request = request_builder.body(AsyncBody::from(
+        serde_json::json!({ "model": model }).to_string(),
+    ))?;

    let mut response = client.send(request).await?;
    let mut body = String::new();
@ -360,10 +409,198 @@ pub async fn show_model(client: &dyn HttpClient, api_url: &str, model: &str) ->
    Ok(details)
 }

+pub async fn generate(
+    client: &dyn HttpClient,
+    api_url: &str,
+    api_key: Option<String>,
+    request: GenerateRequest,
+) -> Result<GenerateResponse> {
+    let uri = format!("{api_url}/api/generate");
+    let mut request_builder = HttpRequest::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("Content-Type", "application/json");
+
+    if let Some(api_key) = api_key {
+        request_builder = request_builder.header("Authorization", format!("Bearer {api_key}"))
+    }
+
+    let serialized_request = serde_json::to_string(&request)?;
+    let request = request_builder.body(AsyncBody::from(serialized_request))?;
+
+    let mut response = match client.send(request).await {
+        Ok(response) => response,
+        Err(err) => {
+            log::error!("Ollama server unavailable at {}: {}", api_url, err);
+            return Err(err);
+        }
+    };
+
+    let mut body = String::new();
+    response.body_mut().read_to_string(&mut body).await?;
+
+    anyhow::ensure!(
+        response.status().is_success(),
+        "Failed to connect to Ollama API: {} {}",
+        response.status(),
+        body,
+    );
+
+    let response: GenerateResponse =
+        serde_json::from_str(&body).context("Unable to parse Ollama generate response")?;
+    Ok(response)
+}
+
+#[cfg(any(test, feature = "test-support"))]
+pub mod fake {
+    use super::*;
+    use crate::ollama_completion_provider::OllamaCompletionProvider;
+    use gpui::AppContext;
+    use http_client::{AsyncBody, Response, Url};
+    use std::collections::HashMap;
+    use std::sync::{Arc, Mutex};
+
+    pub struct FakeHttpClient {
+        responses: Arc<Mutex<HashMap<String, String>>>,
+        requests: Arc<Mutex<Vec<(String, String)>>>, // (path, body)
+    }
+
+    impl FakeHttpClient {
+        pub fn new() -> Self {
+            Self {
+                responses: Arc::new(Mutex::new(HashMap::new())),
+                requests: Arc::new(Mutex::new(Vec::new())),
+            }
+        }
+
+        pub fn set_response(&self, path: &str, response: String) {
+            self.responses
+                .lock()
+                .unwrap()
+                .insert(path.to_string(), response);
+        }
+
+        pub fn set_generate_response(&self, completion_text: &str) {
+            let response = serde_json::json!({
+                "response": completion_text,
+                "done": true,
+                "context": [],
+                "total_duration": 1000000_u64,
+                "load_duration": 1000000_u64,
+                "prompt_eval_count": 10,
+                "prompt_eval_duration": 1000000_u64,
+                "eval_count": 20,
+                "eval_duration": 1000000_u64
+            });
+            self.set_response("/api/generate", response.to_string());
+        }
+
+        pub fn set_error(&self, path: &str) {
+            // Remove any existing response to force an error
+            self.responses.lock().unwrap().remove(path);
+        }
+
+        pub fn get_requests(&self) -> Vec<(String, String)> {
+            self.requests.lock().unwrap().clone()
+        }
+
+        pub fn clear_requests(&self) {
+            self.requests.lock().unwrap().clear();
+        }
+    }
+
+    impl HttpClient for FakeHttpClient {
+        fn type_name(&self) -> &'static str {
+            "FakeHttpClient"
+        }
+
+        fn user_agent(&self) -> Option<&http::HeaderValue> {
+            None
+        }
+
+        fn proxy(&self) -> Option<&Url> {
+            None
+        }
+
+        fn send(
+            &self,
+            req: http_client::Request<AsyncBody>,
+        ) -> futures::future::BoxFuture<'static, Result<Response<AsyncBody>, anyhow::Error>>
+        {
+            let path = req.uri().path().to_string();
+            let responses = Arc::clone(&self.responses);
+            let requests = Arc::clone(&self.requests);
+
+            Box::pin(async move {
+                // Store the request
+                requests.lock().unwrap().push((path.clone(), String::new()));
+
+                let responses = responses.lock().unwrap();
+
+                if let Some(response_body) = responses.get(&path).cloned() {
+                    let response = Response::builder()
+                        .status(200)
+                        .header("content-type", "application/json")
+                        .body(AsyncBody::from(response_body))
+                        .unwrap();
+                    Ok(response)
+                } else {
+                    Err(anyhow::anyhow!("No mock response set for {}", path))
+                }
+            })
+        }
+    }
+
+    pub struct Ollama;
+
+    impl Ollama {
+        pub fn fake(
+            cx: &mut gpui::TestAppContext,
+        ) -> (
+            gpui::Entity<OllamaCompletionProvider>,
+            std::sync::Arc<FakeHttpClient>,
+        ) {
+            let fake_client = std::sync::Arc::new(FakeHttpClient::new());
+
+            let provider =
+                cx.new(|cx| OllamaCompletionProvider::new("qwencoder".to_string(), None, cx));
+
+            (provider, fake_client)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;

+    #[test]
+    fn test_generate_request_with_suffix_serialization() {
+        let request = GenerateRequest {
+            model: "qwen2.5-coder:32b".to_string(),
+            prompt: "def fibonacci(n):".to_string(),
+            suffix: Some("    return result".to_string()),
+            stream: false,
+            options: Some(GenerateOptions {
+                num_predict: Some(150),
+                temperature: Some(0.1),
+                top_p: Some(0.95),
+                stop: None,
+            }),
+            keep_alive: None,
+            context: None,
+        };
+
+        let json = serde_json::to_string(&request).unwrap();
+        let parsed: GenerateRequest = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(parsed.model, "qwen2.5-coder:32b");
+        assert_eq!(parsed.prompt, "def fibonacci(n):");
+        assert_eq!(parsed.suffix, Some("    return result".to_string()));
+        assert!(!parsed.stream);
+        assert!(parsed.options.is_some());
+    }
+
    #[test]
    fn parse_completion() {
        let response = serde_json::json!({
@ -585,4 +822,64 @@ mod tests {
        assert_eq!(message_images.len(), 1);
        assert_eq!(message_images[0].as_str().unwrap(), base64_image);
    }
+
+    #[test]
+    fn test_generate_request_with_api_key_serialization() {
+        let request = GenerateRequest {
+            model: "qwen2.5-coder:32b".to_string(),
+            prompt: "def fibonacci(n):".to_string(),
+            suffix: Some("    return result".to_string()),
+            stream: false,
+            options: Some(GenerateOptions {
+                num_predict: Some(150),
+                temperature: Some(0.1),
+                top_p: Some(0.95),
+                stop: None,
+            }),
+            keep_alive: None,
+            context: None,
+        };
+
+        // Test with API key
+        let json = serde_json::to_string(&request).unwrap();
+        let parsed: GenerateRequest = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(parsed.model, "qwen2.5-coder:32b");
+        assert_eq!(parsed.prompt, "def fibonacci(n):");
+        assert_eq!(parsed.suffix, Some("    return result".to_string()));
+        assert!(!parsed.stream);
+        assert!(parsed.options.is_some());
+
+        // Note: The API key parameter is passed to the generate function itself,
+        // not included in the GenerateRequest struct that gets serialized to JSON
+    }
+
+    #[test]
+    fn test_generate_request_with_stop_tokens() {
+        let request = GenerateRequest {
+            model: "codellama:7b-code".to_string(),
+            prompt: "def fibonacci(n):".to_string(),
+            suffix: Some("    return result".to_string()),
+            stream: false,
+            options: Some(GenerateOptions {
+                num_predict: Some(150),
+                temperature: Some(0.1),
+                top_p: Some(0.95),
+                stop: Some(vec!["<EOT>".to_string()]),
+            }),
+            keep_alive: None,
+            context: None,
+        };
+
+        let json = serde_json::to_string(&request).unwrap();
+        let parsed: GenerateRequest = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(parsed.model, "codellama:7b-code");
+        assert_eq!(parsed.prompt, "def fibonacci(n):");
+        assert_eq!(parsed.suffix, Some("    return result".to_string()));
+        assert!(!parsed.stream);
+        assert!(parsed.options.is_some());
+        let options = parsed.options.unwrap();
+        assert_eq!(options.stop, Some(vec!["<EOT>".to_string()]));
+    }
 }
--- a/crates/ollama/src/ollama_completion_provider.rs
+++ b/crates/ollama/src/ollama_completion_provider.rs
--- a/crates/zed/Cargo.toml
+++ b/crates/zed/Cargo.toml
@ -82,6 +82,7 @@ image_viewer.workspace = true
 indoc.workspace = true
 edit_prediction_button.workspace = true
 inspector_ui.workspace = true
+ollama.workspace = true
 install_cli.workspace = true
 jj_ui.workspace = true
 journal.workspace = true
--- a/crates/zed/src/zed/edit_prediction_registry.rs
+++ b/crates/zed/src/zed/edit_prediction_registry.rs
@ -3,8 +3,11 @@ use collections::HashMap;
 use copilot::{Copilot, CopilotCompletionProvider};
 use editor::Editor;
 use gpui::{AnyWindowHandle, App, AppContext as _, Context, Entity, WeakEntity};
+
 use language::language_settings::{EditPredictionProvider, all_language_settings};
-use settings::SettingsStore;
+use language_models::AllLanguageModelSettings;
+use ollama::{OLLAMA_API_KEY_VAR, OllamaCompletionProvider, SettingsModel, State};
+use settings::{Settings as _, SettingsStore};
 use std::{cell::RefCell, rc::Rc, sync::Arc};
 use supermaven::{Supermaven, SupermavenCompletionProvider};
 use ui::Window;
@ -12,6 +15,33 @@ use workspace::Workspace;
 use zeta::{ProviderDataCollection, ZetaEditPredictionProvider};

 pub fn init(client: Arc<Client>, user_store: Entity<UserStore>, cx: &mut App) {
+    // Initialize global Ollama service
+    let (api_url, settings_models) = {
+        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+        let api_url = settings.api_url.clone();
+        let settings_models: Vec<SettingsModel> = settings
+            .available_models
+            .iter()
+            .map(|model| SettingsModel {
+                name: model.name.clone(),
+                display_name: model.display_name.clone(),
+                max_tokens: model.max_tokens,
+                supports_tools: model.supports_tools,
+                supports_images: model.supports_images,
+                supports_thinking: model.supports_thinking,
+            })
+            .collect();
+        (api_url, settings_models)
+    };
+
+    let ollama_service = State::new(client.http_client(), api_url, None, cx);
+
+    ollama_service.update(cx, |service, cx| {
+        service.set_settings_models(settings_models, cx);
+    });
+
+    State::set_global(ollama_service, cx);
+
    let editors: Rc<RefCell<HashMap<WeakEntity<Editor>, AnyWindowHandle>>> = Rc::default();
    cx.observe_new({
        let editors = editors.clone();
@ -89,6 +119,27 @@ pub fn init(client: Arc<Client>, user_store: Entity<UserStore>, cx: &mut App) {
                    user_store.clone(),
                    cx,
                );
+            } else if provider == EditPredictionProvider::Ollama {
+                // Update global Ollama service when settings change
+                let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+                if let Some(service) = State::global(cx) {
+                    let settings_models: Vec<SettingsModel> = settings
+                        .available_models
+                        .iter()
+                        .map(|model| SettingsModel {
+                            name: model.name.clone(),
+                            display_name: model.display_name.clone(),
+                            max_tokens: model.max_tokens,
+                            supports_tools: model.supports_tools,
+                            supports_images: model.supports_images,
+                            supports_thinking: model.supports_thinking,
+                        })
+                        .collect();
+
+                    service.update(cx, |service, cx| {
+                        service.set_settings_models(settings_models, cx);
+                    });
+                }
            }
        }
    })
@ -229,5 +280,81 @@ fn assign_edit_prediction_provider(
                editor.set_edit_prediction_provider(Some(provider), window, cx);
            }
        }
+        EditPredictionProvider::Ollama => {
+            let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+            let api_key = std::env::var(OLLAMA_API_KEY_VAR).ok();
+
+            // Get model from settings or use discovered models
+            let model = if let Some(first_model) = settings.available_models.first() {
+                Some(first_model.name.clone())
+            } else if let Some(service) = State::global(cx) {
+                // Use first discovered model
+                service
+                    .read(cx)
+                    .available_models()
+                    .first()
+                    .map(|m| m.name.clone())
+            } else {
+                None
+            };
+
+            if let Some(model) = model {
+                let provider = cx.new(|cx| OllamaCompletionProvider::new(model, api_key, cx));
+                editor.set_edit_prediction_provider(Some(provider), window, cx);
+            } else {
+                log::error!(
+                    "No Ollama models available. Please configure models in settings or pull models using 'ollama pull <model-name>'"
+                );
+                editor.set_edit_prediction_provider::<OllamaCompletionProvider>(None, window, cx);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::zed::tests::init_test;
+    use editor::{Editor, MultiBuffer};
+    use gpui::TestAppContext;
+    use language::Buffer;
+    use language_models::{AllLanguageModelSettings, provider::ollama::OllamaSettings};
+
+    #[gpui::test]
+    async fn test_assign_edit_prediction_provider_with_no_ollama_models(cx: &mut TestAppContext) {
+        let app_state = init_test(cx);
+
+        let buffer = cx.new(|cx| Buffer::local("test content", cx));
+        let multibuffer = cx.new(|cx| MultiBuffer::singleton(buffer, cx));
+        let (editor, cx) =
+            cx.add_window_view(|window, cx| Editor::for_multibuffer(multibuffer, None, window, cx));
+
+        // Override settings to have empty available_models
+        cx.update(|_, cx| {
+            let new_settings = AllLanguageModelSettings {
+                ollama: OllamaSettings {
+                    api_url: "http://localhost:11434".to_string(),
+                    available_models: vec![], // Empty models list
+                },
+                ..Default::default()
+            };
+            AllLanguageModelSettings::override_global(new_settings, cx);
+        });
+
+        // Call assign_edit_prediction_provider with Ollama provider
+        // This should complete without panicking even when no models are available
+        let result = editor.update_in(cx, |editor, window, cx| {
+            assign_edit_prediction_provider(
+                editor,
+                language::language_settings::EditPredictionProvider::Ollama,
+                &app_state.client,
+                app_state.user_store.clone(),
+                window,
+                cx,
+            )
+        });
+
+        // Assert that assign_edit_prediction_provider returns ()
+        assert_eq!(result, ());
    }
 }
--- a/docs/src/ai/edit-prediction.md
+++ b/docs/src/ai/edit-prediction.md
@ -44,7 +44,7 @@ On Linux, `alt-tab` is often used by the window manager for switching windows, s

 {#action editor::AcceptPartialEditPrediction} ({#kb editor::AcceptPartialEditPrediction}) can be used to accept the current edit prediction up to the next word boundary.

-See the [Configuring GitHub Copilot](#github-copilot) and [Configuring Supermaven](#supermaven) sections below for configuration of other providers. Only text insertions at the current cursor are supported for these providers, whereas the Zeta model provides multiple predictions including deletions.
+See the [Configuring GitHub Copilot](#github-copilot), [Configuring Supermaven](#supermaven), and [Configuring Ollama](#ollama) sections below for configuration of other providers. Only text insertions at the current cursor are supported for these providers, whereas the Zeta model provides multiple predictions including deletions.

 ## Configuring Edit Prediction Keybindings {#edit-predictions-keybinding}

@ -304,6 +304,74 @@ To use Supermaven as your provider, set this within `settings.json`:

 You should be able to sign-in to Supermaven by clicking on the Supermaven icon in the status bar and following the setup instructions.

+## Configuring Ollama {#ollama}
+
+To use Ollama as your edit prediction provider, set this within `settings.json`:
+
+```json
+{
+  "features": {
+    "edit_prediction_provider": "ollama"
+  }
+}
+```
+
+### Setup
+
+1. Download and install Ollama from [ollama.com/download](https://ollama.com/download)
+2. Pull completion-capable models, for example:
+
+   ```sh
+   ollama pull qwen2.5-coder:3b
+   ollama pull codellama:7b
+   ```
+
+3. Ensure Ollama is running:
+
+   ```sh
+   ollama serve
+   ```
+
+4. Configure the model in your language model settings
+
+The Edit Prediction menu will automatically detect available models. When one is newly selected in the menu, it will be added to your `settings.json`, and put at the top of the list. You can then manually configure it in the settings file if you need more control.
+
+
+
+```json
+{
+  "language_models": {
+    "ollama": {
+      "api_url": "http://localhost:11434",
+      "available_models": [
+        {
+          "name": "qwen2.5-coder:3b",
+          "display_name": "Qwen 2.5 Coder 3B",
+          "max_tokens": 8192
+        },
+        {
+          "name": "codellama:7b",
+          "display_name": "CodeLlama 7B",
+          "max_tokens": 8192
+        }
+      ]
+    }
+  }
+}
+```
+
+You can also switch between them in the menu, and the order of the models in the settings file will be updated behind the scenes.
+
+The settings allows for configuring Ollama's API url too, so one can use Ollama either locally or hosted. The Edit Prediction menu includes a shortcut for it that will open the settings file where the url is set.
+
+### Authentication
+
+Ollama itself doesn't require an API key, but when running it remotely it's a good idea and common practice to setup a proxy server in front of it that does. When sending edit prediction requests to it, Zed will forward the API key as an authentication header so the proxy can authenticate against it:
+
+```bash
+export OLLAMA_API_KEY=your_api_key_here
+```
+
 ## See also

 You may also use the [Agent Panel](./agent-panel.md) or the [Inline Assistant](./inline-assistant.md) to interact with language models, see the [AI documentation](./overview.md) for more information on the other AI features in Zed.
--- a/docs/src/completions.md
+++ b/docs/src/completions.md
@ -3,7 +3,7 @@
 Zed supports two sources for completions:

 1. "Code Completions" provided by Language Servers (LSPs) automatically installed by Zed or via [Zed Language Extensions](languages.md).
-2. "Edit Predictions" provided by Zed's own Zeta model or by external providers like [GitHub Copilot](#github-copilot) or [Supermaven](#supermaven).
+2. "Edit Predictions" provided by Zed's own Zeta model or by external providers like [GitHub Copilot](#github-copilot), [Supermaven](#supermaven), or [Ollama](#ollama).

 ## Language Server Code Completions {#code-completions}