Add LM Studio support to the Assistant (#23097)

#### Release Notes: - Added support for [LM Studio](https://lmstudio.ai/) to the Assistant. #### Quick demo: https://github.com/user-attachments/assets/af58fc13-1abc-4898-9747-3511016da86a #### Future enhancements: - wire up tool calling (new in [LM Studio 0.3.6](https://lmstudio.ai/blog/lmstudio-v0.3.6)) --------- Co-authored-by: Marshall Bowers <elliott.codes@gmail.com>
2025-01-14 15:41:58 -05:00 · 2025-01-14 15:41:58 -05:00 · c038696aa8
commit c038696aa8
parent 4445679f3c
24 changed files with 1153 additions and 2 deletions
--- a/crates/lmstudio/Cargo.toml
+++ b/crates/lmstudio/Cargo.toml
@ -0,0 +1,24 @@
+[package]
+name = "lmstudio"
+version = "0.1.0"
+edition = "2021"
+publish = false
+license = "GPL-3.0-or-later"
+
+[lints]
+workspace = true
+
+[lib]
+path = "src/lmstudio.rs"
+
+[features]
+default = []
+schemars = ["dep:schemars"]
+
+[dependencies]
+anyhow.workspace = true
+futures.workspace = true
+http_client.workspace = true
+schemars = { workspace = true, optional = true }
+serde.workspace = true
+serde_json.workspace = true
--- a/crates/lmstudio/LICENSE-GPL
+++ b/crates/lmstudio/LICENSE-GPL
@ -0,0 +1 @@
+../../LICENSE-GPL
--- a/crates/lmstudio/src/lmstudio.rs
+++ b/crates/lmstudio/src/lmstudio.rs
@ -0,0 +1,369 @@
+use anyhow::{anyhow, Context, Result};
+use futures::{io::BufReader, stream::BoxStream, AsyncBufReadExt, AsyncReadExt, StreamExt};
+use http_client::{http, AsyncBody, HttpClient, Method, Request as HttpRequest};
+use serde::{Deserialize, Serialize};
+use serde_json::{value::RawValue, Value};
+use std::{convert::TryFrom, sync::Arc, time::Duration};
+
+pub const LMSTUDIO_API_URL: &str = "http://localhost:1234/api/v0";
+
+#[derive(Clone, Copy, Serialize, Deserialize, Debug, Eq, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum Role {
+    User,
+    Assistant,
+    System,
+    Tool,
+}
+
+impl TryFrom<String> for Role {
+    type Error = anyhow::Error;
+
+    fn try_from(value: String) -> Result<Self> {
+        match value.as_str() {
+            "user" => Ok(Self::User),
+            "assistant" => Ok(Self::Assistant),
+            "system" => Ok(Self::System),
+            "tool" => Ok(Self::Tool),
+            _ => Err(anyhow!("invalid role '{value}'")),
+        }
+    }
+}
+
+impl From<Role> for String {
+    fn from(val: Role) -> Self {
+        match val {
+            Role::User => "user".to_owned(),
+            Role::Assistant => "assistant".to_owned(),
+            Role::System => "system".to_owned(),
+            Role::Tool => "tool".to_owned(),
+        }
+    }
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
+pub struct Model {
+    pub name: String,
+    pub display_name: Option<String>,
+    pub max_tokens: usize,
+}
+
+impl Model {
+    pub fn new(name: &str, display_name: Option<&str>, max_tokens: Option<usize>) -> Self {
+        Self {
+            name: name.to_owned(),
+            display_name: display_name.map(|s| s.to_owned()),
+            max_tokens: max_tokens.unwrap_or(2048),
+        }
+    }
+
+    pub fn id(&self) -> &str {
+        &self.name
+    }
+
+    pub fn display_name(&self) -> &str {
+        self.display_name.as_ref().unwrap_or(&self.name)
+    }
+
+    pub fn max_token_count(&self) -> usize {
+        self.max_tokens
+    }
+}
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(tag = "role", rename_all = "lowercase")]
+pub enum ChatMessage {
+    Assistant {
+        #[serde(default)]
+        content: Option<String>,
+        #[serde(default)]
+        tool_calls: Option<Vec<LmStudioToolCall>>,
+    },
+    User {
+        content: String,
+    },
+    System {
+        content: String,
+    },
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(rename_all = "lowercase")]
+pub enum LmStudioToolCall {
+    Function(LmStudioFunctionCall),
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct LmStudioFunctionCall {
+    pub name: String,
+    pub arguments: Box<RawValue>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+pub struct LmStudioFunctionTool {
+    pub name: String,
+    pub description: Option<String>,
+    pub parameters: Option<Value>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum LmStudioTool {
+    Function { function: LmStudioFunctionTool },
+}
+
+#[derive(Serialize, Debug)]
+pub struct ChatCompletionRequest {
+    pub model: String,
+    pub messages: Vec<ChatMessage>,
+    pub stream: bool,
+    pub max_tokens: Option<i32>,
+    pub stop: Option<Vec<String>>,
+    pub temperature: Option<f32>,
+    pub tools: Vec<LmStudioTool>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ChatResponse {
+    pub id: String,
+    pub object: String,
+    pub created: u64,
+    pub model: String,
+    pub choices: Vec<ChoiceDelta>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ChoiceDelta {
+    pub index: u32,
+    #[serde(default)]
+    pub delta: serde_json::Value,
+    pub finish_reason: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+pub struct ToolCallChunk {
+    pub index: usize,
+    pub id: Option<String>,
+
+    // There is also an optional `type` field that would determine if a
+    // function is there. Sometimes this streams in with the `function` before
+    // it streams in the `type`
+    pub function: Option<FunctionChunk>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+pub struct FunctionChunk {
+    pub name: Option<String>,
+    pub arguments: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Usage {
+    pub prompt_tokens: u32,
+    pub completion_tokens: u32,
+    pub total_tokens: u32,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(untagged)]
+pub enum ResponseStreamResult {
+    Ok(ResponseStreamEvent),
+    Err { error: String },
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ResponseStreamEvent {
+    pub created: u32,
+    pub model: String,
+    pub choices: Vec<ChoiceDelta>,
+    pub usage: Option<Usage>,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct ListModelsResponse {
+    pub data: Vec<ModelEntry>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+pub struct ModelEntry {
+    pub id: String,
+    pub object: String,
+    pub r#type: ModelType,
+    pub publisher: String,
+    pub arch: Option<String>,
+    pub compatibility_type: CompatibilityType,
+    pub quantization: String,
+    pub state: ModelState,
+    pub max_context_length: Option<u32>,
+    pub loaded_context_length: Option<u32>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ModelType {
+    Llm,
+    Embeddings,
+    Vlm,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "kebab-case")]
+pub enum ModelState {
+    Loaded,
+    Loading,
+    NotLoaded,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum CompatibilityType {
+    Gguf,
+    Mlx,
+}
+
+pub async fn complete(
+    client: &dyn HttpClient,
+    api_url: &str,
+    request: ChatCompletionRequest,
+) -> Result<ChatResponse> {
+    let uri = format!("{api_url}/chat/completions");
+    let request_builder = HttpRequest::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("Content-Type", "application/json");
+
+    let serialized_request = serde_json::to_string(&request)?;
+    let request = request_builder.body(AsyncBody::from(serialized_request))?;
+
+    let mut response = client.send(request).await?;
+    if response.status().is_success() {
+        let mut body = Vec::new();
+        response.body_mut().read_to_end(&mut body).await?;
+        let response_message: ChatResponse = serde_json::from_slice(&body)?;
+        Ok(response_message)
+    } else {
+        let mut body = Vec::new();
+        response.body_mut().read_to_end(&mut body).await?;
+        let body_str = std::str::from_utf8(&body)?;
+        Err(anyhow!(
+            "Failed to connect to API: {} {}",
+            response.status(),
+            body_str
+        ))
+    }
+}
+
+pub async fn stream_chat_completion(
+    client: &dyn HttpClient,
+    api_url: &str,
+    request: ChatCompletionRequest,
+) -> Result<BoxStream<'static, Result<ChatResponse>>> {
+    let uri = format!("{api_url}/chat/completions");
+    let request_builder = http::Request::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("Content-Type", "application/json");
+
+    let request = request_builder.body(AsyncBody::from(serde_json::to_string(&request)?))?;
+    let mut response = client.send(request).await?;
+    if response.status().is_success() {
+        let reader = BufReader::new(response.into_body());
+
+        Ok(reader
+            .lines()
+            .filter_map(|line| async move {
+                match line {
+                    Ok(line) => {
+                        let line = line.strip_prefix("data: ")?;
+                        if line == "[DONE]" {
+                            None
+                        } else {
+                            let result = serde_json::from_str(&line)
+                                .context("Unable to parse chat completions response");
+                            if let Err(ref e) = result {
+                                eprintln!("Error parsing line: {e}\nLine content: '{line}'");
+                            }
+                            Some(result)
+                        }
+                    }
+                    Err(e) => {
+                        eprintln!("Error reading line: {e}");
+                        Some(Err(e.into()))
+                    }
+                }
+            })
+            .boxed())
+    } else {
+        let mut body = String::new();
+        response.body_mut().read_to_string(&mut body).await?;
+
+        Err(anyhow!(
+            "Failed to connect to LM Studio API: {} {}",
+            response.status(),
+            body,
+        ))
+    }
+}
+
+pub async fn get_models(
+    client: &dyn HttpClient,
+    api_url: &str,
+    _: Option<Duration>,
+) -> Result<Vec<ModelEntry>> {
+    let uri = format!("{api_url}/models");
+    let request_builder = HttpRequest::builder()
+        .method(Method::GET)
+        .uri(uri)
+        .header("Accept", "application/json");
+
+    let request = request_builder.body(AsyncBody::default())?;
+
+    let mut response = client.send(request).await?;
+
+    let mut body = String::new();
+    response.body_mut().read_to_string(&mut body).await?;
+
+    if response.status().is_success() {
+        let response: ListModelsResponse =
+            serde_json::from_str(&body).context("Unable to parse LM Studio models response")?;
+        Ok(response.data)
+    } else {
+        Err(anyhow!(
+            "Failed to connect to LM Studio API: {} {}",
+            response.status(),
+            body,
+        ))
+    }
+}
+
+/// Sends an empty request to LM Studio to trigger loading the model
+pub async fn preload_model(client: Arc<dyn HttpClient>, api_url: &str, model: &str) -> Result<()> {
+    let uri = format!("{api_url}/completions");
+    let request = HttpRequest::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("Content-Type", "application/json")
+        .body(AsyncBody::from(serde_json::to_string(
+            &serde_json::json!({
+                "model": model,
+                "messages": [],
+                "stream": false,
+                "max_tokens": 0,
+            }),
+        )?))?;
+
+    let mut response = client.send(request).await?;
+
+    if response.status().is_success() {
+        Ok(())
+    } else {
+        let mut body = String::new();
+        response.body_mut().read_to_string(&mut body).await?;
+
+        Err(anyhow!(
+            "Failed to connect to LM Studio API: {} {}",
+            response.status(),
+            body,
+        ))
+    }
+}