From 0f232e0ce2c7e50ef91b0daf9b8618c81f0ec33d Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 26 Jun 2023 10:35:56 -0400 Subject: [PATCH] added file metadata retrieval from db --- crates/vector_store/src/db.rs | 87 ++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 54f0292d1f..bc5a7fd497 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -7,9 +7,8 @@ use rusqlite::{ types::{FromSql, FromSqlResult, ValueRef}, Connection, }; -use util::ResultExt; -use crate::{Document, IndexedFile}; +use crate::IndexedFile; // This is saving to a local database store within the users dev zed path // Where do we want this to sit? @@ -19,14 +18,22 @@ const VECTOR_DB_URL: &str = "embeddings_db"; // Note this is not an appropriate document #[derive(Debug)] pub struct DocumentRecord { - id: usize, - offset: usize, - name: String, - embedding: Embedding, + pub id: usize, + pub file_id: usize, + pub offset: usize, + pub name: String, + pub embedding: Embedding, } #[derive(Debug)] -struct Embedding(Vec); +pub struct FileRecord { + pub id: usize, + pub path: String, + pub sha1: String, +} + +#[derive(Debug)] +pub struct Embedding(pub Vec); impl FromSql for Embedding { fn column_result(value: ValueRef) -> FromSqlResult { @@ -101,21 +108,16 @@ impl VectorDatabase { Ok(()) } - pub fn get_documents(&self) -> Result> { - // Should return a HashMap in which the key is the id, and the value is the finished document - - // Get Data from Database + pub fn get_files(&self) -> Result> { let db = rusqlite::Connection::open(VECTOR_DB_URL)?; - fn query(db: Connection) -> rusqlite::Result> { - let mut query_statement = - db.prepare("SELECT id, offset, name, embedding FROM documents LIMIT 10")?; + fn query(db: Connection) -> rusqlite::Result> { + let mut query_statement = db.prepare("SELECT id, path, sha1 FROM files")?; let result_iter = query_statement.query_map([], |row| { - Ok(DocumentRecord { + Ok(FileRecord { id: row.get(0)?, - offset: row.get(1)?, - name: row.get(2)?, - embedding: row.get(3)?, + path: row.get(1)?, + sha1: row.get(2)?, }) })?; @@ -127,18 +129,49 @@ impl VectorDatabase { return Ok(results); } - let mut documents: HashMap = HashMap::new(); + let mut pages: HashMap = HashMap::new(); let result_iter = query(db); if result_iter.is_ok() { for result in result_iter.unwrap() { - documents.insert( - result.id, - Document { - offset: result.offset, - name: result.name, - embedding: result.embedding.0, - }, - ); + pages.insert(result.id, result); + } + } + + return Ok(pages); + } + + pub fn get_documents(&self) -> Result> { + // Should return a HashMap in which the key is the id, and the value is the finished document + + // Get Data from Database + let db = rusqlite::Connection::open(VECTOR_DB_URL)?; + + fn query(db: Connection) -> rusqlite::Result> { + let mut query_statement = + db.prepare("SELECT id, file_id, offset, name, embedding FROM documents")?; + let result_iter = query_statement.query_map([], |row| { + Ok(DocumentRecord { + id: row.get(0)?, + file_id: row.get(1)?, + offset: row.get(2)?, + name: row.get(3)?, + embedding: row.get(4)?, + }) + })?; + + let mut results = vec![]; + for result in result_iter { + results.push(result?); + } + + return Ok(results); + } + + let mut documents: HashMap = HashMap::new(); + let result_iter = query(db); + if result_iter.is_ok() { + for result in result_iter.unwrap() { + documents.insert(result.id, result); } }