removed blas and increase batch size for vector search

This commit is contained in:
KCaverly 2023-09-27 09:43:23 -04:00
parent e75f56a0f2
commit abefa2738b
4 changed files with 9 additions and 109 deletions

View file

@ -39,9 +39,7 @@ rand.workspace = true
schemars.workspace = true
globset.workspace = true
sha1 = "0.10.5"
ndarray = { version = "0.15.0", features = ["blas"] }
blas-src = { version = "0.8", features = ["openblas"] }
openblas-src = { version = "0.10", features = ["cblas", "system"] }
ndarray = { version = "0.15.0" }
[dev-dependencies]
collections = { path = "../collections", features = ["test-support"] }

View file

@ -1,5 +1,3 @@
extern crate blas_src;
use crate::{
parsing::{Span, SpanDigest},
SEMANTIC_INDEX_VERSION,
@ -440,25 +438,24 @@ impl VectorDatabase {
.filter_map(|row| row.ok())
.collect::<Vec<(usize, Embedding)>>();
let batch_n = 250;
let batch_n = 1000;
let mut batches = Vec::new();
let mut batch_ids = Vec::new();
let mut batch_embeddings: Vec<f32> = Vec::new();
deserialized_rows.iter().for_each(|(id, embedding)| {
batch_ids.push(id);
batch_embeddings.extend(&embedding.0);
if batch_ids.len() == batch_n {
let array =
Array2::from_shape_vec((batch_ids.len(), 1536), batch_embeddings.clone());
let embeddings = std::mem::take(&mut batch_embeddings);
let ids = std::mem::take(&mut batch_ids);
let array = Array2::from_shape_vec((batch_ids.len(), 1536), embeddings);
match array {
Ok(array) => {
batches.push((batch_ids.clone(), array));
batches.push((ids, array));
}
Err(err) => log::error!("Failed to deserialize to ndarray: {:?}", err),
}
batch_ids = Vec::new();
batch_embeddings = Vec::new();
}
});

View file

@ -711,7 +711,7 @@ impl SemanticIndex {
.await?
.pop()
.ok_or_else(|| anyhow!("could not embed query"))?;
log::trace!("Embedding Search Query: {:?}", t0.elapsed().as_millis());
log::trace!("Embedding Search Query: {:?}ms", t0.elapsed().as_millis());
let search_start = Instant::now();
let modified_buffer_results = this.update(&mut cx, |this, cx| {