Fix some semantic index issues (#11216)

* [x] Fixed an issue where embeddings would be assigned incorrectly to
files if a subset of embedding batches failed
* [x] Added a command to debug which paths are present in the semantic
index
* [x] Determine why so many paths are often missing from the semantic
index
* we erroring out if an embedding batch contained multiple texts that
were the same, which can happen if a worktree contains multiple copies
of the same text (e.g. a license).

Release Notes:

- N/A

---------

Co-authored-by: Marshall <marshall@zed.dev>
Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: Kyle <kylek@zed.dev>
Co-authored-by: Kyle Kelley <rgbkrk@gmail.com>
This commit is contained in:
Max Brunsfeld 2024-04-30 10:55:38 -07:00 committed by GitHub
parent d01428e69c
commit 38b9d5cc36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 237 additions and 61 deletions

View file

@ -72,10 +72,11 @@ impl EmbeddingProvider for CloudEmbeddingProvider {
texts
.iter()
.map(|to_embed| {
let dimensions = embeddings.remove(&to_embed.digest).with_context(|| {
format!("server did not return an embedding for {:?}", to_embed)
})?;
Ok(Embedding::new(dimensions))
let embedding =
embeddings.get(&to_embed.digest).cloned().with_context(|| {
format!("server did not return an embedding for {:?}", to_embed)
})?;
Ok(Embedding::new(embedding))
})
.collect()
}