updated vector store indexing to only use languages with an embedding.scm treesitter query
Co-authored-by: maxbrunsfeld <max@zed.dev>
This commit is contained in:
parent
0db0876289
commit
36907bb4dc
5 changed files with 98 additions and 7 deletions
|
@ -350,6 +350,7 @@ pub struct LanguageQueries {
|
||||||
pub brackets: Option<Cow<'static, str>>,
|
pub brackets: Option<Cow<'static, str>>,
|
||||||
pub indents: Option<Cow<'static, str>>,
|
pub indents: Option<Cow<'static, str>>,
|
||||||
pub outline: Option<Cow<'static, str>>,
|
pub outline: Option<Cow<'static, str>>,
|
||||||
|
pub embedding: Option<Cow<'static, str>>,
|
||||||
pub injections: Option<Cow<'static, str>>,
|
pub injections: Option<Cow<'static, str>>,
|
||||||
pub overrides: Option<Cow<'static, str>>,
|
pub overrides: Option<Cow<'static, str>>,
|
||||||
}
|
}
|
||||||
|
@ -495,6 +496,7 @@ pub struct Grammar {
|
||||||
pub(crate) brackets_config: Option<BracketConfig>,
|
pub(crate) brackets_config: Option<BracketConfig>,
|
||||||
pub(crate) indents_config: Option<IndentConfig>,
|
pub(crate) indents_config: Option<IndentConfig>,
|
||||||
pub outline_config: Option<OutlineConfig>,
|
pub outline_config: Option<OutlineConfig>,
|
||||||
|
pub embedding_config: Option<EmbeddingConfig>,
|
||||||
pub(crate) injection_config: Option<InjectionConfig>,
|
pub(crate) injection_config: Option<InjectionConfig>,
|
||||||
pub(crate) override_config: Option<OverrideConfig>,
|
pub(crate) override_config: Option<OverrideConfig>,
|
||||||
pub(crate) highlight_map: Mutex<HighlightMap>,
|
pub(crate) highlight_map: Mutex<HighlightMap>,
|
||||||
|
@ -516,6 +518,15 @@ pub struct OutlineConfig {
|
||||||
pub extra_context_capture_ix: Option<u32>,
|
pub extra_context_capture_ix: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct EmbeddingConfig {
|
||||||
|
pub query: Query,
|
||||||
|
pub item_capture_ix: u32,
|
||||||
|
pub name_capture_ix: u32,
|
||||||
|
pub context_capture_ix: Option<u32>,
|
||||||
|
pub extra_context_capture_ix: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
struct InjectionConfig {
|
struct InjectionConfig {
|
||||||
query: Query,
|
query: Query,
|
||||||
content_capture_ix: u32,
|
content_capture_ix: u32,
|
||||||
|
@ -1145,6 +1156,7 @@ impl Language {
|
||||||
highlights_query: None,
|
highlights_query: None,
|
||||||
brackets_config: None,
|
brackets_config: None,
|
||||||
outline_config: None,
|
outline_config: None,
|
||||||
|
embedding_config: None,
|
||||||
indents_config: None,
|
indents_config: None,
|
||||||
injection_config: None,
|
injection_config: None,
|
||||||
override_config: None,
|
override_config: None,
|
||||||
|
@ -1181,6 +1193,9 @@ impl Language {
|
||||||
if let Some(query) = queries.outline {
|
if let Some(query) = queries.outline {
|
||||||
self = self.with_outline_query(query.as_ref())?;
|
self = self.with_outline_query(query.as_ref())?;
|
||||||
}
|
}
|
||||||
|
if let Some(query) = queries.embedding {
|
||||||
|
self = self.with_embedding_query(query.as_ref())?;
|
||||||
|
}
|
||||||
if let Some(query) = queries.injections {
|
if let Some(query) = queries.injections {
|
||||||
self = self.with_injection_query(query.as_ref())?;
|
self = self.with_injection_query(query.as_ref())?;
|
||||||
}
|
}
|
||||||
|
@ -1189,6 +1204,7 @@ impl Language {
|
||||||
}
|
}
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
|
pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
|
||||||
let grammar = self.grammar_mut();
|
let grammar = self.grammar_mut();
|
||||||
grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?);
|
grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?);
|
||||||
|
@ -1223,6 +1239,34 @@ impl Language {
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
|
||||||
|
let grammar = self.grammar_mut();
|
||||||
|
let query = Query::new(grammar.ts_language, source)?;
|
||||||
|
let mut item_capture_ix = None;
|
||||||
|
let mut name_capture_ix = None;
|
||||||
|
let mut context_capture_ix = None;
|
||||||
|
let mut extra_context_capture_ix = None;
|
||||||
|
get_capture_indices(
|
||||||
|
&query,
|
||||||
|
&mut [
|
||||||
|
("item", &mut item_capture_ix),
|
||||||
|
("name", &mut name_capture_ix),
|
||||||
|
("context", &mut context_capture_ix),
|
||||||
|
("context.extra", &mut extra_context_capture_ix),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
|
||||||
|
grammar.embedding_config = Some(EmbeddingConfig {
|
||||||
|
query,
|
||||||
|
item_capture_ix,
|
||||||
|
name_capture_ix,
|
||||||
|
context_capture_ix,
|
||||||
|
extra_context_capture_ix,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
|
pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
|
||||||
let grammar = self.grammar_mut();
|
let grammar = self.grammar_mut();
|
||||||
let query = Query::new(grammar.ts_language, source)?;
|
let query = Query::new(grammar.ts_language, source)?;
|
||||||
|
|
|
@ -136,8 +136,8 @@ impl VectorStore {
|
||||||
content: String,
|
content: String,
|
||||||
) -> Result<IndexedFile> {
|
) -> Result<IndexedFile> {
|
||||||
let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?;
|
let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?;
|
||||||
let outline_config = grammar
|
let embedding_config = grammar
|
||||||
.outline_config
|
.embedding_config
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.ok_or_else(|| anyhow!("no outline query"))?;
|
.ok_or_else(|| anyhow!("no outline query"))?;
|
||||||
|
|
||||||
|
@ -148,13 +148,17 @@ impl VectorStore {
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
let mut context_spans = Vec::new();
|
let mut context_spans = Vec::new();
|
||||||
for mat in cursor.matches(&outline_config.query, tree.root_node(), content.as_bytes()) {
|
for mat in cursor.matches(
|
||||||
|
&embedding_config.query,
|
||||||
|
tree.root_node(),
|
||||||
|
content.as_bytes(),
|
||||||
|
) {
|
||||||
let mut item_range = None;
|
let mut item_range = None;
|
||||||
let mut name_range = None;
|
let mut name_range = None;
|
||||||
for capture in mat.captures {
|
for capture in mat.captures {
|
||||||
if capture.index == outline_config.item_capture_ix {
|
if capture.index == embedding_config.item_capture_ix {
|
||||||
item_range = Some(capture.node.byte_range());
|
item_range = Some(capture.node.byte_range());
|
||||||
} else if capture.index == outline_config.name_capture_ix {
|
} else if capture.index == embedding_config.name_capture_ix {
|
||||||
name_range = Some(capture.node.byte_range());
|
name_range = Some(capture.node.byte_range());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -266,7 +270,11 @@ impl VectorStore {
|
||||||
.language_for_file(&absolute_path, None)
|
.language_for_file(&absolute_path, None)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
if language.name().as_ref() != "Rust" {
|
if language
|
||||||
|
.grammar()
|
||||||
|
.and_then(|grammar| grammar.embedding_config.as_ref())
|
||||||
|
.is_none()
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -359,6 +367,8 @@ impl VectorStore {
|
||||||
this.worktree_db_ids.extend(worktree_db_ids);
|
this.worktree_db_ids.extend(worktree_db_ids);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
log::info!("Semantic Indexing Complete!");
|
||||||
|
|
||||||
anyhow::Ok(())
|
anyhow::Ok(())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
|
||||||
},
|
},
|
||||||
Some(tree_sitter_rust::language()),
|
Some(tree_sitter_rust::language()),
|
||||||
)
|
)
|
||||||
.with_outline_query(
|
.with_embedding_query(
|
||||||
r#"
|
r#"
|
||||||
(function_item
|
(function_item
|
||||||
name: (identifier) @name
|
name: (identifier) @name
|
||||||
|
|
|
@ -170,6 +170,7 @@ fn load_queries(name: &str) -> LanguageQueries {
|
||||||
brackets: load_query(name, "/brackets"),
|
brackets: load_query(name, "/brackets"),
|
||||||
indents: load_query(name, "/indents"),
|
indents: load_query(name, "/indents"),
|
||||||
outline: load_query(name, "/outline"),
|
outline: load_query(name, "/outline"),
|
||||||
|
embedding: load_query(name, "/embedding"),
|
||||||
injections: load_query(name, "/injections"),
|
injections: load_query(name, "/injections"),
|
||||||
overrides: load_query(name, "/overrides"),
|
overrides: load_query(name, "/overrides"),
|
||||||
}
|
}
|
||||||
|
|
36
crates/zed/src/languages/rust/embedding.scm
Normal file
36
crates/zed/src/languages/rust/embedding.scm
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
(struct_item
|
||||||
|
(visibility_modifier)? @context
|
||||||
|
"struct" @context
|
||||||
|
name: (_) @name) @item
|
||||||
|
|
||||||
|
(enum_item
|
||||||
|
(visibility_modifier)? @context
|
||||||
|
"enum" @context
|
||||||
|
name: (_) @name) @item
|
||||||
|
|
||||||
|
(impl_item
|
||||||
|
"impl" @context
|
||||||
|
trait: (_)? @name
|
||||||
|
"for"? @context
|
||||||
|
type: (_) @name) @item
|
||||||
|
|
||||||
|
(trait_item
|
||||||
|
(visibility_modifier)? @context
|
||||||
|
"trait" @context
|
||||||
|
name: (_) @name) @item
|
||||||
|
|
||||||
|
(function_item
|
||||||
|
(visibility_modifier)? @context
|
||||||
|
(function_modifiers)? @context
|
||||||
|
"fn" @context
|
||||||
|
name: (_) @name) @item
|
||||||
|
|
||||||
|
(function_signature_item
|
||||||
|
(visibility_modifier)? @context
|
||||||
|
(function_modifiers)? @context
|
||||||
|
"fn" @context
|
||||||
|
name: (_) @name) @item
|
||||||
|
|
||||||
|
(macro_definition
|
||||||
|
. "macro_rules!" @context
|
||||||
|
name: (_) @name) @item
|
Loading…
Add table
Add a link
Reference in a new issue