More fixes to the semantic index's chunking (#11376)
This fixes a tricky intermittent issue I was seeing, where failed to chunk certain files correctly because of the way we reuse Tree-sitter `Parser` instances across parses. I've also accounted for leading comments in chunk boundaries, so that items are grouped with their leading comments whenever possible when chunking. Finally, we've changed the `debug project index` action so that it opens a simple debug view in a pane, instead of printing paths to the console. This lets you click into a path and see how it was chunked. Release Notes: - N/A --------- Co-authored-by: Marshall <marshall@zed.dev>
This commit is contained in:
parent
335c307b93
commit
6964302d89
11 changed files with 532 additions and 171 deletions
|
@ -37,9 +37,12 @@ serde.workspace = true
|
|||
serde_json.workspace = true
|
||||
sha2.workspace = true
|
||||
smol.workspace = true
|
||||
theme.workspace = true
|
||||
tree-sitter.workspace = true
|
||||
ui. workspace = true
|
||||
util. workspace = true
|
||||
unindent.workspace = true
|
||||
workspace.workspace = true
|
||||
worktree.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -54,3 +57,4 @@ project = { workspace = true, features = ["test-support"] }
|
|||
tempfile.workspace = true
|
||||
util = { workspace = true, features = ["test-support"] }
|
||||
worktree = { workspace = true, features = ["test-support"] }
|
||||
workspace = { workspace = true, features = ["test-support"] }
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use language::{with_parser, with_query_cursor, Grammar};
|
||||
use language::{with_parser, with_query_cursor, Language};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::{
|
||||
cmp::{self, Reverse},
|
||||
ops::Range,
|
||||
path::Path,
|
||||
sync::Arc,
|
||||
};
|
||||
use tree_sitter::QueryCapture;
|
||||
|
@ -26,52 +27,95 @@ pub struct Chunk {
|
|||
pub digest: [u8; 32],
|
||||
}
|
||||
|
||||
pub fn chunk_text(text: &str, grammar: Option<&Arc<Grammar>>) -> Vec<Chunk> {
|
||||
chunk_text_with_size_range(text, grammar, CHUNK_SIZE_RANGE)
|
||||
pub fn chunk_text(text: &str, language: Option<&Arc<Language>>, path: &Path) -> Vec<Chunk> {
|
||||
chunk_text_with_size_range(text, language, path, CHUNK_SIZE_RANGE)
|
||||
}
|
||||
|
||||
fn chunk_text_with_size_range(
|
||||
text: &str,
|
||||
grammar: Option<&Arc<Grammar>>,
|
||||
language: Option<&Arc<Language>>,
|
||||
path: &Path,
|
||||
size_config: ChunkSizeRange,
|
||||
) -> Vec<Chunk> {
|
||||
let mut syntactic_ranges = Vec::new();
|
||||
let ranges = syntactic_ranges(text, language, path).unwrap_or_default();
|
||||
chunk_text_with_syntactic_ranges(text, &ranges, size_config)
|
||||
}
|
||||
|
||||
if let Some(grammar) = grammar {
|
||||
if let Some(outline) = grammar.outline_config.as_ref() {
|
||||
let tree = with_parser(|parser| {
|
||||
parser.set_language(&grammar.ts_language).log_err()?;
|
||||
parser.parse(&text, None)
|
||||
});
|
||||
fn syntactic_ranges(
|
||||
text: &str,
|
||||
language: Option<&Arc<Language>>,
|
||||
path: &Path,
|
||||
) -> Option<Vec<Range<usize>>> {
|
||||
let language = language?;
|
||||
let grammar = language.grammar()?;
|
||||
let outline = grammar.outline_config.as_ref()?;
|
||||
let tree = with_parser(|parser| {
|
||||
parser.set_language(&grammar.ts_language).log_err()?;
|
||||
parser.parse(&text, None)
|
||||
});
|
||||
|
||||
if let Some(tree) = tree {
|
||||
with_query_cursor(|cursor| {
|
||||
// Retrieve a list of ranges of outline items (types, functions, etc) in the document.
|
||||
// Omit single-line outline items (e.g. struct fields, constant declarations), because
|
||||
// we'll already be attempting to split on lines.
|
||||
syntactic_ranges = cursor
|
||||
.matches(&outline.query, tree.root_node(), text.as_bytes())
|
||||
.filter_map(|mat| {
|
||||
mat.captures
|
||||
.iter()
|
||||
.find_map(|QueryCapture { node, index }| {
|
||||
if *index == outline.item_capture_ix {
|
||||
if node.end_position().row > node.start_position().row {
|
||||
return Some(node.byte_range());
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
syntactic_ranges
|
||||
.sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
|
||||
});
|
||||
}
|
||||
}
|
||||
let Some(tree) = tree else {
|
||||
log::error!("failed to parse file {path:?} for chunking");
|
||||
return None;
|
||||
};
|
||||
|
||||
struct RowInfo {
|
||||
offset: usize,
|
||||
is_comment: bool,
|
||||
}
|
||||
|
||||
chunk_text_with_syntactic_ranges(text, &syntactic_ranges, size_config)
|
||||
let scope = language.default_scope();
|
||||
let line_comment_prefixes = scope.line_comment_prefixes();
|
||||
let row_infos = text
|
||||
.split('\n')
|
||||
.map({
|
||||
let mut offset = 0;
|
||||
move |line| {
|
||||
let line = line.trim_start();
|
||||
let is_comment = line_comment_prefixes
|
||||
.iter()
|
||||
.any(|prefix| line.starts_with(prefix.as_ref()));
|
||||
let result = RowInfo { offset, is_comment };
|
||||
offset += line.len() + 1;
|
||||
result
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Retrieve a list of ranges of outline items (types, functions, etc) in the document.
|
||||
// Omit single-line outline items (e.g. struct fields, constant declarations), because
|
||||
// we'll already be attempting to split on lines.
|
||||
let mut ranges = with_query_cursor(|cursor| {
|
||||
cursor
|
||||
.matches(&outline.query, tree.root_node(), text.as_bytes())
|
||||
.filter_map(|mat| {
|
||||
mat.captures
|
||||
.iter()
|
||||
.find_map(|QueryCapture { node, index }| {
|
||||
if *index == outline.item_capture_ix {
|
||||
let mut start_offset = node.start_byte();
|
||||
let mut start_row = node.start_position().row;
|
||||
let end_offset = node.end_byte();
|
||||
let end_row = node.end_position().row;
|
||||
|
||||
// Expand the range to include any preceding comments.
|
||||
while start_row > 0 && row_infos[start_row - 1].is_comment {
|
||||
start_offset = row_infos[start_row - 1].offset;
|
||||
start_row -= 1;
|
||||
}
|
||||
|
||||
if end_row > start_row {
|
||||
return Some(start_offset..end_offset);
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
|
||||
ranges.sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
|
||||
Some(ranges)
|
||||
}
|
||||
|
||||
fn chunk_text_with_syntactic_ranges(
|
||||
|
@ -148,7 +192,7 @@ fn chunk_text_with_syntactic_ranges(
|
|||
if !range.is_empty() {
|
||||
chunks.push(Chunk {
|
||||
range: range.clone(),
|
||||
digest: Sha256::digest(&text[range.clone()]).into(),
|
||||
digest: Sha256::digest(&text[range]).into(),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -177,6 +221,8 @@ mod tests {
|
|||
Self { first_name, last_name, age }
|
||||
}
|
||||
|
||||
/// Returns the first name
|
||||
/// something something something
|
||||
fn first_name(&self) -> &str {
|
||||
&self.first_name
|
||||
}
|
||||
|
@ -185,8 +231,8 @@ mod tests {
|
|||
&self.last_name
|
||||
}
|
||||
|
||||
fn age(&self) -> usize {
|
||||
self.ages
|
||||
fn age(&self) -> u32 {
|
||||
self.age
|
||||
}
|
||||
}
|
||||
"
|
||||
|
@ -194,7 +240,8 @@ mod tests {
|
|||
|
||||
let chunks = chunk_text_with_size_range(
|
||||
&text,
|
||||
language.grammar(),
|
||||
Some(&language),
|
||||
Path::new("lib.rs"),
|
||||
ChunkSizeRange {
|
||||
min: text.find('}').unwrap(),
|
||||
max: text.find("Self {").unwrap(),
|
||||
|
@ -209,8 +256,8 @@ mod tests {
|
|||
&[
|
||||
"struct Person {", // ...
|
||||
"impl Person {",
|
||||
" fn first_name",
|
||||
" fn age",
|
||||
" /// Returns the first name",
|
||||
" fn last_name",
|
||||
],
|
||||
);
|
||||
|
||||
|
@ -227,7 +274,8 @@ mod tests {
|
|||
|
||||
let chunks = chunk_text_with_size_range(
|
||||
&text,
|
||||
language.grammar(),
|
||||
Some(&language),
|
||||
Path::new("lib.rs"),
|
||||
ChunkSizeRange {
|
||||
min: text.find('{').unwrap(),
|
||||
max: text.find('V').unwrap(),
|
||||
|
@ -263,7 +311,8 @@ mod tests {
|
|||
|
||||
let chunks = chunk_text_with_size_range(
|
||||
&text,
|
||||
language.grammar(),
|
||||
Some(&language),
|
||||
Path::new("lib.rs"),
|
||||
ChunkSizeRange { min: 32, max: 64 },
|
||||
);
|
||||
|
||||
|
@ -331,33 +380,35 @@ mod tests {
|
|||
#[test]
|
||||
fn test_chunk_text() {
|
||||
let text = "a\n".repeat(1000);
|
||||
let chunks = chunk_text(&text, None);
|
||||
let chunks = chunk_text(&text, None, Path::new("lib.rs"));
|
||||
assert_eq!(
|
||||
chunks.len(),
|
||||
((2000_f64) / (CHUNK_SIZE_RANGE.max as f64)).ceil() as usize
|
||||
);
|
||||
}
|
||||
|
||||
fn rust_language() -> Language {
|
||||
Language::new(
|
||||
LanguageConfig {
|
||||
name: "Rust".into(),
|
||||
matcher: LanguageMatcher {
|
||||
path_suffixes: vec!["rs".to_string()],
|
||||
fn rust_language() -> Arc<Language> {
|
||||
Arc::new(
|
||||
Language::new(
|
||||
LanguageConfig {
|
||||
name: "Rust".into(),
|
||||
matcher: LanguageMatcher {
|
||||
path_suffixes: vec!["rs".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
Some(tree_sitter_rust::language()),
|
||||
)
|
||||
.with_outline_query(
|
||||
"
|
||||
Some(tree_sitter_rust::language()),
|
||||
)
|
||||
.with_outline_query(
|
||||
"
|
||||
(function_item name: (_) @name) @item
|
||||
(impl_item type: (_) @name) @item
|
||||
(struct_item name: (_) @name) @item
|
||||
(field_declaration name: (_) @name) @item
|
||||
",
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
|
300
crates/semantic_index/src/project_index_debug_view.rs
Normal file
300
crates/semantic_index/src/project_index_debug_view.rs
Normal file
|
@ -0,0 +1,300 @@
|
|||
use crate::ProjectIndex;
|
||||
use gpui::{
|
||||
canvas, div, list, uniform_list, AnyElement, AppContext, CursorStyle, EventEmitter,
|
||||
FocusHandle, FocusableView, IntoElement, ListOffset, ListState, Model, MouseMoveEvent, Render,
|
||||
UniformListScrollHandle, View,
|
||||
};
|
||||
use project::WorktreeId;
|
||||
use settings::Settings;
|
||||
use std::{path::Path, sync::Arc};
|
||||
use theme::ThemeSettings;
|
||||
use ui::prelude::*;
|
||||
use workspace::item::{Item, TabContentParams};
|
||||
|
||||
pub struct ProjectIndexDebugView {
|
||||
index: Model<ProjectIndex>,
|
||||
rows: Vec<Row>,
|
||||
selected_path: Option<PathState>,
|
||||
hovered_row_ix: Option<usize>,
|
||||
focus_handle: FocusHandle,
|
||||
list_scroll_handle: UniformListScrollHandle,
|
||||
_subscription: gpui::Subscription,
|
||||
}
|
||||
|
||||
struct PathState {
|
||||
path: Arc<Path>,
|
||||
chunks: Vec<SharedString>,
|
||||
list_state: ListState,
|
||||
}
|
||||
|
||||
enum Row {
|
||||
Worktree(Arc<Path>),
|
||||
Entry(WorktreeId, Arc<Path>),
|
||||
}
|
||||
|
||||
impl ProjectIndexDebugView {
|
||||
pub fn new(index: Model<ProjectIndex>, cx: &mut ViewContext<Self>) -> Self {
|
||||
let mut this = Self {
|
||||
rows: Vec::new(),
|
||||
list_scroll_handle: UniformListScrollHandle::new(),
|
||||
selected_path: None,
|
||||
hovered_row_ix: None,
|
||||
focus_handle: cx.focus_handle(),
|
||||
_subscription: cx.subscribe(&index, |this, _, _, cx| this.update_rows(cx)),
|
||||
index,
|
||||
};
|
||||
this.update_rows(cx);
|
||||
this
|
||||
}
|
||||
|
||||
fn update_rows(&mut self, cx: &mut ViewContext<Self>) {
|
||||
let worktree_indices = self.index.read(cx).worktree_indices(cx);
|
||||
cx.spawn(|this, mut cx| async move {
|
||||
let mut rows = Vec::new();
|
||||
|
||||
for index in worktree_indices {
|
||||
let (root_path, worktree_id, worktree_paths) =
|
||||
index.read_with(&cx, |index, cx| {
|
||||
let worktree = index.worktree.read(cx);
|
||||
(worktree.abs_path(), worktree.id(), index.paths(cx))
|
||||
})?;
|
||||
rows.push(Row::Worktree(root_path));
|
||||
rows.extend(
|
||||
worktree_paths
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|path| Row::Entry(worktree_id, path)),
|
||||
);
|
||||
}
|
||||
|
||||
this.update(&mut cx, |this, cx| {
|
||||
this.rows = rows;
|
||||
cx.notify();
|
||||
})
|
||||
})
|
||||
.detach();
|
||||
}
|
||||
|
||||
fn handle_path_click(
|
||||
&mut self,
|
||||
worktree_id: WorktreeId,
|
||||
file_path: Arc<Path>,
|
||||
cx: &mut ViewContext<Self>,
|
||||
) -> Option<()> {
|
||||
let project_index = self.index.read(cx);
|
||||
let fs = project_index.fs.clone();
|
||||
let worktree_index = project_index.worktree_index(worktree_id, cx)?.read(cx);
|
||||
let root_path = worktree_index.worktree.read(cx).abs_path();
|
||||
let chunks = worktree_index.chunks_for_path(file_path.clone(), cx);
|
||||
|
||||
cx.spawn(|this, mut cx| async move {
|
||||
let chunks = chunks.await?;
|
||||
let content = fs.load(&root_path.join(&file_path)).await?;
|
||||
let chunks = chunks
|
||||
.into_iter()
|
||||
.map(|chunk| {
|
||||
let mut start = chunk.chunk.range.start.min(content.len());
|
||||
let mut end = chunk.chunk.range.end.min(content.len());
|
||||
while !content.is_char_boundary(start) {
|
||||
start += 1;
|
||||
}
|
||||
while !content.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
content[start..end].to_string().into()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
this.update(&mut cx, |this, cx| {
|
||||
let view = cx.view().downgrade();
|
||||
this.selected_path = Some(PathState {
|
||||
path: file_path,
|
||||
list_state: ListState::new(
|
||||
chunks.len(),
|
||||
gpui::ListAlignment::Top,
|
||||
px(100.),
|
||||
move |ix, cx| {
|
||||
if let Some(view) = view.upgrade() {
|
||||
view.update(cx, |view, cx| view.render_chunk(ix, cx))
|
||||
} else {
|
||||
div().into_any()
|
||||
}
|
||||
},
|
||||
),
|
||||
chunks,
|
||||
});
|
||||
cx.notify();
|
||||
})
|
||||
})
|
||||
.detach();
|
||||
None
|
||||
}
|
||||
|
||||
fn render_chunk(&mut self, ix: usize, cx: &mut ViewContext<Self>) -> AnyElement {
|
||||
let buffer_font = ThemeSettings::get_global(cx).buffer_font.family.clone();
|
||||
let Some(state) = &self.selected_path else {
|
||||
return div().into_any();
|
||||
};
|
||||
|
||||
let colors = cx.theme().colors();
|
||||
let chunk = &state.chunks[ix];
|
||||
|
||||
div()
|
||||
.text_ui(cx)
|
||||
.w_full()
|
||||
.font_family(buffer_font)
|
||||
.child(
|
||||
h_flex()
|
||||
.justify_between()
|
||||
.child(format!(
|
||||
"chunk {} of {}. length: {}",
|
||||
ix + 1,
|
||||
state.chunks.len(),
|
||||
chunk.len(),
|
||||
))
|
||||
.child(
|
||||
h_flex()
|
||||
.child(
|
||||
Button::new(("prev", ix), "prev")
|
||||
.disabled(ix == 0)
|
||||
.on_click(cx.listener(move |this, _, _| {
|
||||
this.scroll_to_chunk(ix.saturating_sub(1))
|
||||
})),
|
||||
)
|
||||
.child(
|
||||
Button::new(("next", ix), "next")
|
||||
.disabled(ix + 1 == state.chunks.len())
|
||||
.on_click(
|
||||
cx.listener(move |this, _, _| this.scroll_to_chunk(ix + 1)),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
.child(
|
||||
div()
|
||||
.bg(colors.editor_background)
|
||||
.text_xs()
|
||||
.child(chunk.clone()),
|
||||
)
|
||||
.into_any_element()
|
||||
}
|
||||
|
||||
fn scroll_to_chunk(&mut self, ix: usize) {
|
||||
if let Some(state) = self.selected_path.as_mut() {
|
||||
state.list_state.scroll_to(ListOffset {
|
||||
item_ix: ix,
|
||||
offset_in_item: px(0.),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Render for ProjectIndexDebugView {
|
||||
fn render(&mut self, cx: &mut gpui::ViewContext<'_, Self>) -> impl IntoElement {
|
||||
if let Some(selected_path) = self.selected_path.as_ref() {
|
||||
v_flex()
|
||||
.child(
|
||||
div()
|
||||
.id("selected-path-name")
|
||||
.child(
|
||||
h_flex()
|
||||
.justify_between()
|
||||
.child(selected_path.path.to_string_lossy().to_string())
|
||||
.child("x"),
|
||||
)
|
||||
.border_b_1()
|
||||
.border_color(cx.theme().colors().border)
|
||||
.cursor(CursorStyle::PointingHand)
|
||||
.on_click(cx.listener(|this, _, cx| {
|
||||
this.selected_path.take();
|
||||
cx.notify();
|
||||
})),
|
||||
)
|
||||
.child(list(selected_path.list_state.clone()).size_full())
|
||||
.size_full()
|
||||
.into_any_element()
|
||||
} else {
|
||||
let mut list = uniform_list(
|
||||
cx.view().clone(),
|
||||
"ProjectIndexDebugView",
|
||||
self.rows.len(),
|
||||
move |this, range, cx| {
|
||||
this.rows[range]
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(ix, row)| match row {
|
||||
Row::Worktree(root_path) => div()
|
||||
.id(ix)
|
||||
.child(Label::new(root_path.to_string_lossy().to_string())),
|
||||
Row::Entry(worktree_id, file_path) => div()
|
||||
.id(ix)
|
||||
.pl_8()
|
||||
.child(Label::new(file_path.to_string_lossy().to_string()))
|
||||
.on_mouse_move(cx.listener(move |this, _: &MouseMoveEvent, cx| {
|
||||
if this.hovered_row_ix != Some(ix) {
|
||||
this.hovered_row_ix = Some(ix);
|
||||
cx.notify();
|
||||
}
|
||||
}))
|
||||
.cursor(CursorStyle::PointingHand)
|
||||
.on_click(cx.listener({
|
||||
let worktree_id = *worktree_id;
|
||||
let file_path = file_path.clone();
|
||||
move |this, _, cx| {
|
||||
this.handle_path_click(worktree_id, file_path.clone(), cx);
|
||||
}
|
||||
})),
|
||||
})
|
||||
.collect()
|
||||
},
|
||||
)
|
||||
.track_scroll(self.list_scroll_handle.clone())
|
||||
.size_full()
|
||||
.text_bg(cx.theme().colors().background)
|
||||
.into_any_element();
|
||||
|
||||
canvas(
|
||||
move |bounds, cx| {
|
||||
list.prepaint_as_root(bounds.origin, bounds.size.into(), cx);
|
||||
list
|
||||
},
|
||||
|_, mut list, cx| list.paint(cx),
|
||||
)
|
||||
.size_full()
|
||||
.into_any_element()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EventEmitter<()> for ProjectIndexDebugView {}
|
||||
|
||||
impl Item for ProjectIndexDebugView {
|
||||
type Event = ();
|
||||
|
||||
fn tab_content(&self, params: TabContentParams, _: &WindowContext<'_>) -> AnyElement {
|
||||
Label::new("Project Index (Debug)")
|
||||
.color(if params.selected {
|
||||
Color::Default
|
||||
} else {
|
||||
Color::Muted
|
||||
})
|
||||
.into_any_element()
|
||||
}
|
||||
|
||||
fn clone_on_split(
|
||||
&self,
|
||||
_: workspace::WorkspaceId,
|
||||
cx: &mut ViewContext<Self>,
|
||||
) -> Option<View<Self>>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
Some(cx.new_view(|cx| Self::new(self.index.clone(), cx)))
|
||||
}
|
||||
}
|
||||
|
||||
impl FocusableView for ProjectIndexDebugView {
|
||||
fn focus_handle(&self, _: &AppContext) -> gpui::FocusHandle {
|
||||
self.focus_handle.clone()
|
||||
}
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
mod chunking;
|
||||
mod embedding;
|
||||
mod project_index_debug_view;
|
||||
|
||||
use anyhow::{anyhow, Context as _, Result};
|
||||
use chunking::{chunk_text, Chunk};
|
||||
|
@ -31,6 +32,8 @@ use std::{
|
|||
use util::ResultExt;
|
||||
use worktree::LocalSnapshot;
|
||||
|
||||
pub use project_index_debug_view::ProjectIndexDebugView;
|
||||
|
||||
pub struct SemanticIndex {
|
||||
embedding_provider: Arc<dyn EmbeddingProvider>,
|
||||
db_connection: heed::Env,
|
||||
|
@ -397,26 +400,35 @@ impl ProjectIndex {
|
|||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn debug(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
|
||||
let indices = self
|
||||
pub(crate) fn worktree_index(
|
||||
&self,
|
||||
worktree_id: WorktreeId,
|
||||
cx: &AppContext,
|
||||
) -> Option<Model<WorktreeIndex>> {
|
||||
for index in self.worktree_indices.values() {
|
||||
if let WorktreeIndexHandle::Loaded { index, .. } = index {
|
||||
if index.read(cx).worktree.read(cx).id() == worktree_id {
|
||||
return Some(index.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn worktree_indices(&self, cx: &AppContext) -> Vec<Model<WorktreeIndex>> {
|
||||
let mut result = self
|
||||
.worktree_indices
|
||||
.values()
|
||||
.filter_map(|worktree_index| {
|
||||
if let WorktreeIndexHandle::Loaded { index, .. } = worktree_index {
|
||||
.filter_map(|index| {
|
||||
if let WorktreeIndexHandle::Loaded { index, .. } = index {
|
||||
Some(index.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
cx.spawn(|_, mut cx| async move {
|
||||
eprintln!("semantic index contents:");
|
||||
for index in indices {
|
||||
index.update(&mut cx, |index, cx| index.debug(cx))?.await?
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
result.sort_by_key(|index| index.read(cx).worktree.read(cx).id());
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -726,10 +738,8 @@ impl WorktreeIndex {
|
|||
.language_for_file_path(&entry.path)
|
||||
.await
|
||||
.ok();
|
||||
let grammar =
|
||||
language.as_ref().and_then(|language| language.grammar());
|
||||
let chunked_file = ChunkedFile {
|
||||
chunks: chunk_text(&text, grammar),
|
||||
chunks: chunk_text(&text, language.as_ref(), &entry.path),
|
||||
handle,
|
||||
path: entry.path,
|
||||
mtime: entry.mtime,
|
||||
|
@ -861,7 +871,6 @@ impl WorktreeIndex {
|
|||
db.put(&mut txn, &key, file)?;
|
||||
}
|
||||
txn.commit()?;
|
||||
eprintln!("committed {:?}", embedded_files.len());
|
||||
|
||||
drop(embedded_files);
|
||||
log::debug!("committed");
|
||||
|
@ -871,18 +880,38 @@ impl WorktreeIndex {
|
|||
})
|
||||
}
|
||||
|
||||
fn debug(&mut self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
|
||||
fn paths(&self, cx: &AppContext) -> Task<Result<Vec<Arc<Path>>>> {
|
||||
let connection = self.db_connection.clone();
|
||||
let db = self.db;
|
||||
cx.background_executor().spawn(async move {
|
||||
let tx = connection
|
||||
.read_txn()
|
||||
.context("failed to create read transaction")?;
|
||||
for record in db.iter(&tx)? {
|
||||
let (key, _) = record?;
|
||||
eprintln!("{}", path_for_db_key(key));
|
||||
}
|
||||
Ok(())
|
||||
let result = db
|
||||
.iter(&tx)?
|
||||
.map(|entry| Ok(entry?.1.path.clone()))
|
||||
.collect::<Result<Vec<Arc<Path>>>>();
|
||||
drop(tx);
|
||||
result
|
||||
})
|
||||
}
|
||||
|
||||
fn chunks_for_path(
|
||||
&self,
|
||||
path: Arc<Path>,
|
||||
cx: &AppContext,
|
||||
) -> Task<Result<Vec<EmbeddedChunk>>> {
|
||||
let connection = self.db_connection.clone();
|
||||
let db = self.db;
|
||||
cx.background_executor().spawn(async move {
|
||||
let tx = connection
|
||||
.read_txn()
|
||||
.context("failed to create read transaction")?;
|
||||
Ok(db
|
||||
.get(&tx, &db_key_for_path(&path))?
|
||||
.ok_or_else(|| anyhow!("no such path"))?
|
||||
.chunks
|
||||
.clone())
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -927,7 +956,7 @@ struct EmbeddedFile {
|
|||
chunks: Vec<EmbeddedChunk>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
struct EmbeddedChunk {
|
||||
chunk: Chunk,
|
||||
embedding: Embedding,
|
||||
|
@ -981,10 +1010,6 @@ fn db_key_for_path(path: &Arc<Path>) -> String {
|
|||
path.to_string_lossy().replace('/', "\0")
|
||||
}
|
||||
|
||||
fn path_for_db_key(key: &str) -> String {
|
||||
key.replace('\0', "/")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue