More fixes to the semantic index's chunking (#11376)

This fixes a tricky intermittent issue I was seeing, where failed to
chunk certain files correctly because of the way we reuse Tree-sitter
`Parser` instances across parses.

I've also accounted for leading comments in chunk boundaries, so that
items are grouped with their leading comments whenever possible when
chunking.

Finally, we've changed the `debug project index` action so that it opens
a simple debug view in a pane, instead of printing paths to the console.
This lets you click into a path and see how it was chunked.

Release Notes:

- N/A

---------

Co-authored-by: Marshall <marshall@zed.dev>
This commit is contained in:
Max Brunsfeld 2024-05-03 19:00:18 -07:00 committed by GitHub
parent 335c307b93
commit 6964302d89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 532 additions and 171 deletions

View file

@ -1,10 +1,11 @@
#[cfg(test)]
mod syntax_map_tests;
use crate::{Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry};
use crate::{
with_parser, Grammar, InjectionConfig, Language, LanguageId, LanguageRegistry, QUERY_CURSORS,
};
use collections::HashMap;
use futures::FutureExt;
use parking_lot::Mutex;
use std::{
borrow::Cow,
cmp::{self, Ordering, Reverse},
@ -17,10 +18,6 @@ use sum_tree::{Bias, SeekTarget, SumTree};
use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
use tree_sitter::{Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree};
use super::PARSER;
static QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Mutex::new(vec![]);
#[derive(Default)]
pub struct SyntaxMap {
snapshot: SyntaxSnapshot,
@ -1177,8 +1174,7 @@ fn parse_text(
ranges: Vec<tree_sitter::Range>,
old_tree: Option<Tree>,
) -> anyhow::Result<Tree> {
PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
with_parser(|parser| {
let mut chunks = text.chunks_in_range(start_byte..text.len());
parser.set_included_ranges(&ranges)?;
parser.set_language(&grammar.ts_language)?;