Compare commits

...
Sign in to create a new pull request.

10 commits

Author SHA1 Message Date
Joseph T. Lyons
bcd474d920 v0.103.x stable 2023-09-13 12:21:55 -04:00
Max Brunsfeld
69ab1c014a Make toolbar horizontal padding more consistent (#2944)
* increase horizontal padding of toolbar itself, remove padding that was
added to individual toolbar items like feedback button.
* make feedback info text and breadcrumbs have the same additional
padding as quick action buttons.

Release Notes:

- Fixed some inconsistencies in the layout of toolbars.
2023-09-07 13:24:21 -07:00
Kyle Caverly
fd373517c7 update semantic search to show no results if search query is blank (#2942)
Update semantic search to show no search results if search query is
blank
2023-09-07 14:58:49 -04:00
Joseph T. Lyons
a61d639b67 zed 0.103.1 2023-09-07 13:54:48 -04:00
Antonio Scandurra
eb8cf7c36e Start indexing right away when project was already indexed before (#2941)
Release notes:
- Improved semantic search indexing to start in the background if the
project was already indexed before.
2023-09-07 19:47:40 +02:00
Antonio Scandurra
fedec2d21c Rework how we track projects and worktrees in semantic index (#2938)
This pull request introduces several improvements to the semantic search
experience. We're still missing collaboration and searching modified
buffers, which we'll tackle after we take a detour into reducing the
number of tokens used to generate embeddings.

Release Notes:

- Fixed a bug that could prevent semantic search from working when
deploying right after opening a project.
- Fixed a panic that could sometimes occur when using semantic search
while simultaneously changing a file.
- Fixed a bug that prevented semantic search from including new
worktrees when adding them to a project.
2023-09-07 19:47:12 +02:00
Kyle Caverly
f685265cad Token count fix (#2935)
Fix token count for OpenAIEmbeddings

Release Notes (Preview Only)

- update token count calculation for truncated OpenAIEmbeddings
- increased request timeout for OpenAI
2023-09-07 19:46:29 +02:00
Joseph T. Lyons
3b173b6090 Allow call events to be logged without a room id (#2937)
Prior to this PR, we assumed that all call events needed a room_id, but
we now have call-based actions that don't need a room_id - for instance,
you can right click a channel and view the notes while not in a call. In
this case, there is no room_id. We want to be able to track these
events, which requires removing the restriction that requires a room_id.

Release Notes:

- N/A
2023-09-06 23:10:58 -04:00
Joseph T. Lyons
d88dd417bc v0.103.x preview 2023-09-06 12:52:35 -04:00
Joseph T. Lyons
9701b3ddbf x 2023-09-06 12:52:23 -04:00
20 changed files with 815 additions and 683 deletions

3
Cargo.lock generated
View file

@ -6721,6 +6721,7 @@ dependencies = [
"anyhow",
"async-trait",
"bincode",
"collections",
"ctor",
"editor",
"env_logger 0.9.3",
@ -9761,7 +9762,7 @@ dependencies = [
[[package]]
name = "zed"
version = "0.103.0"
version = "0.103.1"
dependencies = [
"activity_indicator",
"ai",

View file

@ -273,7 +273,13 @@ impl ActiveCall {
.borrow_mut()
.take()
.ok_or_else(|| anyhow!("no incoming call"))?;
Self::report_call_event_for_room("decline incoming", call.room_id, None, &self.client, cx);
Self::report_call_event_for_room(
"decline incoming",
Some(call.room_id),
None,
&self.client,
cx,
);
self.client.send(proto::DeclineCall {
room_id: call.room_id,
})?;
@ -403,22 +409,20 @@ impl ActiveCall {
&self.pending_invites
}
pub fn report_call_event(&self, operation: &'static str, cx: &AppContext) {
if let Some(room) = self.room() {
let room = room.read(cx);
Self::report_call_event_for_room(
operation,
room.id(),
room.channel_id(),
&self.client,
cx,
)
}
fn report_call_event(&self, operation: &'static str, cx: &AppContext) {
let (room_id, channel_id) = match self.room() {
Some(room) => {
let room = room.read(cx);
(Some(room.id()), room.channel_id())
}
None => (None, None),
};
Self::report_call_event_for_room(operation, room_id, channel_id, &self.client, cx)
}
pub fn report_call_event_for_room(
operation: &'static str,
room_id: u64,
room_id: Option<u64>,
channel_id: Option<u64>,
client: &Arc<Client>,
cx: &AppContext,

View file

@ -73,7 +73,7 @@ pub enum ClickhouseEvent {
},
Call {
operation: &'static str,
room_id: u64,
room_id: Option<u64>,
channel_id: Option<u64>,
},
}

View file

@ -2240,7 +2240,8 @@ impl CollabPanel {
fn open_channel_buffer(&mut self, action: &OpenChannelBuffer, cx: &mut ViewContext<Self>) {
if let Some(workspace) = self.workspace.upgrade(cx) {
let pane = workspace.read(cx).active_pane().clone();
let channel_view = ChannelView::open(action.channel_id, pane.clone(), workspace, cx);
let channel_id = action.channel_id;
let channel_view = ChannelView::open(channel_id, pane.clone(), workspace, cx);
cx.spawn(|_, mut cx| async move {
let channel_view = channel_view.await?;
pane.update(&mut cx, |pane, cx| {
@ -2249,9 +2250,18 @@ impl CollabPanel {
anyhow::Ok(())
})
.detach();
ActiveCall::global(cx).update(cx, |call, cx| {
call.report_call_event("open channel notes", cx)
});
let room_id = ActiveCall::global(cx)
.read(cx)
.room()
.map(|room| room.read(cx).id());
ActiveCall::report_call_event_for_room(
"open channel notes",
room_id,
Some(channel_id),
&self.client,
cx,
);
}
}

View file

@ -49,7 +49,7 @@ pub fn toggle_screen_sharing(_: &ToggleScreenSharing, cx: &mut AppContext) {
if room.is_screen_sharing() {
ActiveCall::report_call_event_for_room(
"disable screen share",
room.id(),
Some(room.id()),
room.channel_id(),
&client,
cx,
@ -58,7 +58,7 @@ pub fn toggle_screen_sharing(_: &ToggleScreenSharing, cx: &mut AppContext) {
} else {
ActiveCall::report_call_event_for_room(
"enable screen share",
room.id(),
Some(room.id()),
room.channel_id(),
&client,
cx,
@ -78,7 +78,7 @@ pub fn toggle_mute(_: &ToggleMute, cx: &mut AppContext) {
if room.is_muted(cx) {
ActiveCall::report_call_event_for_room(
"enable microphone",
room.id(),
Some(room.id()),
room.channel_id(),
&client,
cx,
@ -86,7 +86,7 @@ pub fn toggle_mute(_: &ToggleMute, cx: &mut AppContext) {
} else {
ActiveCall::report_call_event_for_room(
"disable microphone",
room.id(),
Some(room.id()),
room.channel_id(),
&client,
cx,

View file

@ -42,14 +42,14 @@ impl View for FeedbackInfoText {
)
.with_child(
MouseEventHandler::new::<OpenZedCommunityRepo, _>(0, cx, |state, _| {
let contained_text = if state.hovered() {
let style = if state.hovered() {
&theme.feedback.link_text_hover
} else {
&theme.feedback.link_text_default
};
Label::new("community repo", contained_text.text.clone())
Label::new("community repo", style.text.clone())
.contained()
.with_style(style.container)
.aligned()
.left()
.clipped()
@ -64,6 +64,8 @@ impl View for FeedbackInfoText {
.with_soft_wrap(false)
.aligned(),
)
.contained()
.with_style(theme.feedback.info_text_default.container)
.aligned()
.left()
.clipped()

View file

@ -42,8 +42,8 @@
"repositoryURL": "https://github.com/apple/swift-protobuf.git",
"state": {
"branch": null,
"revision": "ce20dc083ee485524b802669890291c0d8090170",
"version": "1.22.1"
"revision": "0af9125c4eae12a4973fb66574c53a54962a9e1e",
"version": "1.21.0"
}
}
]

View file

@ -12,22 +12,19 @@ use editor::{
SelectAll, MAX_TAB_TITLE_LEN,
};
use futures::StreamExt;
use gpui::platform::PromptLevel;
use gpui::{
actions, elements::*, platform::MouseButton, Action, AnyElement, AnyViewHandle, AppContext,
Entity, ModelContext, ModelHandle, Subscription, Task, View, ViewContext, ViewHandle,
WeakModelHandle, WeakViewHandle,
actions,
elements::*,
platform::{MouseButton, PromptLevel},
Action, AnyElement, AnyViewHandle, AppContext, Entity, ModelContext, ModelHandle, Subscription,
Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle,
};
use menu::Confirm;
use postage::stream::Stream;
use project::{
search::{PathMatcher, SearchInputs, SearchQuery},
Entry, Project,
};
use semantic_index::SemanticIndex;
use semantic_index::{SemanticIndex, SemanticIndexStatus};
use smallvec::SmallVec;
use std::{
any::{Any, TypeId},
@ -118,7 +115,7 @@ pub struct ProjectSearchView {
model: ModelHandle<ProjectSearch>,
query_editor: ViewHandle<Editor>,
results_editor: ViewHandle<Editor>,
semantic_state: Option<SemanticSearchState>,
semantic_state: Option<SemanticState>,
semantic_permissioned: Option<bool>,
search_options: SearchOptions,
panels_with_errors: HashSet<InputPanel>,
@ -131,10 +128,9 @@ pub struct ProjectSearchView {
current_mode: SearchMode,
}
struct SemanticSearchState {
file_count: usize,
outstanding_file_count: usize,
_progress_task: Task<()>,
struct SemanticState {
index_status: SemanticIndexStatus,
_subscription: Subscription,
}
pub struct ProjectSearchBar {
@ -233,7 +229,7 @@ impl ProjectSearch {
self.search_id += 1;
self.match_ranges.clear();
self.search_history.add(inputs.as_str().to_string());
self.no_results = Some(true);
self.no_results = None;
self.pending_search = Some(cx.spawn(|this, mut cx| async move {
let results = search?.await.log_err()?;
let matches = results
@ -241,9 +237,10 @@ impl ProjectSearch {
.map(|result| (result.buffer, vec![result.range.start..result.range.start]));
this.update(&mut cx, |this, cx| {
this.no_results = Some(true);
this.excerpts.update(cx, |excerpts, cx| {
excerpts.clear(cx);
})
});
});
for (buffer, ranges) in matches {
let mut match_ranges = this.update(&mut cx, |this, cx| {
@ -318,19 +315,20 @@ impl View for ProjectSearchView {
}
};
let semantic_status = if let Some(semantic) = &self.semantic_state {
if semantic.outstanding_file_count > 0 {
format!(
"Indexing: {} of {}...",
semantic.file_count - semantic.outstanding_file_count,
semantic.file_count
)
} else {
"Indexing complete".to_string()
let semantic_status = self.semantic_state.as_ref().and_then(|semantic| {
let status = semantic.index_status;
match status {
SemanticIndexStatus::Indexed => Some("Indexing complete".to_string()),
SemanticIndexStatus::Indexing { remaining_files } => {
if remaining_files == 0 {
Some(format!("Indexing..."))
} else {
Some(format!("Remaining files to index: {}", remaining_files))
}
}
SemanticIndexStatus::NotIndexed => None,
}
} else {
"Indexing: ...".to_string()
};
});
let minor_text = if let Some(no_results) = model.no_results {
if model.pending_search.is_none() && no_results {
@ -340,12 +338,16 @@ impl View for ProjectSearchView {
}
} else {
match current_mode {
SearchMode::Semantic => vec![
"".to_owned(),
semantic_status,
"Simply explain the code you are looking to find.".to_owned(),
"ex. 'prompt user for permissions to index their project'".to_owned(),
],
SearchMode::Semantic => {
let mut minor_text = Vec::new();
minor_text.push("".into());
minor_text.extend(semantic_status);
minor_text.push("Simply explain the code you are looking to find.".into());
minor_text.push(
"ex. 'prompt user for permissions to index their project'".into(),
);
minor_text
}
_ => vec![
"".to_owned(),
"Include/exclude specific paths with the filter option.".to_owned(),
@ -641,40 +643,29 @@ impl ProjectSearchView {
let project = self.model.read(cx).project.clone();
let index_task = semantic_index.update(cx, |semantic_index, cx| {
semantic_index.index_project(project, cx)
semantic_index.update(cx, |semantic_index, cx| {
semantic_index
.index_project(project.clone(), cx)
.detach_and_log_err(cx);
});
cx.spawn(|search_view, mut cx| async move {
let (files_to_index, mut files_remaining_rx) = index_task.await?;
self.semantic_state = Some(SemanticState {
index_status: semantic_index.read(cx).status(&project),
_subscription: cx.observe(&semantic_index, Self::semantic_index_changed),
});
cx.notify();
}
}
search_view.update(&mut cx, |search_view, cx| {
cx.notify();
search_view.semantic_state = Some(SemanticSearchState {
file_count: files_to_index,
outstanding_file_count: files_to_index,
_progress_task: cx.spawn(|search_view, mut cx| async move {
while let Some(count) = files_remaining_rx.recv().await {
search_view
.update(&mut cx, |search_view, cx| {
if let Some(semantic_search_state) =
&mut search_view.semantic_state
{
semantic_search_state.outstanding_file_count = count;
cx.notify();
if count == 0 {
return;
}
}
})
.ok();
}
}),
});
})?;
anyhow::Ok(())
})
.detach_and_log_err(cx);
fn semantic_index_changed(
&mut self,
semantic_index: ModelHandle<SemanticIndex>,
cx: &mut ViewContext<Self>,
) {
let project = self.model.read(cx).project.clone();
if let Some(semantic_state) = self.semantic_state.as_mut() {
semantic_state.index_status = semantic_index.read(cx).status(&project);
cx.notify();
}
}
@ -873,7 +864,7 @@ impl ProjectSearchView {
SemanticIndex::global(cx)
.map(|semantic| {
let project = self.model.read(cx).project.clone();
semantic.update(cx, |this, cx| this.project_previously_indexed(project, cx))
semantic.update(cx, |this, cx| this.project_previously_indexed(&project, cx))
})
.unwrap_or(Task::ready(Ok(false)))
}
@ -958,11 +949,7 @@ impl ProjectSearchView {
let mode = self.current_mode;
match mode {
SearchMode::Semantic => {
if let Some(semantic) = &mut self.semantic_state {
if semantic.outstanding_file_count > 0 {
return;
}
if self.semantic_state.is_some() {
if let Some(query) = self.build_search_query(cx) {
self.model
.update(cx, |model, cx| model.semantic_search(query.as_inner(), cx));

View file

@ -9,6 +9,7 @@ path = "src/semantic_index.rs"
doctest = false
[dependencies]
collections = { path = "../collections" }
gpui = { path = "../gpui" }
language = { path = "../language" }
project = { path = "../project" }
@ -42,6 +43,7 @@ sha1 = "0.10.5"
parse_duration = "2.1.1"
[dev-dependencies]
collections = { path = "../collections", features = ["test-support"] }
gpui = { path = "../gpui", features = ["test-support"] }
language = { path = "../language", features = ["test-support"] }
project = { path = "../project", features = ["test-support"] }

View file

@ -1,9 +1,10 @@
use crate::{
embedding::Embedding,
parsing::{Document, DocumentDigest},
parsing::{Span, SpanDigest},
SEMANTIC_INDEX_VERSION,
};
use anyhow::{anyhow, Context, Result};
use collections::HashMap;
use futures::channel::oneshot;
use gpui::executor;
use project::{search::PathMatcher, Fs};
@ -12,13 +13,12 @@ use rusqlite::params;
use rusqlite::types::Value;
use std::{
cmp::Ordering,
collections::HashMap,
future::Future,
ops::Range,
path::{Path, PathBuf},
rc::Rc,
sync::Arc,
time::{Instant, SystemTime},
time::SystemTime,
};
use util::TryFutureExt;
@ -124,8 +124,12 @@ impl VectorDatabase {
}
log::trace!("vector database schema out of date. updating...");
// We renamed the `documents` table to `spans`, so we want to drop
// `documents` without recreating it if it exists.
db.execute("DROP TABLE IF EXISTS documents", [])
.context("failed to drop 'documents' table")?;
db.execute("DROP TABLE IF EXISTS spans", [])
.context("failed to drop 'spans' table")?;
db.execute("DROP TABLE IF EXISTS files", [])
.context("failed to drop 'files' table")?;
db.execute("DROP TABLE IF EXISTS worktrees", [])
@ -174,7 +178,7 @@ impl VectorDatabase {
)?;
db.execute(
"CREATE TABLE documents (
"CREATE TABLE spans (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER NOT NULL,
start_byte INTEGER NOT NULL,
@ -195,7 +199,7 @@ impl VectorDatabase {
pub fn delete_file(
&self,
worktree_id: i64,
delete_path: PathBuf,
delete_path: Arc<Path>,
) -> impl Future<Output = Result<()>> {
self.transact(move |db| {
db.execute(
@ -209,9 +213,9 @@ impl VectorDatabase {
pub fn insert_file(
&self,
worktree_id: i64,
path: PathBuf,
path: Arc<Path>,
mtime: SystemTime,
documents: Vec<Document>,
spans: Vec<Span>,
) -> impl Future<Output = Result<()>> {
self.transact(move |db| {
// Return the existing ID, if both the file and mtime match
@ -228,27 +232,22 @@ impl VectorDatabase {
let file_id = db.last_insert_rowid();
let t0 = Instant::now();
let mut query = db.prepare(
"
INSERT INTO documents
INSERT INTO spans
(file_id, start_byte, end_byte, name, embedding, digest)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)
",
)?;
log::trace!(
"Preparing Query Took: {:?} milliseconds",
t0.elapsed().as_millis()
);
for document in documents {
for span in spans {
query.execute(params![
file_id,
document.range.start.to_string(),
document.range.end.to_string(),
document.name,
document.embedding,
document.digest
span.range.start.to_string(),
span.range.end.to_string(),
span.name,
span.embedding,
span.digest
])?;
}
@ -278,17 +277,17 @@ impl VectorDatabase {
pub fn embeddings_for_files(
&self,
worktree_id_file_paths: HashMap<i64, Vec<Arc<Path>>>,
) -> impl Future<Output = Result<HashMap<DocumentDigest, Embedding>>> {
) -> impl Future<Output = Result<HashMap<SpanDigest, Embedding>>> {
self.transact(move |db| {
let mut query = db.prepare(
"
SELECT digest, embedding
FROM documents
LEFT JOIN files ON files.id = documents.file_id
FROM spans
LEFT JOIN files ON files.id = spans.file_id
WHERE files.worktree_id = ? AND files.relative_path IN rarray(?)
",
)?;
let mut embeddings_by_digest = HashMap::new();
let mut embeddings_by_digest = HashMap::default();
for (worktree_id, file_paths) in worktree_id_file_paths {
let file_paths = Rc::new(
file_paths
@ -297,10 +296,7 @@ impl VectorDatabase {
.collect::<Vec<_>>(),
);
let rows = query.query_map(params![worktree_id, file_paths], |row| {
Ok((
row.get::<_, DocumentDigest>(0)?,
row.get::<_, Embedding>(1)?,
))
Ok((row.get::<_, SpanDigest>(0)?, row.get::<_, Embedding>(1)?))
})?;
for row in rows {
@ -316,7 +312,7 @@ impl VectorDatabase {
pub fn find_or_create_worktree(
&self,
worktree_root_path: PathBuf,
worktree_root_path: Arc<Path>,
) -> impl Future<Output = Result<i64>> {
self.transact(move |db| {
let mut worktree_query =
@ -351,7 +347,7 @@ impl VectorDatabase {
WHERE worktree_id = ?1
ORDER BY relative_path",
)?;
let mut result: HashMap<PathBuf, SystemTime> = HashMap::new();
let mut result: HashMap<PathBuf, SystemTime> = HashMap::default();
for row in statement.query_map(params![worktree_id], |row| {
Ok((
row.get::<_, String>(0)?.into(),
@ -379,7 +375,7 @@ impl VectorDatabase {
let file_ids = file_ids.to_vec();
self.transact(move |db| {
let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
Self::for_each_document(db, &file_ids, |id, embedding| {
Self::for_each_span(db, &file_ids, |id, embedding| {
let similarity = embedding.similarity(&query_embedding);
let ix = match results.binary_search_by(|(_, s)| {
similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)
@ -434,7 +430,7 @@ impl VectorDatabase {
})
}
fn for_each_document(
fn for_each_span(
db: &rusqlite::Connection,
file_ids: &[i64],
mut f: impl FnMut(i64, Embedding),
@ -444,7 +440,7 @@ impl VectorDatabase {
SELECT
id, embedding
FROM
documents
spans
WHERE
file_id IN rarray(?)
",
@ -459,7 +455,7 @@ impl VectorDatabase {
Ok(())
}
pub fn get_documents_by_ids(
pub fn spans_for_ids(
&self,
ids: &[i64],
) -> impl Future<Output = Result<Vec<(i64, PathBuf, Range<usize>)>>> {
@ -468,16 +464,16 @@ impl VectorDatabase {
let mut statement = db.prepare(
"
SELECT
documents.id,
spans.id,
files.worktree_id,
files.relative_path,
documents.start_byte,
documents.end_byte
spans.start_byte,
spans.end_byte
FROM
documents, files
spans, files
WHERE
documents.file_id = files.id AND
documents.id in rarray(?)
spans.file_id = files.id AND
spans.id in rarray(?)
",
)?;
@ -500,7 +496,7 @@ impl VectorDatabase {
for id in &ids {
let value = values_by_id
.remove(id)
.ok_or(anyhow!("missing document id {}", id))?;
.ok_or(anyhow!("missing span id {}", id))?;
results.push(value);
}

View file

@ -181,18 +181,17 @@ impl EmbeddingProvider for OpenAIEmbeddings {
fn truncate(&self, span: &str) -> (String, usize) {
let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span);
let token_count = tokens.len();
let output = if token_count > OPENAI_INPUT_LIMIT {
let output = if tokens.len() > OPENAI_INPUT_LIMIT {
tokens.truncate(OPENAI_INPUT_LIMIT);
OPENAI_BPE_TOKENIZER
.decode(tokens)
.decode(tokens.clone())
.ok()
.unwrap_or_else(|| span.to_string())
} else {
span.to_string()
};
(output, token_count)
(output, tokens.len())
}
async fn embed_batch(&self, spans: Vec<String>) -> Result<Vec<Embedding>> {
@ -204,7 +203,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
.ok_or_else(|| anyhow!("no api key"))?;
let mut request_number = 0;
let mut request_timeout: u64 = 10;
let mut request_timeout: u64 = 15;
let mut response: Response<AsyncBody>;
while request_number < MAX_RETRIES {
response = self

View file

@ -1,15 +1,15 @@
use crate::{embedding::EmbeddingProvider, parsing::Document, JobHandle};
use crate::{embedding::EmbeddingProvider, parsing::Span, JobHandle};
use gpui::executor::Background;
use parking_lot::Mutex;
use smol::channel;
use std::{mem, ops::Range, path::PathBuf, sync::Arc, time::SystemTime};
use std::{mem, ops::Range, path::Path, sync::Arc, time::SystemTime};
#[derive(Clone)]
pub struct FileToEmbed {
pub worktree_id: i64,
pub path: PathBuf,
pub path: Arc<Path>,
pub mtime: SystemTime,
pub documents: Vec<Document>,
pub spans: Vec<Span>,
pub job_handle: JobHandle,
}
@ -19,7 +19,7 @@ impl std::fmt::Debug for FileToEmbed {
.field("worktree_id", &self.worktree_id)
.field("path", &self.path)
.field("mtime", &self.mtime)
.field("document", &self.documents)
.field("spans", &self.spans)
.finish_non_exhaustive()
}
}
@ -29,13 +29,13 @@ impl PartialEq for FileToEmbed {
self.worktree_id == other.worktree_id
&& self.path == other.path
&& self.mtime == other.mtime
&& self.documents == other.documents
&& self.spans == other.spans
}
}
pub struct EmbeddingQueue {
embedding_provider: Arc<dyn EmbeddingProvider>,
pending_batch: Vec<FileToEmbedFragment>,
pending_batch: Vec<FileFragmentToEmbed>,
executor: Arc<Background>,
pending_batch_token_count: usize,
finished_files_tx: channel::Sender<FileToEmbed>,
@ -43,9 +43,9 @@ pub struct EmbeddingQueue {
}
#[derive(Clone)]
pub struct FileToEmbedFragment {
pub struct FileFragmentToEmbed {
file: Arc<Mutex<FileToEmbed>>,
document_range: Range<usize>,
span_range: Range<usize>,
}
impl EmbeddingQueue {
@ -62,43 +62,40 @@ impl EmbeddingQueue {
}
pub fn push(&mut self, file: FileToEmbed) {
if file.documents.is_empty() {
if file.spans.is_empty() {
self.finished_files_tx.try_send(file).unwrap();
return;
}
let file = Arc::new(Mutex::new(file));
self.pending_batch.push(FileToEmbedFragment {
self.pending_batch.push(FileFragmentToEmbed {
file: file.clone(),
document_range: 0..0,
span_range: 0..0,
});
let mut fragment_range = &mut self.pending_batch.last_mut().unwrap().document_range;
let mut saved_tokens = 0;
for (ix, document) in file.lock().documents.iter().enumerate() {
let document_token_count = if document.embedding.is_none() {
document.token_count
let mut fragment_range = &mut self.pending_batch.last_mut().unwrap().span_range;
for (ix, span) in file.lock().spans.iter().enumerate() {
let span_token_count = if span.embedding.is_none() {
span.token_count
} else {
saved_tokens += document.token_count;
0
};
let next_token_count = self.pending_batch_token_count + document_token_count;
let next_token_count = self.pending_batch_token_count + span_token_count;
if next_token_count > self.embedding_provider.max_tokens_per_batch() {
let range_end = fragment_range.end;
self.flush();
self.pending_batch.push(FileToEmbedFragment {
self.pending_batch.push(FileFragmentToEmbed {
file: file.clone(),
document_range: range_end..range_end,
span_range: range_end..range_end,
});
fragment_range = &mut self.pending_batch.last_mut().unwrap().document_range;
fragment_range = &mut self.pending_batch.last_mut().unwrap().span_range;
}
fragment_range.end = ix + 1;
self.pending_batch_token_count += document_token_count;
self.pending_batch_token_count += span_token_count;
}
log::trace!("Saved Tokens: {:?}", saved_tokens);
}
pub fn flush(&mut self) {
@ -111,60 +108,55 @@ impl EmbeddingQueue {
let finished_files_tx = self.finished_files_tx.clone();
let embedding_provider = self.embedding_provider.clone();
self.executor.spawn(async move {
let mut spans = Vec::new();
let mut document_count = 0;
for fragment in &batch {
let file = fragment.file.lock();
document_count += file.documents[fragment.document_range.clone()].len();
spans.extend(
{
file.documents[fragment.document_range.clone()]
.iter().filter(|d| d.embedding.is_none())
.map(|d| d.content.clone())
}
);
}
log::trace!("Documents Length: {:?}", document_count);
log::trace!("Span Length: {:?}", spans.clone().len());
// If spans is 0, just send the fragment to the finished files if its the last one.
if spans.len() == 0 {
for fragment in batch.clone() {
if let Some(file) = Arc::into_inner(fragment.file) {
finished_files_tx.try_send(file.into_inner()).unwrap();
}
self.executor
.spawn(async move {
let mut spans = Vec::new();
for fragment in &batch {
let file = fragment.file.lock();
spans.extend(
file.spans[fragment.span_range.clone()]
.iter()
.filter(|d| d.embedding.is_none())
.map(|d| d.content.clone()),
);
}
return;
};
match embedding_provider.embed_batch(spans).await {
Ok(embeddings) => {
let mut embeddings = embeddings.into_iter();
for fragment in batch {
for document in
&mut fragment.file.lock().documents[fragment.document_range.clone()].iter_mut().filter(|d| d.embedding.is_none())
{
if let Some(embedding) = embeddings.next() {
document.embedding = Some(embedding);
} else {
//
log::error!("number of embeddings returned different from number of documents");
}
}
// If spans is 0, just send the fragment to the finished files if its the last one.
if spans.is_empty() {
for fragment in batch.clone() {
if let Some(file) = Arc::into_inner(fragment.file) {
finished_files_tx.try_send(file.into_inner()).unwrap();
}
}
return;
};
match embedding_provider.embed_batch(spans).await {
Ok(embeddings) => {
let mut embeddings = embeddings.into_iter();
for fragment in batch {
for span in &mut fragment.file.lock().spans[fragment.span_range.clone()]
.iter_mut()
.filter(|d| d.embedding.is_none())
{
if let Some(embedding) = embeddings.next() {
span.embedding = Some(embedding);
} else {
log::error!("number of embeddings != number of documents");
}
}
if let Some(file) = Arc::into_inner(fragment.file) {
finished_files_tx.try_send(file.into_inner()).unwrap();
}
}
}
Err(error) => {
log::error!("{:?}", error);
}
}
Err(error) => {
log::error!("{:?}", error);
}
}
})
.detach();
})
.detach();
}
pub fn finished_files(&self) -> channel::Receiver<FileToEmbed> {

View file

@ -16,9 +16,9 @@ use std::{
use tree_sitter::{Parser, QueryCursor};
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct DocumentDigest([u8; 20]);
pub struct SpanDigest([u8; 20]);
impl FromSql for DocumentDigest {
impl FromSql for SpanDigest {
fn column_result(value: ValueRef) -> FromSqlResult<Self> {
let blob = value.as_blob()?;
let bytes =
@ -27,17 +27,17 @@ impl FromSql for DocumentDigest {
expected_size: 20,
blob_size: blob.len(),
})?;
return Ok(DocumentDigest(bytes));
return Ok(SpanDigest(bytes));
}
}
impl ToSql for DocumentDigest {
impl ToSql for SpanDigest {
fn to_sql(&self) -> rusqlite::Result<ToSqlOutput> {
self.0.to_sql()
}
}
impl From<&'_ str> for DocumentDigest {
impl From<&'_ str> for SpanDigest {
fn from(value: &'_ str) -> Self {
let mut sha1 = Sha1::new();
sha1.update(value);
@ -46,12 +46,12 @@ impl From<&'_ str> for DocumentDigest {
}
#[derive(Debug, PartialEq, Clone)]
pub struct Document {
pub struct Span {
pub name: String,
pub range: Range<usize>,
pub content: String,
pub embedding: Option<Embedding>,
pub digest: DocumentDigest,
pub digest: SpanDigest,
pub token_count: usize,
}
@ -97,14 +97,14 @@ impl CodeContextRetriever {
relative_path: &Path,
language_name: Arc<str>,
content: &str,
) -> Result<Vec<Document>> {
) -> Result<Vec<Span>> {
let document_span = ENTIRE_FILE_TEMPLATE
.replace("<path>", relative_path.to_string_lossy().as_ref())
.replace("<language>", language_name.as_ref())
.replace("<item>", &content);
let digest = DocumentDigest::from(document_span.as_str());
let digest = SpanDigest::from(document_span.as_str());
let (document_span, token_count) = self.embedding_provider.truncate(&document_span);
Ok(vec![Document {
Ok(vec![Span {
range: 0..content.len(),
content: document_span,
embedding: Default::default(),
@ -114,13 +114,13 @@ impl CodeContextRetriever {
}])
}
fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result<Vec<Document>> {
fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result<Vec<Span>> {
let document_span = MARKDOWN_CONTEXT_TEMPLATE
.replace("<path>", relative_path.to_string_lossy().as_ref())
.replace("<item>", &content);
let digest = DocumentDigest::from(document_span.as_str());
let digest = SpanDigest::from(document_span.as_str());
let (document_span, token_count) = self.embedding_provider.truncate(&document_span);
Ok(vec![Document {
Ok(vec![Span {
range: 0..content.len(),
content: document_span,
embedding: None,
@ -191,32 +191,32 @@ impl CodeContextRetriever {
relative_path: &Path,
content: &str,
language: Arc<Language>,
) -> Result<Vec<Document>> {
) -> Result<Vec<Span>> {
let language_name = language.name();
if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) {
return self.parse_entire_file(relative_path, language_name, &content);
} else if &language_name.to_string() == &"Markdown".to_string() {
} else if language_name.as_ref() == "Markdown" {
return self.parse_markdown_file(relative_path, &content);
}
let mut documents = self.parse_file(content, language)?;
for document in &mut documents {
let mut spans = self.parse_file(content, language)?;
for span in &mut spans {
let document_content = CODE_CONTEXT_TEMPLATE
.replace("<path>", relative_path.to_string_lossy().as_ref())
.replace("<language>", language_name.as_ref())
.replace("item", &document.content);
.replace("item", &span.content);
let (document_content, token_count) =
self.embedding_provider.truncate(&document_content);
document.content = document_content;
document.token_count = token_count;
span.content = document_content;
span.token_count = token_count;
}
Ok(documents)
Ok(spans)
}
pub fn parse_file(&mut self, content: &str, language: Arc<Language>) -> Result<Vec<Document>> {
pub fn parse_file(&mut self, content: &str, language: Arc<Language>) -> Result<Vec<Span>> {
let grammar = language
.grammar()
.ok_or_else(|| anyhow!("no grammar for language"))?;
@ -227,7 +227,7 @@ impl CodeContextRetriever {
let language_scope = language.default_scope();
let placeholder = language_scope.collapsed_placeholder();
let mut documents = Vec::new();
let mut spans = Vec::new();
let mut collapsed_ranges_within = Vec::new();
let mut parsed_name_ranges = HashSet::new();
for (i, context_match) in matches.iter().enumerate() {
@ -267,22 +267,22 @@ impl CodeContextRetriever {
collapsed_ranges_within.sort_by_key(|r| (r.start, Reverse(r.end)));
let mut document_content = String::new();
let mut span_content = String::new();
for context_range in &context_match.context_ranges {
add_content_from_range(
&mut document_content,
&mut span_content,
content,
context_range.clone(),
context_match.start_col,
);
document_content.push_str("\n");
span_content.push_str("\n");
}
let mut offset = item_range.start;
for collapsed_range in &collapsed_ranges_within {
if collapsed_range.start > offset {
add_content_from_range(
&mut document_content,
&mut span_content,
content,
offset..collapsed_range.start,
context_match.start_col,
@ -291,24 +291,24 @@ impl CodeContextRetriever {
}
if collapsed_range.end > offset {
document_content.push_str(placeholder);
span_content.push_str(placeholder);
offset = collapsed_range.end;
}
}
if offset < item_range.end {
add_content_from_range(
&mut document_content,
&mut span_content,
content,
offset..item_range.end,
context_match.start_col,
);
}
let sha1 = DocumentDigest::from(document_content.as_str());
documents.push(Document {
let sha1 = SpanDigest::from(span_content.as_str());
spans.push(Span {
name,
content: document_content,
content: span_content,
range: item_range.clone(),
embedding: None,
digest: sha1,
@ -316,7 +316,7 @@ impl CodeContextRetriever {
})
}
return Ok(documents);
return Ok(spans);
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
use crate::{
embedding::{DummyEmbeddings, Embedding, EmbeddingProvider},
embedding_queue::EmbeddingQueue,
parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest},
parsing::{subtract_ranges, CodeContextRetriever, Span, SpanDigest},
semantic_index_settings::SemanticIndexSettings,
FileToEmbed, JobHandle, SearchResult, SemanticIndex, EMBEDDING_QUEUE_FLUSH_TIMEOUT,
};
@ -87,34 +87,24 @@ async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestApp
let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
let _ = semantic_index
.update(cx, |store, cx| {
store.initialize_project(project.clone(), cx)
})
.await;
let (file_count, outstanding_file_count) = semantic_index
.update(cx, |store, cx| store.index_project(project.clone(), cx))
.await
.unwrap();
assert_eq!(file_count, 3);
let search_results = semantic_index.update(cx, |store, cx| {
store.search_project(
project.clone(),
"aaaaaabbbbzz".to_string(),
5,
vec![],
vec![],
cx,
)
});
let pending_file_count =
semantic_index.read_with(cx, |index, _| index.pending_file_count(&project).unwrap());
deterministic.run_until_parked();
assert_eq!(*pending_file_count.borrow(), 3);
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
assert_eq!(*outstanding_file_count.borrow(), 0);
let search_results = semantic_index
.update(cx, |store, cx| {
store.search_project(
project.clone(),
"aaaaaabbbbzz".to_string(),
5,
vec![],
vec![],
cx,
)
})
.await
.unwrap();
assert_eq!(*pending_file_count.borrow(), 0);
let search_results = search_results.await.unwrap();
assert_search_results(
&search_results,
&[
@ -191,14 +181,12 @@ async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestApp
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
let prev_embedding_count = embedding_provider.embedding_count();
let (file_count, outstanding_file_count) = semantic_index
.update(cx, |store, cx| store.index_project(project.clone(), cx))
.await
.unwrap();
assert_eq!(file_count, 1);
let index = semantic_index.update(cx, |store, cx| store.index_project(project.clone(), cx));
deterministic.run_until_parked();
assert_eq!(*pending_file_count.borrow(), 1);
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
assert_eq!(*outstanding_file_count.borrow(), 0);
assert_eq!(*pending_file_count.borrow(), 0);
index.await.unwrap();
assert_eq!(
embedding_provider.embedding_count() - prev_embedding_count,
@ -214,17 +202,17 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) {
let files = (1..=3)
.map(|file_ix| FileToEmbed {
worktree_id: 5,
path: format!("path-{file_ix}").into(),
path: Path::new(&format!("path-{file_ix}")).into(),
mtime: SystemTime::now(),
documents: (0..rng.gen_range(4..22))
spans: (0..rng.gen_range(4..22))
.map(|document_ix| {
let content_len = rng.gen_range(10..100);
let content = RandomCharIter::new(&mut rng)
.with_simple_text()
.take(content_len)
.collect::<String>();
let digest = DocumentDigest::from(content.as_str());
Document {
let digest = SpanDigest::from(content.as_str());
Span {
range: 0..10,
embedding: None,
name: format!("document {document_ix}"),
@ -257,7 +245,7 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) {
.iter()
.map(|file| {
let mut file = file.clone();
for doc in &mut file.documents {
for doc in &mut file.spans {
doc.embedding = Some(embedding_provider.embed_sync(doc.content.as_ref()));
}
file
@ -449,7 +437,7 @@ async fn test_code_context_retrieval_json() {
}
fn assert_documents_eq(
documents: &[Document],
documents: &[Span],
expected_contents_and_start_offsets: &[(String, usize)],
) {
assert_eq!(

View file

@ -3,7 +3,7 @@ authors = ["Nathan Sobo <nathansobo@gmail.com>"]
description = "The fast, collaborative code editor."
edition = "2021"
name = "zed"
version = "0.103.0"
version = "0.103.1"
publish = false
[lib]

View file

@ -1 +1 @@
dev
stable

View file

@ -31,7 +31,7 @@ preview_tag_name="v${major}.${minor}.${patch}-pre"
git fetch origin ${prev_minor_branch_name}:${prev_minor_branch_name}
git fetch origin --tags
cargo check -q
# cargo check -q
function cleanup {
git checkout -q main
@ -89,7 +89,7 @@ git checkout -q main
git clean -q -dff
old_main_sha=$(git rev-parse HEAD)
cargo set-version --package zed --bump minor
cargo check -q
# cargo check -q
git commit -q --all --message "${next_minor_branch_name} dev"
cat <<MESSAGE

View file

@ -12,9 +12,6 @@ export default function feedback(): any {
background: background(theme.highest, "on"),
corner_radius: 6,
border: border(theme.highest, "on"),
margin: {
right: 4,
},
padding: {
bottom: 2,
left: 10,
@ -41,9 +38,15 @@ export default function feedback(): any {
},
}),
button_margin: 8,
info_text_default: text(theme.highest, "sans", "default", {
size: "xs",
}),
info_text_default: {
padding: {
left: 4,
right: 4,
},
...text(theme.highest, "sans", "default", {
size: "xs",
})
},
link_text_default: text(theme.highest, "sans", "default", {
size: "xs",
underline: true,

View file

@ -2,14 +2,14 @@ import { useTheme } from "../common"
import { toggleable_icon_button } from "../component/icon_button"
import { interactive, toggleable } from "../element"
import { background, border, foreground, text } from "./components"
import { text_button } from "../component";
import { text_button } from "../component"
export const toolbar = () => {
const theme = useTheme()
return {
height: 42,
padding: { left: 4, right: 4 },
padding: { left: 8, right: 8 },
background: background(theme.highest),
border: border(theme.highest, { bottom: true }),
item_spacing: 4,
@ -24,9 +24,9 @@ export const toolbar = () => {
...text(theme.highest, "sans", "variant"),
corner_radius: 6,
padding: {
left: 6,
right: 6,
},
left: 4,
right: 4,
}
},
state: {
hovered: {