diff --git a/Cargo.lock b/Cargo.lock index c0e2741a88..52963c5f15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -670,7 +670,6 @@ dependencies = [ "indexed_docs", "language", "language_model", - "log", "pretty_assertions", "project", "prompt_store", diff --git a/crates/assistant/src/assistant.rs b/crates/assistant/src/assistant.rs index 828520fe41..4e53f225f3 100644 --- a/crates/assistant/src/assistant.rs +++ b/crates/assistant/src/assistant.rs @@ -242,18 +242,6 @@ fn register_slash_commands(prompt_builder: Option>, cx: &mut .detach(); } - cx.observe_flag::({ - let slash_command_registry = slash_command_registry.clone(); - move |is_enabled, _cx| { - if is_enabled { - // [#auto-staff-ship] TODO remove this when /auto is no longer staff-shipped - slash_command_registry - .register_command(assistant_slash_commands::AutoCommand, true); - } - } - }) - .detach(); - cx.observe_flag::({ let slash_command_registry = slash_command_registry.clone(); move |is_enabled, _cx| { diff --git a/crates/assistant_slash_commands/Cargo.toml b/crates/assistant_slash_commands/Cargo.toml index 376ae53b50..b2d98a88f4 100644 --- a/crates/assistant_slash_commands/Cargo.toml +++ b/crates/assistant_slash_commands/Cargo.toml @@ -30,7 +30,6 @@ http_client.workspace = true indexed_docs.workspace = true language.workspace = true language_model.workspace = true -log.workspace = true project.workspace = true prompt_store.workspace = true rope.workspace = true diff --git a/crates/assistant_slash_commands/src/assistant_slash_commands.rs b/crates/assistant_slash_commands/src/assistant_slash_commands.rs index ea1ae5ab5a..31b874fa0b 100644 --- a/crates/assistant_slash_commands/src/assistant_slash_commands.rs +++ b/crates/assistant_slash_commands/src/assistant_slash_commands.rs @@ -1,4 +1,3 @@ -mod auto_command; mod cargo_workspace_command; mod context_server_command; mod default_command; @@ -21,7 +20,6 @@ use gpui::App; use language::{CodeLabel, HighlightId}; use ui::ActiveTheme as _; -pub use crate::auto_command::*; pub use crate::cargo_workspace_command::*; pub use crate::context_server_command::*; pub use crate::default_command::*; diff --git a/crates/assistant_slash_commands/src/auto_command.rs b/crates/assistant_slash_commands/src/auto_command.rs deleted file mode 100644 index 1db397557e..0000000000 --- a/crates/assistant_slash_commands/src/auto_command.rs +++ /dev/null @@ -1,371 +0,0 @@ -use anyhow::{anyhow, Result}; -use assistant_slash_command::{ - ArgumentCompletion, SlashCommand, SlashCommandOutput, SlashCommandOutputSection, - SlashCommandResult, -}; -use feature_flags::FeatureFlag; -use futures::StreamExt; -use gpui::{App, AsyncApp, Task, WeakEntity, Window}; -use language::{CodeLabel, LspAdapterDelegate}; -use language_model::{ - LanguageModelCompletionEvent, LanguageModelRegistry, LanguageModelRequest, - LanguageModelRequestMessage, Role, -}; -use semantic_index::{FileSummary, SemanticDb}; -use smol::channel; -use std::sync::{atomic::AtomicBool, Arc}; -use ui::{prelude::*, BorrowAppContext}; -use util::ResultExt; -use workspace::Workspace; - -use crate::create_label_for_command; - -pub struct AutoSlashCommandFeatureFlag; - -impl FeatureFlag for AutoSlashCommandFeatureFlag { - const NAME: &'static str = "auto-slash-command"; -} - -pub struct AutoCommand; - -impl SlashCommand for AutoCommand { - fn name(&self) -> String { - "auto".into() - } - - fn description(&self) -> String { - "Automatically infer what context to add".into() - } - - fn icon(&self) -> IconName { - IconName::Wand - } - - fn menu_text(&self) -> String { - self.description() - } - - fn label(&self, cx: &App) -> CodeLabel { - create_label_for_command("auto", &["--prompt"], cx) - } - - fn complete_argument( - self: Arc, - _arguments: &[String], - _cancel: Arc, - workspace: Option>, - _window: &mut Window, - cx: &mut App, - ) -> Task>> { - // There's no autocomplete for a prompt, since it's arbitrary text. - // However, we can use this opportunity to kick off a drain of the backlog. - // That way, it can hopefully be done resummarizing by the time we've actually - // typed out our prompt. This re-runs on every keystroke during autocomplete, - // but in the future, we could instead do it only once, when /auto is first entered. - let Some(workspace) = workspace.and_then(|ws| ws.upgrade()) else { - log::warn!("workspace was dropped or unavailable during /auto autocomplete"); - - return Task::ready(Ok(Vec::new())); - }; - - let project = workspace.read(cx).project().clone(); - let Some(project_index) = - cx.update_global(|index: &mut SemanticDb, cx| index.project_index(project, cx)) - else { - return Task::ready(Err(anyhow!("No project indexer, cannot use /auto"))); - }; - - let cx: &mut App = cx; - - cx.spawn(async move |cx| { - let task = project_index.read_with(cx, |project_index, cx| { - project_index.flush_summary_backlogs(cx) - })?; - - cx.background_spawn(task).await; - - anyhow::Ok(Vec::new()) - }) - } - - fn requires_argument(&self) -> bool { - true - } - - fn run( - self: Arc, - arguments: &[String], - _context_slash_command_output_sections: &[SlashCommandOutputSection], - _context_buffer: language::BufferSnapshot, - workspace: WeakEntity, - _delegate: Option>, - window: &mut Window, - cx: &mut App, - ) -> Task { - let Some(workspace) = workspace.upgrade() else { - return Task::ready(Err(anyhow::anyhow!("workspace was dropped"))); - }; - if arguments.is_empty() { - return Task::ready(Err(anyhow!("missing prompt"))); - }; - let argument = arguments.join(" "); - let original_prompt = argument.to_string(); - let project = workspace.read(cx).project().clone(); - let Some(project_index) = - cx.update_global(|index: &mut SemanticDb, cx| index.project_index(project, cx)) - else { - return Task::ready(Err(anyhow!("no project indexer"))); - }; - - let task = window.spawn(cx, async move |cx| { - let summaries = project_index - .read_with(cx, |project_index, cx| project_index.all_summaries(cx))? - .await?; - - commands_for_summaries(&summaries, &original_prompt, &cx).await - }); - - // As a convenience, append /auto's argument to the end of the prompt - // so you don't have to write it again. - let original_prompt = argument.to_string(); - - cx.background_spawn(async move { - let commands = task.await?; - let mut prompt = String::new(); - - log::info!( - "Translating this response into slash-commands: {:?}", - commands - ); - - for command in commands { - prompt.push('/'); - prompt.push_str(&command.name); - prompt.push(' '); - prompt.push_str(&command.arg); - prompt.push('\n'); - } - - prompt.push('\n'); - prompt.push_str(&original_prompt); - - Ok(SlashCommandOutput { - text: prompt, - sections: Vec::new(), - run_commands_in_text: true, - } - .to_event_stream()) - }) - } -} - -const PROMPT_INSTRUCTIONS_BEFORE_SUMMARY: &str = include_str!("prompt_before_summary.txt"); -const PROMPT_INSTRUCTIONS_AFTER_SUMMARY: &str = include_str!("prompt_after_summary.txt"); - -fn summaries_prompt(summaries: &[FileSummary], original_prompt: &str) -> String { - let json_summaries = serde_json::to_string(summaries).unwrap(); - - format!("{PROMPT_INSTRUCTIONS_BEFORE_SUMMARY}\n{json_summaries}\n{PROMPT_INSTRUCTIONS_AFTER_SUMMARY}\n{original_prompt}") -} - -/// The slash commands that the model is told about, and which we look for in the inference response. -const SUPPORTED_SLASH_COMMANDS: &[&str] = &["search", "file"]; - -#[derive(Debug, Clone)] -struct CommandToRun { - name: String, - arg: String, -} - -/// Given the pre-indexed file summaries for this project, as well as the original prompt -/// string passed to `/auto`, get a list of slash commands to run, along with their arguments. -/// -/// The prompt's output does not include the slashes (to reduce the chance that it makes a mistake), -/// so taking one of these returned Strings and turning it into a real slash-command-with-argument -/// involves prepending a slash to it. -/// -/// This function will validate that each of the returned lines begins with one of SUPPORTED_SLASH_COMMANDS. -/// Any other lines it encounters will be discarded, with a warning logged. -async fn commands_for_summaries( - summaries: &[FileSummary], - original_prompt: &str, - cx: &AsyncApp, -) -> Result> { - if summaries.is_empty() { - log::warn!("Inferring no context because there were no summaries available."); - return Ok(Vec::new()); - } - - // Use the globally configured model to translate the summaries into slash-commands, - // because Qwen2-7B-Instruct has not done a good job at that task. - let Some(model) = cx.update(|cx| LanguageModelRegistry::read_global(cx).active_model())? else { - log::warn!("Can't infer context because there's no active model."); - return Ok(Vec::new()); - }; - // Only go up to 90% of the actual max token count, to reduce chances of - // exceeding the token count due to inaccuracies in the token counting heuristic. - let max_token_count = (model.max_token_count() * 9) / 10; - - // Rather than recursing (which would require this async function use a pinned box), - // we use an explicit stack of arguments and answers for when we need to "recurse." - let mut stack = vec![summaries]; - let mut final_response = Vec::new(); - let mut prompts = Vec::new(); - - // TODO We only need to create multiple Requests because we currently - // don't have the ability to tell if a CompletionProvider::complete response - // was a "too many tokens in this request" error. If we had that, then - // we could try the request once, instead of having to make separate requests - // to check the token count and then afterwards to run the actual prompt. - let make_request = |prompt: String| LanguageModelRequest { - messages: vec![LanguageModelRequestMessage { - role: Role::User, - content: vec![prompt.into()], - // Nothing in here will benefit from caching - cache: false, - }], - tools: Vec::new(), - stop: Vec::new(), - temperature: None, - }; - - while let Some(current_summaries) = stack.pop() { - // The split can result in one slice being empty and the other having one element. - // Whenever that happens, skip the empty one. - if current_summaries.is_empty() { - continue; - } - - log::info!( - "Inferring prompt context using {} file summaries", - current_summaries.len() - ); - - let prompt = summaries_prompt(¤t_summaries, original_prompt); - let start = std::time::Instant::now(); - // Per OpenAI, 1 token ~= 4 chars in English (we go with 4.5 to overestimate a bit, because failed API requests cost a lot of perf) - // Verifying this against an actual model.count_tokens() confirms that it's usually within ~5% of the correct answer, whereas - // getting the correct answer from tiktoken takes hundreds of milliseconds (compared to this arithmetic being ~free). - // source: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them - let token_estimate = prompt.len() * 2 / 9; - let duration = start.elapsed(); - log::info!( - "Time taken to count tokens for prompt of length {:?}B: {:?}", - prompt.len(), - duration - ); - - if token_estimate < max_token_count { - prompts.push(prompt); - } else if current_summaries.len() == 1 { - log::warn!("Inferring context for a single file's summary failed because the prompt's token length exceeded the model's token limit."); - } else { - log::info!( - "Context inference using file summaries resulted in a prompt containing {token_estimate} tokens, which exceeded the model's max of {max_token_count}. Retrying as two separate prompts, each including half the number of summaries.", - ); - let (left, right) = current_summaries.split_at(current_summaries.len() / 2); - stack.push(right); - stack.push(left); - } - } - - let all_start = std::time::Instant::now(); - - let (tx, rx) = channel::bounded(1024); - - let completion_streams = prompts - .into_iter() - .map(|prompt| { - let request = make_request(prompt.clone()); - let model = model.clone(); - let tx = tx.clone(); - let stream = model.stream_completion(request, &cx); - - (stream, tx) - }) - .collect::>(); - - cx.background_spawn(async move { - let futures = completion_streams - .into_iter() - .enumerate() - .map(|(ix, (stream, tx))| async move { - let start = std::time::Instant::now(); - let events = stream.await?; - log::info!("Time taken for awaiting /await chunk stream #{ix}: {:?}", start.elapsed()); - - let completion: String = events - .filter_map(|event| async { - if let Ok(LanguageModelCompletionEvent::Text(text)) = event { - Some(text) - } else { - None - } - }) - .collect() - .await; - - log::info!("Time taken for all /auto chunks to come back for #{ix}: {:?}", start.elapsed()); - - for line in completion.split('\n') { - if let Some(first_space) = line.find(' ') { - let command = &line[..first_space].trim(); - let arg = &line[first_space..].trim(); - - tx.send(CommandToRun { - name: command.to_string(), - arg: arg.to_string(), - }) - .await?; - } else if !line.trim().is_empty() { - // All slash-commands currently supported in context inference need a space for the argument. - log::warn!( - "Context inference returned a non-blank line that contained no spaces (meaning no argument for the slash command): {:?}", - line - ); - } - } - - anyhow::Ok(()) - }) - .collect::>(); - - let _ = futures::future::try_join_all(futures).await.log_err(); - - let duration = all_start.elapsed(); - eprintln!("All futures completed in {:?}", duration); - }) - .await; - - drop(tx); // Close the channel so that rx.collect() won't hang. This is safe because all futures have completed. - let results = rx.collect::>().await; - eprintln!( - "Finished collecting from the channel with {} results", - results.len() - ); - for command in results { - // Don't return empty or duplicate commands - if !command.name.is_empty() - && !final_response - .iter() - .any(|cmd: &CommandToRun| cmd.name == command.name && cmd.arg == command.arg) - { - if SUPPORTED_SLASH_COMMANDS - .iter() - .any(|supported| &command.name == supported) - { - final_response.push(command); - } else { - log::warn!( - "Context inference returned an unrecognized slash command: {:?}", - command - ); - } - } - } - - // Sort the commands by name (reversed just so that /search appears before /file) - final_response.sort_by(|cmd1, cmd2| cmd1.name.cmp(&cmd2.name).reverse()); - - Ok(final_response) -} diff --git a/crates/assistant_slash_commands/src/prompt_after_summary.txt b/crates/assistant_slash_commands/src/prompt_after_summary.txt deleted file mode 100644 index fc139a1fcb..0000000000 --- a/crates/assistant_slash_commands/src/prompt_after_summary.txt +++ /dev/null @@ -1,24 +0,0 @@ -Actions have a cost, so only include actions that you think -will be helpful to you in doing a great job answering the -prompt in the future. - -You must respond ONLY with a list of actions you would like to -perform. Each action should be on its own line, and followed by a space and then its parameter. - -Actions can be performed more than once with different parameters. -Here is an example valid response: - -``` -file path/to/my/file.txt -file path/to/another/file.txt -search something to search for -search something else to search for -``` - -Once again, do not forget: you must respond ONLY in the format of -one action per line, and the action name should be followed by -its parameter. Your response must not include anything other -than a list of actions, with one action per line, in this format. -It is extremely important that you do not deviate from this format even slightly! - -This is the end of my instructions for how to respond. The rest is the prompt: diff --git a/crates/assistant_slash_commands/src/prompt_before_summary.txt b/crates/assistant_slash_commands/src/prompt_before_summary.txt deleted file mode 100644 index 5d8db1b8f7..0000000000 --- a/crates/assistant_slash_commands/src/prompt_before_summary.txt +++ /dev/null @@ -1,31 +0,0 @@ -I'm going to give you a prompt. I don't want you to respond -to the prompt itself. I want you to figure out which of the following -actions on my project, if any, would help you answer the prompt. - -Here are the actions: - -## file - -This action's parameter is a file path to one of the files -in the project. If you ask for this action, I will tell you -the full contents of the file, so you can learn all the -details of the file. - -## search - -This action's parameter is a string to do a semantic search for -across the files in the project. (You will have a JSON summary -of all the files in the project.) It will tell you which files this string -(or similar strings; it is a semantic search) appear in, -as well as some context of the lines surrounding each result. -It's very important that you only use this action when you think -that searching across the specific files in this project for the query -in question will be useful. For example, don't use this command to search -for queries you might put into a general Web search engine, because those -will be too general to give useful results in this project-specific search. - ---- - -That was the end of the list of actions. - -Here is a JSON summary of each of the files in my project: diff --git a/crates/feature_flags/src/feature_flags.rs b/crates/feature_flags/src/feature_flags.rs index a3a02f6922..ebc48d16c2 100644 --- a/crates/feature_flags/src/feature_flags.rs +++ b/crates/feature_flags/src/feature_flags.rs @@ -101,15 +101,6 @@ impl FeatureFlag for NotebookFeatureFlag { const NAME: &'static str = "notebooks"; } -pub struct AutoCommand {} -impl FeatureFlag for AutoCommand { - const NAME: &'static str = "auto-command"; - - fn enabled_for_staff() -> bool { - false - } -} - pub struct Debugger {} impl FeatureFlag for Debugger { const NAME: &'static str = "debugger"; diff --git a/crates/semantic_index/src/worktree_index.rs b/crates/semantic_index/src/worktree_index.rs index f4d4650757..84b932d296 100644 --- a/crates/semantic_index/src/worktree_index.rs +++ b/crates/semantic_index/src/worktree_index.rs @@ -3,7 +3,6 @@ use crate::embedding_index::EmbeddingIndex; use crate::indexing::IndexingEntrySet; use crate::summary_index::SummaryIndex; use anyhow::Result; -use feature_flags::{AutoCommand, FeatureFlagAppExt}; use fs::Fs; use futures::future::Shared; use gpui::{App, AppContext as _, AsyncApp, Context, Entity, Subscription, Task, WeakEntity}; @@ -170,30 +169,21 @@ impl WorktreeIndex { updated_entries: channel::Receiver, cx: &mut AsyncApp, ) -> Result<()> { - let is_auto_available = cx.update(|cx| cx.wait_for_flag::())?.await; let index = this.update(cx, |this, cx| { futures::future::try_join( this.embedding_index.index_entries_changed_on_disk(cx), - this.summary_index - .index_entries_changed_on_disk(is_auto_available, cx), + this.summary_index.index_entries_changed_on_disk(false, cx), ) })?; index.await.log_err(); while let Ok(updated_entries) = updated_entries.recv().await { - let is_auto_available = cx - .update(|cx| cx.has_flag::()) - .unwrap_or(false); - let index = this.update(cx, |this, cx| { futures::future::try_join( this.embedding_index .index_updated_entries(updated_entries.clone(), cx), - this.summary_index.index_updated_entries( - updated_entries, - is_auto_available, - cx, - ), + this.summary_index + .index_updated_entries(updated_entries, false, cx), ) })?; index.await.log_err();