diff --git a/Cargo.lock b/Cargo.lock index 18b606eb24..8c0d73d9b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3643,6 +3643,7 @@ dependencies = [ "dirs 4.0.0", "edit_prediction", "editor", + "encoding", "fs", "futures 0.3.31", "gpui", @@ -20107,6 +20108,7 @@ dependencies = [ "anyhow", "clock", "collections", + "encoding", "fs", "futures 0.3.31", "fuzzy", diff --git a/crates/copilot/Cargo.toml b/crates/copilot/Cargo.toml index 0fc119f311..4a3a6b5c8e 100644 --- a/crates/copilot/Cargo.toml +++ b/crates/copilot/Cargo.toml @@ -53,6 +53,8 @@ util.workspace = true workspace.workspace = true workspace-hack.workspace = true itertools.workspace = true +encoding = "0.2.33" + [target.'cfg(windows)'.dependencies] async-std = { version = "1.12.0", features = ["unstable"] } diff --git a/crates/copilot/src/copilot.rs b/crates/copilot/src/copilot.rs index b7d8423fd7..355df6b41e 100644 --- a/crates/copilot/src/copilot.rs +++ b/crates/copilot/src/copilot.rs @@ -1193,6 +1193,7 @@ async fn get_copilot_lsp(fs: Arc, node_runtime: NodeRuntime) -> anyhow:: #[cfg(test)] mod tests { use super::*; + use encoding::Encoding; use gpui::TestAppContext; use util::path; @@ -1406,6 +1407,10 @@ mod tests { fn load_bytes(&self, _cx: &App) -> Task>> { unimplemented!() } + + fn load_with_encoding(&self, _: &App, _: &'static dyn Encoding) -> Task> { + unimplemented!() + } } } diff --git a/crates/encodings/src/lib.rs b/crates/encodings/src/lib.rs index 169b4bf350..5315515998 100644 --- a/crates/encodings/src/lib.rs +++ b/crates/encodings/src/lib.rs @@ -14,10 +14,11 @@ use workspace::{ItemHandle, StatusItemView, Workspace}; use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding}; +/// A status bar item that shows the current file encoding and allows changing it. pub struct EncodingIndicator { pub encoding: Option<&'static dyn Encoding>, pub workspace: WeakEntity, - observe: Option, + observe: Option, // Subscription to observe changes in the active editor } pub mod selectors; @@ -93,6 +94,7 @@ impl StatusItemView for EncodingIndicator { } } +/// Get a human-readable name for the given encoding. pub fn encoding_name(encoding: &'static dyn Encoding) -> String { let name = encoding.name(); @@ -140,6 +142,8 @@ pub fn encoding_name(encoding: &'static dyn Encoding) -> String { .to_string() } +/// Get an encoding from its index in the predefined list. +/// If the index is out of range, UTF-8 is returned as a default. pub fn encoding_from_index(index: usize) -> &'static dyn Encoding { match index { 0 => UTF_8, diff --git a/crates/encodings/src/selectors.rs b/crates/encodings/src/selectors.rs index cdc0702e53..c25b56be56 100644 --- a/crates/encodings/src/selectors.rs +++ b/crates/encodings/src/selectors.rs @@ -19,6 +19,8 @@ pub mod save_or_reopen { use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate}; + /// A modal view that allows the user to select between saving with a different encoding or + /// reopening with a different encoding. pub struct EncodingSaveOrReopenSelector { picker: Entity>, pub current_selection: usize, @@ -43,6 +45,8 @@ pub mod save_or_reopen { } } + /// Toggle the modal view for selecting between saving with a different encoding or + /// reopening with a different encoding. pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context) { let weak_workspace = workspace.weak_handle(); workspace.toggle_modal(window, cx, |window, cx| { @@ -100,6 +104,7 @@ pub mod save_or_reopen { (&self.actions[0].string, &self.actions[1].string) } + /// Handle the action selected by the user. pub fn post_selection( &self, cx: &mut Context>, @@ -281,6 +286,7 @@ pub mod encoding { use crate::encoding_from_index; + /// A modal view that allows the user to select an encoding from a list of encodings. pub struct EncodingSelector { picker: Entity>, action: Action, @@ -459,6 +465,7 @@ pub mod encoding { } } + /// The action to perform after selecting an encoding. pub enum Action { Save, Reopen, diff --git a/crates/fs/src/encodings.rs b/crates/fs/src/encodings.rs index 8fb38ff24f..b0a1264a14 100644 --- a/crates/fs/src/encodings.rs +++ b/crates/fs/src/encodings.rs @@ -1,21 +1,40 @@ +use anyhow::{Error, Result}; + use encoding::Encoding; -pub enum CharacterEncoding { - Utf8, - Iso8859_1, - Cp865, -} +/// A wrapper around `encoding::Encoding` to implement `Send` and `Sync`. +/// Since the reference is static, it is safe to send it across threads. +pub struct EncodingWrapper(&'static dyn Encoding); -pub fn to_utf8<'a>(input: Vec, encoding: &'a impl encoding::Encoding) -> String { - match encoding.decode(&input, encoding::DecoderTrap::Strict) { - Ok(v) => return v, - Err(_) => panic!(), +unsafe impl Send for EncodingWrapper {} +unsafe impl Sync for EncodingWrapper {} + +impl EncodingWrapper { + pub fn new(encoding: &'static dyn Encoding) -> EncodingWrapper { + EncodingWrapper(encoding) + } + + pub async fn decode(&self, input: Vec) -> Result { + match self.0.decode(&input, encoding::DecoderTrap::Replace) { + Ok(v) => Ok(v), + Err(e) => Err(Error::msg(e.to_string())), + } + } + + pub async fn encode(&self, input: String) -> Result> { + match self.0.encode(&input, encoding::EncoderTrap::Replace) { + Ok(v) => Ok(v), + Err(e) => Err(Error::msg(e.to_string())), + } } } -pub fn to<'a>(input: String, target: &'a impl encoding::Encoding) -> Vec { - match target.encode(&input, encoding::EncoderTrap::Strict) { - Ok(v) => v, - Err(_) => panic!(), - } +/// Convert a byte vector from a specified encoding to a UTF-8 string. +pub async fn to_utf8<'a>(input: Vec, encoding: EncodingWrapper) -> Result { + Ok(encoding.decode(input).await?) +} + +/// Convert a UTF-8 string to a byte vector in a specified encoding. +pub async fn from_utf8<'a>(input: String, target: EncodingWrapper) -> Result> { + Ok(target.encode(input).await?) } diff --git a/crates/fs/src/fs.rs b/crates/fs/src/fs.rs index 75312c5c0c..cb3b649b76 100644 --- a/crates/fs/src/fs.rs +++ b/crates/fs/src/fs.rs @@ -1,6 +1,7 @@ #[cfg(target_os = "macos")] mod mac_watcher; +pub mod encodings; #[cfg(not(target_os = "macos"))] pub mod fs_watcher; @@ -54,6 +55,8 @@ use smol::io::AsyncReadExt; #[cfg(any(test, feature = "test-support"))] use std::ffi::OsStr; +use crate::encodings::EncodingWrapper; + pub trait Watcher: Send + Sync { fn add(&self, path: &Path) -> Result<()>; fn remove(&self, path: &Path) -> Result<()>; @@ -108,6 +111,16 @@ pub trait Fs: Send + Sync { async fn load(&self, path: &Path) -> Result { Ok(String::from_utf8(self.load_bytes(path).await?)?) } + + /// Load a file with the specified encoding, returning a UTF-8 string. + async fn load_with_encoding( + &self, + path: PathBuf, + encoding: EncodingWrapper, + ) -> anyhow::Result { + Ok(encodings::to_utf8(self.load_bytes(path.as_path()).await?, encoding).await?) + } + async fn load_bytes(&self, path: &Path) -> Result>; async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>; async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>; @@ -539,8 +552,12 @@ impl Fs for RealFs { async fn load(&self, path: &Path) -> Result { let path = path.to_path_buf(); - let text = smol::unblock(|| std::fs::read_to_string(path)).await?; - Ok(text) + let encoding = EncodingWrapper::new(encoding::all::UTF_8); + let text = + smol::unblock(async || Ok(encodings::to_utf8(std::fs::read(path)?, encoding).await?)) + .await + .await; + text } async fn load_bytes(&self, path: &Path) -> Result> { let path = path.to_path_buf(); diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 610e3f4aaf..44a5dacc2d 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -21,7 +21,8 @@ use anyhow::{Context as _, Result}; pub use clock::ReplicaId; use clock::{AGENT_REPLICA_ID, Lamport}; use collections::HashMap; -use fs::MTime; +use encoding::Encoding; +use fs::{Fs, MTime, RealFs}; use futures::channel::oneshot; use gpui::{ App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText, @@ -401,6 +402,10 @@ pub trait LocalFile: File { /// Loads the file's contents from disk. fn load_bytes(&self, cx: &App) -> Task>>; + + /// Loads the file contents from disk, decoding them with the given encoding. + fn load_with_encoding(&self, cx: &App, encoding: &'static dyn Encoding) + -> Task>; } /// The auto-indent behavior associated with an editing operation. @@ -1276,12 +1281,15 @@ impl Buffer { /// Reloads the contents of the buffer from disk. pub fn reload(&mut self, cx: &Context) -> oneshot::Receiver> { let (tx, rx) = futures::channel::oneshot::channel(); + let encoding = self.encoding.clone(); let prev_version = self.text.version(); self.reload_task = Some(cx.spawn(async move |this, cx| { let Some((new_mtime, new_text)) = this.update(cx, |this, cx| { let file = this.file.as_ref()?.as_local()?; - - Some((file.disk_state().mtime(), file.load(cx))) + Some(( + file.disk_state().mtime(), + file.load_with_encoding(cx, encoding), + )) })? else { return Ok(()); @@ -4967,6 +4975,14 @@ impl LocalFile for TestFile { fn load_bytes(&self, _cx: &App) -> Task>> { unimplemented!() } + + fn load_with_encoding( + &self, + cx: &App, + encoding: &'static dyn Encoding, + ) -> Task> { + unimplemented!() + } } pub(crate) fn contiguous_ranges( diff --git a/crates/worktree/Cargo.toml b/crates/worktree/Cargo.toml index db264fe3aa..6dd398dfc8 100644 --- a/crates/worktree/Cargo.toml +++ b/crates/worktree/Cargo.toml @@ -48,6 +48,8 @@ sum_tree.workspace = true text.workspace = true util.workspace = true workspace-hack.workspace = true +encoding = "0.2.33" + [dev-dependencies] clock = { workspace = true, features = ["test-support"] } diff --git a/crates/worktree/src/worktree.rs b/crates/worktree/src/worktree.rs index cf61ee2669..c255877b3a 100644 --- a/crates/worktree/src/worktree.rs +++ b/crates/worktree/src/worktree.rs @@ -7,7 +7,11 @@ use ::ignore::gitignore::{Gitignore, GitignoreBuilder}; use anyhow::{Context as _, Result, anyhow}; use clock::ReplicaId; use collections::{HashMap, HashSet, VecDeque}; -use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items}; +use encoding::Encoding; +use fs::{ + Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, encodings::EncodingWrapper, + read_dir_items, +}; use futures::{ FutureExt as _, Stream, StreamExt, channel::{ @@ -3361,6 +3365,19 @@ impl language::LocalFile for File { let fs = worktree.fs.clone(); cx.background_spawn(async move { fs.load_bytes(&abs_path?).await }) } + + fn load_with_encoding( + &self, + cx: &App, + encoding: &'static dyn Encoding, + ) -> Task> { + let worktree = self.worktree.read(cx).as_local().unwrap(); + let path = worktree.absolutize(&self.path); + let fs = worktree.fs.clone(); + + let encoding = EncodingWrapper::new(encoding); + cx.background_spawn(async move { fs.load_with_encoding(path?, encoding).await }) + } } impl File {