Add a new load_with_encoding function to handle files with various encodings.

Modified `Buffer::reload` in `buffer.rs` to use this new function, allowing Zed
to open files with any encoding in UTF-8 mode. Files with characters that
are invalid in UTF-8 will have those bytes replaced with the � character.

Add comments and documentation.
This commit is contained in:
R Aadarsh 2025-08-25 12:39:12 +05:30
parent 43357f689c
commit 0e1f9f689c
10 changed files with 112 additions and 21 deletions

2
Cargo.lock generated
View file

@ -3643,6 +3643,7 @@ dependencies = [
"dirs 4.0.0", "dirs 4.0.0",
"edit_prediction", "edit_prediction",
"editor", "editor",
"encoding",
"fs", "fs",
"futures 0.3.31", "futures 0.3.31",
"gpui", "gpui",
@ -20107,6 +20108,7 @@ dependencies = [
"anyhow", "anyhow",
"clock", "clock",
"collections", "collections",
"encoding",
"fs", "fs",
"futures 0.3.31", "futures 0.3.31",
"fuzzy", "fuzzy",

View file

@ -53,6 +53,8 @@ util.workspace = true
workspace.workspace = true workspace.workspace = true
workspace-hack.workspace = true workspace-hack.workspace = true
itertools.workspace = true itertools.workspace = true
encoding = "0.2.33"
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
async-std = { version = "1.12.0", features = ["unstable"] } async-std = { version = "1.12.0", features = ["unstable"] }

View file

@ -1193,6 +1193,7 @@ async fn get_copilot_lsp(fs: Arc<dyn Fs>, node_runtime: NodeRuntime) -> anyhow::
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use encoding::Encoding;
use gpui::TestAppContext; use gpui::TestAppContext;
use util::path; use util::path;
@ -1406,6 +1407,10 @@ mod tests {
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> { fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
unimplemented!() unimplemented!()
} }
fn load_with_encoding(&self, _: &App, _: &'static dyn Encoding) -> Task<Result<String>> {
unimplemented!()
}
} }
} }

View file

@ -14,10 +14,11 @@ use workspace::{ItemHandle, StatusItemView, Workspace};
use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding}; use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding};
/// A status bar item that shows the current file encoding and allows changing it.
pub struct EncodingIndicator { pub struct EncodingIndicator {
pub encoding: Option<&'static dyn Encoding>, pub encoding: Option<&'static dyn Encoding>,
pub workspace: WeakEntity<Workspace>, pub workspace: WeakEntity<Workspace>,
observe: Option<Subscription>, observe: Option<Subscription>, // Subscription to observe changes in the active editor
} }
pub mod selectors; pub mod selectors;
@ -93,6 +94,7 @@ impl StatusItemView for EncodingIndicator {
} }
} }
/// Get a human-readable name for the given encoding.
pub fn encoding_name(encoding: &'static dyn Encoding) -> String { pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
let name = encoding.name(); let name = encoding.name();
@ -140,6 +142,8 @@ pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
.to_string() .to_string()
} }
/// Get an encoding from its index in the predefined list.
/// If the index is out of range, UTF-8 is returned as a default.
pub fn encoding_from_index(index: usize) -> &'static dyn Encoding { pub fn encoding_from_index(index: usize) -> &'static dyn Encoding {
match index { match index {
0 => UTF_8, 0 => UTF_8,

View file

@ -19,6 +19,8 @@ pub mod save_or_reopen {
use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate}; use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate};
/// A modal view that allows the user to select between saving with a different encoding or
/// reopening with a different encoding.
pub struct EncodingSaveOrReopenSelector { pub struct EncodingSaveOrReopenSelector {
picker: Entity<Picker<EncodingSaveOrReopenDelegate>>, picker: Entity<Picker<EncodingSaveOrReopenDelegate>>,
pub current_selection: usize, pub current_selection: usize,
@ -43,6 +45,8 @@ pub mod save_or_reopen {
} }
} }
/// Toggle the modal view for selecting between saving with a different encoding or
/// reopening with a different encoding.
pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context<Workspace>) { pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context<Workspace>) {
let weak_workspace = workspace.weak_handle(); let weak_workspace = workspace.weak_handle();
workspace.toggle_modal(window, cx, |window, cx| { workspace.toggle_modal(window, cx, |window, cx| {
@ -100,6 +104,7 @@ pub mod save_or_reopen {
(&self.actions[0].string, &self.actions[1].string) (&self.actions[0].string, &self.actions[1].string)
} }
/// Handle the action selected by the user.
pub fn post_selection( pub fn post_selection(
&self, &self,
cx: &mut Context<Picker<EncodingSaveOrReopenDelegate>>, cx: &mut Context<Picker<EncodingSaveOrReopenDelegate>>,
@ -281,6 +286,7 @@ pub mod encoding {
use crate::encoding_from_index; use crate::encoding_from_index;
/// A modal view that allows the user to select an encoding from a list of encodings.
pub struct EncodingSelector { pub struct EncodingSelector {
picker: Entity<Picker<EncodingSelectorDelegate>>, picker: Entity<Picker<EncodingSelectorDelegate>>,
action: Action, action: Action,
@ -459,6 +465,7 @@ pub mod encoding {
} }
} }
/// The action to perform after selecting an encoding.
pub enum Action { pub enum Action {
Save, Save,
Reopen, Reopen,

View file

@ -1,21 +1,40 @@
use anyhow::{Error, Result};
use encoding::Encoding; use encoding::Encoding;
pub enum CharacterEncoding { /// A wrapper around `encoding::Encoding` to implement `Send` and `Sync`.
Utf8, /// Since the reference is static, it is safe to send it across threads.
Iso8859_1, pub struct EncodingWrapper(&'static dyn Encoding);
Cp865,
}
pub fn to_utf8<'a>(input: Vec<u8>, encoding: &'a impl encoding::Encoding) -> String { unsafe impl Send for EncodingWrapper {}
match encoding.decode(&input, encoding::DecoderTrap::Strict) { unsafe impl Sync for EncodingWrapper {}
Ok(v) => return v,
Err(_) => panic!(), impl EncodingWrapper {
pub fn new(encoding: &'static dyn Encoding) -> EncodingWrapper {
EncodingWrapper(encoding)
}
pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
match self.0.decode(&input, encoding::DecoderTrap::Replace) {
Ok(v) => Ok(v),
Err(e) => Err(Error::msg(e.to_string())),
}
}
pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
match self.0.encode(&input, encoding::EncoderTrap::Replace) {
Ok(v) => Ok(v),
Err(e) => Err(Error::msg(e.to_string())),
}
} }
} }
pub fn to<'a>(input: String, target: &'a impl encoding::Encoding) -> Vec<u8> { /// Convert a byte vector from a specified encoding to a UTF-8 string.
match target.encode(&input, encoding::EncoderTrap::Strict) { pub async fn to_utf8<'a>(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
Ok(v) => v, Ok(encoding.decode(input).await?)
Err(_) => panic!(), }
}
/// Convert a UTF-8 string to a byte vector in a specified encoding.
pub async fn from_utf8<'a>(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
Ok(target.encode(input).await?)
} }

View file

@ -1,6 +1,7 @@
#[cfg(target_os = "macos")] #[cfg(target_os = "macos")]
mod mac_watcher; mod mac_watcher;
pub mod encodings;
#[cfg(not(target_os = "macos"))] #[cfg(not(target_os = "macos"))]
pub mod fs_watcher; pub mod fs_watcher;
@ -54,6 +55,8 @@ use smol::io::AsyncReadExt;
#[cfg(any(test, feature = "test-support"))] #[cfg(any(test, feature = "test-support"))]
use std::ffi::OsStr; use std::ffi::OsStr;
use crate::encodings::EncodingWrapper;
pub trait Watcher: Send + Sync { pub trait Watcher: Send + Sync {
fn add(&self, path: &Path) -> Result<()>; fn add(&self, path: &Path) -> Result<()>;
fn remove(&self, path: &Path) -> Result<()>; fn remove(&self, path: &Path) -> Result<()>;
@ -108,6 +111,16 @@ pub trait Fs: Send + Sync {
async fn load(&self, path: &Path) -> Result<String> { async fn load(&self, path: &Path) -> Result<String> {
Ok(String::from_utf8(self.load_bytes(path).await?)?) Ok(String::from_utf8(self.load_bytes(path).await?)?)
} }
/// Load a file with the specified encoding, returning a UTF-8 string.
async fn load_with_encoding(
&self,
path: PathBuf,
encoding: EncodingWrapper,
) -> anyhow::Result<String> {
Ok(encodings::to_utf8(self.load_bytes(path.as_path()).await?, encoding).await?)
}
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>>; async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>>;
async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>; async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>;
async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>; async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>;
@ -539,8 +552,12 @@ impl Fs for RealFs {
async fn load(&self, path: &Path) -> Result<String> { async fn load(&self, path: &Path) -> Result<String> {
let path = path.to_path_buf(); let path = path.to_path_buf();
let text = smol::unblock(|| std::fs::read_to_string(path)).await?; let encoding = EncodingWrapper::new(encoding::all::UTF_8);
Ok(text) let text =
smol::unblock(async || Ok(encodings::to_utf8(std::fs::read(path)?, encoding).await?))
.await
.await;
text
} }
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>> { async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>> {
let path = path.to_path_buf(); let path = path.to_path_buf();

View file

@ -21,7 +21,8 @@ use anyhow::{Context as _, Result};
pub use clock::ReplicaId; pub use clock::ReplicaId;
use clock::{AGENT_REPLICA_ID, Lamport}; use clock::{AGENT_REPLICA_ID, Lamport};
use collections::HashMap; use collections::HashMap;
use fs::MTime; use encoding::Encoding;
use fs::{Fs, MTime, RealFs};
use futures::channel::oneshot; use futures::channel::oneshot;
use gpui::{ use gpui::{
App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText, App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
@ -401,6 +402,10 @@ pub trait LocalFile: File {
/// Loads the file's contents from disk. /// Loads the file's contents from disk.
fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>>; fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>>;
/// Loads the file contents from disk, decoding them with the given encoding.
fn load_with_encoding(&self, cx: &App, encoding: &'static dyn Encoding)
-> Task<Result<String>>;
} }
/// The auto-indent behavior associated with an editing operation. /// The auto-indent behavior associated with an editing operation.
@ -1276,12 +1281,15 @@ impl Buffer {
/// Reloads the contents of the buffer from disk. /// Reloads the contents of the buffer from disk.
pub fn reload(&mut self, cx: &Context<Self>) -> oneshot::Receiver<Option<Transaction>> { pub fn reload(&mut self, cx: &Context<Self>) -> oneshot::Receiver<Option<Transaction>> {
let (tx, rx) = futures::channel::oneshot::channel(); let (tx, rx) = futures::channel::oneshot::channel();
let encoding = self.encoding.clone();
let prev_version = self.text.version(); let prev_version = self.text.version();
self.reload_task = Some(cx.spawn(async move |this, cx| { self.reload_task = Some(cx.spawn(async move |this, cx| {
let Some((new_mtime, new_text)) = this.update(cx, |this, cx| { let Some((new_mtime, new_text)) = this.update(cx, |this, cx| {
let file = this.file.as_ref()?.as_local()?; let file = this.file.as_ref()?.as_local()?;
Some((
Some((file.disk_state().mtime(), file.load(cx))) file.disk_state().mtime(),
file.load_with_encoding(cx, encoding),
))
})? })?
else { else {
return Ok(()); return Ok(());
@ -4967,6 +4975,14 @@ impl LocalFile for TestFile {
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> { fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
unimplemented!() unimplemented!()
} }
fn load_with_encoding(
&self,
cx: &App,
encoding: &'static dyn Encoding,
) -> Task<Result<String>> {
unimplemented!()
}
} }
pub(crate) fn contiguous_ranges( pub(crate) fn contiguous_ranges(

View file

@ -48,6 +48,8 @@ sum_tree.workspace = true
text.workspace = true text.workspace = true
util.workspace = true util.workspace = true
workspace-hack.workspace = true workspace-hack.workspace = true
encoding = "0.2.33"
[dev-dependencies] [dev-dependencies]
clock = { workspace = true, features = ["test-support"] } clock = { workspace = true, features = ["test-support"] }

View file

@ -7,7 +7,11 @@ use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
use anyhow::{Context as _, Result, anyhow}; use anyhow::{Context as _, Result, anyhow};
use clock::ReplicaId; use clock::ReplicaId;
use collections::{HashMap, HashSet, VecDeque}; use collections::{HashMap, HashSet, VecDeque};
use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items}; use encoding::Encoding;
use fs::{
Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, encodings::EncodingWrapper,
read_dir_items,
};
use futures::{ use futures::{
FutureExt as _, Stream, StreamExt, FutureExt as _, Stream, StreamExt,
channel::{ channel::{
@ -3361,6 +3365,19 @@ impl language::LocalFile for File {
let fs = worktree.fs.clone(); let fs = worktree.fs.clone();
cx.background_spawn(async move { fs.load_bytes(&abs_path?).await }) cx.background_spawn(async move { fs.load_bytes(&abs_path?).await })
} }
fn load_with_encoding(
&self,
cx: &App,
encoding: &'static dyn Encoding,
) -> Task<Result<String>> {
let worktree = self.worktree.read(cx).as_local().unwrap();
let path = worktree.absolutize(&self.path);
let fs = worktree.fs.clone();
let encoding = EncodingWrapper::new(encoding);
cx.background_spawn(async move { fs.load_with_encoding(path?, encoding).await })
}
} }
impl File { impl File {