Add a new load_with_encoding function to handle files with various encodings.

Modified `Buffer::reload` in `buffer.rs` to use this new function, allowing Zed
to open files with any encoding in UTF-8 mode. Files with characters that
are invalid in UTF-8 will have those bytes replaced with the � character.

Add comments and documentation.
This commit is contained in:
R Aadarsh 2025-08-25 12:39:12 +05:30
parent 43357f689c
commit 0e1f9f689c
10 changed files with 112 additions and 21 deletions

2
Cargo.lock generated
View file

@ -3643,6 +3643,7 @@ dependencies = [
"dirs 4.0.0",
"edit_prediction",
"editor",
"encoding",
"fs",
"futures 0.3.31",
"gpui",
@ -20107,6 +20108,7 @@ dependencies = [
"anyhow",
"clock",
"collections",
"encoding",
"fs",
"futures 0.3.31",
"fuzzy",

View file

@ -53,6 +53,8 @@ util.workspace = true
workspace.workspace = true
workspace-hack.workspace = true
itertools.workspace = true
encoding = "0.2.33"
[target.'cfg(windows)'.dependencies]
async-std = { version = "1.12.0", features = ["unstable"] }

View file

@ -1193,6 +1193,7 @@ async fn get_copilot_lsp(fs: Arc<dyn Fs>, node_runtime: NodeRuntime) -> anyhow::
#[cfg(test)]
mod tests {
use super::*;
use encoding::Encoding;
use gpui::TestAppContext;
use util::path;
@ -1406,6 +1407,10 @@ mod tests {
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
unimplemented!()
}
fn load_with_encoding(&self, _: &App, _: &'static dyn Encoding) -> Task<Result<String>> {
unimplemented!()
}
}
}

View file

@ -14,10 +14,11 @@ use workspace::{ItemHandle, StatusItemView, Workspace};
use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding};
/// A status bar item that shows the current file encoding and allows changing it.
pub struct EncodingIndicator {
pub encoding: Option<&'static dyn Encoding>,
pub workspace: WeakEntity<Workspace>,
observe: Option<Subscription>,
observe: Option<Subscription>, // Subscription to observe changes in the active editor
}
pub mod selectors;
@ -93,6 +94,7 @@ impl StatusItemView for EncodingIndicator {
}
}
/// Get a human-readable name for the given encoding.
pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
let name = encoding.name();
@ -140,6 +142,8 @@ pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
.to_string()
}
/// Get an encoding from its index in the predefined list.
/// If the index is out of range, UTF-8 is returned as a default.
pub fn encoding_from_index(index: usize) -> &'static dyn Encoding {
match index {
0 => UTF_8,

View file

@ -19,6 +19,8 @@ pub mod save_or_reopen {
use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate};
/// A modal view that allows the user to select between saving with a different encoding or
/// reopening with a different encoding.
pub struct EncodingSaveOrReopenSelector {
picker: Entity<Picker<EncodingSaveOrReopenDelegate>>,
pub current_selection: usize,
@ -43,6 +45,8 @@ pub mod save_or_reopen {
}
}
/// Toggle the modal view for selecting between saving with a different encoding or
/// reopening with a different encoding.
pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context<Workspace>) {
let weak_workspace = workspace.weak_handle();
workspace.toggle_modal(window, cx, |window, cx| {
@ -100,6 +104,7 @@ pub mod save_or_reopen {
(&self.actions[0].string, &self.actions[1].string)
}
/// Handle the action selected by the user.
pub fn post_selection(
&self,
cx: &mut Context<Picker<EncodingSaveOrReopenDelegate>>,
@ -281,6 +286,7 @@ pub mod encoding {
use crate::encoding_from_index;
/// A modal view that allows the user to select an encoding from a list of encodings.
pub struct EncodingSelector {
picker: Entity<Picker<EncodingSelectorDelegate>>,
action: Action,
@ -459,6 +465,7 @@ pub mod encoding {
}
}
/// The action to perform after selecting an encoding.
pub enum Action {
Save,
Reopen,

View file

@ -1,21 +1,40 @@
use anyhow::{Error, Result};
use encoding::Encoding;
pub enum CharacterEncoding {
Utf8,
Iso8859_1,
Cp865,
/// A wrapper around `encoding::Encoding` to implement `Send` and `Sync`.
/// Since the reference is static, it is safe to send it across threads.
pub struct EncodingWrapper(&'static dyn Encoding);
unsafe impl Send for EncodingWrapper {}
unsafe impl Sync for EncodingWrapper {}
impl EncodingWrapper {
pub fn new(encoding: &'static dyn Encoding) -> EncodingWrapper {
EncodingWrapper(encoding)
}
pub fn to_utf8<'a>(input: Vec<u8>, encoding: &'a impl encoding::Encoding) -> String {
match encoding.decode(&input, encoding::DecoderTrap::Strict) {
Ok(v) => return v,
Err(_) => panic!(),
pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
match self.0.decode(&input, encoding::DecoderTrap::Replace) {
Ok(v) => Ok(v),
Err(e) => Err(Error::msg(e.to_string())),
}
}
pub fn to<'a>(input: String, target: &'a impl encoding::Encoding) -> Vec<u8> {
match target.encode(&input, encoding::EncoderTrap::Strict) {
Ok(v) => v,
Err(_) => panic!(),
pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
match self.0.encode(&input, encoding::EncoderTrap::Replace) {
Ok(v) => Ok(v),
Err(e) => Err(Error::msg(e.to_string())),
}
}
}
/// Convert a byte vector from a specified encoding to a UTF-8 string.
pub async fn to_utf8<'a>(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
Ok(encoding.decode(input).await?)
}
/// Convert a UTF-8 string to a byte vector in a specified encoding.
pub async fn from_utf8<'a>(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
Ok(target.encode(input).await?)
}

View file

@ -1,6 +1,7 @@
#[cfg(target_os = "macos")]
mod mac_watcher;
pub mod encodings;
#[cfg(not(target_os = "macos"))]
pub mod fs_watcher;
@ -54,6 +55,8 @@ use smol::io::AsyncReadExt;
#[cfg(any(test, feature = "test-support"))]
use std::ffi::OsStr;
use crate::encodings::EncodingWrapper;
pub trait Watcher: Send + Sync {
fn add(&self, path: &Path) -> Result<()>;
fn remove(&self, path: &Path) -> Result<()>;
@ -108,6 +111,16 @@ pub trait Fs: Send + Sync {
async fn load(&self, path: &Path) -> Result<String> {
Ok(String::from_utf8(self.load_bytes(path).await?)?)
}
/// Load a file with the specified encoding, returning a UTF-8 string.
async fn load_with_encoding(
&self,
path: PathBuf,
encoding: EncodingWrapper,
) -> anyhow::Result<String> {
Ok(encodings::to_utf8(self.load_bytes(path.as_path()).await?, encoding).await?)
}
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>>;
async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>;
async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>;
@ -539,8 +552,12 @@ impl Fs for RealFs {
async fn load(&self, path: &Path) -> Result<String> {
let path = path.to_path_buf();
let text = smol::unblock(|| std::fs::read_to_string(path)).await?;
Ok(text)
let encoding = EncodingWrapper::new(encoding::all::UTF_8);
let text =
smol::unblock(async || Ok(encodings::to_utf8(std::fs::read(path)?, encoding).await?))
.await
.await;
text
}
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>> {
let path = path.to_path_buf();

View file

@ -21,7 +21,8 @@ use anyhow::{Context as _, Result};
pub use clock::ReplicaId;
use clock::{AGENT_REPLICA_ID, Lamport};
use collections::HashMap;
use fs::MTime;
use encoding::Encoding;
use fs::{Fs, MTime, RealFs};
use futures::channel::oneshot;
use gpui::{
App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
@ -401,6 +402,10 @@ pub trait LocalFile: File {
/// Loads the file's contents from disk.
fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>>;
/// Loads the file contents from disk, decoding them with the given encoding.
fn load_with_encoding(&self, cx: &App, encoding: &'static dyn Encoding)
-> Task<Result<String>>;
}
/// The auto-indent behavior associated with an editing operation.
@ -1276,12 +1281,15 @@ impl Buffer {
/// Reloads the contents of the buffer from disk.
pub fn reload(&mut self, cx: &Context<Self>) -> oneshot::Receiver<Option<Transaction>> {
let (tx, rx) = futures::channel::oneshot::channel();
let encoding = self.encoding.clone();
let prev_version = self.text.version();
self.reload_task = Some(cx.spawn(async move |this, cx| {
let Some((new_mtime, new_text)) = this.update(cx, |this, cx| {
let file = this.file.as_ref()?.as_local()?;
Some((file.disk_state().mtime(), file.load(cx)))
Some((
file.disk_state().mtime(),
file.load_with_encoding(cx, encoding),
))
})?
else {
return Ok(());
@ -4967,6 +4975,14 @@ impl LocalFile for TestFile {
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
unimplemented!()
}
fn load_with_encoding(
&self,
cx: &App,
encoding: &'static dyn Encoding,
) -> Task<Result<String>> {
unimplemented!()
}
}
pub(crate) fn contiguous_ranges(

View file

@ -48,6 +48,8 @@ sum_tree.workspace = true
text.workspace = true
util.workspace = true
workspace-hack.workspace = true
encoding = "0.2.33"
[dev-dependencies]
clock = { workspace = true, features = ["test-support"] }

View file

@ -7,7 +7,11 @@ use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
use anyhow::{Context as _, Result, anyhow};
use clock::ReplicaId;
use collections::{HashMap, HashSet, VecDeque};
use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items};
use encoding::Encoding;
use fs::{
Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, encodings::EncodingWrapper,
read_dir_items,
};
use futures::{
FutureExt as _, Stream, StreamExt,
channel::{
@ -3361,6 +3365,19 @@ impl language::LocalFile for File {
let fs = worktree.fs.clone();
cx.background_spawn(async move { fs.load_bytes(&abs_path?).await })
}
fn load_with_encoding(
&self,
cx: &App,
encoding: &'static dyn Encoding,
) -> Task<Result<String>> {
let worktree = self.worktree.read(cx).as_local().unwrap();
let path = worktree.absolutize(&self.path);
let fs = worktree.fs.clone();
let encoding = EncodingWrapper::new(encoding);
cx.background_spawn(async move { fs.load_with_encoding(path?, encoding).await })
}
}
impl File {