Add a new load_with_encoding
function to handle files with various encodings.
Modified `Buffer::reload` in `buffer.rs` to use this new function, allowing Zed to open files with any encoding in UTF-8 mode. Files with characters that are invalid in UTF-8 will have those bytes replaced with the � character. Add comments and documentation.
This commit is contained in:
parent
43357f689c
commit
0e1f9f689c
10 changed files with 112 additions and 21 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -3643,6 +3643,7 @@ dependencies = [
|
|||
"dirs 4.0.0",
|
||||
"edit_prediction",
|
||||
"editor",
|
||||
"encoding",
|
||||
"fs",
|
||||
"futures 0.3.31",
|
||||
"gpui",
|
||||
|
@ -20107,6 +20108,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"clock",
|
||||
"collections",
|
||||
"encoding",
|
||||
"fs",
|
||||
"futures 0.3.31",
|
||||
"fuzzy",
|
||||
|
|
|
@ -53,6 +53,8 @@ util.workspace = true
|
|||
workspace.workspace = true
|
||||
workspace-hack.workspace = true
|
||||
itertools.workspace = true
|
||||
encoding = "0.2.33"
|
||||
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
async-std = { version = "1.12.0", features = ["unstable"] }
|
||||
|
|
|
@ -1193,6 +1193,7 @@ async fn get_copilot_lsp(fs: Arc<dyn Fs>, node_runtime: NodeRuntime) -> anyhow::
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use encoding::Encoding;
|
||||
use gpui::TestAppContext;
|
||||
use util::path;
|
||||
|
||||
|
@ -1406,6 +1407,10 @@ mod tests {
|
|||
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn load_with_encoding(&self, _: &App, _: &'static dyn Encoding) -> Task<Result<String>> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,10 +14,11 @@ use workspace::{ItemHandle, StatusItemView, Workspace};
|
|||
|
||||
use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding};
|
||||
|
||||
/// A status bar item that shows the current file encoding and allows changing it.
|
||||
pub struct EncodingIndicator {
|
||||
pub encoding: Option<&'static dyn Encoding>,
|
||||
pub workspace: WeakEntity<Workspace>,
|
||||
observe: Option<Subscription>,
|
||||
observe: Option<Subscription>, // Subscription to observe changes in the active editor
|
||||
}
|
||||
|
||||
pub mod selectors;
|
||||
|
@ -93,6 +94,7 @@ impl StatusItemView for EncodingIndicator {
|
|||
}
|
||||
}
|
||||
|
||||
/// Get a human-readable name for the given encoding.
|
||||
pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
|
||||
let name = encoding.name();
|
||||
|
||||
|
@ -140,6 +142,8 @@ pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
|
|||
.to_string()
|
||||
}
|
||||
|
||||
/// Get an encoding from its index in the predefined list.
|
||||
/// If the index is out of range, UTF-8 is returned as a default.
|
||||
pub fn encoding_from_index(index: usize) -> &'static dyn Encoding {
|
||||
match index {
|
||||
0 => UTF_8,
|
||||
|
|
|
@ -19,6 +19,8 @@ pub mod save_or_reopen {
|
|||
|
||||
use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate};
|
||||
|
||||
/// A modal view that allows the user to select between saving with a different encoding or
|
||||
/// reopening with a different encoding.
|
||||
pub struct EncodingSaveOrReopenSelector {
|
||||
picker: Entity<Picker<EncodingSaveOrReopenDelegate>>,
|
||||
pub current_selection: usize,
|
||||
|
@ -43,6 +45,8 @@ pub mod save_or_reopen {
|
|||
}
|
||||
}
|
||||
|
||||
/// Toggle the modal view for selecting between saving with a different encoding or
|
||||
/// reopening with a different encoding.
|
||||
pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context<Workspace>) {
|
||||
let weak_workspace = workspace.weak_handle();
|
||||
workspace.toggle_modal(window, cx, |window, cx| {
|
||||
|
@ -100,6 +104,7 @@ pub mod save_or_reopen {
|
|||
(&self.actions[0].string, &self.actions[1].string)
|
||||
}
|
||||
|
||||
/// Handle the action selected by the user.
|
||||
pub fn post_selection(
|
||||
&self,
|
||||
cx: &mut Context<Picker<EncodingSaveOrReopenDelegate>>,
|
||||
|
@ -281,6 +286,7 @@ pub mod encoding {
|
|||
|
||||
use crate::encoding_from_index;
|
||||
|
||||
/// A modal view that allows the user to select an encoding from a list of encodings.
|
||||
pub struct EncodingSelector {
|
||||
picker: Entity<Picker<EncodingSelectorDelegate>>,
|
||||
action: Action,
|
||||
|
@ -459,6 +465,7 @@ pub mod encoding {
|
|||
}
|
||||
}
|
||||
|
||||
/// The action to perform after selecting an encoding.
|
||||
pub enum Action {
|
||||
Save,
|
||||
Reopen,
|
||||
|
|
|
@ -1,21 +1,40 @@
|
|||
use anyhow::{Error, Result};
|
||||
|
||||
use encoding::Encoding;
|
||||
|
||||
pub enum CharacterEncoding {
|
||||
Utf8,
|
||||
Iso8859_1,
|
||||
Cp865,
|
||||
}
|
||||
/// A wrapper around `encoding::Encoding` to implement `Send` and `Sync`.
|
||||
/// Since the reference is static, it is safe to send it across threads.
|
||||
pub struct EncodingWrapper(&'static dyn Encoding);
|
||||
|
||||
pub fn to_utf8<'a>(input: Vec<u8>, encoding: &'a impl encoding::Encoding) -> String {
|
||||
match encoding.decode(&input, encoding::DecoderTrap::Strict) {
|
||||
Ok(v) => return v,
|
||||
Err(_) => panic!(),
|
||||
unsafe impl Send for EncodingWrapper {}
|
||||
unsafe impl Sync for EncodingWrapper {}
|
||||
|
||||
impl EncodingWrapper {
|
||||
pub fn new(encoding: &'static dyn Encoding) -> EncodingWrapper {
|
||||
EncodingWrapper(encoding)
|
||||
}
|
||||
|
||||
pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
|
||||
match self.0.decode(&input, encoding::DecoderTrap::Replace) {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(Error::msg(e.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
|
||||
match self.0.encode(&input, encoding::EncoderTrap::Replace) {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(Error::msg(e.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to<'a>(input: String, target: &'a impl encoding::Encoding) -> Vec<u8> {
|
||||
match target.encode(&input, encoding::EncoderTrap::Strict) {
|
||||
Ok(v) => v,
|
||||
Err(_) => panic!(),
|
||||
}
|
||||
/// Convert a byte vector from a specified encoding to a UTF-8 string.
|
||||
pub async fn to_utf8<'a>(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
|
||||
Ok(encoding.decode(input).await?)
|
||||
}
|
||||
|
||||
/// Convert a UTF-8 string to a byte vector in a specified encoding.
|
||||
pub async fn from_utf8<'a>(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
|
||||
Ok(target.encode(input).await?)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#[cfg(target_os = "macos")]
|
||||
mod mac_watcher;
|
||||
|
||||
pub mod encodings;
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub mod fs_watcher;
|
||||
|
||||
|
@ -54,6 +55,8 @@ use smol::io::AsyncReadExt;
|
|||
#[cfg(any(test, feature = "test-support"))]
|
||||
use std::ffi::OsStr;
|
||||
|
||||
use crate::encodings::EncodingWrapper;
|
||||
|
||||
pub trait Watcher: Send + Sync {
|
||||
fn add(&self, path: &Path) -> Result<()>;
|
||||
fn remove(&self, path: &Path) -> Result<()>;
|
||||
|
@ -108,6 +111,16 @@ pub trait Fs: Send + Sync {
|
|||
async fn load(&self, path: &Path) -> Result<String> {
|
||||
Ok(String::from_utf8(self.load_bytes(path).await?)?)
|
||||
}
|
||||
|
||||
/// Load a file with the specified encoding, returning a UTF-8 string.
|
||||
async fn load_with_encoding(
|
||||
&self,
|
||||
path: PathBuf,
|
||||
encoding: EncodingWrapper,
|
||||
) -> anyhow::Result<String> {
|
||||
Ok(encodings::to_utf8(self.load_bytes(path.as_path()).await?, encoding).await?)
|
||||
}
|
||||
|
||||
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>>;
|
||||
async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>;
|
||||
async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>;
|
||||
|
@ -539,8 +552,12 @@ impl Fs for RealFs {
|
|||
|
||||
async fn load(&self, path: &Path) -> Result<String> {
|
||||
let path = path.to_path_buf();
|
||||
let text = smol::unblock(|| std::fs::read_to_string(path)).await?;
|
||||
Ok(text)
|
||||
let encoding = EncodingWrapper::new(encoding::all::UTF_8);
|
||||
let text =
|
||||
smol::unblock(async || Ok(encodings::to_utf8(std::fs::read(path)?, encoding).await?))
|
||||
.await
|
||||
.await;
|
||||
text
|
||||
}
|
||||
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>> {
|
||||
let path = path.to_path_buf();
|
||||
|
|
|
@ -21,7 +21,8 @@ use anyhow::{Context as _, Result};
|
|||
pub use clock::ReplicaId;
|
||||
use clock::{AGENT_REPLICA_ID, Lamport};
|
||||
use collections::HashMap;
|
||||
use fs::MTime;
|
||||
use encoding::Encoding;
|
||||
use fs::{Fs, MTime, RealFs};
|
||||
use futures::channel::oneshot;
|
||||
use gpui::{
|
||||
App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
|
||||
|
@ -401,6 +402,10 @@ pub trait LocalFile: File {
|
|||
|
||||
/// Loads the file's contents from disk.
|
||||
fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>>;
|
||||
|
||||
/// Loads the file contents from disk, decoding them with the given encoding.
|
||||
fn load_with_encoding(&self, cx: &App, encoding: &'static dyn Encoding)
|
||||
-> Task<Result<String>>;
|
||||
}
|
||||
|
||||
/// The auto-indent behavior associated with an editing operation.
|
||||
|
@ -1276,12 +1281,15 @@ impl Buffer {
|
|||
/// Reloads the contents of the buffer from disk.
|
||||
pub fn reload(&mut self, cx: &Context<Self>) -> oneshot::Receiver<Option<Transaction>> {
|
||||
let (tx, rx) = futures::channel::oneshot::channel();
|
||||
let encoding = self.encoding.clone();
|
||||
let prev_version = self.text.version();
|
||||
self.reload_task = Some(cx.spawn(async move |this, cx| {
|
||||
let Some((new_mtime, new_text)) = this.update(cx, |this, cx| {
|
||||
let file = this.file.as_ref()?.as_local()?;
|
||||
|
||||
Some((file.disk_state().mtime(), file.load(cx)))
|
||||
Some((
|
||||
file.disk_state().mtime(),
|
||||
file.load_with_encoding(cx, encoding),
|
||||
))
|
||||
})?
|
||||
else {
|
||||
return Ok(());
|
||||
|
@ -4967,6 +4975,14 @@ impl LocalFile for TestFile {
|
|||
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn load_with_encoding(
|
||||
&self,
|
||||
cx: &App,
|
||||
encoding: &'static dyn Encoding,
|
||||
) -> Task<Result<String>> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn contiguous_ranges(
|
||||
|
|
|
@ -48,6 +48,8 @@ sum_tree.workspace = true
|
|||
text.workspace = true
|
||||
util.workspace = true
|
||||
workspace-hack.workspace = true
|
||||
encoding = "0.2.33"
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
clock = { workspace = true, features = ["test-support"] }
|
||||
|
|
|
@ -7,7 +7,11 @@ use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
|
|||
use anyhow::{Context as _, Result, anyhow};
|
||||
use clock::ReplicaId;
|
||||
use collections::{HashMap, HashSet, VecDeque};
|
||||
use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items};
|
||||
use encoding::Encoding;
|
||||
use fs::{
|
||||
Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, encodings::EncodingWrapper,
|
||||
read_dir_items,
|
||||
};
|
||||
use futures::{
|
||||
FutureExt as _, Stream, StreamExt,
|
||||
channel::{
|
||||
|
@ -3361,6 +3365,19 @@ impl language::LocalFile for File {
|
|||
let fs = worktree.fs.clone();
|
||||
cx.background_spawn(async move { fs.load_bytes(&abs_path?).await })
|
||||
}
|
||||
|
||||
fn load_with_encoding(
|
||||
&self,
|
||||
cx: &App,
|
||||
encoding: &'static dyn Encoding,
|
||||
) -> Task<Result<String>> {
|
||||
let worktree = self.worktree.read(cx).as_local().unwrap();
|
||||
let path = worktree.absolutize(&self.path);
|
||||
let fs = worktree.fs.clone();
|
||||
|
||||
let encoding = EncodingWrapper::new(encoding);
|
||||
cx.background_spawn(async move { fs.load_with_encoding(path?, encoding).await })
|
||||
}
|
||||
}
|
||||
|
||||
impl File {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue