Add a new load_with_encoding
function to handle files with various encodings.
Modified `Buffer::reload` in `buffer.rs` to use this new function, allowing Zed to open files with any encoding in UTF-8 mode. Files with characters that are invalid in UTF-8 will have those bytes replaced with the � character. Add comments and documentation.
This commit is contained in:
parent
43357f689c
commit
0e1f9f689c
10 changed files with 112 additions and 21 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -3643,6 +3643,7 @@ dependencies = [
|
||||||
"dirs 4.0.0",
|
"dirs 4.0.0",
|
||||||
"edit_prediction",
|
"edit_prediction",
|
||||||
"editor",
|
"editor",
|
||||||
|
"encoding",
|
||||||
"fs",
|
"fs",
|
||||||
"futures 0.3.31",
|
"futures 0.3.31",
|
||||||
"gpui",
|
"gpui",
|
||||||
|
@ -20107,6 +20108,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clock",
|
"clock",
|
||||||
"collections",
|
"collections",
|
||||||
|
"encoding",
|
||||||
"fs",
|
"fs",
|
||||||
"futures 0.3.31",
|
"futures 0.3.31",
|
||||||
"fuzzy",
|
"fuzzy",
|
||||||
|
|
|
@ -53,6 +53,8 @@ util.workspace = true
|
||||||
workspace.workspace = true
|
workspace.workspace = true
|
||||||
workspace-hack.workspace = true
|
workspace-hack.workspace = true
|
||||||
itertools.workspace = true
|
itertools.workspace = true
|
||||||
|
encoding = "0.2.33"
|
||||||
|
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
async-std = { version = "1.12.0", features = ["unstable"] }
|
async-std = { version = "1.12.0", features = ["unstable"] }
|
||||||
|
|
|
@ -1193,6 +1193,7 @@ async fn get_copilot_lsp(fs: Arc<dyn Fs>, node_runtime: NodeRuntime) -> anyhow::
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use encoding::Encoding;
|
||||||
use gpui::TestAppContext;
|
use gpui::TestAppContext;
|
||||||
use util::path;
|
use util::path;
|
||||||
|
|
||||||
|
@ -1406,6 +1407,10 @@ mod tests {
|
||||||
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
|
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn load_with_encoding(&self, _: &App, _: &'static dyn Encoding) -> Task<Result<String>> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,10 +14,11 @@ use workspace::{ItemHandle, StatusItemView, Workspace};
|
||||||
|
|
||||||
use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding};
|
use crate::selectors::save_or_reopen::{EncodingSaveOrReopenSelector, get_current_encoding};
|
||||||
|
|
||||||
|
/// A status bar item that shows the current file encoding and allows changing it.
|
||||||
pub struct EncodingIndicator {
|
pub struct EncodingIndicator {
|
||||||
pub encoding: Option<&'static dyn Encoding>,
|
pub encoding: Option<&'static dyn Encoding>,
|
||||||
pub workspace: WeakEntity<Workspace>,
|
pub workspace: WeakEntity<Workspace>,
|
||||||
observe: Option<Subscription>,
|
observe: Option<Subscription>, // Subscription to observe changes in the active editor
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod selectors;
|
pub mod selectors;
|
||||||
|
@ -93,6 +94,7 @@ impl StatusItemView for EncodingIndicator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get a human-readable name for the given encoding.
|
||||||
pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
|
pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
|
||||||
let name = encoding.name();
|
let name = encoding.name();
|
||||||
|
|
||||||
|
@ -140,6 +142,8 @@ pub fn encoding_name(encoding: &'static dyn Encoding) -> String {
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get an encoding from its index in the predefined list.
|
||||||
|
/// If the index is out of range, UTF-8 is returned as a default.
|
||||||
pub fn encoding_from_index(index: usize) -> &'static dyn Encoding {
|
pub fn encoding_from_index(index: usize) -> &'static dyn Encoding {
|
||||||
match index {
|
match index {
|
||||||
0 => UTF_8,
|
0 => UTF_8,
|
||||||
|
|
|
@ -19,6 +19,8 @@ pub mod save_or_reopen {
|
||||||
|
|
||||||
use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate};
|
use crate::selectors::encoding::{Action, EncodingSelector, EncodingSelectorDelegate};
|
||||||
|
|
||||||
|
/// A modal view that allows the user to select between saving with a different encoding or
|
||||||
|
/// reopening with a different encoding.
|
||||||
pub struct EncodingSaveOrReopenSelector {
|
pub struct EncodingSaveOrReopenSelector {
|
||||||
picker: Entity<Picker<EncodingSaveOrReopenDelegate>>,
|
picker: Entity<Picker<EncodingSaveOrReopenDelegate>>,
|
||||||
pub current_selection: usize,
|
pub current_selection: usize,
|
||||||
|
@ -43,6 +45,8 @@ pub mod save_or_reopen {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Toggle the modal view for selecting between saving with a different encoding or
|
||||||
|
/// reopening with a different encoding.
|
||||||
pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context<Workspace>) {
|
pub fn toggle(workspace: &mut Workspace, window: &mut Window, cx: &mut Context<Workspace>) {
|
||||||
let weak_workspace = workspace.weak_handle();
|
let weak_workspace = workspace.weak_handle();
|
||||||
workspace.toggle_modal(window, cx, |window, cx| {
|
workspace.toggle_modal(window, cx, |window, cx| {
|
||||||
|
@ -100,6 +104,7 @@ pub mod save_or_reopen {
|
||||||
(&self.actions[0].string, &self.actions[1].string)
|
(&self.actions[0].string, &self.actions[1].string)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Handle the action selected by the user.
|
||||||
pub fn post_selection(
|
pub fn post_selection(
|
||||||
&self,
|
&self,
|
||||||
cx: &mut Context<Picker<EncodingSaveOrReopenDelegate>>,
|
cx: &mut Context<Picker<EncodingSaveOrReopenDelegate>>,
|
||||||
|
@ -281,6 +286,7 @@ pub mod encoding {
|
||||||
|
|
||||||
use crate::encoding_from_index;
|
use crate::encoding_from_index;
|
||||||
|
|
||||||
|
/// A modal view that allows the user to select an encoding from a list of encodings.
|
||||||
pub struct EncodingSelector {
|
pub struct EncodingSelector {
|
||||||
picker: Entity<Picker<EncodingSelectorDelegate>>,
|
picker: Entity<Picker<EncodingSelectorDelegate>>,
|
||||||
action: Action,
|
action: Action,
|
||||||
|
@ -459,6 +465,7 @@ pub mod encoding {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The action to perform after selecting an encoding.
|
||||||
pub enum Action {
|
pub enum Action {
|
||||||
Save,
|
Save,
|
||||||
Reopen,
|
Reopen,
|
||||||
|
|
|
@ -1,21 +1,40 @@
|
||||||
|
use anyhow::{Error, Result};
|
||||||
|
|
||||||
use encoding::Encoding;
|
use encoding::Encoding;
|
||||||
|
|
||||||
pub enum CharacterEncoding {
|
/// A wrapper around `encoding::Encoding` to implement `Send` and `Sync`.
|
||||||
Utf8,
|
/// Since the reference is static, it is safe to send it across threads.
|
||||||
Iso8859_1,
|
pub struct EncodingWrapper(&'static dyn Encoding);
|
||||||
Cp865,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_utf8<'a>(input: Vec<u8>, encoding: &'a impl encoding::Encoding) -> String {
|
unsafe impl Send for EncodingWrapper {}
|
||||||
match encoding.decode(&input, encoding::DecoderTrap::Strict) {
|
unsafe impl Sync for EncodingWrapper {}
|
||||||
Ok(v) => return v,
|
|
||||||
Err(_) => panic!(),
|
impl EncodingWrapper {
|
||||||
|
pub fn new(encoding: &'static dyn Encoding) -> EncodingWrapper {
|
||||||
|
EncodingWrapper(encoding)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
|
||||||
|
match self.0.decode(&input, encoding::DecoderTrap::Replace) {
|
||||||
|
Ok(v) => Ok(v),
|
||||||
|
Err(e) => Err(Error::msg(e.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
|
||||||
|
match self.0.encode(&input, encoding::EncoderTrap::Replace) {
|
||||||
|
Ok(v) => Ok(v),
|
||||||
|
Err(e) => Err(Error::msg(e.to_string())),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to<'a>(input: String, target: &'a impl encoding::Encoding) -> Vec<u8> {
|
/// Convert a byte vector from a specified encoding to a UTF-8 string.
|
||||||
match target.encode(&input, encoding::EncoderTrap::Strict) {
|
pub async fn to_utf8<'a>(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
|
||||||
Ok(v) => v,
|
Ok(encoding.decode(input).await?)
|
||||||
Err(_) => panic!(),
|
}
|
||||||
}
|
|
||||||
|
/// Convert a UTF-8 string to a byte vector in a specified encoding.
|
||||||
|
pub async fn from_utf8<'a>(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
|
||||||
|
Ok(target.encode(input).await?)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#[cfg(target_os = "macos")]
|
#[cfg(target_os = "macos")]
|
||||||
mod mac_watcher;
|
mod mac_watcher;
|
||||||
|
|
||||||
|
pub mod encodings;
|
||||||
#[cfg(not(target_os = "macos"))]
|
#[cfg(not(target_os = "macos"))]
|
||||||
pub mod fs_watcher;
|
pub mod fs_watcher;
|
||||||
|
|
||||||
|
@ -54,6 +55,8 @@ use smol::io::AsyncReadExt;
|
||||||
#[cfg(any(test, feature = "test-support"))]
|
#[cfg(any(test, feature = "test-support"))]
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
|
|
||||||
|
use crate::encodings::EncodingWrapper;
|
||||||
|
|
||||||
pub trait Watcher: Send + Sync {
|
pub trait Watcher: Send + Sync {
|
||||||
fn add(&self, path: &Path) -> Result<()>;
|
fn add(&self, path: &Path) -> Result<()>;
|
||||||
fn remove(&self, path: &Path) -> Result<()>;
|
fn remove(&self, path: &Path) -> Result<()>;
|
||||||
|
@ -108,6 +111,16 @@ pub trait Fs: Send + Sync {
|
||||||
async fn load(&self, path: &Path) -> Result<String> {
|
async fn load(&self, path: &Path) -> Result<String> {
|
||||||
Ok(String::from_utf8(self.load_bytes(path).await?)?)
|
Ok(String::from_utf8(self.load_bytes(path).await?)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Load a file with the specified encoding, returning a UTF-8 string.
|
||||||
|
async fn load_with_encoding(
|
||||||
|
&self,
|
||||||
|
path: PathBuf,
|
||||||
|
encoding: EncodingWrapper,
|
||||||
|
) -> anyhow::Result<String> {
|
||||||
|
Ok(encodings::to_utf8(self.load_bytes(path.as_path()).await?, encoding).await?)
|
||||||
|
}
|
||||||
|
|
||||||
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>>;
|
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>>;
|
||||||
async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>;
|
async fn atomic_write(&self, path: PathBuf, text: String) -> Result<()>;
|
||||||
async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>;
|
async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()>;
|
||||||
|
@ -539,8 +552,12 @@ impl Fs for RealFs {
|
||||||
|
|
||||||
async fn load(&self, path: &Path) -> Result<String> {
|
async fn load(&self, path: &Path) -> Result<String> {
|
||||||
let path = path.to_path_buf();
|
let path = path.to_path_buf();
|
||||||
let text = smol::unblock(|| std::fs::read_to_string(path)).await?;
|
let encoding = EncodingWrapper::new(encoding::all::UTF_8);
|
||||||
Ok(text)
|
let text =
|
||||||
|
smol::unblock(async || Ok(encodings::to_utf8(std::fs::read(path)?, encoding).await?))
|
||||||
|
.await
|
||||||
|
.await;
|
||||||
|
text
|
||||||
}
|
}
|
||||||
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>> {
|
async fn load_bytes(&self, path: &Path) -> Result<Vec<u8>> {
|
||||||
let path = path.to_path_buf();
|
let path = path.to_path_buf();
|
||||||
|
|
|
@ -21,7 +21,8 @@ use anyhow::{Context as _, Result};
|
||||||
pub use clock::ReplicaId;
|
pub use clock::ReplicaId;
|
||||||
use clock::{AGENT_REPLICA_ID, Lamport};
|
use clock::{AGENT_REPLICA_ID, Lamport};
|
||||||
use collections::HashMap;
|
use collections::HashMap;
|
||||||
use fs::MTime;
|
use encoding::Encoding;
|
||||||
|
use fs::{Fs, MTime, RealFs};
|
||||||
use futures::channel::oneshot;
|
use futures::channel::oneshot;
|
||||||
use gpui::{
|
use gpui::{
|
||||||
App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
|
App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
|
||||||
|
@ -401,6 +402,10 @@ pub trait LocalFile: File {
|
||||||
|
|
||||||
/// Loads the file's contents from disk.
|
/// Loads the file's contents from disk.
|
||||||
fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>>;
|
fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>>;
|
||||||
|
|
||||||
|
/// Loads the file contents from disk, decoding them with the given encoding.
|
||||||
|
fn load_with_encoding(&self, cx: &App, encoding: &'static dyn Encoding)
|
||||||
|
-> Task<Result<String>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The auto-indent behavior associated with an editing operation.
|
/// The auto-indent behavior associated with an editing operation.
|
||||||
|
@ -1276,12 +1281,15 @@ impl Buffer {
|
||||||
/// Reloads the contents of the buffer from disk.
|
/// Reloads the contents of the buffer from disk.
|
||||||
pub fn reload(&mut self, cx: &Context<Self>) -> oneshot::Receiver<Option<Transaction>> {
|
pub fn reload(&mut self, cx: &Context<Self>) -> oneshot::Receiver<Option<Transaction>> {
|
||||||
let (tx, rx) = futures::channel::oneshot::channel();
|
let (tx, rx) = futures::channel::oneshot::channel();
|
||||||
|
let encoding = self.encoding.clone();
|
||||||
let prev_version = self.text.version();
|
let prev_version = self.text.version();
|
||||||
self.reload_task = Some(cx.spawn(async move |this, cx| {
|
self.reload_task = Some(cx.spawn(async move |this, cx| {
|
||||||
let Some((new_mtime, new_text)) = this.update(cx, |this, cx| {
|
let Some((new_mtime, new_text)) = this.update(cx, |this, cx| {
|
||||||
let file = this.file.as_ref()?.as_local()?;
|
let file = this.file.as_ref()?.as_local()?;
|
||||||
|
Some((
|
||||||
Some((file.disk_state().mtime(), file.load(cx)))
|
file.disk_state().mtime(),
|
||||||
|
file.load_with_encoding(cx, encoding),
|
||||||
|
))
|
||||||
})?
|
})?
|
||||||
else {
|
else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -4967,6 +4975,14 @@ impl LocalFile for TestFile {
|
||||||
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
|
fn load_bytes(&self, _cx: &App) -> Task<Result<Vec<u8>>> {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn load_with_encoding(
|
||||||
|
&self,
|
||||||
|
cx: &App,
|
||||||
|
encoding: &'static dyn Encoding,
|
||||||
|
) -> Task<Result<String>> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn contiguous_ranges(
|
pub(crate) fn contiguous_ranges(
|
||||||
|
|
|
@ -48,6 +48,8 @@ sum_tree.workspace = true
|
||||||
text.workspace = true
|
text.workspace = true
|
||||||
util.workspace = true
|
util.workspace = true
|
||||||
workspace-hack.workspace = true
|
workspace-hack.workspace = true
|
||||||
|
encoding = "0.2.33"
|
||||||
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
clock = { workspace = true, features = ["test-support"] }
|
clock = { workspace = true, features = ["test-support"] }
|
||||||
|
|
|
@ -7,7 +7,11 @@ use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
|
||||||
use anyhow::{Context as _, Result, anyhow};
|
use anyhow::{Context as _, Result, anyhow};
|
||||||
use clock::ReplicaId;
|
use clock::ReplicaId;
|
||||||
use collections::{HashMap, HashSet, VecDeque};
|
use collections::{HashMap, HashSet, VecDeque};
|
||||||
use fs::{Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, read_dir_items};
|
use encoding::Encoding;
|
||||||
|
use fs::{
|
||||||
|
Fs, MTime, PathEvent, RemoveOptions, Watcher, copy_recursive, encodings::EncodingWrapper,
|
||||||
|
read_dir_items,
|
||||||
|
};
|
||||||
use futures::{
|
use futures::{
|
||||||
FutureExt as _, Stream, StreamExt,
|
FutureExt as _, Stream, StreamExt,
|
||||||
channel::{
|
channel::{
|
||||||
|
@ -3361,6 +3365,19 @@ impl language::LocalFile for File {
|
||||||
let fs = worktree.fs.clone();
|
let fs = worktree.fs.clone();
|
||||||
cx.background_spawn(async move { fs.load_bytes(&abs_path?).await })
|
cx.background_spawn(async move { fs.load_bytes(&abs_path?).await })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn load_with_encoding(
|
||||||
|
&self,
|
||||||
|
cx: &App,
|
||||||
|
encoding: &'static dyn Encoding,
|
||||||
|
) -> Task<Result<String>> {
|
||||||
|
let worktree = self.worktree.read(cx).as_local().unwrap();
|
||||||
|
let path = worktree.absolutize(&self.path);
|
||||||
|
let fs = worktree.fs.clone();
|
||||||
|
|
||||||
|
let encoding = EncodingWrapper::new(encoding);
|
||||||
|
cx.background_spawn(async move { fs.load_with_encoding(path?, encoding).await })
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl File {
|
impl File {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue