Add a new load_with_encoding
function to handle files with various encodings.
Modified `Buffer::reload` in `buffer.rs` to use this new function, allowing Zed to open files with any encoding in UTF-8 mode. Files with characters that are invalid in UTF-8 will have those bytes replaced with the � character. Add comments and documentation.
This commit is contained in:
parent
43357f689c
commit
0e1f9f689c
10 changed files with 112 additions and 21 deletions
|
@ -1,21 +1,40 @@
|
|||
use anyhow::{Error, Result};
|
||||
|
||||
use encoding::Encoding;
|
||||
|
||||
pub enum CharacterEncoding {
|
||||
Utf8,
|
||||
Iso8859_1,
|
||||
Cp865,
|
||||
}
|
||||
/// A wrapper around `encoding::Encoding` to implement `Send` and `Sync`.
|
||||
/// Since the reference is static, it is safe to send it across threads.
|
||||
pub struct EncodingWrapper(&'static dyn Encoding);
|
||||
|
||||
pub fn to_utf8<'a>(input: Vec<u8>, encoding: &'a impl encoding::Encoding) -> String {
|
||||
match encoding.decode(&input, encoding::DecoderTrap::Strict) {
|
||||
Ok(v) => return v,
|
||||
Err(_) => panic!(),
|
||||
unsafe impl Send for EncodingWrapper {}
|
||||
unsafe impl Sync for EncodingWrapper {}
|
||||
|
||||
impl EncodingWrapper {
|
||||
pub fn new(encoding: &'static dyn Encoding) -> EncodingWrapper {
|
||||
EncodingWrapper(encoding)
|
||||
}
|
||||
|
||||
pub async fn decode(&self, input: Vec<u8>) -> Result<String> {
|
||||
match self.0.decode(&input, encoding::DecoderTrap::Replace) {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(Error::msg(e.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn encode(&self, input: String) -> Result<Vec<u8>> {
|
||||
match self.0.encode(&input, encoding::EncoderTrap::Replace) {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(Error::msg(e.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to<'a>(input: String, target: &'a impl encoding::Encoding) -> Vec<u8> {
|
||||
match target.encode(&input, encoding::EncoderTrap::Strict) {
|
||||
Ok(v) => v,
|
||||
Err(_) => panic!(),
|
||||
}
|
||||
/// Convert a byte vector from a specified encoding to a UTF-8 string.
|
||||
pub async fn to_utf8<'a>(input: Vec<u8>, encoding: EncodingWrapper) -> Result<String> {
|
||||
Ok(encoding.decode(input).await?)
|
||||
}
|
||||
|
||||
/// Convert a UTF-8 string to a byte vector in a specified encoding.
|
||||
pub async fn from_utf8<'a>(input: String, target: EncodingWrapper) -> Result<Vec<u8>> {
|
||||
Ok(target.encode(input).await?)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue