melib: add more encodings
Add more encodings already supported by `encoding` crate: - iso-8859-3, - iso-8859-4, - iso-8859-5, - iso-8859-6, - iso-8859-8, - iso-8859-10, - iso-8859-13, - iso-8859-14, - iso-8859-16, - gb-2312 - big-5 - iso-2022-jp - euc-jp - koi8-r - koi8-u - utf-16jmap-eventsource
parent
afa74ccfb5
commit
9a9c876f4a
|
@ -31,8 +31,17 @@ pub enum Charset {
|
|||
UTF16,
|
||||
ISO8859_1,
|
||||
ISO8859_2,
|
||||
ISO8859_3,
|
||||
ISO8859_4,
|
||||
ISO8859_5,
|
||||
ISO8859_6,
|
||||
ISO8859_7,
|
||||
ISO8859_8,
|
||||
ISO8859_10,
|
||||
ISO8859_13,
|
||||
ISO8859_14,
|
||||
ISO8859_15,
|
||||
ISO8859_16,
|
||||
Windows1250,
|
||||
Windows1251,
|
||||
Windows1252,
|
||||
|
@ -41,6 +50,9 @@ pub enum Charset {
|
|||
GB2312,
|
||||
BIG5,
|
||||
ISO2022JP,
|
||||
EUCJP,
|
||||
KOI8R,
|
||||
KOI8U,
|
||||
}
|
||||
|
||||
impl Default for Charset {
|
||||
|
@ -67,14 +79,49 @@ impl<'a> From<&'a [u8]> for Charset {
|
|||
b if b.eq_ignore_ascii_case(b"iso-8859-2") || b.eq_ignore_ascii_case(b"iso8859-2") => {
|
||||
Charset::ISO8859_2
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-3") || b.eq_ignore_ascii_case(b"iso8859-3") => {
|
||||
Charset::ISO8859_3
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-4") || b.eq_ignore_ascii_case(b"iso8859-4") => {
|
||||
Charset::ISO8859_4
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-5") || b.eq_ignore_ascii_case(b"iso8859-5") => {
|
||||
Charset::ISO8859_5
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-6") || b.eq_ignore_ascii_case(b"iso8859-6") => {
|
||||
Charset::ISO8859_6
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-7") || b.eq_ignore_ascii_case(b"iso8859-7") => {
|
||||
Charset::ISO8859_7
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-8") || b.eq_ignore_ascii_case(b"iso8859-8") => {
|
||||
Charset::ISO8859_8
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-10")
|
||||
|| b.eq_ignore_ascii_case(b"iso8859-10") =>
|
||||
{
|
||||
Charset::ISO8859_10
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-13")
|
||||
|| b.eq_ignore_ascii_case(b"iso8859-13") =>
|
||||
{
|
||||
Charset::ISO8859_13
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-14")
|
||||
|| b.eq_ignore_ascii_case(b"iso8859-14") =>
|
||||
{
|
||||
Charset::ISO8859_14
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-15")
|
||||
|| b.eq_ignore_ascii_case(b"iso8859-15") =>
|
||||
{
|
||||
Charset::ISO8859_15
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"iso-8859-16")
|
||||
|| b.eq_ignore_ascii_case(b"iso8859-16") =>
|
||||
{
|
||||
Charset::ISO8859_16
|
||||
}
|
||||
b if b.eq_ignore_ascii_case(b"windows-1250")
|
||||
|| b.eq_ignore_ascii_case(b"windows1250") =>
|
||||
{
|
||||
|
@ -101,6 +148,9 @@ impl<'a> From<&'a [u8]> for Charset {
|
|||
}
|
||||
b if b.eq_ignore_ascii_case(b"big5") => Charset::BIG5,
|
||||
b if b.eq_ignore_ascii_case(b"iso-2022-jp") => Charset::ISO2022JP,
|
||||
b if b.eq_ignore_ascii_case(b"euc-jp") => Charset::EUCJP,
|
||||
b if b.eq_ignore_ascii_case(b"koi8-r") => Charset::KOI8R,
|
||||
b if b.eq_ignore_ascii_case(b"koi8-u") => Charset::KOI8U,
|
||||
_ => {
|
||||
debug!("unknown tag is {:?}", str::from_utf8(b));
|
||||
Charset::Ascii
|
||||
|
@ -117,16 +167,28 @@ impl Display for Charset {
|
|||
Charset::UTF16 => write!(f, "utf-16"),
|
||||
Charset::ISO8859_1 => write!(f, "iso-8859-1"),
|
||||
Charset::ISO8859_2 => write!(f, "iso-8859-2"),
|
||||
Charset::ISO8859_3 => write!(f, "iso-8859-3"),
|
||||
Charset::ISO8859_4 => write!(f, "iso-8859-4"),
|
||||
Charset::ISO8859_5 => write!(f, "iso-8859-5"),
|
||||
Charset::ISO8859_6 => write!(f, "iso-8859-6"),
|
||||
Charset::ISO8859_7 => write!(f, "iso-8859-7"),
|
||||
Charset::ISO8859_8 => write!(f, "iso-8859-8"),
|
||||
Charset::ISO8859_10 => write!(f, "iso-8859-10"),
|
||||
Charset::ISO8859_13 => write!(f, "iso-8859-13"),
|
||||
Charset::ISO8859_14 => write!(f, "iso-8859-14"),
|
||||
Charset::ISO8859_15 => write!(f, "iso-8859-15"),
|
||||
Charset::ISO8859_16 => write!(f, "iso-8859-16"),
|
||||
Charset::Windows1250 => write!(f, "windows-1250"),
|
||||
Charset::Windows1251 => write!(f, "windows-1251"),
|
||||
Charset::Windows1252 => write!(f, "windows-1252"),
|
||||
Charset::Windows1253 => write!(f, "windows-1253"),
|
||||
Charset::GBK => write!(f, "GBK"),
|
||||
Charset::GBK => write!(f, "gbk"),
|
||||
Charset::GB2312 => write!(f, "gb2312"),
|
||||
Charset::BIG5 => write!(f, "BIG5"),
|
||||
Charset::ISO2022JP => write!(f, "ISO-2022-JP"),
|
||||
Charset::BIG5 => write!(f, "big5"),
|
||||
Charset::ISO2022JP => write!(f, "iso-2022-jp"),
|
||||
Charset::EUCJP => write!(f, "euc-jp"),
|
||||
Charset::KOI8R => write!(f, "koi8-r"),
|
||||
Charset::KOI8U => write!(f, "koi8-u"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1877,18 +1877,33 @@ pub mod encodings {
|
|||
Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
|
||||
Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_3 => Ok(ISO_8859_3.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_4 => Ok(ISO_8859_4.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_5 => Ok(ISO_8859_5.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_6 => Ok(ISO_8859_6.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_8 => Ok(ISO_8859_8.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_10 => Ok(ISO_8859_10.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_13 => Ok(ISO_8859_13.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_14 => Ok(ISO_8859_14.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::ISO8859_16 => Ok(ISO_8859_16.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::Windows1250 => Ok(WINDOWS_1250.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
|
||||
// Unimplemented:
|
||||
Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
|
||||
Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
|
||||
Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
|
||||
Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
|
||||
Charset::KOI8R => Ok(KOI8_R.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::KOI8U => Ok(KOI8_U.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::BIG5 => Ok(BIG5_2003.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::GB2312 => {
|
||||
Ok(encoding::codec::simpchinese::GBK_ENCODING.decode(s, DecoderTrap::Strict)?)
|
||||
}
|
||||
Charset::UTF16 => {
|
||||
Ok(encoding::codec::utf_16::UTF_16LE_ENCODING.decode(s, DecoderTrap::Strict)?)
|
||||
}
|
||||
Charset::ISO2022JP => Ok(ISO_2022_JP.decode(s, DecoderTrap::Strict)?),
|
||||
Charset::EUCJP => Ok(EUC_JP.decode(s, DecoderTrap::Strict)?),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue