melib: add more encodings

Add more encodings already supported by `encoding` crate:

  - iso-8859-3,
  - iso-8859-4,
  - iso-8859-5,
  - iso-8859-6,
  - iso-8859-8,
  - iso-8859-10,
  - iso-8859-13,
  - iso-8859-14,
  - iso-8859-16,
  - gb-2312
  - big-5
  - iso-2022-jp
  - euc-jp
  - koi8-r
  - koi8-u
  - utf-16
jmap-eventsource
Manos Pitsidianakis 2020-10-26 21:23:54 +02:00
parent afa74ccfb5
commit 9a9c876f4a
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
2 changed files with 85 additions and 8 deletions

View File

@ -31,8 +31,17 @@ pub enum Charset {
UTF16,
ISO8859_1,
ISO8859_2,
ISO8859_3,
ISO8859_4,
ISO8859_5,
ISO8859_6,
ISO8859_7,
ISO8859_8,
ISO8859_10,
ISO8859_13,
ISO8859_14,
ISO8859_15,
ISO8859_16,
Windows1250,
Windows1251,
Windows1252,
@ -41,6 +50,9 @@ pub enum Charset {
GB2312,
BIG5,
ISO2022JP,
EUCJP,
KOI8R,
KOI8U,
}
impl Default for Charset {
@ -67,14 +79,49 @@ impl<'a> From<&'a [u8]> for Charset {
b if b.eq_ignore_ascii_case(b"iso-8859-2") || b.eq_ignore_ascii_case(b"iso8859-2") => {
Charset::ISO8859_2
}
b if b.eq_ignore_ascii_case(b"iso-8859-3") || b.eq_ignore_ascii_case(b"iso8859-3") => {
Charset::ISO8859_3
}
b if b.eq_ignore_ascii_case(b"iso-8859-4") || b.eq_ignore_ascii_case(b"iso8859-4") => {
Charset::ISO8859_4
}
b if b.eq_ignore_ascii_case(b"iso-8859-5") || b.eq_ignore_ascii_case(b"iso8859-5") => {
Charset::ISO8859_5
}
b if b.eq_ignore_ascii_case(b"iso-8859-6") || b.eq_ignore_ascii_case(b"iso8859-6") => {
Charset::ISO8859_6
}
b if b.eq_ignore_ascii_case(b"iso-8859-7") || b.eq_ignore_ascii_case(b"iso8859-7") => {
Charset::ISO8859_7
}
b if b.eq_ignore_ascii_case(b"iso-8859-8") || b.eq_ignore_ascii_case(b"iso8859-8") => {
Charset::ISO8859_8
}
b if b.eq_ignore_ascii_case(b"iso-8859-10")
|| b.eq_ignore_ascii_case(b"iso8859-10") =>
{
Charset::ISO8859_10
}
b if b.eq_ignore_ascii_case(b"iso-8859-13")
|| b.eq_ignore_ascii_case(b"iso8859-13") =>
{
Charset::ISO8859_13
}
b if b.eq_ignore_ascii_case(b"iso-8859-14")
|| b.eq_ignore_ascii_case(b"iso8859-14") =>
{
Charset::ISO8859_14
}
b if b.eq_ignore_ascii_case(b"iso-8859-15")
|| b.eq_ignore_ascii_case(b"iso8859-15") =>
{
Charset::ISO8859_15
}
b if b.eq_ignore_ascii_case(b"iso-8859-16")
|| b.eq_ignore_ascii_case(b"iso8859-16") =>
{
Charset::ISO8859_16
}
b if b.eq_ignore_ascii_case(b"windows-1250")
|| b.eq_ignore_ascii_case(b"windows1250") =>
{
@ -101,6 +148,9 @@ impl<'a> From<&'a [u8]> for Charset {
}
b if b.eq_ignore_ascii_case(b"big5") => Charset::BIG5,
b if b.eq_ignore_ascii_case(b"iso-2022-jp") => Charset::ISO2022JP,
b if b.eq_ignore_ascii_case(b"euc-jp") => Charset::EUCJP,
b if b.eq_ignore_ascii_case(b"koi8-r") => Charset::KOI8R,
b if b.eq_ignore_ascii_case(b"koi8-u") => Charset::KOI8U,
_ => {
debug!("unknown tag is {:?}", str::from_utf8(b));
Charset::Ascii
@ -117,16 +167,28 @@ impl Display for Charset {
Charset::UTF16 => write!(f, "utf-16"),
Charset::ISO8859_1 => write!(f, "iso-8859-1"),
Charset::ISO8859_2 => write!(f, "iso-8859-2"),
Charset::ISO8859_3 => write!(f, "iso-8859-3"),
Charset::ISO8859_4 => write!(f, "iso-8859-4"),
Charset::ISO8859_5 => write!(f, "iso-8859-5"),
Charset::ISO8859_6 => write!(f, "iso-8859-6"),
Charset::ISO8859_7 => write!(f, "iso-8859-7"),
Charset::ISO8859_8 => write!(f, "iso-8859-8"),
Charset::ISO8859_10 => write!(f, "iso-8859-10"),
Charset::ISO8859_13 => write!(f, "iso-8859-13"),
Charset::ISO8859_14 => write!(f, "iso-8859-14"),
Charset::ISO8859_15 => write!(f, "iso-8859-15"),
Charset::ISO8859_16 => write!(f, "iso-8859-16"),
Charset::Windows1250 => write!(f, "windows-1250"),
Charset::Windows1251 => write!(f, "windows-1251"),
Charset::Windows1252 => write!(f, "windows-1252"),
Charset::Windows1253 => write!(f, "windows-1253"),
Charset::GBK => write!(f, "GBK"),
Charset::GBK => write!(f, "gbk"),
Charset::GB2312 => write!(f, "gb2312"),
Charset::BIG5 => write!(f, "BIG5"),
Charset::ISO2022JP => write!(f, "ISO-2022-JP"),
Charset::BIG5 => write!(f, "big5"),
Charset::ISO2022JP => write!(f, "iso-2022-jp"),
Charset::EUCJP => write!(f, "euc-jp"),
Charset::KOI8R => write!(f, "koi8-r"),
Charset::KOI8U => write!(f, "koi8-u"),
}
}
}

View File

@ -1877,18 +1877,33 @@ pub mod encodings {
Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_3 => Ok(ISO_8859_3.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_4 => Ok(ISO_8859_4.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_5 => Ok(ISO_8859_5.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_6 => Ok(ISO_8859_6.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_8 => Ok(ISO_8859_8.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_10 => Ok(ISO_8859_10.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_13 => Ok(ISO_8859_13.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_14 => Ok(ISO_8859_14.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_16 => Ok(ISO_8859_16.decode(s, DecoderTrap::Strict)?),
Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
Charset::Windows1250 => Ok(WINDOWS_1250.decode(s, DecoderTrap::Strict)?),
Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
// Unimplemented:
Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
Charset::KOI8R => Ok(KOI8_R.decode(s, DecoderTrap::Strict)?),
Charset::KOI8U => Ok(KOI8_U.decode(s, DecoderTrap::Strict)?),
Charset::BIG5 => Ok(BIG5_2003.decode(s, DecoderTrap::Strict)?),
Charset::GB2312 => {
Ok(encoding::codec::simpchinese::GBK_ENCODING.decode(s, DecoderTrap::Strict)?)
}
Charset::UTF16 => {
Ok(encoding::codec::utf_16::UTF_16LE_ENCODING.decode(s, DecoderTrap::Strict)?)
}
Charset::ISO2022JP => Ok(ISO_2022_JP.decode(s, DecoderTrap::Strict)?),
Charset::EUCJP => Ok(EUC_JP.decode(s, DecoderTrap::Strict)?),
}
}