add 'GB18030' charset

pull/144/head
Alex.F 2021-03-19 22:40:26 +08:00 committed by Manos Pitsidianakis
parent 13c5798c7b
commit 69916f267b
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
2 changed files with 14 additions and 0 deletions

View File

@ -48,6 +48,7 @@ pub enum Charset {
Windows1253,
GBK,
GB2312,
GB18030,
BIG5,
ISO2022JP,
EUCJP,
@ -143,6 +144,9 @@ impl<'a> From<&'a [u8]> for Charset {
Charset::Windows1253
}
b if b.eq_ignore_ascii_case(b"gbk") => Charset::GBK,
b if b.eq_ignore_ascii_case(b"gb18030") || b.eq_ignore_ascii_case(b"gb-18030") => {
Charset::GB18030
}
b if b.eq_ignore_ascii_case(b"gb2312") || b.eq_ignore_ascii_case(b"gb-2312") => {
Charset::GB2312
}
@ -184,6 +188,7 @@ impl Display for Charset {
Charset::Windows1253 => write!(f, "windows-1253"),
Charset::GBK => write!(f, "gbk"),
Charset::GB2312 => write!(f, "gb2312"),
Charset::GB18030 => write!(f, "gb18030"),
Charset::BIG5 => write!(f, "big5"),
Charset::ISO2022JP => write!(f, "iso-2022-jp"),
Charset::EUCJP => write!(f, "euc-jp"),

View File

@ -1899,6 +1899,9 @@ pub mod encodings {
Charset::GB2312 => {
Ok(encoding::codec::simpchinese::GBK_ENCODING.decode(s, DecoderTrap::Strict)?)
}
Charset::GB18030 => Ok(
encoding::codec::simpchinese::GB18030_ENCODING.decode(s, DecoderTrap::Strict)?
),
Charset::UTF16 => {
Ok(encoding::codec::utf_16::UTF_16LE_ENCODING.decode(s, DecoderTrap::Strict)?)
}
@ -2552,6 +2555,12 @@ mod tests {
"Re: Climate crisis reality check \u{a0}EcoHustler",
std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
);
let words = r#"=?gb18030?B?zNrRtsbz0rXTys/k19S2r9eqt6LR6dak08q8/g==?="#;
assert_eq!(
"腾讯企业邮箱自动转发验证邮件",
std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
);
}
#[test]