Add Charset type and Charset based decoding

embed
Manos Pitsidianakis 2018-08-08 10:41:25 +03:00
parent f16fd889e4
commit a8fed3a042
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
10 changed files with 244 additions and 129 deletions

View File

@ -51,6 +51,9 @@ impl Folder {
children: children,
}
}
pub fn hash(&self) -> u64 {
self.hash
}
pub fn path(&self) -> &str {
&self.path
}

View File

@ -27,6 +27,7 @@ use std::error::Error;
use std::fmt;
use std::io;
use std::result;
use std::borrow::Cow;
use nom;
@ -74,6 +75,13 @@ impl From<nom::IError> for MeliError {
}
}
impl<'a> From<Cow<'a, str>> for MeliError {
#[inline]
fn from(kind: Cow<'_, str>) -> MeliError {
MeliError::new(format!("{:?}", kind))
}
}
//use std::option;
//impl From<option::NoneError> for MeliError {
// #[inline]

View File

@ -172,6 +172,7 @@ impl MailBackend for MaildirType {
if MaildirType::is_valid(&f).is_err() {
continue;
}
eprintln!("watching {}", f.path());
let mut p = PathBuf::from(&f.path());
p.push("cur");
watcher.watch(&p, RecursiveMode::NonRecursive).unwrap();
@ -182,8 +183,17 @@ impl MailBackend for MaildirType {
loop {
match rx.recv() {
Ok(event) => match event {
DebouncedEvent::Create(pathbuf) => {
let path = pathbuf.parent().unwrap().to_str().unwrap();
DebouncedEvent::Create(mut pathbuf) | DebouncedEvent::Remove(mut pathbuf) => {
let path = if pathbuf.is_dir() {
if pathbuf.ends_with("cur") | pathbuf.ends_with("new") {
pathbuf.pop();
}
pathbuf.to_str().unwrap()
} else {
pathbuf.pop();
pathbuf.parent().unwrap().to_str().unwrap()
};
eprintln!(" got event in {}", path);
let mut hasher = DefaultHasher::new();
hasher.write(path.as_bytes());

View File

@ -0,0 +1,107 @@
use std::fmt::{Display, Formatter, Result as FmtResult};
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Charset {
Ascii,
UTF8,
UTF16,
ISO8859_1,
ISO8859_2,
ISO8859_7,
Windows1252,
Windows1253,
GBK,
GB2312,
}
impl Default for Charset {
fn default() -> Self {
Charset::UTF8
}
}
impl<'a> From<&'a[u8]> for Charset {
fn from(b: &'a [u8]) -> Self {
// TODO: Case insensitivity
match b {
b"us-ascii" | b"ascii" | b"US-ASCII" => Charset::Ascii,
b"utf-8" | b"UTF-8" => Charset::UTF8,
b"utf-16" | b"UTF-16" => Charset::UTF16,
b"iso-8859-1" | b"ISO-8859-1" => Charset::ISO8859_1,
b"iso-8859-2" | b"ISO-8859-2" => Charset::ISO8859_2,
b"iso-8859-7" | b"ISO-8859-7" => Charset::ISO8859_7,
b"windows-1252" | b"Windows-1252" => Charset::Windows1252,
b"windows-1253" | b"Windows-1253" => Charset::Windows1253,
b"GBK" | b"gbk" => Charset::GBK,
b"gb2312" | b"GB2312" => Charset::GB2312,
_ => Charset::Ascii,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum MultipartType {
Mixed,
Alternative,
Digest,
Unsupported { tag: Vec<u8> },
}
impl Display for MultipartType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match self {
MultipartType::Mixed => write!(f, "multipart/mixed"),
MultipartType::Alternative => write!(f, "multipart/alternative"),
MultipartType::Digest => write!(f, "multipart/digest"),
MultipartType::Unsupported { tag: ref t } => {
write!(f, "multipart/{}", String::from_utf8_lossy(t))
}
}
}
}
#[derive(Clone, Debug)]
pub enum ContentType {
Text { charset: Charset },
Multipart { boundary: Vec<u8> },
Unsupported { tag: Vec<u8> },
}
impl Default for ContentType {
fn default() -> Self {
ContentType::Text{ charset: Charset::UTF8 }
}
}
impl Display for ContentType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match *self {
ContentType::Text { .. } => write!(f, "text"),
ContentType::Multipart { .. } => write!(f, "multipart"),
ContentType::Unsupported { tag: ref t } => write!(f, "{}", String::from_utf8_lossy(t)),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum ContentSubType {
Plain,
Other { tag: Vec<u8> },
}
impl Display for ContentSubType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match *self {
ContentSubType::Plain => write!(f, "plain"),
ContentSubType::Other { tag: ref t } => write!(f, "{}", String::from_utf8_lossy(t)),
}
}
}
#[derive(Clone, Debug)]
pub enum ContentTransferEncoding {
_8Bit,
_7Bit,
Base64,
QuotedPrintable,
Other { tag: Vec<u8> },
}

View File

@ -18,41 +18,12 @@
* You should have received a copy of the GNU General Public License
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
use mailbox::email::parser;
use mailbox::email::parser::BytesExt;
use std::fmt::{Display, Formatter, Result as FmtResult};
use std::str;
use data_encoding::BASE64_MIME;
use mailbox::email::parser;
/*
*
* Data
* Text { content: Vec<u8> }
* Multipart
*/
#[derive(Clone, Debug, PartialEq)]
pub enum MultipartType {
Mixed,
Alternative,
Digest,
Unsupported { tag: Vec<u8> },
}
impl Display for MultipartType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match self {
MultipartType::Mixed => write!(f, "multipart/mixed"),
MultipartType::Alternative => write!(f, "multipart/alternative"),
MultipartType::Digest => write!(f, "multipart/digest"),
MultipartType::Unsupported { tag: ref t } => {
write!(f, "multipart/{}", String::from_utf8_lossy(t))
}
}
}
}
pub use mailbox::email::attachment_types::*;
#[derive(Clone, Debug)]
pub enum AttachmentType {
@ -68,6 +39,30 @@ pub enum AttachmentType {
},
}
/*
*
* Data
* Text { content: Vec<u8> }
* Multipart
*/
// TODO: Add example.
//
pub struct AttachmentBuilder {
content_type: (ContentType, ContentSubType),
content_transfer_encoding: ContentTransferEncoding,
raw: Vec<u8>,
}
#[derive(Clone, Debug)]
pub struct Attachment {
content_type: (ContentType, ContentSubType),
content_transfer_encoding: ContentTransferEncoding,
raw: Vec<u8>,
attachment_type: AttachmentType,
}
impl Display for AttachmentType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match self {
@ -77,57 +72,11 @@ impl Display for AttachmentType {
}
}
}
#[derive(Clone, Debug)]
pub enum ContentType {
Text,
Multipart { boundary: Vec<u8> },
Unsupported { tag: Vec<u8> },
}
impl Display for ContentType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match *self {
ContentType::Text => write!(f, "text"),
ContentType::Multipart { .. } => write!(f, "multipart"),
ContentType::Unsupported { tag: ref t } => write!(f, "{}", String::from_utf8_lossy(t)),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum ContentSubType {
Plain,
Other { tag: Vec<u8> },
}
impl Display for ContentSubType {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
match *self {
ContentSubType::Plain => write!(f, "plain"),
ContentSubType::Other { tag: ref t } => write!(f, "{}", String::from_utf8_lossy(t)),
}
}
}
#[derive(Clone, Debug)]
pub enum ContentTransferEncoding {
_8Bit,
_7Bit,
Base64,
QuotedPrintable,
Other { tag: Vec<u8> },
}
/// TODO: Add example.
///
pub struct AttachmentBuilder {
content_type: (ContentType, ContentSubType),
content_transfer_encoding: ContentTransferEncoding,
raw: Vec<u8>,
}
impl AttachmentBuilder {
pub fn new(content: &[u8]) -> Self {
AttachmentBuilder {
content_type: (ContentType::Text, ContentSubType::Plain),
content_type: (Default::default() , ContentSubType::Plain),
content_transfer_encoding: ContentTransferEncoding::_7Bit,
raw: content.to_vec(),
}
@ -152,7 +101,13 @@ impl AttachmentBuilder {
};
self.content_type.1 = ContentSubType::Other { tag: cst.into() };
} else if ct.eq_ignore_ascii_case(b"text") {
self.content_type.0 = ContentType::Text;
self.content_type.0 = Default::default();
for (n, v) in params {
if n.eq_ignore_ascii_case(b"charset") {
self.content_type.0 = ContentType::Text { charset: Charset::from(v) };
break;
}
}
if !cst.eq_ignore_ascii_case(b"plain") {
self.content_type.1 = ContentSubType::Other {
tag: cst.to_ascii_lowercase(),
@ -189,42 +144,29 @@ impl AttachmentBuilder {
self
}
fn decode(&self) -> Vec<u8> {
// TODO: Use charset for decoding
let charset = match self.content_type.0 {
ContentType::Text{ charset: c } => c,
_ => Default::default(),
};
let decoded_result = parser::decode_charset(&self.raw, charset);
let b: &[u8] = decoded_result.as_ref().map(|v| v.as_bytes()).unwrap_or_else(|_| &self.raw);
match self.content_transfer_encoding {
ContentTransferEncoding::Base64 => match BASE64_MIME.decode(
str::from_utf8(&self.raw)
.unwrap()
.trim()
.lines()
.fold(String::with_capacity(self.raw.len()), |mut acc, x| {
acc.push_str(x);
acc
})
.as_bytes(),
) {
Ok(ref s) => {
let s: Vec<u8> = s.clone();
{
let slice = &s[..];
if slice.find(b"\r\n").is_some() {
s.replace(b"\r\n", b"\n");
}
}
s
}
_ => self.raw.clone(),
ContentTransferEncoding::Base64 => match BASE64_MIME.decode(b) {
Ok(v) => v,
_ => b.to_vec(),
},
ContentTransferEncoding::QuotedPrintable => parser::quoted_printable_text(&self.raw)
ContentTransferEncoding::QuotedPrintable => parser::quoted_printable_text(b)
.to_full_result()
.unwrap(),
ContentTransferEncoding::_7Bit
| ContentTransferEncoding::_8Bit
| ContentTransferEncoding::Other { .. } => self.raw.clone(),
ContentTransferEncoding::_7Bit
| ContentTransferEncoding::_8Bit
| ContentTransferEncoding::Other { .. } => b.to_vec(),
}
}
pub fn build(self) -> Attachment {
let attachment_type = match self.content_type.0 {
ContentType::Text => AttachmentType::Text {
ContentType::Text { .. } => AttachmentType::Text {
content: self.decode(),
},
ContentType::Multipart { boundary: ref b } => {
@ -295,15 +237,6 @@ impl AttachmentBuilder {
}
}
#[derive(Clone, Debug)]
pub struct Attachment {
content_type: (ContentType, ContentSubType),
content_transfer_encoding: ContentTransferEncoding,
raw: Vec<u8>,
attachment_type: AttachmentType,
}
impl Display for Attachment {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
@ -409,17 +342,23 @@ pub fn interpret_format_flowed(_t: &str) -> String {
}
pub fn decode(a: &Attachment) -> Vec<u8> {
// TODO: Use charset for decoding
let charset = match a.content_type.0 {
ContentType::Text{ charset: c } => c,
_ => Default::default(),
};
let decoded_result = parser::decode_charset(a.bytes(), charset);
let b: &[u8] = decoded_result.as_ref().map(|v| v.as_bytes()).unwrap_or_else(|_| a.bytes());
match a.content_transfer_encoding {
ContentTransferEncoding::Base64 => match BASE64_MIME.decode(a.bytes()) {
ContentTransferEncoding::Base64 => match BASE64_MIME.decode(b) {
Ok(v) => v,
_ => a.bytes().to_vec(),
_ => b.to_vec(),
},
ContentTransferEncoding::QuotedPrintable => parser::quoted_printed_bytes(&a.bytes())
ContentTransferEncoding::QuotedPrintable => parser::quoted_printed_bytes(b)
.to_full_result()
.unwrap(),
ContentTransferEncoding::_7Bit
| ContentTransferEncoding::_8Bit
| ContentTransferEncoding::Other { .. } => a.bytes().to_vec(),
| ContentTransferEncoding::Other { .. } => b.to_vec(),
}
}

View File

@ -22,13 +22,14 @@
/*!
* Email parsing, handling, sending etc.
*/
mod attachment_types;
pub mod attachments;
pub mod parser;
pub use self::attachments::*;
pub mod parser;
use parser::BytesExt;
use error::{MeliError, Result};
use mailbox::backends::BackendOpGenerator;
use parser::BytesExt;
use std::borrow::Cow;
use std::cmp::Ordering;

View File

@ -166,7 +166,7 @@ named!(pub attachment<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
/* TODO: make a map of encodings and decoding functions so that they can be reused and easily
* extended */
use encoding::all::{ISO_8859_1, ISO_8859_2, ISO_8859_7, WINDOWS_1253, GBK};
use encoding::all::{ISO_8859_1, ISO_8859_2, ISO_8859_7, WINDOWS_1253, WINDOWS_1252, GBK};
fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
if input.len() < 5 {
@ -264,8 +264,41 @@ fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
IResult::Error(error_code!(ErrorKind::Custom(43)))
}
pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
match charset {
Charset::UTF8 | Charset::Ascii => {
Ok(String::from_utf8(s.to_vec()).unwrap())
}
Charset::ISO8859_7 => {
Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?)
}
Charset::ISO8859_1 => {
Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?)
}
Charset::ISO8859_2 => {
Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?)
}
Charset::GBK => {
Ok(GBK.decode(s, DecoderTrap::Strict)?)
}
Charset::Windows1252 => {
Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?)
},
Charset::Windows1253 => {
Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?)
},
Charset::GB2312 => {
unimplemented!()
},
Charset::UTF16 => {
unimplemented!()
},
}
}
named!(qp_underscore_header<u8>, do_parse!(tag!("_") >> ({ b' ' })));
/// For atoms in Header values.
named!(
pub quoted_printed_bytes<Vec<u8>>,
many0!(alt_complete!(

View File

@ -184,6 +184,7 @@ fn main() {
},
ThreadEvent::RefreshMailbox { hash : h } => {
eprintln!("got refresh mailbox hash {:x}", h);
state.hash_to_folder(h);
//state.rcv_event(UIEvent { id: 0, event_type: UIEventType::Notification(n.clone())});
state.redraw();
/* Don't handle this yet. */

View File

@ -292,7 +292,7 @@ impl Component for MailView {
let envelope: &Envelope = &mailbox.collection[envelope_idx];
if let Some(u) = envelope.body().attachments().get(lidx) {
match u.content_type().0 {
ContentType::Text => {
ContentType::Text { .. } => {
self.mode = ViewMode::Attachment(lidx);
self.dirty = true;
}

View File

@ -41,6 +41,7 @@ use termion::{clear, cursor, style};
/// A context container for loaded settings, accounts, UI changes, etc.
pub struct Context {
pub accounts: Vec<Account>,
mailbox_hashes: FnvHashMap<u64, (usize, usize)>,
pub settings: Settings,
pub runtime_settings: Settings,
@ -151,6 +152,8 @@ impl State<std::io::Stdout> {
context: Context {
accounts,
mailbox_hashes: FnvHashMap::with_capacity_and_hasher(1, Default::default()),
_backends: backends,
settings: settings.clone(),
runtime_settings: settings,
@ -172,14 +175,24 @@ impl State<std::io::Stdout> {
cursor::Goto(1, 1)
).unwrap();
s.flush();
for account in &mut s.context.accounts {
for (x, account) in s.context.accounts.iter_mut().enumerate() {
for (y, folder) in account.settings.folders.iter().enumerate() {
s.context.mailbox_hashes.insert(folder.hash(), (x, y));
}
let sender = s.sender.clone();
account.watch(RefreshEventConsumer::new(Box::new(move |r| {
sender.send(ThreadEvent::from(r));
})));
}
for (k, v) in &s.context.mailbox_hashes {
eprintln!("{:x} -> {:?}", k, v);
}
s
}
pub fn hash_to_folder(&self, hash: u64) {
eprintln!("got refresh {:?}", self.context.mailbox_hashes[&hash]);
}
/// If an owned thread returns a `ThreadEvent::ThreadJoin` event to `State` then it must remove
/// the thread from its list and `join` it.