From 64a2af377756f5525eb0b31fe1d4e872761943d7 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Wed, 16 Sep 2020 18:09:24 +0300 Subject: [PATCH] melib/email: smarter attachment detection Look for Content-Disposition: attachment to detect attachments --- melib/src/backends/imap/protocol_parser.rs | 148 +++++++++++++++------ melib/src/email/attachments.rs | 62 +++++++-- melib/src/email/parser.rs | 24 +++- 3 files changed, 177 insertions(+), 57 deletions(-) diff --git a/melib/src/backends/imap/protocol_parser.rs b/melib/src/backends/imap/protocol_parser.rs index fb9ff9a43..f467b96de 100644 --- a/melib/src/backends/imap/protocol_parser.rs +++ b/melib/src/backends/imap/protocol_parser.rs @@ -21,7 +21,10 @@ use super::*; use crate::email::address::{Address, MailboxAddress}; -use crate::email::parser::{BytesExt, IResult}; +use crate::email::parser::{ + generic::{byte_in_range, byte_in_slice}, + BytesExt, IResult, +}; use crate::error::ResultIntoMeliError; use crate::get_path_hash; use nom::{ @@ -30,7 +33,7 @@ use nom::{ character::complete::digit1, character::is_digit, combinator::{map, map_res, opt}, - multi::{fold_many1, length_data, many0, separated_nonempty_list}, + multi::{fold_many1, length_data, many0, many1, separated_nonempty_list}, sequence::{delimited, preceded}, }; use std::convert::TryFrom; @@ -602,38 +605,10 @@ pub fn fetch_response(input: &[u8]) -> ImapParseResult> { } } else if input[i..].starts_with(b"BODYSTRUCTURE ") { i += b"BODYSTRUCTURE ".len(); - let mut struct_ptr = i; - let mut parenth_level = 0; - let mut inside_quote = false; - while struct_ptr != input.len() { - if !inside_quote { - if input[struct_ptr] == b'(' { - parenth_level += 1; - } else if input[struct_ptr] == b')' { - if parenth_level == 0 { - return debug!(Err(MeliError::new(format!( - "Unexpected input while parsing UID FETCH response. Got: `{:.40}`", - String::from_utf8_lossy(&input[struct_ptr..]) - )))); - } - parenth_level -= 1; - if parenth_level == 0 { - struct_ptr += 1; - break; - } - } else if input[struct_ptr] == b'"' { - inside_quote = true; - } - } else if input[struct_ptr] == b'\"' - && (struct_ptr == 0 || (input[struct_ptr - 1] != b'\\')) - { - inside_quote = false; - } - struct_ptr += 1; - } - has_attachments = bodystructure_has_attachments(&input[i..struct_ptr]); - i = struct_ptr; + let (rest, _has_attachments) = bodystructure_has_attachments(&input[i..])?; + has_attachments = _has_attachments; + i += input[i..].len() - rest.len(); } else if input[i..].starts_with(b")\r\n") { i += b")\r\n".len(); break; @@ -1427,11 +1402,10 @@ pub fn uid_fetch_envelopes_response( let (input, _) = tag(" ENVELOPE ")(input)?; let (input, env) = envelope(input.ltrim())?; let (input, _) = tag("BODYSTRUCTURE ")(input)?; - let (input, bodystructure) = take_until(")\r\n")(input)?; + let (input, has_attachments) = bodystructure_has_attachments(input)?; let (input, _) = tag(")\r\n")(input)?; Ok((input, { let mut env = env; - let has_attachments = bodystructure_has_attachments(bodystructure); env.set_has_attachments(has_attachments); (uid_flags.0, uid_flags.1, env) })) @@ -1439,8 +1413,43 @@ pub fn uid_fetch_envelopes_response( )(input) } -pub fn bodystructure_has_attachments(input: &[u8]) -> bool { - input.rfind(b" \"mixed\" ").is_some() || input.rfind(b" \"MIXED\" ").is_some() +pub fn bodystructure_has_attachments(input: &[u8]) -> IResult<&[u8], bool> { + let (input, _) = eat_whitespace(input)?; + let (input, _) = tag("(")(input)?; + let (mut input, _) = eat_whitespace(input)?; + let mut has_attachments = false; + let mut first_in_line = true; + while !input.is_empty() && !input.starts_with(b")") { + if input.starts_with(b"\"") || input[0].is_ascii_alphanumeric() { + let (_input, token) = astring_token(input)?; + input = _input; + if first_in_line { + has_attachments |= token.eq_ignore_ascii_case(b"attachment"); + } + } else if input.starts_with(b"(") { + let (_input, _has_attachments) = bodystructure_has_attachments(input)?; + has_attachments |= _has_attachments; + input = _input; + } + let (_input, _) = eat_whitespace(input)?; + input = _input; + first_in_line = false; + } + let (input, _) = tag(")")(input)?; + Ok((input, has_attachments)) +} + +fn eat_whitespace(mut input: &[u8]) -> IResult<&[u8], ()> { + while !input.is_empty() { + if input[0] == b' ' || input[0] == b'\n' || input[0] == b'\t' { + input = &input[1..]; + } else if input.starts_with(b"\r\n") { + input = &input[2..]; + } else { + break; + } + } + return Ok((input, ())); } #[derive(Debug, Default, Clone)] @@ -1525,7 +1534,7 @@ pub fn mailbox_token<'i>(input: &'i [u8]) -> IResult<&'i [u8], std::borrow::Cow< // astring = 1*ASTRING-CHAR / string fn astring_token(input: &[u8]) -> IResult<&[u8], &[u8]> { - alt((string_token, astring_char_tokens))(input) + alt((string_token, astring_char))(input) } // string = quoted / literal @@ -1554,9 +1563,66 @@ fn string_token(input: &[u8]) -> IResult<&[u8], &[u8]> { // atom = 1*ATOM-CHAR // ATOM-CHAR = // atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / quoted-specials / resp-specials -fn astring_char_tokens(input: &[u8]) -> IResult<&[u8], &[u8]> { - // FIXME - is_not(" \r\n")(input) +fn astring_char(input: &[u8]) -> IResult<&[u8], &[u8]> { + let (rest, chars) = many1(atom_char)(input)?; + Ok((rest, &input[0..chars.len()])) +} + +fn atom_char(mut input: &[u8]) -> IResult<&[u8], u8> { + if input.is_empty() { + return Err(nom::Err::Error( + (input, "astring_char_tokens(): EOF").into(), + )); + } + if atom_specials(input).is_ok() { + return Err(nom::Err::Error( + (input, "astring_char_tokens(): invalid input").into(), + )); + } + let ret = input[0]; + input = &input[1..]; + Ok((input, ret)) +} + +#[inline(always)] +fn atom_specials(input: &[u8]) -> IResult<&[u8], u8> { + alt(( + raw_chars, + ctl, + list_wildcards, + quoted_specials, + resp_specials, + ))(input) +} + +#[inline(always)] +fn raw_chars(input: &[u8]) -> IResult<&[u8], u8> { + byte_in_slice(&[b'(', b')', b'{', b' '])(input) +} + +#[inline(always)] +fn list_wildcards(input: &[u8]) -> IResult<&[u8], u8> { + byte_in_slice(&[b'%', b'*'])(input) +} + +#[inline(always)] +fn quoted_specials(input: &[u8]) -> IResult<&[u8], u8> { + byte_in_slice(&[b'"', b'\\'])(input) +} + +#[inline(always)] +fn resp_specials(input: &[u8]) -> IResult<&[u8], u8> { + byte_in_slice(&[b']'])(input) +} + +#[inline(always)] +fn ctl(input: &[u8]) -> IResult<&[u8], u8> { + //U+0000—U+001F (C0 controls), U+007F (delete), and U+0080—U+009F (C1 controls + alt(( + byte_in_range(0, 0x1f), + byte_in_range(0x7f, 0x7f), + byte_in_range(0x80, 0x9f), + ))(input) } pub fn generate_envelope_hash(mailbox_path: &str, uid: &UID) -> EnvelopeHash { diff --git a/melib/src/email/attachments.rs b/melib/src/email/attachments.rs index f286ccbe9..a5a1a7c80 100644 --- a/melib/src/email/attachments.rs +++ b/melib/src/email/attachments.rs @@ -27,6 +27,7 @@ use crate::email::{ use core::fmt; use core::str; use data_encoding::BASE64_MIME; +use smallvec::SmallVec; use crate::email::attachment_types::*; @@ -146,15 +147,19 @@ impl AttachmentBuilder { break; } } - assert!(boundary.is_some()); - let boundary = boundary.unwrap().to_vec(); - let parts = Self::parts(self.body(), &boundary); + if let Some(boundary) = boundary { + let parts = Self::parts(self.body(), &boundary); - self.content_type = ContentType::Multipart { - boundary, - kind: MultipartType::from(cst), - parts, - }; + let boundary = boundary.to_vec(); + self.content_type = ContentType::Multipart { + boundary, + kind: MultipartType::from(cst), + parts, + }; + } else { + self.content_type = ContentType::default(); + return self; + } } else if ct.eq_ignore_ascii_case(b"text") { self.content_type = ContentType::default(); for (n, v) in params { @@ -465,15 +470,48 @@ impl Attachment { if bytes.is_empty() { return false; } - // FIXME: check if any part is multipart/mixed as well match parser::attachments::multipart_parts(bytes, boundary) { Ok((_, parts)) => { for p in parts { - for (n, v) in - crate::email::parser::generic::HeaderIterator(p.display_bytes(bytes)) + let (body, headers) = match parser::headers::headers_raw(p.display_bytes(bytes)) { - if !n.eq_ignore_ascii_case(b"content-type") && !v.starts_with(b"text/") { + Ok(v) => v, + Err(_err) => return false, + }; + let headers = crate::email::parser::generic::HeaderIterator(headers) + .collect::>(); + let disposition = headers + .iter() + .find(|(n, _)| n.eq_ignore_ascii_case(b"content-disposition")) + .map(|(_, v)| ContentDisposition::from(*v)) + .unwrap_or_default(); + if disposition.kind.is_attachment() { + return true; + } + if let Some(boundary) = headers + .iter() + .find(|(n, _)| n.eq_ignore_ascii_case(b"content-type")) + .and_then(|(_, v)| { + match parser::attachments::content_type(v) { + Ok((_, (ct, _cst, params))) => { + if ct.eq_ignore_ascii_case(b"multipart") { + let mut boundary = None; + for (n, v) in params { + if n.eq_ignore_ascii_case(b"boundary") { + boundary = Some(v); + break; + } + } + return boundary; + } + } + _ => {} + } + None + }) + { + if Attachment::check_if_has_attachments_quick(body, boundary) { return true; } } diff --git a/melib/src/email/parser.rs b/melib/src/email/parser.rs index be9407837..08e15cf4d 100644 --- a/melib/src/email/parser.rs +++ b/melib/src/email/parser.rs @@ -506,7 +506,22 @@ pub mod dates { pub mod generic { use super::*; - fn byte_in_range<'a>(a: u8, b: u8) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], u8> { + #[inline(always)] + pub fn byte_in_slice<'a>(slice: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], u8> { + move |input| { + if input.is_empty() { + return Err(nom::Err::Error((input, "empty input").into())); + } + if slice.contains(&input[0]) { + Ok((&input[1..], input[0])) + } else { + Err(nom::Err::Error((input, "out of range").into())) + } + } + } + + #[inline(always)] + pub fn byte_in_range<'a>(a: u8, b: u8) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], u8> { move |input| { if input.is_empty() { return Err(nom::Err::Error((input, "empty input").into())); @@ -1678,10 +1693,11 @@ pub mod attachments { pub fn content_disposition(input: &[u8]) -> IResult<&[u8], ContentDisposition> { let (input, kind) = alt((take_until(";"), take_while(|_| true)))(input.trim())?; let mut ret = ContentDisposition { - kind: if kind.trim().eq_ignore_ascii_case(b"attachment") { - ContentDispositionKind::Attachment - } else { + /* RFC2183 Content-Disposition: "Unrecognized disposition types should be treated as `attachment'." */ + kind: if kind.trim().eq_ignore_ascii_case(b"inline") { ContentDispositionKind::Inline + } else { + ContentDispositionKind::Attachment }, ..ContentDisposition::default() };