meli/melib/src/email/parser.rs

2909 lines
104 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
* meli - parser module
*
* Copyright 2017 - 2020 Manos Pitsidianakis
*
* This file is part of meli.
*
* meli is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* meli is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
/*! Parsers for email. See submodules */
use crate::error::{MeliError, Result, ResultIntoMeliError};
use nom::{
branch::alt,
bytes::complete::{is_a, is_not, tag, take, take_until, take_while, take_while1},
character::{is_alphabetic, is_digit, is_hex_digit},
combinator::peek,
combinator::{map, opt},
error::{context, ErrorKind},
multi::{many0, many1, separated_nonempty_list},
number::complete::le_u8,
sequence::{delimited, pair, preceded, separated_pair, terminated},
};
use smallvec::SmallVec;
use std::borrow::Cow;
macro_rules! to_str {
($l:expr) => {{
unsafe { std::str::from_utf8_unchecked($l) }
}};
}
#[derive(Eq, PartialEq)]
pub struct ParsingError<I> {
pub input: I,
pub error: Cow<'static, str>,
}
impl core::fmt::Debug for ParsingError<&'_ [u8]> {
fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
fmt.debug_struct("ParsingError")
.field("input", &to_str!(&self.input))
.field("error", &self.error)
.finish()
}
}
struct DebugOkWrapper<'r, I, R: AsRef<[u8]>>(&'r IResult<I, R>);
impl<R: AsRef<[u8]> + core::fmt::Debug> core::fmt::Debug for DebugOkWrapper<'_, &'_ [u8], R> {
fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
if let Ok((a, b)) = self.0 {
write!(fmt, "Ok({}, {})", &to_str!(a), &to_str!(b.as_ref()))
} else {
write!(fmt, "{:?}", self.0)
}
}
}
pub type IResult<I, O, E = ParsingError<I>> = std::result::Result<(I, O), nom::Err<E>>;
impl<'i> ParsingError<&'i str> {
pub fn as_bytes(self) -> ParsingError<&'i [u8]> {
ParsingError {
input: self.input.as_bytes(),
error: self.error,
}
}
}
impl<I> From<(I, &'static str)> for ParsingError<I> {
fn from((input, error): (I, &'static str)) -> Self {
Self {
input,
error: error.into(),
}
}
}
impl<I> From<(I, String)> for ParsingError<I> {
fn from((input, error): (I, String)) -> Self {
Self {
input,
error: error.into(),
}
}
}
impl<I> nom::error::ParseError<I> for ParsingError<I> {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
Self {
input,
error: kind.description().to_string().into(),
}
}
fn append(input: I, kind: ErrorKind, other: Self) -> Self {
Self {
input,
error: format!("{}, {}", kind.description(), other.error).into(),
}
}
}
impl<'i> From<ParsingError<&'i [u8]>> for MeliError {
fn from(val: ParsingError<&'i [u8]>) -> MeliError {
MeliError::new("Parsing error").set_summary(format!(
r#"In input: "{}...",
Error: {}"#,
String::from_utf8_lossy(val.input)
.chars()
.take(30)
.collect::<String>(),
val.error
))
}
}
impl<'i> From<ParsingError<&'i str>> for MeliError {
fn from(val: ParsingError<&'i str>) -> MeliError {
MeliError::new("Parsing error").set_summary(format!(
r#"In input: "{}...",
Error: {}"#,
val.input.chars().take(30).collect::<String>(),
val.error
))
}
}
impl<'i> From<nom::Err<ParsingError<&'i [u8]>>> for MeliError {
fn from(val: nom::Err<ParsingError<&'i [u8]>>) -> MeliError {
match val {
nom::Err::Incomplete(_) => MeliError::new("Parsing Error: Incomplete"),
nom::Err::Error(err) | nom::Err::Failure(err) => err.into(),
}
}
}
impl<'i> From<nom::Err<ParsingError<&'i str>>> for MeliError {
fn from(val: nom::Err<ParsingError<&'i str>>) -> MeliError {
match val {
nom::Err::Incomplete(_) => MeliError::new("Parsing Error: Incomplete"),
nom::Err::Error(err) | nom::Err::Failure(err) => err.into(),
}
}
}
macro_rules! is_ctl_or_space {
($var:ident) => {
/* <any ASCII control character and DEL> */
$var < 33 || $var == 127
};
($var:expr) => {
/* <any ASCII control character and DEL> */
$var < 33 || $var == 127
};
}
macro_rules! is_whitespace {
($var:ident) => {
$var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
};
($var:expr) => {
$var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
};
}
pub trait BytesExt {
fn rtrim(&self) -> &Self;
fn ltrim(&self) -> &Self;
fn trim_start(&self) -> &Self {
self.ltrim()
}
fn trim_end(&self) -> &Self {
self.rtrim()
}
fn trim(&self) -> &Self;
fn find<T: AsRef<[u8]>>(&self, needle: T) -> Option<usize>;
fn contains_subsequence<T: AsRef<[u8]>>(&self, needle: T) -> bool {
self.find(needle.as_ref()).is_some()
}
fn rfind<T: AsRef<[u8]>>(&self, needle: T) -> Option<usize>;
fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8>;
fn is_quoted(&self) -> bool;
}
impl BytesExt for [u8] {
fn rtrim(&self) -> &Self {
if let Some(last) = self.iter().rposition(|b| !is_whitespace!(*b)) {
&self[..=last]
} else {
&[]
}
}
fn ltrim(&self) -> &Self {
if let Some(first) = self.iter().position(|b| !is_whitespace!(*b)) {
&self[first..]
} else {
&[]
}
}
fn trim(&self) -> &[u8] {
self.rtrim().ltrim()
}
// https://stackoverflow.com/a/35907071
fn find<T: AsRef<[u8]>>(&self, needle: T) -> Option<usize> {
let needle = needle.as_ref();
if needle.is_empty() {
return None;
}
self.windows(needle.len())
.position(|window| window == needle)
}
fn rfind<T: AsRef<[u8]>>(&self, needle: T) -> Option<usize> {
let needle = needle.as_ref();
if needle.is_empty() {
return None;
}
self.windows(needle.len())
.rposition(|window| window == needle)
}
fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8> {
let mut ret = self.to_vec();
if let Some(idx) = self.find(from) {
ret.splice(idx..(idx + from.len()), to.iter().cloned());
}
ret
}
fn is_quoted(&self) -> bool {
self.starts_with(b"\"") && self.ends_with(b"\"") && self.len() > 1
}
}
pub trait BytesIterExt {
fn join(&mut self, sep: u8) -> Vec<u8>;
}
impl<'a, P: for<'r> FnMut(&'r u8) -> bool> BytesIterExt for std::slice::Split<'a, u8, P> {
fn join(&mut self, sep: u8) -> Vec<u8> {
self.fold(vec![], |mut acc, el| {
if !acc.is_empty() {
acc.push(sep);
}
acc.extend(el.iter());
acc
})
}
}
//fn parser(input: I) -> IResult<I, O, E>;
pub fn mail(input: &[u8]) -> Result<(Vec<(&[u8], &[u8])>, &[u8])> {
let (rest, result) = alt((
separated_pair(
headers::headers,
alt((tag(b"\n"), tag(b"\r\n"))),
take_while(|_| true),
),
pair(headers::headers, generic::eof),
))(input)
.chain_err_summary(|| "Could not parse mail")?;
if !rest.is_empty() {
return Err(MeliError::new("Got leftover bytes after parsing mail"));
}
Ok(result)
}
pub mod dates {
/*! Date values in headers */
use super::generic::*;
use super::*;
use crate::datetime::UnixTimestamp;
fn take_n_digits(n: usize) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
move |input: &[u8]| {
let (input, ret) = take(n)(input)?;
if !ret.iter().all(|c| is_digit(*c)) {
return Err(nom::Err::Error(
(input, "take_n_digits(): not digits").into(),
));
}
Ok((input, ret))
}
}
///In the obsolete time zone, "UT" and "GMT" are indications of
///"Universal Time" and "Greenwich Mean Time", respectively, and are
///both semantically identical to "+0000".
///The remaining three character zones are the US time zones. The first
///letter, "E", "C", "M", or "P" stands for "Eastern", "Central",
///"Mountain", and "Pacific". The second letter is either "S" for
///"Standard" time, or "D" for "Daylight Savings" (or summer) time.
///Their interpretations are as follows:
/// EDT is semantically equivalent to -0400
/// EST is semantically equivalent to -0500
/// CDT is semantically equivalent to -0500
/// CST is semantically equivalent to -0600
/// MDT is semantically equivalent to -0600
/// MST is semantically equivalent to -0700
/// PDT is semantically equivalent to -0700
/// PST is semantically equivalent to -0800
///The 1 character military time zones were defined in a non-standard
///way in [RFC0822] and are therefore unpredictable in their meaning.
///The original definitions of the military zones "A" through "I" are
///equivalent to "+0100" through "+0900", respectively; "K", "L", and
///"M" are equivalent to "+1000", "+1100", and "+1200", respectively;
///"N" through "Y" are equivalent to "-0100" through "-1200".
///respectively; and "Z" is equivalent to "+0000". However, because of
///the error in [RFC0822], they SHOULD all be considered equivalent to
///"-0000" unless there is out-of-band information confirming their
///meaning.
///Other multi-character (usually between 3 and 5) alphabetic time zones
///have been used in Internet messages. Any such time zone whose
///meaning is not known SHOULD be considered equivalent to "-0000"
///unless there is out-of-band information confirming their meaning.
fn obs_zone(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
alt((
map(tag("UT"), |_| (&b"+"[..], &b"0000"[..])),
map(tag("GMT"), |_| (&b"+"[..], &b"0000"[..])),
map(tag("EDT"), |_| (&b"-"[..], &b"0400"[..])),
map(tag("EST"), |_| (&b"-"[..], &b"0500"[..])),
map(tag("CDT"), |_| (&b"-"[..], &b"0500"[..])),
map(tag("CST"), |_| (&b"-"[..], &b"0600"[..])),
map(tag("MDT"), |_| (&b"-"[..], &b"0600"[..])),
map(tag("MST"), |_| (&b"-"[..], &b"0700"[..])),
map(tag("PDT"), |_| (&b"-"[..], &b"0700"[..])),
map(tag("PST"), |_| (&b"-"[..], &b"0800"[..])),
map(take_while1(is_alphabetic), |_| (&b"-"[..], &b"0000"[..])),
))(input)
}
///zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
fn zone(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
alt((
|input| {
let (input, sign) = alt((tag("+"), tag("-")))(input)?;
let (input, zone) = take_n_digits(4)(input)?;
Ok((input, (sign, zone)))
},
obs_zone,
))(input)
}
///date-time = [ day-of-week "," ] date time [CFWS]
///date = day month year
///time = time-of-day zone
///time-of-day = hour ":" minute [ ":" second ]
///hour = 2DIGIT / obs-hour
///minute = 2DIGIT / obs-minute
///second = 2DIGIT / obs-second
fn date_time(input: &[u8]) -> IResult<&[u8], UnixTimestamp> {
let orig_input = input;
let mut accum: SmallVec<[u8; 32]> = SmallVec::new();
let (input, day_of_week) = opt(terminated(day_of_week, tag(",")))(input)?;
let (input, day) = day(input)?;
let (input, month) = month(input)?;
let (input, year) = year(input)?;
let (input, hour) = take_n_digits(2)(input)?;
let (input, _) = tag(":")(input)?;
let (input, minute) = take_n_digits(2)(input)?;
let (input, second) = opt(preceded(tag(":"), take_n_digits(2)))(input)?;
let (input, _) = fws(input)?;
let (input, (sign, zone)) = zone(input)?;
let (input, _) = opt(cfws)(input)?;
if let Some(day_of_week) = day_of_week {
accum.extend_from_slice(&day_of_week);
accum.extend_from_slice(b", ");
}
accum.extend_from_slice(&day);
accum.extend_from_slice(b" ");
accum.extend_from_slice(&month);
accum.extend_from_slice(b" ");
accum.extend_from_slice(&year);
accum.extend_from_slice(b" ");
accum.extend_from_slice(&hour);
accum.extend_from_slice(b":");
accum.extend_from_slice(&minute);
if let Some(second) = second {
accum.extend_from_slice(b":");
accum.extend_from_slice(&second);
}
accum.extend_from_slice(b" ");
accum.extend_from_slice(&sign);
accum.extend_from_slice(&zone);
match crate::datetime::rfc822_to_timestamp(accum.to_vec()) {
Ok(t) => Ok((input, t)),
Err(_err) => Err(nom::Err::Error(
(
orig_input,
"date_time(): could not convert date from rfc822",
)
.into(),
)),
}
}
///`day-of-week = ([FWS] day-name) / obs-day-of-week`
///day-name = "Mon" / "Tue" / "Wed" / "Thu" /
/// "Fri" / "Sat" / "Sun"
fn day_of_week(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, day_name) = alt((
tag("Mon"),
tag("Tue"),
tag("Wed"),
tag("Thu"),
tag("Fri"),
tag("Sat"),
tag("Sun"),
))(input)?;
Ok((input, day_name.into()))
}
///day = ([FWS] 1*2DIGIT FWS) / obs-day
fn day(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, _) = opt(fws)(input)?;
let (input, ret) = alt((take_n_digits(2), take_n_digits(1)))(input)?;
let (input, _) = fws(input)?;
Ok((input, ret))
}
///month = "Jan" / "Feb" / "Mar" / "Apr" /
/// "May" / "Jun" / "Jul" / "Aug" /
/// "Sep" / "Oct" / "Nov" / "Dec"
fn month(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((
tag("Jan"),
tag("Feb"),
tag("Mar"),
tag("Apr"),
tag("May"),
tag("Jun"),
tag("Jul"),
tag("Aug"),
tag("Sep"),
tag("Oct"),
tag("Nov"),
tag("Dec"),
))(input)
}
///year = (FWS 4*DIGIT FWS) / obs-year
fn year(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, _) = fws(input)?;
let (input, ret) = take_n_digits(4)(input)?;
let (input, _) = fws(input)?;
Ok((input, ret))
}
pub fn rfc5322_date(input: &[u8]) -> Result<crate::datetime::UnixTimestamp> {
date_time(input)
.or_else(|_| {
//let (_, mut parsed_result) = encodings::phrase(&eat_comments(input), false)?;
let (rest, parsed_result) = encodings::phrase(input, false)?;
let (_, ret) = match date_time(&parsed_result) {
Ok(v) => v,
Err(_) => {
return Err(nom::Err::Error(
(rest, "rfc5322_date(): invalid input").into(),
));
}
};
Ok((rest, ret))
})
.map(|(_, r)| r)
.map_err(|err: nom::Err<ParsingError<_>>| err.into())
/*
}
if let Some(pos) = parsed_result.find(b"-0000") {
parsed_result[pos] = b'+';
}
crate::datetime::rfc822_to_timestamp(parsed_result.trim())
*/
}
#[test]
fn test_date_new() {
let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";
let _s = b"Thu, 31 Aug 2017 13:43:37 +0000";
let __s = b"=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=";
assert_eq!(rfc5322_date(s).unwrap(), rfc5322_date(_s).unwrap());
assert_eq!(rfc5322_date(_s).unwrap(), rfc5322_date(__s).unwrap());
let val = b"Fri, 23 Dec 0001 21:20:36 -0800 (PST)";
assert_eq!(rfc5322_date(val).unwrap(), 0);
}
}
pub mod generic {
/*! Generally useful parser combinators */
use super::*;
#[inline(always)]
pub fn byte_in_slice<'a>(slice: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], u8> {
move |input| {
if input.is_empty() {
return Err(nom::Err::Error((input, "empty input").into()));
}
if slice.contains(&input[0]) {
Ok((&input[1..], input[0]))
} else {
Err(nom::Err::Error((input, "out of range").into()))
}
}
}
#[inline(always)]
pub fn byte_in_range<'a>(a: u8, b: u8) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], u8> {
move |input| {
if input.is_empty() {
return Err(nom::Err::Error((input, "empty input").into()));
}
if input[0] >= a && input[0] <= b {
Ok((&input[1..], input[0]))
} else {
Err(nom::Err::Error((input, "out of range").into()))
}
}
}
///UTF-8 characters can be defined in terms of octets using the
///following ABNF [RFC5234], taken from [RFC3629]:
///UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4
fn utf8_non_ascii(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
/// UTF8-2 = %xC2-DF UTF8-tail
fn utf8_2(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (rest, _) = byte_in_range(0xc2, 0xdf)(input)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..2]))
}
/// UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
fn utf8_3<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
alt((
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xe0, 0xe0)(input)?;
let (rest, _) = byte_in_range(0xa0, 0xbf)(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xe1, 0xec)(input)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xed, 0xed)(input)?;
let (rest, _) = byte_in_range(0x80, 0x9f)(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xee, 0xef)(input)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
))(input)
}
/// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / %xF4 %x80-8F 2( UTF8-tail )
fn utf8_4<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
alt((
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xf0, 0xf0)(input)?;
let (rest, _) = byte_in_range(0x90, 0xbf)(rest)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..4]))
},
|input: &'a [u8]| {
let (rest, _) = byte_in_range(0xf1, 0xf3)(input)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..4]))
},
|input: &'a [u8]| {
let (rest, _) = byte_in_range(0xf4, 0xf4)(input)?;
let (rest, _) = byte_in_range(0x80, 0x8f)(rest)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..4]))
},
))(input)
}
/// UTF8-tail = %x80-BF
fn utf8_tail(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (rest, _) = byte_in_range(0x80, 0xbf)(input)?;
Ok((rest, &input[0..1]))
}
let (rest, ret) = alt((utf8_2, utf8_3, utf8_4))(input)?;
Ok((rest, ret.into()))
}
///`%x21-7E`
/// RFC6532 adds: `VCHAR =/ UTF8-non-ascii`
fn vchar(input: &[u8]) -> IResult<&[u8], u8> {
byte_in_range(0x21, 0x7e)(input)
}
///`quoted-pair = ("\" (VCHAR / WSP)) / obs-qp`
fn quoted_pair(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
preceded(
tag("\\"),
alt((
utf8_non_ascii,
map(vchar, |byte| vec![byte].into()),
map(wsp, |byte| vec![byte].into()),
)),
)(input)
}
///```text
///ctext = %d33-39 / ; Printable US-ASCII
/// %d42-91 / ; characters not including
/// %d93-126 / ; "(", ")", or "\"
/// obs-ctext
///```
fn ctext(input: &[u8]) -> IResult<&[u8], ()> {
alt((
map(
alt((
byte_in_range(33, 39),
byte_in_range(42, 91),
byte_in_range(93, 126),
)),
|_| (),
),
map(utf8_non_ascii, |_| ()),
))(input)
}
///```text
///ctext = %d33-39 / ; Printable US-ASCII
/// %d42-91 / ; characters not including
/// %d93-126 / ; "(", ")", or "\"
/// obs-ctext
///ccontent = ctext / quoted-pair / comment
///comment = "(" *([FWS] ccontent) [FWS] ")"
///```
pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
if !input.starts_with(b"(") {
return Err(nom::Err::Error(
(input, "comment(): not starting with '('").into(),
));
}
let mut input = &input[1..];
let mut comment_level = 1;
while comment_level > 0 {
if input.is_empty() {
return Err(nom::Err::Error(
(input, "comment(): unclosed comment").into(),
));
}
input = context("comment()", opt(fws))(input)?.0;
while let Ok((_input, _)) =
context("comment()", alt((ctext, map(quoted_pair, |_| ()))))(input)
{
input = _input;
}
if input.starts_with(b")") {
comment_level -= 1;
input = &input[1..];
} else if input.starts_with(b"(") {
comment_level += 1;
input = &input[1..];
} else {
input = context("comment()", opt(fws))(input)?.0;
}
}
Ok((input, ()))
}
#[test]
fn test_parser_comment() {
let s = b"(recursive (comment) block)";
assert_eq!(comment(s), Ok((&b""[..], ())));
}
///`FWS = ([*WSP CRLF] 1*WSP) / obs-FWS`
pub fn fws(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
if let Ok((rest, ws)) = terminated(many0(wsp), crlf)(input) {
let mut v: Vec<u8> = ws.into_iter().fold(vec![], |mut acc, x| {
acc.push(x);
acc
});
let mut width = 0;
let mut input = rest;
while let Ok((input_, w)) = wsp(input) {
v.push(w);
width += 1;
input = input_;
}
if width == 0 {
Err(nom::Err::Error((input, "fws(): no WSP").into()))
} else {
Ok((input, Cow::Owned(v)))
}
} else {
let orig_input = input;
let mut input = input;
let mut width = 0;
while let Ok((input_, _)) = wsp(input) {
width += 1;
input = input_;
}
if width == 0 {
Err(nom::Err::Error((input, "fws(): no WSP").into()))
} else {
Ok((input, Cow::Borrowed(&orig_input[..width])))
}
}
}
///`WSP = SP / HTAB ; white space`
pub fn wsp(input: &[u8]) -> IResult<&[u8], u8> {
if input.starts_with(b" ") || input.starts_with(b"\t") {
Ok((&input[1..], input[0]))
} else {
Err(nom::Err::Error((input, "wsp(): not whitespace").into()))
}
}
pub fn crlf(input: &[u8]) -> IResult<&[u8], ()> {
if input.starts_with(b"\n") {
Ok((&input[1..], ()))
} else if input.starts_with(b"\r\n") {
Ok((&input[2..], ()))
} else {
Err(nom::Err::Error((input, "crlf(): not whitespace").into()))
}
}
///`CFWS = (1*([FWS] comment) [FWS]) / FWS`
pub fn cfws(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((
|input| {
let (input, pr) = many1(terminated(opt(fws), comment))(input)?;
let (input, end) = opt(fws)(input)?;
let mut pr = pr.into_iter().filter_map(|s| s).fold(vec![], |mut acc, x| {
acc.extend_from_slice(&x);
acc
});
if pr.is_empty() {
Ok((input, end.unwrap_or((&b""[..]).into())))
} else {
if let Some(end) = end {
pr.extend_from_slice(&end);
}
Ok((input, pr.into()))
}
},
fws,
))(input)
}
///`unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct`
pub fn unstructured(input: &[u8]) -> Result<String> {
let (input, r): (_, Vec<(Option<Cow<'_, [u8]>>, u8)>) =
many0(pair(opt(fws), vchar))(input)?;
let (input, rest_wsp): (_, Vec<u8>) = many0(wsp)(input)?;
let mut ret_s = Vec::new();
for (opt_slice, b) in r {
if let Some(slice) = opt_slice {
ret_s.extend_from_slice(&slice);
}
ret_s.push(b);
}
ret_s.extend_from_slice(&rest_wsp);
let ret_s = String::from_utf8_lossy(&ret_s).into_owned();
if !input.is_empty() {
Err(MeliError::from(format!(
"unstructured(): unmatched input: {} while result is {}",
to_str!(input),
ret_s
)))
} else {
Ok(ret_s)
}
}
use crate::email::address::Address;
use crate::email::mailto::Mailto;
pub fn mailto(mut input: &[u8]) -> IResult<&[u8], Mailto> {
if !input.starts_with(b"mailto:") {
return Err(nom::Err::Error(
(input, "mailto(): input doesn't start with `mailto:`").into(),
));
}
input = &input[b"mailto:".len()..];
let end = input.iter().position(|e| *e == b'?').unwrap_or(input.len());
let address: Address;
if let Ok((_, addr)) = crate::email::parser::address::address(&input[..end]) {
address = addr;
input = if input[end..].is_empty() {
&input[end..]
} else {
&input[end + 1..]
};
} else {
return Err(nom::Err::Error(
(input, "mailto(): address not found in input").into(),
));
}
let mut subject = None;
let mut cc = None;
let mut bcc = None;
let mut body = None;
while !input.is_empty() {
let tag = if let Some(tag_pos) = input.iter().position(|e| *e == b'=') {
let ret = &input[0..tag_pos];
input = &input[tag_pos + 1..];
ret
} else {
return Err(nom::Err::Error(
(input, "mailto(): extra characters found in input").into(),
));
};
let value_end = input.iter().position(|e| *e == b'&').unwrap_or(input.len());
let value = String::from_utf8_lossy(&input[..value_end]).to_string();
match tag {
b"subject" if subject.is_none() => {
subject = Some(value);
}
b"cc" if cc.is_none() => {
cc = Some(value);
}
b"bcc" if bcc.is_none() => {
bcc = Some(value);
}
b"body" if body.is_none() => {
/* FIXME:
* Parse escaped characters properly.
*/
body = Some(value.replace("%20", " ").replace("%0A", "\n"));
}
_ => {
return Err(nom::Err::Error(
(input, "mailto(): unknown tag in input").into(),
));
}
}
if input[value_end..].is_empty() {
break;
}
input = &input[value_end + 1..];
}
Ok((
input,
Mailto {
address,
subject,
cc,
bcc,
body,
},
))
}
pub struct HeaderIterator<'a>(pub &'a [u8]);
impl<'a> Iterator for HeaderIterator<'a> {
type Item = (&'a [u8], &'a [u8]);
fn next(&mut self) -> Option<(&'a [u8], &'a [u8])> {
if self.0.is_empty() {
return None;
}
match super::headers::header(self.0) {
Ok((rest, value)) => {
self.0 = rest;
Some(value)
}
_ => {
self.0 = &[];
None
}
}
}
}
pub fn eof(input: &[u8]) -> IResult<&[u8], &[u8]> {
if input.is_empty() {
Ok((input, input))
} else {
Err(nom::Err::Error((input, "expected EOF").into()))
}
}
#[test]
fn test_parser_cfws() {
let s = r#"This
is a test"#;
assert_eq!(&unstructured(s.as_bytes()).unwrap(), "This is a test",);
assert_eq!(&unstructured(s.as_bytes()).unwrap(), "This is a test",);
let s = "this is\n\ta folded name";
assert_eq!(
&unstructured(s.as_bytes()).unwrap(),
"this is\ta folded name",
);
}
///`atom = [CFWS] 1*atext [CFWS]`
pub fn atom(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, opt_space) = opt(cfws)(input)?;
let mut i = 0;
while i < input.len() {
//&& !input[i].is_ascii_whitespace() {
match input[i] {
b'(' | b')' | b'<' | b'>' | b'[' | b']' | b':' | b';' | b'@' | b'\\' | b','
| b'.' | b'\r' | b'\n' | b'"' => break,
_ => {}
}
i += 1;
}
if i == 0 {
return Err(nom::Err::Error(
(input, "atom(): starts with whitespace or empty").into(),
));
}
while i + 1 > 0 {
if input[i - 1] == b' ' || input[i - 1] == b'\t' {
i -= 1;
} else {
break;
}
}
let (rest, opt_space2) = opt(cfws)(&input[i..])?;
let ret = if opt_space.is_some() || opt_space2.is_some() {
let mut ret = Vec::with_capacity(i + 2);
if let Some(opt_space) = opt_space {
ret.extend_from_slice(&opt_space);
}
ret.extend_from_slice(&input[..i]);
if let Some(opt_space) = opt_space2 {
ret.extend_from_slice(&opt_space);
}
Cow::Owned(ret)
} else {
Cow::Borrowed(&input[..i])
};
Ok((rest, ret))
}
///`quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
pub fn quoted_string(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, opt_space) = opt(cfws)(input)?;
if !input.starts_with(b"\"") {
return Err(nom::Err::Error(
(input, "quoted_string(): doesn't start with DQUOTE").into(),
));
}
let input = &input[1..];
let mut i = 0;
while i < input.len() && input[i] != b'"' {
if opt_space.is_some() || (input[i..].starts_with(b"\\") && i + 1 < input.len()) {
let mut ret = if let Some(opt_space) = opt_space {
let mut r = Vec::with_capacity(2 * i);
r.extend_from_slice(&opt_space);
r
} else {
Vec::with_capacity(2 * i)
};
ret.extend_from_slice(&input[..i]);
i += 1;
ret.push(input[i]);
i += 1;
while i < input.len() && input[i] != b'"' {
if input[i..].starts_with(b"\\") && i + 1 < input.len() {
i += 1;
}
ret.push(input[i]);
i += 1;
}
if i < input.len() {
// skip DQUOTE
i += 1;
} else {
return Err(nom::Err::Error(
(input, "quoted_string(): unclosed DQUOTE").into(),
));
}
let (rest, opt_sp) = opt(cfws)(&input[i..])?;
if let Some(opt_sp) = opt_sp {
ret.extend_from_slice(&opt_sp);
}
let ret = Cow::Owned(ret);
return Ok((rest, ret));
}
i += 1;
}
let ret = Cow::Borrowed(&input[..i]);
if i < input.len() {
// skip DQUOTE
i += 1;
} else {
return Err(nom::Err::Error(
(input, "quoted_string(): unclosed DQUOTE").into(),
));
}
let (rest, opt_sp) = opt(cfws)(&input[i..])?;
if let Some(opt_sp) = opt_sp {
let mut ret = ret.to_vec();
ret.extend_from_slice(&opt_sp);
Ok((rest, Cow::Owned(ret)))
} else {
Ok((rest, ret))
}
}
///`word = atom / quoted-string`
pub fn word(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((quoted_string, atom))(input)
}
///`phrase = 1*word / obs-phrase`
pub fn phrase2(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
let (rest, words) = many1(word)(input)?;
let len = words.iter().map(|v| v.len()).sum::<usize>();
let mut ret = words
.into_iter()
.fold(Vec::with_capacity(len), |mut acc, el| {
acc.extend_from_slice(&el);
acc
});
let right_wsp_padding = ret.len() - ret.rtrim().len();
for _ in 0..right_wsp_padding {
ret.pop();
}
Ok((rest, ret))
}
#[test]
fn test_phrase() {
let s = b"\"Jeffrey \\\"fejj\\\" Stedfast\""; // <fejj@helixcode.com>"
assert_eq!(to_str!(&phrase2(s).unwrap().1), "Jeffrey \"fejj\" Stedfast");
}
///dot-atom-text = 1*atext *("." 1*atext)
pub fn dot_atom_text(mut input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let mut ret = vec![];
let mut at_least_one = false;
while let Ok((_input, atext_r)) = atext(input) {
at_least_one = true;
ret.extend_from_slice(&atext_r);
input = _input;
}
if !at_least_one {
return Err(nom::Err::Error(
(input, "dot_atom(): starts with at least one atext").into(),
));
}
loop {
if !input.starts_with(b".") {
break;
}
ret.push(b'.');
input = &input[1..];
let mut at_least_one = false;
while let Ok((_input, atext_r)) = atext(input) {
at_least_one = true;
ret.extend_from_slice(&atext_r);
input = _input;
}
if !at_least_one {
return Err(nom::Err::Error(
(input, "dot_atom(): DOT followed with at least one atext").into(),
));
}
}
Ok((input, ret.into()))
}
///`atext = ALPHA / DIGIT / ; Printable US-ASCII "!" / "#" / ; characters not including "$" / "%" / ; specials. Used for atoms. "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"`
pub fn atext_ascii(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
if input.is_empty() {
return Err(nom::Err::Error((input, "atext(): empty input").into()));
}
if input[0].is_ascii_alphanumeric()
|| [
b'!', b'#', b'$', b'%', b'&', b'\'', b'*', b'+', b'-', b'/', b'=', b'?', b'^',
b'_', b'`', b'{', b'|', b'}', b'~',
]
.contains(&input[0])
{
Ok((&input[1..], input[0..1].into()))
} else {
return Err(nom::Err::Error((input, "atext(): invalid byte").into()));
}
}
pub fn atext(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((atext_ascii, utf8_non_ascii))(input)
}
///dot-atom = [CFWS] dot-atom-text [CFWS]
pub fn dot_atom(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, _) = opt(cfws)(input)?;
let (input, ret) = dot_atom_text(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((input, ret.into()))
}
///```text
///dtext = %d33-90 / ; Printable US-ASCII
/// %d94-126 / ; characters not including
/// obs-dtext ; "[", "]", or "\"
///```
pub fn dtext(input: &[u8]) -> IResult<&[u8], u8> {
alt((byte_in_range(33, 90), byte_in_range(94, 125)))(input)
}
}
pub mod mailing_lists {
//! Mailing lists headers.
//!
//! Implemented RFCs:
//!
//! - [RFC2369 "The Use of URLs as Meta-Syntax for Core Mail List Commands and their Transport through Message Header Fields"](https://tools.ietf.org/html/rfc2369)
use super::*;
use generic::cfws;
///Parse the value of headers defined in RFC2369 "The Use of URLs as Meta-Syntax for Core
///Mail List Commands and their Transport through Message Header Fields"
pub fn rfc_2369_list_headers_action_list(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
let (input, _) = opt(cfws)(input)?;
let (input, ret) = alt((
separated_nonempty_list(
delimited(
map(opt(cfws), |_| ()),
map(is_a(", "), |_| ()),
map(opt(cfws), |_| ()),
),
delimited(tag("<"), take_until(">"), tag(">")),
),
map(delimited(tag("<"), take_until(">"), tag(">")), |el| {
vec![el]
}),
map(
delimited(
map(opt(cfws), |_| ()),
map(tag("NO"), |_| ()),
map(opt(cfws), |_| ()),
),
|_| vec![&b"NO"[..]],
),
))(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((input, ret))
}
#[test]
fn test_parser_rfc_2369_list() {
let s = r#"List-Help: <mailto:list@host.com?subject=help> (List Instructions)
List-Help: <mailto:list-manager@host.com?body=info>
List-Help: <mailto:list-info@host.com> (Info about the list)
List-Help: <http://www.host.com/list/>, <mailto:list-info@host.com>
List-Help: <ftp://ftp.host.com/list.txt> (FTP),
<mailto:list@host.com?subject=help>
List-Post: <mailto:list@host.com>
List-Post: <mailto:moderator@host.com> (Postings are Moderated)
List-Post: <mailto:moderator@host.com?subject=list%20posting>
List-Post: NO (posting not allowed on this list)
List-Archive: <mailto:archive@host.com?subject=index%20list>
List-Archive: <ftp://ftp.host.com/pub/list/archive/>
List-Archive: <http://www.host.com/list/archive/> (Web Archive)
"#;
let (rest, headers) = headers::headers(s.as_bytes()).unwrap();
assert!(rest.is_empty());
for (_h, v) in headers {
let (rest, _action_list) = rfc_2369_list_headers_action_list(v).unwrap();
assert!(rest.is_empty());
}
}
}
pub mod headers {
/*! Email headers */
use super::*;
pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
many1(header)(input)
}
pub fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
alt((header_without_val, header_with_val))(input)
}
pub fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
if input.is_empty() {
return Err(nom::Err::Error(
(input, "header_without_val(): input is empty").into(),
));
} else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
return Err(nom::Err::Error(
(
input,
"header_without_val(): input starts with folding whitespace",
)
.into(),
));
}
let mut ptr = 0;
let mut name: &[u8] = &[];
let mut has_colon = false;
/* field-name = 1*<any CHAR, excluding CTLs, SPACE, and ":"> */
for (i, x) in input.iter().enumerate() {
if input[i..].starts_with(b"\r\n") {
name = &input[0..i];
ptr = i + 2;
break;
} else if *x == b':' {
name = &input[0..i];
has_colon = true;
ptr = i;
break;
} else if *x == b'\n' {
name = &input[0..i];
ptr = i;
break;
} else if is_ctl_or_space!(*x) {
return Err(nom::Err::Error((
input,
r#"header_without_val(): field-name should contain "any CHAR, excluding CTLs, SPACE, and ":""#,
).into()));
}
}
if name.is_empty() || input.len() <= ptr {
return Err(nom::Err::Error(
(input, "header_without_val(): not enough input").into(),
));
}
if input[ptr] == b':' {
ptr += 1;
has_colon = true;
if ptr >= input.len() {
return Err(nom::Err::Error(
(input, "header_without_val(): EOF after colon").into(),
));
}
}
if !has_colon {
return Err(nom::Err::Error(
(input, "header_without_val(): no colon found").into(),
));
}
while input[ptr] == b' ' {
ptr += 1;
if ptr >= input.len() {
return Err(nom::Err::Error(
(
input,
"header_without_val(): expected start of next field, found EOF",
)
.into(),
));
}
}
if input[ptr..].starts_with(b"\n") {
ptr += 1;
if ptr >= input.len() {
return Err(nom::Err::Error(
(
input,
"header_without_val(): expected folding whitespace, found EOF",
)
.into(),
));
}
if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
Ok((&input[ptr..], (name, b"")))
} else {
Err(nom::Err::Error(
(
input,
"header_without_val(): expected folding whitespace, found EOF",
)
.into(),
))
}
} else if input[ptr..].starts_with(b"\r\n") {
ptr += 2;
if ptr > input.len() {
return Err(nom::Err::Error(
(
input,
"header_without_val(): expected folding whitespace, found EOF",
)
.into(),
));
}
if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
Ok((&input[ptr..], (name, b"")))
} else {
Err(nom::Err::Error(
(
&input[ptr..],
"header_without_val(): expected folding whitespace, found EOF",
)
.into(),
))
}
} else {
Err(nom::Err::Error(
(
&input[ptr..],
"header_without_val(): expected folding whitespace (newline)",
)
.into(),
))
}
}
/* A header can span multiple lines, eg:
*
* Received: from -------------------- (-------------------------)
* by --------------------- (--------------------- [------------------]) (-----------------------)
* with ESMTP id ------------ for <------------------->;
* Tue, 5 Jan 2016 21:30:44 +0100 (CET)
*/
pub fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
let input_len = input.len();
for (i, x) in input.iter().enumerate() {
if *x == b'\n'
&& (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
|| i + 1 == input_len)
{
return Ok((&input[(i + 1)..], &input[0..i]));
} else if input[i..].starts_with(b"\r\n")
&& (((i + 2) < input_len && input[i + 2] != b' ' && input[i + 2] != b'\t')
|| i + 2 == input_len)
{
return Ok((&input[(i + 2)..], &input[0..i]));
}
}
Err(nom::Err::Error(
(
input,
"header_value(): expected new line after header value",
)
.into(),
))
}
/* Parse a single header as a tuple */
pub fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
if input.is_empty() {
return Err(nom::Err::Error(
(input, "header_with_val(): empty input").into(),
));
} else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
return Err(nom::Err::Error(
(input, "header_with_val(): field name starts with new line").into(),
));
}
let mut ptr = 0;
let mut name: &[u8] = &[];
/* field-name = 1*<any CHAR, excluding CTLs, SPACE, and ":"> */
for (i, x) in input.iter().enumerate() {
if *x == b':' {
name = &input[0..i];
ptr = i + 1;
break;
} else if is_ctl_or_space!(*x) {
return Err(nom::Err::Error(
(
&input[i..],
format!("header_with_val(): invalid character: {:?}", *x as char),
)
.into(),
));
}
}
if name.is_empty() {
return Err(nom::Err::Error(
(input, "header_with_val(): found empty header name ").into(),
));
}
if ptr >= input.len() {
return Err(nom::Err::Error(
(input, "header_with_val(): found EOF").into(),
));
}
if input[ptr] == b'\n' {
ptr += 1;
if ptr >= input.len() {
return Err(nom::Err::Error(
(input, "header_with_val(): found EOF").into(),
));
}
} else if input[ptr..].starts_with(b"\r\n") {
ptr += 2;
if ptr > input.len() {
return Err(nom::Err::Error(
(input, "header_with_val(): found EOF").into(),
));
}
}
if ptr >= input.len() {
return Err(nom::Err::Error(
(input, "header_with_val(): found EOF").into(),
));
}
while input[ptr] == b' ' || input[ptr] == b'\t' {
ptr += 1;
if ptr >= input.len() {
return Err(nom::Err::Error(
(input, "header_with_val(): found EOF").into(),
));
}
}
header_value(&input[ptr..]).map(|(rest, value)| (rest, (name, value)))
}
pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
if input.is_empty() {
return Err(nom::Err::Error(
(input, "headers_raw(): input is empty").into(),
));
}
for i in 0..input.len() {
if input[i..].starts_with(b"\n\n") {
return Ok((&input[(i + 1)..], &input[0..=i]));
} else if input[i..].starts_with(b"\r\n\r\n") {
return Ok((&input[(i + 2)..], &input[0..=i]));
}
}
Err(nom::Err::Error(
(input, "headers_raw(): got EOF while looking for new line").into(),
))
}
}
pub mod attachments {
/*! Email attachments */
use super::*;
use crate::email::address::*;
use crate::email::attachment_types::{ContentDisposition, ContentDispositionKind};
pub fn attachment(input: &[u8]) -> IResult<&[u8], (std::vec::Vec<(&[u8], &[u8])>, &[u8])> {
alt((
separated_pair(
many0(headers::header),
alt((tag(b"\n"), tag(b"\r\n"))),
take_while(|_| true),
),
pair(headers::headers, generic::eof),
))(input)
}
pub fn multipart_parts<'a>(
input: &'a [u8],
boundary: &[u8],
) -> IResult<&'a [u8], Vec<StrBuilder>> {
let mut ret: Vec<_> = Vec::new();
let mut input = input;
let mut offset = 0;
loop {
let b_start = if let Some(v) = input.find(boundary) {
v
} else {
return Err(nom::Err::Error(
(input, "multipart_parts(): could not find starting boundary").into(),
));
};
if b_start < 2 {
return Err(nom::Err::Error(
(input, "multipart_parts(): malformed boundary").into(),
));
}
offset += b_start - 2;
input = &input[b_start - 2..];
if &input[0..2] == b"--" {
offset += 2 + boundary.len();
input = &input[2 + boundary.len()..];
if input[0] == b'\n' {
offset += 1;
input = &input[1..];
} else if input[0..].starts_with(b"\r\n") {
offset += 2;
input = &input[2..];
} else {
continue;
}
break;
}
}
loop {
if input.len() < boundary.len() + 4 {
return Err(nom::Err::Error(
(input, "multipart_parts(): found EOF").into(),
));
}
if let Some(end) = input.find(boundary) {
if &input[end - 2..end] != b"--" {
return Err(nom::Err::Error(
(input, "multipart_parts(): malformed boundary").into(),
));
}
if input[..end - 2].ends_with(b"\r\n") {
ret.push(StrBuilder {
offset,
length: end - 4,
});
} else {
ret.push(StrBuilder {
offset,
length: end - 3,
});
}
offset += end + boundary.len();
input = &input[end + boundary.len()..];
if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
break;
}
if input[0] == b'\n' {
offset += 1;
input = &input[1..];
} else if input[0..].starts_with(b"\r\n") {
offset += 2;
input = &input[2..];
}
} else {
ret.push(StrBuilder {
offset,
length: input.len(),
});
break;
}
}
Ok((input, ret))
}
fn parts_f(boundary: &[u8]) -> impl Fn(&[u8]) -> IResult<&[u8], Vec<&[u8]>> + '_ {
move |input: &[u8]| -> IResult<&[u8], Vec<&[u8]>> {
let mut ret: Vec<&[u8]> = Vec::new();
let mut input = input;
loop {
let b_start = if let Some(v) = input.find(boundary) {
v
} else {
return Err(nom::Err::Error(
(input, "parts_f(): could not find starting boundary").into(),
));
};
if b_start < 2 {
return Err(nom::Err::Error(
(input, "parts_f(): malformed boundary").into(),
));
}
input = &input[b_start - 2..];
if &input[0..2] == b"--" {
input = &input[2 + boundary.len()..];
if input[0] == b'\n' {
input = &input[1..];
} else if input[0..].starts_with(b"\r\n") {
input = &input[2..];
} else {
continue;
}
break;
}
}
loop {
if input.len() < boundary.len() + 4 {
return Err(nom::Err::Error((input, "parts_f(): found EOF").into()));
}
if let Some(end) = input.find(boundary) {
if &input[end - 2..end] != b"--" {
return Err(nom::Err::Error((input, "parts_f(): found EOF").into()));
}
if input[..end - 2].ends_with(b"\r\n") {
ret.push(&input[..end - 4]);
} else {
ret.push(&input[..end - 3]);
}
input = &input[end + boundary.len()..];
if input.len() < 2
|| (input[0] != b'\n' && &input[0..2] != b"\r\n")
|| &input[0..2] == b"--"
{
break;
}
if input[0] == b'\n' {
input = &input[1..];
} else if input[0..].starts_with(b"\r\n") {
input = &input[2..];
}
} else {
ret.push(input);
break;
}
}
Ok((input, ret))
}
}
pub fn parts<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
alt((
parts_f(boundary),
|input: &'a [u8]| -> IResult<&'a [u8], Vec<&'a [u8]>> {
let (input, _) = take_until(&b"--"[..])(input)?;
let (input, _) = take_until(boundary)(input)?;
Ok((input, Vec::<&[u8]>::new()))
},
))(input)
/*
alt_complete!(call!(parts_f, boundary) | do_parse!(
take_until_and_consume!(&b"--"[..]) >>
take_until_and_consume!(boundary) >>
( { Vec::<&[u8]>::new() } ))
));
*/
}
/* Caution: values should be passed through phrase() */
pub fn content_type_parameter(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
let (input, _) = tag(";")(input.ltrim())?;
let (input, name) = terminated(take_until("="), tag("="))(input.ltrim())?;
let (input, value) = alt((
delimited(tag("\""), take_until("\""), tag("\"")),
is_not(";"),
))(input.ltrim())?;
Ok((input, (name, value)))
}
pub fn content_type(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], Vec<(&[u8], &[u8])>)> {
let (input, _type) = take_until("/")(input.ltrim())?;
let (input, _) = tag("/")(input)?;
let (input, _subtype) = is_not(";")(input)?;
let (input, parameters) = many0(content_type_parameter)(input)?;
Ok((input, (_type, _subtype, parameters)))
/*
do_parse!(
_type: take_until!("/") >>
tag!("/") >>
_subtype: is_not!(";") >>
parameters: many0!(complete!(content_type_parameter)) >>
( {
(_type, _subtype, parameters)
} )
));
*/
}
/* Caution: values should be passed through phrase() */
pub fn content_disposition_parameter(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
let (input, _) = tag(";")(input.ltrim())?;
let (input, name) = terminated(take_until("="), tag("="))(input.ltrim())?;
let (input, value) = alt((
delimited(tag("\""), take_until("\""), tag("\"")),
is_not(";"),
))(input.ltrim())?;
Ok((input, (name, value)))
}
pub fn content_disposition(input: &[u8]) -> IResult<&[u8], ContentDisposition> {
let (input, kind) = alt((take_until(";"), take_while(|_| true)))(input.trim())?;
let mut ret = ContentDisposition {
/* RFC2183 Content-Disposition: "Unrecognized disposition types should be treated as `attachment'." */
kind: if kind.trim().eq_ignore_ascii_case(b"inline") {
ContentDispositionKind::Inline
} else {
ContentDispositionKind::Attachment
},
..ContentDisposition::default()
};
if input.is_empty() {
return Ok((input, ret));
}
let (input, parameters) = many0(content_disposition_parameter)(input.ltrim())?;
for (k, v) in parameters {
if k.eq_ignore_ascii_case(b"filename") {
ret.filename =
Some(String::from_utf8_lossy(&super::encodings::phrase(v, false)?.1).into());
} else if k.eq_ignore_ascii_case(b"size") {
ret.size =
Some(String::from_utf8_lossy(&super::encodings::phrase(v, false)?.1).into());
} else if k.eq_ignore_ascii_case(b"creation-date") {
ret.creation_date =
Some(String::from_utf8_lossy(&super::encodings::phrase(v, false)?.1).into());
} else if k.eq_ignore_ascii_case(b"modification-date") {
ret.modification_date =
Some(String::from_utf8_lossy(&super::encodings::phrase(v, false)?.1).into());
} else if k.eq_ignore_ascii_case(b"read-date") {
ret.read_date =
Some(String::from_utf8_lossy(&super::encodings::phrase(v, false)?.1).into());
} else {
ret.parameter
.push(String::from_utf8_lossy(&super::encodings::phrase(v, false)?.1).into());
}
}
Ok((input, ret))
}
}
pub mod encodings {
/*! Email encodings (quoted printable, MIME) */
use super::*;
use crate::email::attachment_types::Charset;
use data_encoding::BASE64_MIME;
use encoding::all::*;
use encoding::{DecoderTrap, Encoding};
pub fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
if input.len() < 3 {
Err(nom::Err::Error(
(
input,
"quoted_printable_byte(): input too short to be quoted_printable",
)
.into(),
))
} else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
let a = if input[1] < b':' {
input[1] - 48
} else if input[1] < b'[' {
input[1] - 55
} else {
input[1] - 87
};
let b = if input[2] < b':' {
input[2] - 48
} else if input[2] < b'[' {
input[2] - 55
} else {
input[2] - 87
};
Ok((&input[3..], a * 16 + b))
} else if input.starts_with(b"\r\n") {
Ok((&input[2..], b'\n'))
} else {
Err(nom::Err::Error(
(input, "quoted_printable_byte(): invalid input").into(),
))
}
}
/* Encoded words
*"=?charset?encoding?encoded text?=".
*/
fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
if input.is_empty() {
return Ok((&[], Vec::with_capacity(0)));
}
if input.len() < 5 {
return Err(nom::Err::Error(
(input, "encoded_word(): input too short to be encoded_word").into(),
));
} else if input[0] != b'=' || input[1] != b'?' {
return Err(nom::Err::Error(
(input, "encoded_word(): invalid input").into(),
));
}
/* find end of Charset tag:
* =?charset?encoding?encoded text?=
* ---------^
*/
let mut tag_end_idx = None;
for (idx, b) in input[2..].iter().enumerate() {
if *b == b'?' {
tag_end_idx = Some(idx + 2);
break;
}
}
if tag_end_idx.is_none() {
return Err(nom::Err::Error(
(input, "encoded_word(): expected end tag").into(),
));
}
let tag_end_idx = tag_end_idx.unwrap();
if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
return Err(nom::Err::Error(
(input, "encoded_word(): expected valid end tag").into(),
));
}
/* See if input ends with "?=" and get ending index
* =?charset?encoding?encoded text?=
* -------------------------------^
*/
let mut encoded_end_idx = None;
for i in (3 + tag_end_idx)..input.len() {
if input[i] == b'?' && i + 1 < input.len() && input[i + 1] == b'=' {
encoded_end_idx = Some(i);
break;
}
}
if encoded_end_idx.is_none() {
return Err(nom::Err::Error(
(input, "encoded_word(): expected input after end tag").into(),
));
}
let encoded_end_idx = encoded_end_idx.unwrap();
let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];
let s: Vec<u8> = match input[tag_end_idx + 1] {
b'b' | b'B' => match BASE64_MIME.decode(encoded_text) {
Ok(v) => v,
Err(_) => encoded_text.to_vec(),
},
b'q' | b'Q' => match quoted_printable_bytes_header(encoded_text) {
Ok((b"", s)) => s,
_ => {
return Err(nom::Err::Error(
(input, "encoded_word(): invalid quoted_printable").into(),
))
}
},
_ => {
return Err(nom::Err::Error(
(input, "encoded_word(): expected 'b|q'").into(),
))
}
};
let charset = Charset::from(&input[2..tag_end_idx]);
if let Charset::UTF8 = charset {
Ok((&input[encoded_end_idx + 2..], s))
} else {
match decode_charset(&s, charset) {
Ok(v) => Ok((&input[encoded_end_idx + 2..], v.into_bytes())),
_ => Err(nom::Err::Error(
(
input,
format!("encoded_word(): unknown charset {:?}", charset),
)
.into(),
)),
}
}
}
pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
match charset {
Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_3 => Ok(ISO_8859_3.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_4 => Ok(ISO_8859_4.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_5 => Ok(ISO_8859_5.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_6 => Ok(ISO_8859_6.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_8 => Ok(ISO_8859_8.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_10 => Ok(ISO_8859_10.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_13 => Ok(ISO_8859_13.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_14 => Ok(ISO_8859_14.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
Charset::ISO8859_16 => Ok(ISO_8859_16.decode(s, DecoderTrap::Strict)?),
Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
Charset::Windows1250 => Ok(WINDOWS_1250.decode(s, DecoderTrap::Strict)?),
Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
Charset::KOI8R => Ok(KOI8_R.decode(s, DecoderTrap::Strict)?),
Charset::KOI8U => Ok(KOI8_U.decode(s, DecoderTrap::Strict)?),
Charset::BIG5 => Ok(BIG5_2003.decode(s, DecoderTrap::Strict)?),
Charset::GB2312 => {
Ok(encoding::codec::simpchinese::GBK_ENCODING.decode(s, DecoderTrap::Strict)?)
}
Charset::GB18030 => Ok(
encoding::codec::simpchinese::GB18030_ENCODING.decode(s, DecoderTrap::Strict)?
),
Charset::UTF16 => {
Ok(encoding::codec::utf_16::UTF_16LE_ENCODING.decode(s, DecoderTrap::Strict)?)
}
Charset::ISO2022JP => Ok(ISO_2022_JP.decode(s, DecoderTrap::Strict)?),
Charset::EUCJP => Ok(EUC_JP.decode(s, DecoderTrap::Strict)?),
}
}
fn quoted_printable_soft_break(input: &[u8]) -> IResult<&[u8], u8> {
if input.starts_with(b"=\n") {
Ok((&input[2..], input[1])) // `=\n` is an escaped space character.
} else if input.starts_with(b"=\r\n") {
Ok((&input[3..], input[2])) // `=\r\n` is an escaped space character.
} else {
Err(nom::Err::Error(
(input, "quoted_printable_soft_break(): invalid input").into(),
))
}
}
pub fn qp_underscore_header(input: &[u8]) -> IResult<&[u8], u8> {
let (rest, _) = tag(b"_")(input)?;
Ok((rest, 0x20))
}
// With MIME, headers in quoted printable format can contain underscores that represent spaces.
// In non-header context, an underscore is just a plain underscore.
pub fn quoted_printable_bytes_header(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
many0(alt((quoted_printable_byte, qp_underscore_header, le_u8)))(input)
}
// For atoms in Header values.
pub fn quoted_printable_bytes(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
many0(alt((
terminated(quoted_printable_soft_break, tag("\n")),
terminated(quoted_printable_soft_break, tag("\r\n")),
terminated(quoted_printable_soft_break, generic::eof),
preceded(quoted_printable_soft_break, quoted_printable_byte),
preceded(quoted_printable_soft_break, le_u8),
quoted_printable_byte,
le_u8,
)))(input)
}
pub fn space(input: &[u8]) -> IResult<&[u8], ()> {
let (rest, _) =
take_while(|c: u8| c == b' ' || c == b'\t' || c == b'\r' || c == b'\n')(input)?;
Ok((rest, ()))
//eat_separator!());
}
pub fn encoded_word_list(input: &[u8]) -> IResult<&[u8], SmallVec<[u8; 64]>> {
let (input, list) = separated_nonempty_list(space, encoded_word)(input)?;
let list_len = list.iter().fold(0, |mut acc, x| {
acc += x.len();
acc
});
Ok((
input,
list.iter()
.fold(SmallVec::with_capacity(list_len), |mut acc, x| {
acc.extend(x.into_iter().cloned());
acc
}),
))
}
pub fn ascii_token(input: &[u8]) -> IResult<&[u8], SmallVec<[u8; 64]>> {
let (input, word) = alt((
terminated(take_until(" =?"), peek(preceded(tag(b" "), encoded_word))),
take_while(|_| true),
))(input)?;
Ok((input, SmallVec::from(word)))
}
pub fn phrase(
input: &[u8],
multiline: /* preserve newlines */ bool,
) -> IResult<&[u8], Vec<u8>> {
if input.is_empty() {
return Ok((&[], Vec::with_capacity(0)));
}
let mut input = input.ltrim();
let mut acc: Vec<u8> = Vec::new();
let mut ptr = 0;
while ptr < input.len() {
let mut flag = false;
// Check if word is encoded.
while let Ok((rest, v)) = encoded_word(&input[ptr..]) {
flag = true;
input = rest;
ptr = 0;
acc.extend(v);
// consume whitespace
while ptr < input.len() && (is_whitespace!(input[ptr])) {
ptr += 1;
}
if ptr >= input.len() {
break;
}
}
if flag && ptr < input.len() && ptr != 0 {
acc.push(b' ');
}
let end = input[ptr..].find(b"=?");
let end = end.unwrap_or_else(|| input.len() - ptr) + ptr;
let ascii_s = ptr;
let mut ascii_e = 0;
while ptr < end && !(is_whitespace!(input[ptr])) {
ptr += 1;
}
if !multiline {
ascii_e = ptr;
}
while ptr < input.len() && (is_whitespace!(input[ptr])) {
ptr += 1;
}
if multiline {
ascii_e = ptr;
}
if ptr >= input.len() {
acc.extend(ascii_token(&input[ascii_s..ascii_e])?.1);
break;
}
if ascii_s >= ascii_e {
/* We have the start of an encoded word but not the end, so parse it as ascii */
ascii_e = input[ascii_s..]
.find(b" ")
.unwrap_or_else(|| ascii_s + input[ascii_s..].len());
ptr = ascii_e;
}
if ascii_s >= ascii_e {
return Err(nom::Err::Error(
(input, "phrase(): start of an encoded word but no end").into(),
));
}
acc.extend(ascii_token(&input[ascii_s..ascii_e])?.1);
if ptr != ascii_e {
acc.push(b' ');
}
}
Ok((&input[ptr..], acc))
}
}
pub mod address {
//! Parsing of address values and address-related headers.
//!
//! Implemented RFCs:
//!
//! - [RFC5322 "Internet Message Format"](https://tools.ietf.org/html/rfc5322)
//! - [RFC6532 "Internationalized Email Headers"](https://tools.ietf.org/html/rfc6532)
//! - [RFC2047 "MIME Part Three: Message Header Extensions for Non-ASCII Text"](https://tools.ietf.org/html/rfc2047)
use super::*;
use crate::email::address::*;
use crate::email::parser::generic::{
atom, cfws, dot_atom, dot_atom_text, dtext, phrase2, quoted_string,
};
pub fn display_addr(input: &[u8]) -> IResult<&[u8], Address> {
if input.is_empty() || input.len() < 3 {
Err(nom::Err::Error((input, "display_addr(): EOF").into()))
} else if !is_whitespace!(input[0]) {
let mut display_name = StrBuilder {
offset: 0,
length: 0,
};
let mut flag = false;
for (i, b) in input[0..].iter().enumerate() {
if *b == b'<' {
display_name.length = i.saturating_sub(1); // if i != 0 { i - 1 } else { 0 };
flag = true;
break;
}
}
if !flag {
let (rest, output) = match super::encodings::phrase(input, false) {
Ok(v) => v,
_ => {
return Err(nom::Err::Error(
(input, "display_addr(): no '<' found").into(),
))
}
};
if output.contains(&b'<') {
let (_, address) = match display_addr(&output) {
Ok(v) => v,
_ => {
return Err(nom::Err::Error(
(input, "display_addr(): invalid input").into(),
))
}
};
return Ok((rest, address));
}
return Err(nom::Err::Error(
(input, "display_addr(): invalid input").into(),
));
}
let mut end = input.len();
let mut at_flag = false;
let mut flag = false;
for (i, b) in input[display_name.length + 2..].iter().enumerate() {
match *b {
b'@' => at_flag = true,
b'>' => {
end = i;
flag = true;
break;
}
_ => {}
}
}
if at_flag && flag {
let (_, raw) =
super::encodings::phrase(&input[0..end + display_name.length + 3], false)?;
let display_name_end = raw.find(b"<").unwrap();
display_name.length = raw[0..display_name_end].trim().len();
let address_spec = if display_name_end == 0 {
StrBuilder {
offset: 1,
length: end + 1,
}
} else {
StrBuilder {
offset: display_name_end + 1,
length: end,
}
};
if display_name.display(&raw).as_bytes().is_quoted() {
display_name.offset += 1;
display_name.length -= 2;
}
let rest_start = if input.len() > end + display_name.length + 2 {
end + display_name.length + 3
} else {
end + display_name.length + 2
};
Ok((
input.get(rest_start..).unwrap_or_default(),
Address::Mailbox(MailboxAddress {
raw,
display_name,
address_spec,
}),
))
} else {
Err(nom::Err::Error(
(input, "display_addr(): did not find both '@' and '>'").into(),
))
}
} else {
Err(nom::Err::Error(
(input, "display_addr(): unexpected whitespace").into(),
))
}
}
///`angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
pub fn angle_addr(input: &[u8]) -> IResult<&[u8], Address> {
let (input, _) = opt(cfws)(input)?;
let (input, _) = tag("<")(input)?;
let (input, addr_spec) = addr_spec(input)?;
let (input, _) = tag(">")(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((input, addr_spec))
}
///`obs-domain = atom *("." atom)`
pub fn obs_domain(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (mut input, atom_) = context("obs_domain", atom)(input)?;
let mut ret: Vec<u8> = atom_.into();
loop {
if !input.starts_with(b".") {
break;
}
ret.push(b'.');
input = &input[1..];
if let Ok((_input, atom_)) = context("obs_domain", atom)(input) {
ret.extend_from_slice(&atom_);
input = _input;
} else {
return Err(nom::Err::Error(
(input, "obs_domain(): expected <atom> after DOT").into(),
));
}
}
Ok((input, ret.into()))
}
///`local-part = dot-atom / quoted-string / obs-local-part`
pub fn local_part(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((dot_atom, quoted_string))(input)
}
///`domain = dot-atom / domain-literal / obs-domain`
pub fn domain(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((dot_atom, domain_literal, obs_domain))(input)
}
///`domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
pub fn domain_literal(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
use crate::email::parser::generic::fws;
let (input, first_opt_space) = context("domain_literal()", opt(cfws))(input)?;
let (input, _) = context("domain_literal()", tag("["))(input)?;
let (input, dtexts) = many0(pair(opt(fws), dtext))(input)?;
let (input, end_fws): (_, Option<_>) = context("domain_literal()", opt(fws))(input)?;
let (input, _) = context("domain_literal()", tag("]"))(input)?;
let (input, _) = context("domain_literal()", opt(cfws))(input)?;
let mut ret_s = vec![b'['];
if let Some(first_opt_space) = first_opt_space {
ret_s.extend_from_slice(&first_opt_space);
}
for (fws_opt, dtext) in dtexts {
if let Some(fws_opt) = fws_opt {
ret_s.extend_from_slice(&fws_opt);
}
ret_s.push(dtext);
}
if let Some(end_fws) = end_fws {
ret_s.extend_from_slice(&end_fws);
}
ret_s.push(b']');
Ok((input, ret_s.into()))
}
///`addr-spec = local-part "@" domain`
pub fn addr_spec(input: &[u8]) -> IResult<&[u8], Address> {
let (input, local_part) = context("addr_spec()", local_part)(input)?;
let (input, _) = context("addr_spec()", tag("@"))(input)?;
let (input, domain) = context("addr_spec()", domain)(input)?;
Ok((
input,
Address::new(
None,
format!("{}@{}", to_str!(&local_part), to_str!(&domain)),
),
))
}
///Returns the raw `local_part` and `domain` parts.
///
///`addr-spec = local-part "@" domain`
pub fn addr_spec_raw(input: &[u8]) -> IResult<&[u8], (Cow<'_, [u8]>, Cow<'_, [u8]>)> {
let (input, local_part) = context("addr_spec()", local_part)(input)?;
let (input, _) = context("addr_spec()", tag("@"))(input)?;
let (input, domain) = context("addr_spec()", domain)(input)?;
Ok((input, (local_part, domain)))
}
///`display-name = phrase`
pub fn display_name(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
let (rest, ret) = phrase2(input)?;
if let Ok((_, ret)) = crate::email::parser::encodings::phrase(&ret, true) {
Ok((rest, ret))
} else {
Ok((rest, ret))
}
}
///`name-addr = [display-name] angle-addr`
pub fn name_addr(input: &[u8]) -> IResult<&[u8], Address> {
let (input, (display_name, angle_addr)) = alt((
pair(map(display_name, |s| Some(s)), angle_addr),
map(angle_addr, |r| (None, r)),
))(input)?;
Ok((
input,
Address::new(
display_name.map(|v| to_str!(&v).to_string()),
angle_addr.get_email(),
),
))
}
///`mailbox = name-addr / addr-spec`
pub fn mailbox(input: &[u8]) -> IResult<&[u8], Address> {
alt((addr_spec, name_addr))(input)
}
///`group-list = mailbox-list / CFWS / obs-group-list`
pub fn group_list(input: &[u8]) -> IResult<&[u8], Vec<Address>> {
///`mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list`
fn mailbox_list(input: &[u8]) -> IResult<&[u8], Vec<Address>> {
let (mut input, first_m) = mailbox(input)?;
let mut ret = vec![first_m];
loop {
if !input.starts_with(b",") {
break;
}
input = &input[1..];
let (input_, next_m) = mailbox(input)?;
ret.push(next_m);
input = input_;
}
Ok((input, ret))
}
if let Ok((input, mailboxes)) = mailbox_list(input) {
Ok((input, mailboxes))
} else {
let (input, _) = cfws(input)?;
Ok((input, vec![]))
}
}
///`group = display-name ":" [group-list] ";" [CFWS]`
fn group(input: &[u8]) -> IResult<&[u8], Address> {
let (input, display_name) = context("group()", display_name)(input)?;
let (input, _) = context("group()", tag(":"))(input)?;
let (input, group_list): (_, Option<Vec<Address>>) =
context("group()", opt(group_list))(input)?;
let (input, _) = context("group()", tag(";"))(input)?;
let (input, _) = context("group()", opt(cfws))(input)?;
Ok((
input,
Address::new_group(
to_str!(&display_name).to_string(),
group_list.unwrap_or_default(),
),
))
}
///```text
///address = mailbox / group
///```
pub fn address(input: &[u8]) -> IResult<&[u8], Address> {
alt((mailbox, group))(input)
}
pub fn rfc2822address_list(input: &[u8]) -> IResult<&[u8], SmallVec<[Address; 1]>> {
separated_list_smallvec(is_a(", "), address)(input.ltrim())
// ws!( separated_list!(is_a!(","), address))
}
pub fn address_list(input: &[u8]) -> IResult<&[u8], String> {
let (input, list) = alt((
super::encodings::encoded_word_list,
super::encodings::ascii_token,
))(input)?;
let list: Vec<&[u8]> = list.split(|c| *c == b',').collect();
let string_len = list.iter().fold(0, |mut acc, x| {
acc += x.trim().len();
acc
}) + list.len()
- 1;
let list_len = list.len();
let mut i = 0;
Ok((
input,
list.iter()
.fold(String::with_capacity(string_len), |acc, x| {
let mut acc = acc
+ &String::from_utf8_lossy(
x.replace(b"\n", b"")
.replace(b"\r", b"")
.replace(b"\t", b" ")
.trim(),
);
if i != list_len - 1 {
acc.push_str(" ");
i += 1;
}
acc
}),
))
}
///`msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]`
pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageID> {
///`no-fold-literal = "[" *dtext "]"`
pub fn no_fold_literal(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let orig_input = input;
let (input, _) = tag("[")(input)?;
let (input, ret) = many0(dtext)(input)?;
let (input, _) = tag("]")(input)?;
Ok((input, Cow::Borrowed(&orig_input[0..ret.len() + 1])))
}
///`id-left = dot-atom-text / obs-id-left`
pub fn id_left(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
dot_atom_text(input)
}
///`id-right = dot-atom-text / no-fold-literal / obs-id-right`
pub fn id_right(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((dot_atom_text, no_fold_literal))(input)
}
let (input, _) = opt(cfws)(input)?;
let orig_input = input;
let (input, _) = tag("<")(input)?;
let (input, id_left_) = id_left(input)?;
let (input, _) = tag("@")(input)?;
let (input, id_right_) = id_right(input)?;
let (input, _) = tag(">")(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((
input,
MessageID::new(
&orig_input[..3 + id_left_.len() + id_right_.len()],
&orig_input[1..2 + id_left_.len() + id_right_.len()],
),
))
}
pub fn msg_id_list(input: &[u8]) -> IResult<&[u8], Vec<MessageID>> {
many0(msg_id)(input)
}
use smallvec::SmallVec;
pub fn separated_list_smallvec<I, O, Sep, E, F, G>(
sep: G,
f: F,
) -> impl FnMut(I) -> IResult<I, SmallVec<[O; 1]>, E>
where
I: Clone + PartialEq,
F: Fn(I) -> IResult<I, O, E>,
G: Fn(I) -> IResult<I, Sep, E>,
E: nom::error::ParseError<I>,
{
move |i: I| {
let mut res = SmallVec::new();
let mut i = i;
// Parse the first element
match f(i.clone()) {
Err(e) => return Err(e),
Ok((i1, o)) => {
if i1 == i {
return Err(nom::Err::Error(E::from_error_kind(
i1,
ErrorKind::SeparatedList,
)));
}
res.push(o);
i = i1;
}
}
loop {
match sep(i.clone()) {
Err(nom::Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i1, _)) => {
if i1 == i {
return Err(nom::Err::Error(E::from_error_kind(
i1,
ErrorKind::SeparatedList,
)));
}
match f(i1.clone()) {
Err(nom::Err::Error(_)) => return Ok((i, res)),
Err(e) => return Err(e),
Ok((i2, o)) => {
if i2 == i {
return Err(nom::Err::Error(E::from_error_kind(
i2,
ErrorKind::SeparatedList,
)));
}
res.push(o);
i = i2;
}
}
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::{address::*, encodings::*, *};
use crate::email::address::*;
use crate::make_address;
#[test]
fn test_phrase() {
let words = b"=?iso-8859-7?B?W215Y291cnNlcy5udHVhLmdyIC0gyvXs4fTp6t4g6uHpIMri4e306ere?=
=?iso-8859-7?B?INb18+nq3l0gzd3hIMHt4erv3+358+c6IMzF0c/TIMHQz9TFy8XTzMHU?=
=?iso-8859-7?B?2c0gwiDUzC4gysHNLiDFzsXUwdPH0yAyMDE3LTE4OiDTx8zFydnTxw==?=";
assert_eq!("[mycourses.ntua.gr - Κυματική και Κβαντική Φυσική] Νέα Ανακοίνωση: ΜΕΡΟΣ ΑΠΟΤΕΛΕΣΜΑΤΩΝ Β ΤΜ. ΚΑΝ. ΕΞΕΤΑΣΗΣ 2017-18: ΣΗΜΕΙΩΣΗ" , std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap());
let words = b"=?UTF-8?Q?=CE=A0=CF=81=CF=8C=CF=83=CE=B8=CE=B5?= =?UTF-8?Q?=CF=84=CE=B7_=CE=B5=CE=BE=CE=B5=CF=84?= =?UTF-8?Q?=CE=B1=CF=83=CF=84=CE=B9=CE=BA=CE=AE?=";
assert_eq!(
"Πρόσθετη εξεταστική",
std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap()
);
let words = b"[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
assert_eq!(
"[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store",
std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap()
);
let words = b"Re: [Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=
=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=
=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
assert_eq!(
"Re: [Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store",
std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap()
);
let words = b"sdf";
assert_eq!(
"sdf",
std::str::from_utf8(&phrase(words, false).unwrap().1).unwrap()
);
let words = b"=?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?= =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
assert_eq!(
"Seg fault στην εκτέλεση του example ru n _sniper",
std::str::from_utf8(&phrase(words, false).unwrap().1).unwrap()
);
let words = b"Re: [Advcomparch]
=?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?=
=?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
assert_eq!(
"Re: [Advcomparch] Seg fault στην εκτέλεση του example ru n _sniper",
std::str::from_utf8(&phrase(words, false).unwrap().1).unwrap()
);
let words = r#"[internal] =?UTF-8?B?zp3Orc6/z4Igzp/OtM63zrPPjM+CIM6jz4XOs86zz4E=?=
=?UTF-8?B?zrHPhs6uz4I=?="#;
assert_eq!(
"[internal] Νέος Οδηγός Συγγραφής",
std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
);
let words = r#"=?UTF-8?Q?Re=3a_Climate_crisis_reality_check_=e2=80=93=c2=a0EcoHust?=
=?UTF-8?Q?ler?="#;
assert_eq!(
"Re: Climate crisis reality check \u{a0}EcoHustler",
std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
);
let words = r#"Re: Climate crisis reality check =?windows-1250?B?lqBFY29IdXN0?=
=?windows-1250?B?bGVy?="#;
assert_eq!(
"Re: Climate crisis reality check \u{a0}EcoHustler",
std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
);
let words = r#"=?gb18030?B?zNrRtsbz0rXTys/k19S2r9eqt6LR6dak08q8/g==?="#;
assert_eq!(
"腾讯企业邮箱自动转发验证邮件",
std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
);
}
#[test]
fn test_address_list() {
let s = b"Obit Oppidum <user@domain>,
list <list@domain.tld>, list2 <list2@domain.tld>,
Bobit Boppidum <user@otherdomain.com>, Cobit Coppidum <user2@otherdomain.com>, <user@domain.tld>";
assert_eq!(
(
&s[0..0],
smallvec::smallvec![
make_address!("Obit Oppidum", "user@domain"),
make_address!("list", "list@domain.tld"),
make_address!("list2", "list2@domain.tld"),
make_address!("Bobit Boppidum", "user@otherdomain.com"),
make_address!("Cobit Coppidum", "user2@otherdomain.com"),
make_address!("", "user@domain.tld")
]
),
rfc2822address_list(s).unwrap()
);
}
#[test]
fn test_attachments() {
//FIXME: add file
return;
/*
use std::io::Read;
let mut buffer: Vec<u8> = Vec::new();
let _ = std::fs::File::open("").unwrap().read_to_end(&mut buffer);
let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
let (_, body) = match mail(&buffer) {
Ok(v) => v,
Err(_) => panic!(),
};
let attachments = parts(body, boundary).unwrap().1;
assert_eq!(attachments.len(), 4);
let v: Vec<&str> = attachments
.iter()
.map(|v| std::str::from_utf8(v).unwrap())
.collect();
//println!("attachments {:?}", v);
*/
}
#[test]
fn test_addresses() {
macro_rules! assert_parse {
($name:literal, $addr:literal, $raw:literal) => {{
let s = $raw.as_bytes();
let r = address(s).unwrap().1;
match r {
Address::Mailbox(ref m) => {
assert_eq!(to_str!(m.display_name.display_bytes(&m.raw)), $name);
assert_eq!(to_str!(m.address_spec.display_bytes(&m.raw)), $addr);
}
_ => assert!(false),
}
}};
}
assert_parse!(
"Σταύρος Μαλτέζος",
"maltezos@central.ntua.gr",
"=?iso-8859-7?B?0/Th/fHv8iDM4ev03ebv8g==?= <maltezos@central.ntua.gr>"
);
assert_parse!("", "user@domain", "user@domain");
assert_parse!("", "user@domain", "<user@domain>");
assert_parse!("", "user@domain", " <user@domain>");
assert_parse!("Name", "user@domain", "Name <user@domain>");
assert_parse!(
"",
"julia@ficdep.minitrue",
"julia(outer party)@ficdep.minitrue"
);
assert_parse!(
"Winston Smith",
"winston.smith@recdep.minitrue",
"\"Winston Smith\" <winston.smith@recdep.minitrue> (Records Department)"
);
assert_parse!(
"John Q. Public",
"JQB@bar.com",
"\"John Q. Public\" <JQB@bar.com>"
);
assert_parse!(
"John Q. Public",
"JQB@bar.com",
"John \"Q.\" Public <JQB@bar.com>"
);
assert_parse!(
"John Q. Public",
"JQB@bar.com",
"\"John Q.\" Public <JQB@bar.com>"
);
assert_parse!(
"John Q. Public",
"JQB@bar.com",
"John \"Q. Public\" <JQB@bar.com>"
);
assert_parse!(
"Jeffrey Stedfast",
"fejj@helixcode.com",
"Jeffrey Stedfast <fejj@helixcode.com>"
);
assert_parse!(
"this is\ta folded name",
"folded@name.com",
"this is\n\ta folded name <folded@name.com>"
);
assert_parse!(
"Jeffrey fejj Stedfast",
"fejj@helixcode.com",
"Jeffrey fejj Stedfast <fejj@helixcode.com>"
);
assert_parse!(
"Jeffrey fejj Stedfast",
"fejj@helixcode.com",
"Jeffrey \"fejj\" Stedfast <fejj@helixcode.com>"
);
assert_parse!(
"Jeffrey \"fejj\" Stedfast",
"fejj@helixcode.com",
"\"Jeffrey \\\"fejj\\\" Stedfast\" <fejj@helixcode.com>"
);
assert_parse!(
"Stedfast, Jeffrey",
"fejj@helixcode.com",
"\"Stedfast, Jeffrey\" <fejj@helixcode.com>"
);
assert_parse!(
"",
"fejj@helixcode.com",
"fejj@helixcode.com (Jeffrey Stedfast)"
);
assert_parse!(
"Jeffrey Stedfast",
"fejj@helixcode.com",
"Jeffrey Stedfast <fejj(nonrecursive block)@helixcode.(and a comment here)com>"
);
assert_parse!(
"Jeffrey Stedfast",
"fejj@helixcode.com",
"Jeffrey Stedfast <fejj(recursive (comment) block)@helixcode.(and a comment here)com>"
);
assert_parse!(
"Joe Q. Public",
"john.q.public@example.com",
"\"Joe Q. Public\" <john.q.public@example.com>"
);
assert_parse!("Mary Smith", "mary@x.test", "Mary Smith <mary@x.test>");
assert_parse!("Mary Smith", "mary@x.test", "Mary Smith <mary@x.test>");
assert_parse!("", "jdoe@example.org", "jdoe@example.org");
assert_parse!("Who?", "one@y.test", "Who? <one@y.test>");
assert_parse!("", "boss@nil.test", "<boss@nil.test>");
assert_parse!(
"Giant; \"Big\" Box",
"sysservices@example.net",
r#""Giant; \"Big\" Box" <sysservices@example.net>"#
);
//assert_eq!(
// make_address!("Jeffrey Stedfast", "fejj@helixcode.com"),
// address(b"Jeffrey Stedfast <fejj@helixcode.com.>")
// .unwrap()
// .1
//);
assert_parse!(
"John <middle> Doe",
"jdoe@machine.example",
"\"John <middle> Doe\" <jdoe@machine.example>"
);
// RFC 2047 "Q"-encoded ISO-8859-1 address.
assert_parse!(
"Jörg Doe",
"joerg@example.com",
"=?iso-8859-1?q?J=F6rg_Doe?= <joerg@example.com>"
);
// RFC 2047 "Q"-encoded US-ASCII address. Dumb but legal.
assert_parse!(
"Jorg Doe",
"joerg@example.com",
"=?us-ascii?q?J=6Frg_Doe?= <joerg@example.com>"
);
// RFC 2047 "Q"-encoded UTF-8 address.
assert_parse!(
"Jörg Doe",
"joerg@example.com",
"=?utf-8?q?J=C3=B6rg_Doe?= <joerg@example.com>"
);
// RFC 2047 "Q"-encoded UTF-8 address with multiple encoded-words.
assert_parse!(
"JörgDoe",
"joerg@example.com",
"=?utf-8?q?J=C3=B6rg?= =?utf-8?q?Doe?= <joerg@example.com>"
);
assert_parse!(
"André Pirard",
"PIRARD@vm1.ulg.ac.be",
"=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>"
);
// Custom example of RFC 2047 "B"-encoded ISO-8859-1 address.
assert_parse!(
"Jörg",
"joerg@example.com",
"=?ISO-8859-1?B?SvZyZw==?= <joerg@example.com>"
);
// Custom example of RFC 2047 "B"-encoded UTF-8 address.
assert_parse!(
"Jörg",
"joerg@example.com",
"=?UTF-8?B?SsO2cmc=?= <joerg@example.com>"
);
// Custom example with "." in name. For issue 4938
//assert_parse!(
// "Asem H.",
// "noreply@example.com",
// "Asem H. <noreply@example.com>"
//);
assert_parse!(
// RFC 6532 3.2.3, qtext /= UTF8-non-ascii
"Gø Pher",
"gopher@example.com",
"\"Gø Pher\" <gopher@example.com>"
);
// RFC 6532 3.2, atext /= UTF8-non-ascii
assert_parse!("µ", "micro@example.com", "µ <micro@example.com>");
// RFC 6532 3.2.2, local address parts allow UTF-8
//assert_parse!("Micro", "µ@example.com", "Micro <µ@example.com>");
// RFC 6532 3.2.4, domains parts allow UTF-8
//assert_parse!(
// "Micro",
// "micro@µ.example.com",
// "Micro <micro@µ.example.com>"
//);
// Issue 14866
assert_parse!(
"",
"emptystring@example.com",
"\"\" <emptystring@example.com>"
);
// CFWS
assert_parse!(
"",
"cfws@example.com",
"<cfws@example.com> (CFWS (cfws)) (another comment)"
);
//"<cfws@example.com> () (another comment), <cfws2@example.com> (another)"
assert_parse!(
"Kristoffer Brånemyr",
"ztion@swipenet.se",
"=?iso-8859-1?q?Kristoffer_Br=E5nemyr?= <ztion@swipenet.se>"
);
assert_parse!(
"François Pons",
"fpons@mandrakesoft.com",
"=?iso-8859-1?q?Fran=E7ois?= Pons <fpons@mandrakesoft.com>"
);
assert_parse!(
"هل تتكلم اللغة الإنجليزية /العربية؟", "do.you.speak@arabic.com",
"=?utf-8?b?2YfZhCDYqtiq2YPZhNmFINin2YTZhNi62Kkg2KfZhNil2YbYrNmE2YrYstmK2Kk=?=\n =?utf-8?b?IC/Yp9mE2LnYsdio2YrYqdif?= <do.you.speak@arabic.com>"
);
assert_parse!(
"狂ったこの世で狂うなら気は確かだ。", "famous@quotes.ja",
"=?utf-8?b?54uC44Gj44Gf44GT44Gu5LiW44Gn54uC44GG44Gq44KJ5rCX44Gv56K644GL44Gg?=\n =?utf-8?b?44CC?= <famous@quotes.ja>"
);
assert_eq!(
Address::new_group(
"A Group".to_string(),
vec![
make_address!("Ed Jones", "c@a.test"),
make_address!("", "joe@where.test"),
make_address!("John", "jdoe@one.test")
]
),
address(b"A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;")
.unwrap()
.1
);
assert_eq!(
Address::new_group("Undisclosed recipients".to_string(), vec![]),
address(b"Undisclosed recipients:;").unwrap().1
);
assert_parse!(
"狂ったこの世で狂うなら気は確かだ。",
"famous@quotes.ja",
"狂ったこの世で狂うなら気は確かだ。 <famous@quotes.ja>"
);
}
#[test]
fn test_quoted_printable() {
let input = r#"<=21-- SEPARATOR -->
<tr>
<td style=3D=22padding-left: 10px;padding-right: 10px;background-color:=
=23f3f5fa;=22>
<table width=3D=22100%=22 cellspacing=3D=220=22 cellpadding=3D=220=22 =
border=3D=220=22>
<tr>
<td style=3D=22height:5px;background-color: =23f3f5fa;=22>&nbsp;</td>
</tr>
</table>
</td>
</tr>"#;
assert_eq!(
quoted_printable_bytes(input.as_bytes())
.as_ref()
.map(|(_, b)| unsafe { std::str::from_utf8_unchecked(b) }),
Ok(r#"<!-- SEPARATOR -->
<tr>
<td style="padding-left: 10px;padding-right: 10px;background-color: #f3f5fa;">
<table width="100%" cellspacing="0" cellpadding="0" border="0">
<tr>
<td style="height:5px;background-color: #f3f5fa;">&nbsp;</td>
</tr>
</table>
</td>
</tr>"#)
);
}
#[test]
fn test_msg_id() {
let s = "Message-ID: <1234@local.machine.example>\r\n";
let (rest, (_header_name, value)) = headers::header(s.as_bytes()).unwrap();
assert!(rest.is_empty());
let a = msg_id(value).unwrap().1;
assert_eq!(a.val(), b"<1234@local.machine.example>");
let s = "Message-ID: <testabcd.1234@silly.test>\r\n";
let (rest, (_header_name, value)) = headers::header(s.as_bytes()).unwrap();
assert!(rest.is_empty());
let b = msg_id(value).unwrap().1;
assert_eq!(b.val(), b"<testabcd.1234@silly.test>");
let s = "References: <1234@local.machine.example>\r\n";
let (rest, (_header_name, value)) = headers::header(s.as_bytes()).unwrap();
assert!(rest.is_empty());
assert_eq!(&msg_id_list(value).unwrap().1, &[a.clone()]);
let s = "References: <1234@local.machine.example> <3456@example.net>\r\n";
let (rest, (_header_name, value)) = headers::header(s.as_bytes()).unwrap();
assert!(rest.is_empty());
let s = b"<3456@example.net>";
let c = msg_id(s).unwrap().1;
assert_eq!(&msg_id_list(value).unwrap().1, &[a, c]);
}
}