Browse Source

melib/email/parser: impl RFC5322 parser for dates

tags/alpha-0.6.2
Manos Pitsidianakis 1 month ago
parent
commit
9b0180fdbc
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS. GPG Key ID: 73627C2F690DF710
5 changed files with 229 additions and 43 deletions
  1. +3
    -1
      melib/src/backends/imap/protocol_parser.rs
  2. +2
    -2
      melib/src/backends/jmap/objects/email.rs
  3. +3
    -1
      melib/src/backends/nntp/protocol_parser.rs
  4. +1
    -1
      melib/src/email.rs
  5. +220
    -38
      melib/src/email/parser.rs

+ 3
- 1
melib/src/backends/imap/protocol_parser.rs View File

@ -1231,7 +1231,9 @@ pub fn envelope(input: &[u8]) -> IResult<&[u8], Envelope> {
let mut env = Envelope::new(0);
if let Some(date) = date {
env.set_date(&date);
if let Ok(d) = crate::email::parser::generic::date(env.date_as_str().as_bytes()) {
if let Ok(d) =
crate::email::parser::dates::rfc5322_date(env.date_as_str().as_bytes())
{
env.set_datetime(d);
}
}

+ 2
- 2
melib/src/backends/jmap/objects/email.rs View File

@ -232,7 +232,7 @@ impl std::fmt::Display for EmailAddress {
impl std::convert::From<EmailObject> for crate::Envelope {
fn from(mut t: EmailObject) -> crate::Envelope {
let mut env = crate::Envelope::new(0);
if let Ok(d) = crate::email::parser::generic::date(env.date_as_str().as_bytes()) {
if let Ok(d) = crate::email::parser::dates::rfc5322_date(env.date_as_str().as_bytes()) {
env.set_datetime(d);
}
if let Some(ref mut sent_at) = t.sent_at {
@ -260,7 +260,7 @@ impl std::convert::From for crate::Envelope {
}
if let Some(v) = t.headers.get("Date") {
env.set_date(v.as_bytes());
if let Ok(d) = crate::email::parser::generic::date(v.as_bytes()) {
if let Ok(d) = crate::email::parser::dates::rfc5322_date(v.as_bytes()) {
env.set_datetime(d);
}
}

+ 3
- 1
melib/src/backends/nntp/protocol_parser.rs View File

@ -124,7 +124,9 @@ pub fn over_article(input: &str) -> IResult<&str, (UID, Envelope)> {
let mut env = Envelope::new(env_hash);
if let Some(date) = date {
env.set_date(date.as_bytes());
if let Ok(d) = crate::email::parser::generic::date(env.date_as_str().as_bytes()) {
if let Ok(d) =
crate::email::parser::dates::rfc5322_date(env.date_as_str().as_bytes())
{
env.set_datetime(d);
}
}

+ 1
- 1
melib/src/email.rs View File

@ -313,7 +313,7 @@ impl Envelope {
if let Some(x) = self.in_reply_to.clone() {
self.push_references(x);
}
if let Ok(d) = parser::generic::date(&self.date.as_bytes()) {
if let Ok(d) = parser::dates::rfc5322_date(&self.date.as_bytes()) {
self.set_datetime(d);
}
if self.message_id.raw().is_empty() {

+ 220
- 38
melib/src/email/parser.rs View File

@ -22,8 +22,8 @@
use crate::error::{MeliError, Result, ResultIntoMeliError};
use nom::{
branch::alt,
bytes::complete::{is_a, is_not, tag, take_until, take_while},
character::is_hex_digit,
bytes::complete::{is_a, is_not, tag, take, take_until, take_while, take_while1},
character::{is_alphabetic, is_digit, is_hex_digit},
combinator::peek,
combinator::{map, opt},
error::{context, ErrorKind},
@ -267,17 +267,232 @@ pub fn mail(input: &[u8]) -> Result<(Vec<(&[u8], &[u8])>, &[u8])> {
Ok(result)
}
pub mod generic {
pub mod dates {
use super::generic::*;
use super::*;
pub fn date(input: &[u8]) -> Result<crate::datetime::UnixTimestamp> {
let (_, mut parsed_result) = encodings::phrase(&eat_comments(input), false)?;
use crate::datetime::UnixTimestamp;
fn take_n_digits(n: usize) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
move |input: &[u8]| {
let (input, ret) = take(n)(input)?;
if !ret.iter().all(|c| is_digit(*c)) {
return Err(nom::Err::Error(
(input, "take_n_digits(): not digits").into(),
));
}
Ok((input, ret))
}
}
///In the obsolete time zone, "UT" and "GMT" are indications of
///"Universal Time" and "Greenwich Mean Time", respectively, and are
///both semantically identical to "+0000".
///The remaining three character zones are the US time zones. The first
///letter, "E", "C", "M", or "P" stands for "Eastern", "Central",
///"Mountain", and "Pacific". The second letter is either "S" for
///"Standard" time, or "D" for "Daylight Savings" (or summer) time.
///Their interpretations are as follows:
/// EDT is semantically equivalent to -0400
/// EST is semantically equivalent to -0500
/// CDT is semantically equivalent to -0500
/// CST is semantically equivalent to -0600
/// MDT is semantically equivalent to -0600
/// MST is semantically equivalent to -0700
/// PDT is semantically equivalent to -0700
/// PST is semantically equivalent to -0800
///The 1 character military time zones were defined in a non-standard
///way in [RFC0822] and are therefore unpredictable in their meaning.
///The original definitions of the military zones "A" through "I" are
///equivalent to "+0100" through "+0900", respectively; "K", "L", and
///"M" are equivalent to "+1000", "+1100", and "+1200", respectively;
///"N" through "Y" are equivalent to "-0100" through "-1200".
///respectively; and "Z" is equivalent to "+0000". However, because of
///the error in [RFC0822], they SHOULD all be considered equivalent to
///"-0000" unless there is out-of-band information confirming their
///meaning.
///Other multi-character (usually between 3 and 5) alphabetic time zones
///have been used in Internet messages. Any such time zone whose
///meaning is not known SHOULD be considered equivalent to "-0000"
///unless there is out-of-band information confirming their meaning.
fn obs_zone(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
alt((
map(tag("UT"), |_| (&b"+"[..], &b"0000"[..])),
map(tag("GMT"), |_| (&b"+"[..], &b"0000"[..])),
map(tag("EDT"), |_| (&b"-"[..], &b"0400"[..])),
map(tag("EST"), |_| (&b"-"[..], &b"0500"[..])),
map(tag("CDT"), |_| (&b"-"[..], &b"0500"[..])),
map(tag("CST"), |_| (&b"-"[..], &b"0600"[..])),
map(tag("MDT"), |_| (&b"-"[..], &b"0600"[..])),
map(tag("MST"), |_| (&b"-"[..], &b"0700"[..])),
map(tag("PDT"), |_| (&b"-"[..], &b"0700"[..])),
map(tag("PST"), |_| (&b"-"[..], &b"0800"[..])),
map(take_while1(is_alphabetic), |_| (&b"-"[..], &b"0000"[..])),
))(input)
}
///zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
fn zone(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
alt((
|input| {
let (input, sign) = alt((tag("+"), tag("-")))(input)?;
let (input, zone) = take_n_digits(4)(input)?;
Ok((input, (sign, zone)))
},
obs_zone,
))(input)
}
///date-time = [ day-of-week "," ] date time [CFWS]
///date = day month year
///time = time-of-day zone
///time-of-day = hour ":" minute [ ":" second ]
///hour = 2DIGIT / obs-hour
///minute = 2DIGIT / obs-minute
///second = 2DIGIT / obs-second
fn date_time(input: &[u8]) -> IResult<&[u8], UnixTimestamp> {
let orig_input = input;
let mut accum: SmallVec<[u8; 32]> = SmallVec::new();
let (input, day_of_week) = opt(terminated(day_of_week, tag(",")))(input)?;
let (input, day) = day(input)?;
let (input, month) = month(input)?;
let (input, year) = year(input)?;
let (input, hour) = take_n_digits(2)(input)?;
let (input, _) = tag(":")(input)?;
let (input, minute) = take_n_digits(2)(input)?;
let (input, second) = opt(preceded(tag(":"), take_n_digits(2)))(input)?;
let (input, _) = fws(input)?;
let (input, (sign, zone)) = zone(input)?;
let (input, _) = opt(cfws)(input)?;
if let Some(day_of_week) = day_of_week {
accum.extend_from_slice(&day_of_week);
accum.extend_from_slice(b", ");
}
accum.extend_from_slice(&day);
accum.extend_from_slice(b" ");
accum.extend_from_slice(&month);
accum.extend_from_slice(b" ");
accum.extend_from_slice(&year);
accum.extend_from_slice(b" ");
accum.extend_from_slice(&hour);
accum.extend_from_slice(b":");
accum.extend_from_slice(&minute);
if let Some(second) = second {
accum.extend_from_slice(b":");
accum.extend_from_slice(&second);
}
accum.extend_from_slice(b" ");
accum.extend_from_slice(&sign);
accum.extend_from_slice(&zone);
match crate::datetime::rfc822_to_timestamp(accum.to_vec()) {
Ok(t) => Ok((input, t)),
Err(_err) => Err(nom::Err::Error(
(
orig_input,
"date_time(): could not convert date from rfc822",
)
.into(),
)),
}
}
///`day-of-week = ([FWS] day-name) / obs-day-of-week`
///day-name = "Mon" / "Tue" / "Wed" / "Thu" /
/// "Fri" / "Sat" / "Sun"
fn day_of_week(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, day_name) = alt((
tag("Mon"),
tag("Tue"),
tag("Wed"),
tag("Thu"),
tag("Fri"),
tag("Sat"),
tag("Sun"),
))(input)?;
Ok((input, day_name.into()))
}
///day = ([FWS] 1*2DIGIT FWS) / obs-day
fn day(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, _) = opt(fws)(input)?;
let (input, ret) = alt((take_n_digits(2), take_n_digits(1)))(input)?;
let (input, _) = fws(input)?;
Ok((input, ret))
}
///month = "Jan" / "Feb" / "Mar" / "Apr" /
/// "May" / "Jun" / "Jul" / "Aug" /
/// "Sep" / "Oct" / "Nov" / "Dec"
fn month(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((
tag("Jan"),
tag("Feb"),
tag("Mar"),
tag("Apr"),
tag("May"),
tag("Jun"),
tag("Jul"),
tag("Aug"),
tag("Sep"),
tag("Oct"),
tag("Nov"),
tag("Dec"),
))(input)
}
///year = (FWS 4*DIGIT FWS) / obs-year
fn year(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, _) = fws(input)?;
let (input, ret) = take_n_digits(4)(input)?;
let (input, _) = fws(input)?;
Ok((input, ret))
}
pub fn rfc5322_date(input: &[u8]) -> Result<crate::datetime::UnixTimestamp> {
date_time(input)
.or_else(|_| {
//let (_, mut parsed_result) = encodings::phrase(&eat_comments(input), false)?;
let (rest, parsed_result) = encodings::phrase(input, false)?;
let (_, ret) = match date_time(&parsed_result) {
Ok(v) => v,
Err(_) => {
return Err(nom::Err::Error(
(rest, "rfc5322_date(): invalid input").into(),
));
}
};
Ok((rest, ret))
})
.map(|(_, r)| r)
.map_err(|err: nom::Err<ParsingError<_>>| err.into())
/*
}
if let Some(pos) = parsed_result.find(b"-0000") {
parsed_result[pos] = b'+';
}
crate::datetime::rfc822_to_timestamp(parsed_result.trim())
*/
}
#[test]
fn test_date_new() {
let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";
let _s = b"Thu, 31 Aug 2017 13:43:37 +0000";
let __s = b"=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=";
assert_eq!(rfc5322_date(s).unwrap(), rfc5322_date(_s).unwrap());
assert_eq!(rfc5322_date(_s).unwrap(), rfc5322_date(__s).unwrap());
let val = b"Fri, 23 Dec 0001 21:20:36 -0800 (PST)";
assert_eq!(rfc5322_date(val).unwrap(), 0);
}
}
pub mod generic {
use super::*;
///`%x21-7E`
fn vchar(input: &[u8]) -> IResult<&[u8], u8> {
if input.is_empty() {
@ -442,26 +657,6 @@ pub mod generic {
))(input)
}
fn eat_comments(input: &[u8]) -> Vec<u8> {
let mut in_comment = false;
input
.iter()
.fold(Vec::with_capacity(input.len()), |mut acc, x| {
if *x == b'(' && !in_comment {
in_comment = true;
acc
} else if *x == b')' && in_comment {
in_comment = false;
acc
} else if in_comment {
acc
} else {
acc.push(*x);
acc
}
})
}
///`unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct`
pub fn unstructured(input: &[u8]) -> Result<String> {
let (input, r): (_, Vec<(Option<Cow<'_, [u8]>>, u8)>) =
@ -2245,19 +2440,6 @@ mod tests {
}
#[test]
fn test_date() {
let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";
let _s = b"Thu, 31 Aug 2017 13:43:37 +0000";
let __s = b"=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=";
debug!("{:?}, {:?}", date(s), date(_s));
debug!("{:?}", date(__s));
assert_eq!(date(s).unwrap(), date(_s).unwrap());
assert_eq!(date(_s).unwrap(), date(__s).unwrap());
let val = b"Fri, 23 Dec 0001 21:20:36 -0800 (PST)";
assert_eq!(date(val).unwrap(), 0);
}
#[test]
fn test_attachments() {
//FIXME: add file
return;

Loading…
Cancel
Save