melib/email/parser: impl RFC6532

RFC6532 International Mail Headers
memfd
Manos Pitsidianakis 2020-09-16 13:12:17 +03:00
parent 3618bdcffb
commit 096c2970b3
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
1 changed files with 119 additions and 33 deletions

View File

@ -506,22 +506,111 @@ pub mod dates {
pub mod generic {
use super::*;
fn byte_in_range<'a>(a: u8, b: u8) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], u8> {
move |input| {
if input.is_empty() {
return Err(nom::Err::Error((input, "empty input").into()));
}
if input[0] >= a && input[0] <= b {
Ok((&input[1..], input[0]))
} else {
Err(nom::Err::Error((input, "out of range").into()))
}
}
}
///UTF-8 characters can be defined in terms of octets using the
///following ABNF [RFC5234], taken from [RFC3629]:
///UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4
fn utf8_non_ascii(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
/// UTF8-2 = %xC2-DF UTF8-tail
fn utf8_2(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (rest, _) = byte_in_range(0xc2, 0xdf)(input)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..2]))
}
/// UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
fn utf8_3<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
alt((
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xe0, 0xe0)(input)?;
let (rest, _) = byte_in_range(0xa0, 0xbf)(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xe1, 0xec)(input)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xed, 0xed)(input)?;
let (rest, _) = byte_in_range(0x80, 0x9f)(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xee, 0xef)(input)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..3]))
},
))(input)
}
/// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / %xF4 %x80-8F 2( UTF8-tail )
fn utf8_4<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
alt((
|input: &'a [u8]| -> IResult<&'a [u8], &'a [u8]> {
let (rest, _) = byte_in_range(0xf0, 0xf0)(input)?;
let (rest, _) = byte_in_range(0x90, 0xbf)(rest)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..4]))
},
|input: &'a [u8]| {
let (rest, _) = byte_in_range(0xf1, 0xf3)(input)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..4]))
},
|input: &'a [u8]| {
let (rest, _) = byte_in_range(0xf4, 0xf4)(input)?;
let (rest, _) = byte_in_range(0x80, 0x8f)(rest)?;
let (rest, _) = utf8_tail(rest)?;
let (rest, _) = utf8_tail(rest)?;
Ok((rest, &input[0..4]))
},
))(input)
}
/// UTF8-tail = %x80-BF
fn utf8_tail(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (rest, _) = byte_in_range(0x80, 0xbf)(input)?;
Ok((rest, &input[0..1]))
}
let (rest, ret) = alt((utf8_2, utf8_3, utf8_4))(input)?;
Ok((rest, ret.into()))
}
///`%x21-7E`
/// RFC6532 adds: `VCHAR =/ UTF8-non-ascii`
fn vchar(input: &[u8]) -> IResult<&[u8], u8> {
if input.is_empty() {
return Err(nom::Err::Error((input, "vchar(): empty input").into()));
}
if input[0] >= 0x21 && input[0] <= 0x7e {
Ok((&input[1..], input[0]))
} else {
Err(nom::Err::Error((input, "vchar(): out of range").into()))
}
byte_in_range(0x21, 0x7e)(input)
}
///`quoted-pair = ("\" (VCHAR / WSP)) / obs-qp`
fn quoted_pair(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, byte) = preceded(tag("\\"), alt((vchar, wsp)))(input)?;
Ok((input, vec![byte].into()))
preceded(
tag("\\"),
alt((
utf8_non_ascii,
map(vchar, |byte| vec![byte].into()),
map(wsp, |byte| vec![byte].into()),
)),
)(input)
}
///```text
@ -531,17 +620,17 @@ pub mod generic {
/// obs-ctext
///```
fn ctext(input: &[u8]) -> IResult<&[u8], ()> {
if input.is_empty() {
return Err(nom::Err::Error((input, "ctext(): empty input").into()));
}
if (input[0] >= 33 && input[0] <= 39)
|| (input[0] >= 42 && input[0] <= 91)
|| (input[0] >= 93 && input[0] <= 126)
{
Ok((&input[1..], ()))
} else {
Err(nom::Err::Error((input, "ctext(): out of range").into()))
}
alt((
map(
alt((
byte_in_range(33, 39),
byte_in_range(42, 91),
byte_in_range(93, 126),
)),
|_| (),
),
map(utf8_non_ascii, |_| ()),
))(input)
}
///```text
@ -967,7 +1056,7 @@ pub mod generic {
let mut at_least_one = false;
while let Ok((_input, atext_r)) = atext(input) {
at_least_one = true;
ret.push(atext_r);
ret.extend_from_slice(&atext_r);
input = _input;
}
if !at_least_one {
@ -985,7 +1074,7 @@ pub mod generic {
let mut at_least_one = false;
while let Ok((_input, atext_r)) = atext(input) {
at_least_one = true;
ret.push(atext_r);
ret.extend_from_slice(&atext_r);
input = _input;
}
if !at_least_one {
@ -998,7 +1087,7 @@ pub mod generic {
}
///`atext = ALPHA / DIGIT / ; Printable US-ASCII "!" / "#" / ; characters not including "$" / "%" / ; specials. Used for atoms. "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"`
pub fn atext(input: &[u8]) -> IResult<&[u8], u8> {
pub fn atext_ascii(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
if input.is_empty() {
return Err(nom::Err::Error((input, "atext(): empty input").into()));
}
@ -1009,12 +1098,16 @@ pub mod generic {
]
.contains(&input[0])
{
Ok((&input[1..], input[0]))
Ok((&input[1..], input[0..1].into()))
} else {
return Err(nom::Err::Error((input, "atext(): invalid byte").into()));
}
}
pub fn atext(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
alt((atext_ascii, utf8_non_ascii))(input)
}
///dot-atom = [CFWS] dot-atom-text [CFWS]
pub fn dot_atom(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (input, _) = opt(cfws)(input)?;
@ -1029,14 +1122,7 @@ pub mod generic {
/// obs-dtext ; "[", "]", or "\"
///```
pub fn dtext(input: &[u8]) -> IResult<&[u8], u8> {
if input.is_empty() {
return Err(nom::Err::Error((input, "dtext(): empty input").into()));
}
if (input[0] >= 33 && input[0] <= 90) || (input[0] > 94 && input[0] < 126) {
Ok((&input[1..], input[0]))
} else {
Err(nom::Err::Error((input, "dtext(): out of range").into()))
}
alt((byte_in_range(33, 90), byte_in_range(94, 125)))(input)
}
}