Rewrite multipart attachment parser

embed
Manos Pitsidianakis 2018-08-17 23:07:29 +03:00
parent ae209e2545
commit 41d8793412
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
2 changed files with 74 additions and 46 deletions

View File

@ -20,6 +20,7 @@
*/ */
use data_encoding::BASE64_MIME; use data_encoding::BASE64_MIME;
use mailbox::email::parser; use mailbox::email::parser;
use mailbox::email::parser::BytesExt;
use std::fmt; use std::fmt;
use std::str; use std::str;
@ -51,10 +52,11 @@ pub struct Attachment {
impl fmt::Debug for Attachment { impl fmt::Debug for Attachment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Attachment {{\n content_type: {:?},\n content_transfer_encoding: {:?},\n raw: Vec of {} bytes\n }}", write!(f, "Attachment {{\n content_type: {:?},\n content_transfer_encoding: {:?},\n raw: Vec of {} bytes\n, body:\n{}\n }}",
self.content_type, self.content_type,
self.content_transfer_encoding, self.content_transfer_encoding,
self.raw.len()) self.raw.len(),
str::from_utf8(&self.raw).unwrap())
} }
} }
@ -81,7 +83,7 @@ impl AttachmentBuilder {
let offset = _boundary.as_ptr() as usize - value.as_ptr() as usize; let offset = _boundary.as_ptr() as usize - value.as_ptr() as usize;
let boundary = SliceBuild::new(offset, _boundary.len()); let boundary = SliceBuild::new(offset, _boundary.len());
let subattachments = Self::subattachments(&self.raw, boundary.get(&value)); let subattachments = Self::subattachments(&self.raw, boundary.get(&value));
eprintln!("boundary is {} and suba is {:?}", str::from_utf8(_boundary).unwrap(), subattachments); assert!(subattachments.len() > 0);
self.content_type = ContentType::Multipart { self.content_type = ContentType::Multipart {
boundary, boundary,
kind: if cst.eq_ignore_ascii_case(b"mixed") { kind: if cst.eq_ignore_ascii_case(b"mixed") {
@ -188,39 +190,37 @@ impl AttachmentBuilder {
} }
pub fn subattachments(raw: &[u8], boundary: &[u8]) -> Vec<Attachment> { pub fn subattachments(raw: &[u8], boundary: &[u8]) -> Vec<Attachment> {
eprintln!("subattachments boundary {}", str::from_utf8(boundary).unwrap());
match parser::attachments(raw, boundary).to_full_result() match parser::attachments(raw, boundary).to_full_result()
{ {
Ok(attachments) => { Ok(attachments) => {
let mut vec = Vec::with_capacity(attachments.len()); let mut vec = Vec::with_capacity(attachments.len());
for a in attachments { for a in attachments {
let mut builder = AttachmentBuilder::default(); let mut builder = AttachmentBuilder::default();
let body_slice = { let (headers, body) = match parser::attachment(&a).to_full_result() {
let (headers, body) = match parser::attachment(&a).to_full_result() { Ok(v) => v,
Ok(v) => v, Err(_) => {
Err(_) => { eprintln!("error in parsing attachment");
eprintln!("error in parsing attachment"); eprintln!("\n-------------------------------");
eprintln!("\n-------------------------------"); eprintln!("{}\n", ::std::string::String::from_utf8_lossy(a));
eprintln!("{}\n", ::std::string::String::from_utf8_lossy(a)); eprintln!("-------------------------------\n");
eprintln!("-------------------------------\n");
continue; continue;
}
};
for (name, value) in headers {
if name.eq_ignore_ascii_case(b"content-type") {
builder.content_type(value);
} else if name.eq_ignore_ascii_case(b"content-transfer-encoding") {
builder.content_transfer_encoding(value);
}
} }
};
let body_slice = {
let offset = body.as_ptr() as usize - a.as_ptr() as usize; let offset = body.as_ptr() as usize - a.as_ptr() as usize;
SliceBuild::new(offset, body.len()) SliceBuild::new(offset, body.len())
}; };
builder.raw = body_slice.get(a).into(); builder.raw = body_slice.get(a).into();
for (name, value) in headers {
if name.eq_ignore_ascii_case(b"content-type") {
builder.content_type(value);
} else if name.eq_ignore_ascii_case(b"content-transfer-encoding") {
builder.content_transfer_encoding(value);
}
}
vec.push(builder.build()); vec.push(builder.build());
} }
eprintln!("subattachments {:?}", vec);
vec vec
} }
a => { a => {
@ -303,7 +303,7 @@ impl Attachment {
pub fn text(&self) -> String { pub fn text(&self) -> String {
let mut text = Vec::with_capacity(self.raw.len()); let mut text = Vec::with_capacity(self.raw.len());
self.get_text_recursive(&mut text); self.get_text_recursive(&mut text);
String::from_utf8_lossy(&text).into() String::from_utf8_lossy(text.as_slice().trim()).into()
} }
pub fn description(&self) -> Vec<String> { pub fn description(&self) -> Vec<String> {
self.attachments().iter().map(|a| a.text()).collect() self.attachments().iter().map(|a| a.text()).collect()

View File

@ -22,8 +22,6 @@ use super::*;
use chrono; use chrono;
use data_encoding::BASE64_MIME; use data_encoding::BASE64_MIME;
use encoding::{DecoderTrap, Encoding}; use encoding::{DecoderTrap, Encoding};
use nom::FindSubstring;
use nom::Slice;
use nom::{is_hex_digit, le_u8}; use nom::{is_hex_digit, le_u8};
use nom::{ErrorKind, IResult, Needed}; use nom::{ErrorKind, IResult, Needed};
@ -519,20 +517,53 @@ fn message_id_peek(input: &[u8]) -> IResult<&[u8], &[u8]> {
named!(pub references<Vec<&[u8]>>, separated_list!(complete!(is_a!(" \n\t\r")), message_id_peek)); named!(pub references<Vec<&[u8]>>, separated_list!(complete!(is_a!(" \n\t\r")), message_id_peek));
fn attachments_f<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
let mut ret: Vec<&[u8]> = Vec::new();
let mut input = input.ltrim();
loop {
let b_start = if let Some(v) = input.find(boundary) {
v
} else {
return IResult::Error(error_code!(ErrorKind::Custom(39)));
};
if b_start < 2 {
return IResult::Error(error_code!(ErrorKind::Custom(40)));
}
input = &input[b_start - 2..];
if &input[0..2] == b"--" {
input = &input[2 + boundary.len()..];
if &input[0..1] != b"\n" {
continue;
}
input = &input[1..];
break;
}
}
loop {
if input.len() < boundary.len() + 4 {
return IResult::Error(error_code!(ErrorKind::Custom(41)));
}
if let Some(end) = input.find(boundary) {
if &input[end - 2..end] != b"--" {
return IResult::Error(error_code!(ErrorKind::Custom(42)));
}
ret.push(&input[0..end - 2]);
input = &input[end + boundary.len()..];
if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
break;
}
input = &input[1..];
continue;
} else {
return IResult::Error(error_code!(ErrorKind::Custom(43)));
}
}
return IResult::Done(input, ret);
}
named_args!(pub attachments<'a>(boundary: &'a [u8]) < Vec<&'this_is_probably_unique_i_hope_please [u8]> >, named_args!(pub attachments<'a>(boundary: &'a [u8]) < Vec<&'this_is_probably_unique_i_hope_please [u8]> >,
alt_complete!(do_parse!( alt_complete!(call!(attachments_f, boundary) | do_parse!(
take_until_and_consume!(boundary) >>
vecs: many0!(complete!(do_parse!(
tag!("\n") >>
body: take_until_and_consume1!(boundary) >>
( { body } )))) >>
tag!(b"--") >>
tag!("\n") >>
take_while!(call!(|_| { true })) >>
( {
vecs
} )
) | do_parse!(
take_until_and_consume!(&b"--"[..]) >> take_until_and_consume!(&b"--"[..]) >>
take_until_and_consume!(boundary) >> take_until_and_consume!(boundary) >>
( { Vec::<&[u8]>::new() } )) ( { Vec::<&[u8]>::new() } ))
@ -725,20 +756,17 @@ mod tests {
fn test_attachments() { fn test_attachments() {
use std::io::Read; use std::io::Read;
let mut buffer: Vec<u8> = Vec::new(); let mut buffer: Vec<u8> = Vec::new();
let _ = std::fs::File::open( let _ = std::fs::File::open("./attachment_test")
"./Trash/cur/1490727777_3.37623.post,U=51565,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S", .unwrap()
) .read_to_end(&mut buffer);
.unwrap() let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
.read_to_end(&mut buffer);
let boundary = b"bb11dc565bd54a03b36cc119a6266ebd";
//let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
let boundary_len = boundary.len(); let boundary_len = boundary.len();
let (_, body) = match mail(&buffer).to_full_result() { let (_, body) = match mail(&buffer).to_full_result() {
Ok(v) => v, Ok(v) => v,
Err(_) => panic!(), Err(_) => panic!(),
}; };
let attachments = attachments(body, boundary).to_full_result().unwrap(); let attachments = attachments(body, boundary).to_full_result().unwrap();
assert_eq!(attachments.len(), 2); assert_eq!(attachments.len(), 4);
let v: Vec<&str> = attachments let v: Vec<&str> = attachments
.iter() .iter()
.map(|v| std::str::from_utf8(v).unwrap()) .map(|v| std::str::from_utf8(v).unwrap())