You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.rs 32KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. /*
  2. * meli - parser module
  3. *
  4. * Copyright 2017 Manos Pitsidianakis
  5. *
  6. * This file is part of meli.
  7. *
  8. * meli is free software: you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation, either version 3 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * meli is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with meli. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. use super::*;
  22. use chrono;
  23. use data_encoding::BASE64_MIME;
  24. use encoding::{DecoderTrap, Encoding};
  25. use nom::{is_hex_digit, le_u8};
  26. pub(super) use nom::{ErrorKind, IResult, Needed};
  27. use encoding::all::*;
  28. use std;
  29. macro_rules! is_whitespace {
  30. ($var:ident) => {
  31. $var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
  32. };
  33. ($var:expr) => {
  34. $var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
  35. };
  36. }
  37. pub trait BytesExt {
  38. fn rtrim(&self) -> &Self;
  39. fn ltrim(&self) -> &Self;
  40. fn trim(&self) -> &Self;
  41. fn find(&self, needle: &[u8]) -> Option<usize>;
  42. fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8>;
  43. }
  44. impl BytesExt for [u8] {
  45. fn rtrim(&self) -> &Self {
  46. if let Some(last) = self.iter().rposition(|b| !is_whitespace!(*b)) {
  47. &self[..=last]
  48. } else {
  49. &[]
  50. }
  51. }
  52. fn ltrim(&self) -> &Self {
  53. if let Some(first) = self.iter().position(|b| !is_whitespace!(*b)) {
  54. &self[first..]
  55. } else {
  56. &[]
  57. }
  58. }
  59. fn trim(&self) -> &[u8] {
  60. self.rtrim().ltrim()
  61. }
  62. // https://stackoverflow.com/a/35907071
  63. fn find(&self, needle: &[u8]) -> Option<usize> {
  64. self.windows(needle.len())
  65. .position(|window| window == needle)
  66. }
  67. fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8> {
  68. let mut ret = self.to_vec();
  69. if let Some(idx) = self.find(from) {
  70. ret.splice(idx..(idx + from.len()), to.iter().cloned());
  71. }
  72. ret
  73. }
  74. }
  75. fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
  76. if input.len() < 3 {
  77. IResult::Incomplete(Needed::Size(1))
  78. } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
  79. let a = if input[1] < b':' {
  80. input[1] - 48
  81. } else if input[1] < b'[' {
  82. input[1] - 55
  83. } else {
  84. input[1] - 87
  85. };
  86. let b = if input[2] < b':' {
  87. input[2] - 48
  88. } else if input[2] < b'[' {
  89. input[2] - 55
  90. } else {
  91. input[2] - 87
  92. };
  93. IResult::Done(&input[3..], a * 16 + b)
  94. } else {
  95. IResult::Error(error_code!(ErrorKind::Custom(43)))
  96. }
  97. }
  98. // Parser definition
  99. /* A header can span multiple lines, eg:
  100. *
  101. * Received: from -------------------- (-------------------------)
  102. * by --------------------- (--------------------- [------------------]) (-----------------------)
  103. * with ESMTP id ------------ for <------------------->;
  104. * Tue, 5 Jan 2016 21:30:44 +0100 (CET)
  105. */
  106. fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
  107. let input_len = input.len();
  108. for (i, x) in input.iter().enumerate() {
  109. if *x == b'\n'
  110. && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
  111. || i + 1 == input_len)
  112. {
  113. return IResult::Done(&input[(i + 1)..], &input[0..i]);
  114. }
  115. }
  116. IResult::Incomplete(Needed::Unknown)
  117. }
  118. /* Parse a single header as a tuple */
  119. fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
  120. if input.is_empty() {
  121. return IResult::Incomplete(Needed::Unknown);
  122. } else if input.starts_with(b"\n") {
  123. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  124. }
  125. let mut ptr = 0;
  126. let mut name: &[u8] = &input[0..0];
  127. for (i, x) in input.iter().enumerate() {
  128. if *x == b':' {
  129. name = &input[0..i];
  130. ptr = i + 1;
  131. break;
  132. }
  133. }
  134. if name.is_empty() {
  135. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  136. }
  137. if ptr > input.len() {
  138. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  139. }
  140. if input[ptr] == b'\n' {
  141. ptr += 1;
  142. if ptr > input.len() {
  143. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  144. }
  145. }
  146. while input[ptr] == b' ' || input[ptr] == b'\t' {
  147. ptr += 1;
  148. if ptr > input.len() {
  149. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  150. }
  151. }
  152. match header_value(&input[ptr..]) {
  153. IResult::Done(rest, value) => IResult::Done(rest, (name, value)),
  154. IResult::Incomplete(needed) => IResult::Incomplete(needed),
  155. IResult::Error(code) => IResult::Error(code),
  156. }
  157. }
  158. fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
  159. if input.is_empty() {
  160. return IResult::Incomplete(Needed::Unknown);
  161. } else if input.starts_with(b"\n") {
  162. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  163. }
  164. let mut ptr = 0;
  165. let mut name: &[u8] = &input[0..0];
  166. for (i, x) in input.iter().enumerate() {
  167. if *x == b':' || *x == b'\n' {
  168. name = &input[0..i];
  169. ptr = i;
  170. break;
  171. }
  172. }
  173. if name.is_empty() {
  174. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  175. }
  176. if input[ptr] == b':' {
  177. ptr += 1;
  178. if ptr > input.len() {
  179. return IResult::Incomplete(Needed::Unknown);
  180. }
  181. }
  182. while input[ptr] == b' ' {
  183. ptr += 1;
  184. if ptr > input.len() {
  185. return IResult::Incomplete(Needed::Unknown);
  186. }
  187. }
  188. if input[ptr..].starts_with(b"\n") {
  189. ptr += 1;
  190. if ptr > input.len() {
  191. return IResult::Incomplete(Needed::Unknown);
  192. }
  193. if input[ptr] != b' ' && input[ptr] != b'\t' {
  194. IResult::Done(&input[ptr..], (name, b""))
  195. } else {
  196. IResult::Error(error_code!(ErrorKind::Custom(43)))
  197. }
  198. } else {
  199. IResult::Error(error_code!(ErrorKind::Custom(43)))
  200. }
  201. }
  202. named!(
  203. header<(&[u8], &[u8])>,
  204. alt_complete!(call!(header_without_val) | call!(header_with_val))
  205. );
  206. /* Parse all headers -> Vec<(&str, Vec<&str>)> */
  207. named!(pub headers<std::vec::Vec<(&[u8], &[u8])>>,
  208. many1!(complete!(header)));
  209. pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
  210. if input.is_empty() {
  211. return IResult::Incomplete(Needed::Unknown);
  212. }
  213. for (i, x) in input.iter().enumerate() {
  214. if *x == b'\n' && i + 1 < input.len() && input[i + 1] == b'\n' {
  215. return IResult::Done(&input[(i + 1)..], &input[0..=i]);
  216. }
  217. }
  218. IResult::Error(error_code!(ErrorKind::Custom(43)))
  219. }
  220. named!(pub body_raw<&[u8]>,
  221. do_parse!(
  222. take_until1!("\n\n") >>
  223. body: take_while!(call!(|_| true)) >>
  224. ( { body } )));
  225. named!(pub mail<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
  226. separated_pair!(headers, tag!(b"\n"), take_while!(call!(|_| true))));
  227. named!(pub attachment<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
  228. do_parse!(
  229. opt!(is_a!(" \n\t\r")) >>
  230. pair: pair!(many0!(complete!(header)), take_while!(call!(|_| true))) >>
  231. ( { pair } )));
  232. /* Header parsers */
  233. /* Encoded words
  234. *"=?charset?encoding?encoded text?=".
  235. */
  236. fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
  237. if input.is_empty() {
  238. return IResult::Done(&[], Vec::with_capacity(0));
  239. }
  240. if input.len() < 5 {
  241. return IResult::Incomplete(Needed::Unknown);
  242. } else if input[0] != b'=' || input[1] != b'?' {
  243. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  244. }
  245. /* find end of Charset tag:
  246. * =?charset?encoding?encoded text?=
  247. * ---------^
  248. */
  249. let mut tag_end_idx = None;
  250. for (idx, b) in input[2..].iter().enumerate() {
  251. if *b == b'?' {
  252. tag_end_idx = Some(idx + 2);
  253. break;
  254. }
  255. }
  256. if tag_end_idx.is_none() {
  257. return IResult::Error(error_code!(ErrorKind::Custom(42)));
  258. }
  259. let tag_end_idx = tag_end_idx.unwrap();
  260. if input[2 + tag_end_idx] != b'?' {
  261. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  262. }
  263. /* See if input ends with "?=" and get ending index
  264. * =?charset?encoding?encoded text?=
  265. * -------------------------------^
  266. */
  267. let mut encoded_end_idx = None;
  268. for i in (3 + tag_end_idx)..input.len() {
  269. if input[i] == b'?' && i < input.len() && input[i + 1] == b'=' {
  270. encoded_end_idx = Some(i);
  271. break;
  272. }
  273. }
  274. if encoded_end_idx.is_none() {
  275. return IResult::Error(error_code!(ErrorKind::Custom(44)));
  276. }
  277. let encoded_end_idx = encoded_end_idx.unwrap();
  278. let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];
  279. let s: Vec<u8> = match input[tag_end_idx + 1] {
  280. b'b' | b'B' => match BASE64_MIME.decode(encoded_text) {
  281. Ok(v) => v,
  282. Err(_) => encoded_text.to_vec(),
  283. },
  284. b'q' | b'Q' => match quoted_printable_bytes_header(encoded_text) {
  285. IResult::Done(b"", s) => s,
  286. _ => return IResult::Error(error_code!(ErrorKind::Custom(45))),
  287. },
  288. _ => return IResult::Error(error_code!(ErrorKind::Custom(46))),
  289. };
  290. let charset = Charset::from(&input[2..tag_end_idx]);
  291. if let Charset::UTF8 = charset {
  292. IResult::Done(&input[encoded_end_idx + 2..], s)
  293. } else {
  294. match decode_charset(&s, charset) {
  295. Ok(v) => IResult::Done(&input[encoded_end_idx + 2..], v.into_bytes()),
  296. _ => IResult::Error(error_code!(ErrorKind::Custom(43))),
  297. }
  298. }
  299. }
  300. pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
  301. match charset {
  302. Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
  303. Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
  304. Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
  305. Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
  306. Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
  307. Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
  308. Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
  309. Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
  310. Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
  311. // Unimplemented:
  312. Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
  313. Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
  314. Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
  315. Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
  316. }
  317. }
  318. fn quoted_printable_soft_break(input: &[u8]) -> IResult<&[u8], &[u8]> {
  319. if input.len() < 2 {
  320. IResult::Incomplete(Needed::Size(1))
  321. } else if input[0] == b'=' && input[1] == b'\n' {
  322. IResult::Done(&input[2..], &input[0..2]) // `=\n` is an escaped space character.
  323. } else {
  324. IResult::Error(error_code!(ErrorKind::Custom(43)))
  325. }
  326. }
  327. named!(
  328. qp_underscore_header<u8>,
  329. do_parse!(tag!(b"_") >> ({ 0x20 }))
  330. );
  331. // With MIME, headers in quoted printable format can contain underscores that represent spaces.
  332. // In non-header context, an underscore is just a plain underscore.
  333. named!(
  334. pub quoted_printable_bytes_header<Vec<u8>>,
  335. many0!(alt_complete!(
  336. quoted_printable_byte | qp_underscore_header | le_u8
  337. ))
  338. );
  339. /// For atoms in Header values.
  340. named!(
  341. pub quoted_printable_bytes<Vec<u8>>,
  342. many0!(alt_complete!(
  343. preceded!(quoted_printable_soft_break, quoted_printable_byte) |
  344. preceded!(quoted_printable_soft_break, le_u8) | quoted_printable_byte | le_u8
  345. ))
  346. );
  347. fn display_addr(input: &[u8]) -> IResult<&[u8], Address> {
  348. if input.is_empty() || input.len() < 3 {
  349. IResult::Incomplete(Needed::Size(1))
  350. } else if !is_whitespace!(input[0]) {
  351. let mut display_name = StrBuilder {
  352. offset: 0,
  353. length: 0,
  354. };
  355. let mut flag = false;
  356. for (i, b) in input[0..].iter().enumerate() {
  357. if *b == b'<' {
  358. display_name.length = i.saturating_sub(1); // if i != 0 { i - 1 } else { 0 };
  359. flag = true;
  360. break;
  361. }
  362. }
  363. if !flag {
  364. let (rest, output) = match phrase(input) {
  365. IResult::Done(rest, raw) => (rest, raw),
  366. _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
  367. };
  368. if output.contains(&b'<') {
  369. match display_addr(&output) {
  370. IResult::Done(_, address) => return IResult::Done(rest, address),
  371. _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
  372. }
  373. }
  374. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  375. }
  376. let mut end = input.len();
  377. let mut flag = false;
  378. for (i, b) in input[display_name.length + 2..].iter().enumerate() {
  379. match *b {
  380. b'@' => flag = true,
  381. b'>' => {
  382. end = i;
  383. break;
  384. }
  385. _ => {}
  386. }
  387. }
  388. if flag {
  389. match phrase(&input[0..end + display_name.length + 3]) {
  390. IResult::Error(e) => IResult::Error(e),
  391. IResult::Incomplete(i) => IResult::Incomplete(i),
  392. IResult::Done(rest, raw) => {
  393. let display_name_end = raw.find(b"<").unwrap();
  394. display_name.length = { raw[0..display_name_end].trim().len() };
  395. let address_spec = if display_name_end == 0 {
  396. StrBuilder {
  397. offset: 1,
  398. length: end + 1,
  399. }
  400. } else {
  401. StrBuilder {
  402. offset: display_name_end + 1,
  403. length: end,
  404. }
  405. };
  406. IResult::Done(
  407. rest,
  408. Address::Mailbox(MailboxAddress {
  409. raw,
  410. display_name,
  411. address_spec,
  412. }),
  413. )
  414. }
  415. }
  416. } else {
  417. IResult::Error(error_code!(ErrorKind::Custom(43)))
  418. }
  419. } else {
  420. IResult::Error(error_code!(ErrorKind::Custom(43)))
  421. }
  422. }
  423. fn addr_spec(input: &[u8]) -> IResult<&[u8], Address> {
  424. if input.is_empty() || input.len() < 3 {
  425. IResult::Incomplete(Needed::Size(1))
  426. } else if !is_whitespace!(input[0]) {
  427. let mut end = input[1..].len();
  428. let mut flag = false;
  429. for (i, b) in input[1..].iter().enumerate() {
  430. if *b == b'@' {
  431. flag = true;
  432. }
  433. if is_whitespace!(*b) {
  434. end = i;
  435. break;
  436. }
  437. }
  438. if flag {
  439. IResult::Done(
  440. &input[end..],
  441. Address::Mailbox(MailboxAddress {
  442. raw: input[0..=end].into(),
  443. display_name: StrBuilder {
  444. offset: 0,
  445. length: 0,
  446. },
  447. address_spec: StrBuilder {
  448. offset: 0,
  449. length: input[0..=end].len(),
  450. },
  451. }),
  452. )
  453. } else {
  454. IResult::Error(error_code!(ErrorKind::Custom(43)))
  455. }
  456. } else {
  457. IResult::Error(error_code!(ErrorKind::Custom(42)))
  458. }
  459. }
  460. named!(
  461. pub mailbox<Address>,
  462. ws!(alt_complete!(display_addr | addr_spec))
  463. );
  464. named!(mailbox_list<Vec<Address>>, many0!(mailbox));
  465. /*
  466. * group of recipients eg. undisclosed-recipients;
  467. */
  468. fn group(input: &[u8]) -> IResult<&[u8], Address> {
  469. let mut flag = false;
  470. let mut dlength = 0;
  471. for (i, b) in input.iter().enumerate() {
  472. if *b == b':' {
  473. flag = true;
  474. dlength = i;
  475. break;
  476. }
  477. }
  478. if !flag {
  479. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  480. }
  481. match mailbox_list(&input[dlength..]) {
  482. IResult::Error(e) => IResult::Error(e),
  483. IResult::Done(rest, vec) => {
  484. let size: usize =
  485. (rest.as_ptr() as usize).wrapping_sub((&input[0..] as &[u8]).as_ptr() as usize);
  486. IResult::Done(
  487. rest,
  488. Address::Group(GroupAddress {
  489. raw: input[0..size].into(),
  490. display_name: StrBuilder {
  491. offset: 0,
  492. length: dlength,
  493. },
  494. mailbox_list: vec,
  495. }),
  496. )
  497. }
  498. IResult::Incomplete(i) => IResult::Incomplete(i),
  499. }
  500. }
  501. named!(address<Address>, ws!(alt_complete!(mailbox | group)));
  502. named!(pub rfc2822address_list<Vec<Address>>, ws!( separated_list!(is_a!(","), address)));
  503. named!(pub address_list<String>, ws!(do_parse!(
  504. list: alt_complete!( encoded_word_list | ascii_token) >>
  505. ( {
  506. let list: Vec<&[u8]> = list.split(|c| *c == b',').collect();
  507. let string_len = list.iter().fold(0, |mut acc, x| { acc+=x.trim().len(); acc }) + list.len() - 1;
  508. let list_len = list.len();
  509. let mut i = 0;
  510. list.iter().fold(String::with_capacity(string_len),
  511. |acc, x| {
  512. let mut acc = acc + &String::from_utf8_lossy(x.replace(b"\n", b"").replace(b"\t", b" ").trim());
  513. if i != list_len - 1 {
  514. acc.push_str(" ");
  515. i+=1;
  516. }
  517. acc
  518. })
  519. } )
  520. )));
  521. fn eat_comments(input: &[u8]) -> Vec<u8> {
  522. let mut in_comment = false;
  523. input
  524. .iter()
  525. .fold(Vec::with_capacity(input.len()), |mut acc, x| {
  526. if *x == b'(' && !in_comment {
  527. in_comment = true;
  528. acc
  529. } else if *x == b')' && in_comment {
  530. in_comment = false;
  531. acc
  532. } else if in_comment {
  533. acc
  534. } else {
  535. acc.push(*x);
  536. acc
  537. }
  538. })
  539. }
  540. /*
  541. * Date should tokenize input and convert the tokens,
  542. * right now we expect input will have no extra spaces in between tokens
  543. *
  544. * We should use a custom parser here*/
  545. pub fn date(input: &[u8]) -> Option<chrono::DateTime<chrono::FixedOffset>> {
  546. let parsed_result = phrase(&eat_comments(input))
  547. .to_full_result()
  548. .unwrap()
  549. .replace(b"-", b"+");
  550. chrono::DateTime::parse_from_rfc2822(String::from_utf8_lossy(parsed_result.trim()).as_ref())
  551. .ok()
  552. }
  553. named!(pub message_id<&[u8]>,
  554. complete!(delimited!(tag!("<"), take_until1!(">"), tag!(">")))
  555. );
  556. fn message_id_peek(input: &[u8]) -> IResult<&[u8], &[u8]> {
  557. let input_length = input.len();
  558. if input.is_empty() {
  559. IResult::Incomplete(Needed::Size(1))
  560. } else if input_length == 2 || input[0] != b'<' {
  561. IResult::Error(error_code!(ErrorKind::Custom(43)))
  562. } else {
  563. for (i, &x) in input.iter().take(input_length).enumerate().skip(1) {
  564. if x == b'>' {
  565. return IResult::Done(&input[i + 1..], &input[0..=i]);
  566. }
  567. }
  568. IResult::Incomplete(Needed::Unknown)
  569. }
  570. }
  571. named!(pub references<Vec<&[u8]>>, separated_list!(complete!(is_a!(" \n\t\r")), message_id_peek));
  572. fn attachments_f<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
  573. let mut ret: Vec<&[u8]> = Vec::new();
  574. let mut input = input.ltrim();
  575. loop {
  576. let b_start = if let Some(v) = input.find(boundary) {
  577. v
  578. } else {
  579. return IResult::Error(error_code!(ErrorKind::Custom(39)));
  580. };
  581. if b_start < 2 {
  582. return IResult::Error(error_code!(ErrorKind::Custom(40)));
  583. }
  584. input = &input[b_start - 2..];
  585. if &input[0..2] == b"--" {
  586. input = &input[2 + boundary.len()..];
  587. if &input[0..1] != b"\n" {
  588. continue;
  589. }
  590. input = &input[1..];
  591. break;
  592. }
  593. }
  594. loop {
  595. if input.len() < boundary.len() + 4 {
  596. return IResult::Error(error_code!(ErrorKind::Custom(41)));
  597. }
  598. if let Some(end) = input.find(boundary) {
  599. if &input[end - 2..end] != b"--" {
  600. return IResult::Error(error_code!(ErrorKind::Custom(42)));
  601. }
  602. ret.push(&input[0..end - 2]);
  603. input = &input[end + boundary.len()..];
  604. if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
  605. break;
  606. }
  607. input = &input[1..];
  608. continue;
  609. } else {
  610. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  611. }
  612. }
  613. IResult::Done(input, ret)
  614. }
  615. named_args!(pub attachments<'a>(boundary: &'a [u8]) < Vec<&'this_is_probably_unique_i_hope_please [u8]> >,
  616. alt_complete!(call!(attachments_f, boundary) | do_parse!(
  617. take_until_and_consume!(&b"--"[..]) >>
  618. take_until_and_consume!(boundary) >>
  619. ( { Vec::<&[u8]>::new() } ))
  620. ));
  621. named!(
  622. content_type_parameter<(&[u8], &[u8])>,
  623. do_parse!(
  624. tag!(";")
  625. >> name: terminated!(ws!(take_until!("=")), tag!("="))
  626. >> value:
  627. ws!(alt_complete!(
  628. delimited!(tag!("\""), take_until!("\""), tag!("\"")) | is_not!(";")
  629. ))
  630. >> ({ (name, value) })
  631. )
  632. );
  633. named!(pub content_type< (&[u8], &[u8], Vec<(&[u8], &[u8])>) >,
  634. do_parse!(
  635. _type: take_until!("/") >>
  636. tag!("/") >>
  637. _subtype: is_not!(";") >>
  638. parameters: many0!(complete!(content_type_parameter)) >>
  639. ( {
  640. (_type, _subtype, parameters)
  641. } )
  642. ));
  643. named!(pub space, eat_separator!(&b" \t\r\n"[..]));
  644. named!(
  645. encoded_word_list<Vec<u8>>,
  646. ws!(do_parse!(
  647. list: separated_nonempty_list!(call!(space), encoded_word)
  648. >> ({
  649. let list_len = list.iter().fold(0, |mut acc, x| {
  650. acc += x.len();
  651. acc
  652. });
  653. list.iter()
  654. .fold(Vec::with_capacity(list_len), |mut acc, x| {
  655. acc.append(&mut x.clone());
  656. acc
  657. })
  658. })
  659. ))
  660. );
  661. named!(
  662. ascii_token<Vec<u8>>,
  663. do_parse!(
  664. word: alt_complete!(
  665. terminated!(
  666. take_until1!(" =?"),
  667. peek!(preceded!(tag!(b" "), call!(encoded_word)))
  668. ) | take_while!(call!(|_| true))
  669. ) >> ({ word.into() })
  670. )
  671. );
  672. pub fn phrase(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
  673. if input.is_empty() {
  674. return IResult::Done(&[], Vec::with_capacity(0));
  675. }
  676. let mut input = input.ltrim();
  677. let mut acc: Vec<u8> = Vec::new();
  678. let mut ptr = 0;
  679. while ptr < input.len() {
  680. let mut flag = false;
  681. // Check if word is encoded.
  682. while let IResult::Done(rest, v) = encoded_word(&input[ptr..]) {
  683. flag = true;
  684. input = rest;
  685. ptr = 0;
  686. acc.extend(v);
  687. // consume whitespace
  688. while ptr < input.len() && (is_whitespace!(input[ptr])) {
  689. ptr += 1;
  690. }
  691. if ptr >= input.len() {
  692. break;
  693. }
  694. }
  695. if flag && ptr < input.len() && ptr != 0 {
  696. acc.push(b' ');
  697. }
  698. let end = input[ptr..].find(b"=?");
  699. let end = end.unwrap_or_else(|| input.len() - ptr) + ptr;
  700. let ascii_s = ptr;
  701. let mut ascii_e;
  702. while ptr < end && !(is_whitespace!(input[ptr])) {
  703. ptr += 1;
  704. }
  705. ascii_e = ptr;
  706. while ptr < input.len() && (is_whitespace!(input[ptr])) {
  707. ptr += 1;
  708. }
  709. if ptr >= input.len() {
  710. acc.extend(
  711. ascii_token(&input[ascii_s..ascii_e])
  712. .to_full_result()
  713. .unwrap(),
  714. );
  715. break;
  716. }
  717. if ascii_s == ascii_e {
  718. /* We have the start of an encoded word but not the end, so parse it as ascii */
  719. ascii_e = input[ascii_s..]
  720. .find(b" ")
  721. .unwrap_or_else(|| input[ascii_s..].len());
  722. ptr = ascii_e;
  723. }
  724. acc.extend(
  725. ascii_token(&input[ascii_s..ascii_e])
  726. .to_full_result()
  727. .unwrap(),
  728. );
  729. if ptr != ascii_e {
  730. acc.push(b' ');
  731. }
  732. }
  733. IResult::Done(&input[ptr..], acc)
  734. }
  735. #[cfg(test)]
  736. mod tests {
  737. use super::*;
  738. #[test]
  739. fn test_subject() {
  740. let words = b"=?iso-8859-7?B?W215Y291cnNlcy5udHVhLmdyIC0gyvXs4fTp6t4g6uHpIMri4e306ere?=
  741. =?iso-8859-7?B?INb18+nq3l0gzd3hIMHt4erv3+358+c6IMzF0c/TIMHQz9TFy8XTzMHU?=
  742. =?iso-8859-7?B?2c0gwiDUzC4gysHNLiDFzsXUwdPH0yAyMDE3LTE4OiDTx8zFydnTxw==?=";
  743. assert!("[mycourses.ntua.gr - Κυματική και Κβαντική Φυσική] Νέα Ανακοίνωση: ΜΕΡΟΣ ΑΠΟΤΕΛΕΣΜΑΤΩΝ Β ΤΜ. ΚΑΝ. ΕΞΕΤΑΣΗΣ 2017-18: ΣΗΜΕΙΩΣΗ" == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap());
  744. let words = b"=?UTF-8?Q?=CE=A0=CF=81=CF=8C=CF=83=CE=B8=CE=B5?= =?UTF-8?Q?=CF=84=CE=B7_=CE=B5=CE=BE=CE=B5=CF=84?= =?UTF-8?Q?=CE=B1=CF=83=CF=84=CE=B9=CE=BA=CE=AE?=";
  745. assert!(
  746. "Πρόσθετη εξεταστική"
  747. == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap()
  748. );
  749. let words = b"[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
  750. assert!(
  751. "[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store"
  752. == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap()
  753. );
  754. let words = b"Re: [Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=
  755. =?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=
  756. =?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
  757. assert!(
  758. "Re: [Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store"
  759. == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap()
  760. );
  761. let words = b"sdf";
  762. assert!("sdf" == std::str::from_utf8(&phrase(words).to_full_result().unwrap()).unwrap());
  763. let words = b"=?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?= =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
  764. assert!(
  765. "Seg fault στην εκτέλεση του example ru n _sniper"
  766. == std::str::from_utf8(&phrase(words).to_full_result().unwrap()).unwrap()
  767. );
  768. let words = b"Re: [Advcomparch]
  769. =?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?=
  770. =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
  771. assert!(
  772. "Re: [Advcomparch] Seg fault στην εκτέλεση του example ru n _sniper"
  773. == std::str::from_utf8(&phrase(words).to_full_result().unwrap()).unwrap()
  774. );
  775. }
  776. macro_rules! make_address {
  777. ($d:literal, $a:literal) => {
  778. Address::Mailbox(if $d.is_empty() {
  779. MailboxAddress {
  780. raw: format!("<{}>", $a).into_bytes(),
  781. display_name: StrBuilder {
  782. offset: 0,
  783. length: 0,
  784. },
  785. address_spec: StrBuilder {
  786. offset: 1,
  787. length: $a.len(),
  788. },
  789. }
  790. } else {
  791. MailboxAddress {
  792. raw: format!("{} <{}>", $d, $a).into_bytes(),
  793. display_name: StrBuilder {
  794. offset: 0,
  795. length: $d.len(),
  796. },
  797. address_spec: StrBuilder {
  798. offset: $d.len() + 2,
  799. length: $a.len(),
  800. },
  801. }
  802. })
  803. };
  804. }
  805. #[test]
  806. fn test_address_list() {
  807. let s = b"Obit Oppidum <user@domain>,
  808. list <list@domain.tld>, list2 <list2@domain.tld>,
  809. Bobit Boppidum <user@otherdomain.com>, Cobit Coppidum <user2@otherdomain.com>, <user@domain.tld>";
  810. assert_eq!(
  811. (
  812. &s[0..0],
  813. vec![
  814. make_address!("Obit Oppidum", "user@domain"),
  815. make_address!("list", "list@domain.tld"),
  816. make_address!("list2", "list2@domain.tld"),
  817. make_address!("Bobit Boppidum", "user@otherdomain.com"),
  818. make_address!("Cobit Coppidum", "user2@otherdomain.com"),
  819. make_address!("", "user@domain.tld")
  820. ]
  821. ),
  822. rfc2822address_list(s).unwrap()
  823. );
  824. }
  825. #[test]
  826. fn test_date() {
  827. let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";
  828. let _s = b"Thu, 31 Aug 2017 13:43:37 +0000";
  829. let __s = b"=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=";
  830. debug!("{:?}, {:?}", date(s), date(_s));
  831. debug!("{:?}", date(__s));
  832. assert_eq!(date(s).unwrap(), date(_s).unwrap());
  833. assert_eq!(date(_s).unwrap(), date(__s).unwrap());
  834. }
  835. #[test]
  836. fn test_attachments() {
  837. //FIXME: add file
  838. return;
  839. /*
  840. use std::io::Read;
  841. let mut buffer: Vec<u8> = Vec::new();
  842. let _ = std::fs::File::open("").unwrap().read_to_end(&mut buffer);
  843. let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
  844. let (_, body) = match mail(&buffer).to_full_result() {
  845. Ok(v) => v,
  846. Err(_) => panic!(),
  847. };
  848. let attachments = attachments(body, boundary).to_full_result().unwrap();
  849. assert_eq!(attachments.len(), 4);
  850. let v: Vec<&str> = attachments
  851. .iter()
  852. .map(|v| std::str::from_utf8(v).unwrap())
  853. .collect();
  854. println!("attachments {:?}", v);
  855. */
  856. }
  857. #[test]
  858. fn test_addresses() {
  859. {
  860. let s = b"=?iso-8859-7?B?0/Th/fHv8iDM4ev03ebv8g==?= <maltezos@central.ntua.gr>";
  861. let r = mailbox(s).unwrap().1;
  862. match r {
  863. Address::Mailbox(ref m) => assert!(
  864. "Σταύρος Μαλτέζος"
  865. == std::str::from_utf8(&m.display_name.display_bytes(&m.raw)).unwrap()
  866. && std::str::from_utf8(&m.address_spec.display_bytes(&m.raw)).unwrap()
  867. == "maltezos@central.ntua.gr"
  868. ),
  869. _ => assert!(false),
  870. }
  871. }
  872. {
  873. let s = b"user@domain";
  874. let r = mailbox(s).unwrap().1;
  875. match r {
  876. Address::Mailbox(ref m) => assert!(
  877. m.display_name.display_bytes(&m.raw) == b""
  878. && m.address_spec.display_bytes(&m.raw) == b"user@domain"
  879. ),
  880. _ => assert!(false),
  881. }
  882. }
  883. {
  884. let s = b"Name <user@domain>";
  885. let r = display_addr(s).unwrap().1;
  886. match r {
  887. Address::Mailbox(ref m) => assert!(
  888. b"Name" == m.display_name.display_bytes(&m.raw)
  889. && b"user@domain" == m.address_spec.display_bytes(&m.raw)
  890. ),
  891. _ => {}
  892. }
  893. }
  894. {
  895. let s = b"user@domain";
  896. let r = mailbox(s).unwrap().1;
  897. match r {
  898. Address::Mailbox(ref m) => assert!(
  899. b"" == m.display_name.display_bytes(&m.raw)
  900. && b"user@domain" == m.address_spec.display_bytes(&m.raw)
  901. ),
  902. _ => {}
  903. }
  904. }
  905. }
  906. }