You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.rs 34KB


  1. /*
  2. * meli - parser module
  3. *
  4. * Copyright 2017 Manos Pitsidianakis
  5. *
  6. * This file is part of meli.
  7. *
  8. * meli is free software: you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation, either version 3 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * meli is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with meli. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. use super::*;
  22. use chrono;
  23. use data_encoding::BASE64_MIME;
  24. use encoding::{DecoderTrap, Encoding};
  25. use nom::{is_hex_digit, le_u8};
  26. pub(super) use nom::{ErrorKind, IResult, Needed};
  27. use encoding::all::*;
  28. use std;
  29. macro_rules! is_whitespace {
  30. ($var:ident) => {
  31. $var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
  32. };
  33. ($var:expr) => {
  34. $var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
  35. };
  36. }
  37. pub trait BytesExt {
  38. fn rtrim(&self) -> &Self;
  39. fn ltrim(&self) -> &Self;
  40. fn trim(&self) -> &Self;
  41. fn find(&self, needle: &[u8]) -> Option<usize>;
  42. fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8>;
  43. }
  44. impl BytesExt for [u8] {
  45. fn rtrim(&self) -> &Self {
  46. if let Some(last) = self.iter().rposition(|b| !is_whitespace!(*b)) {
  47. &self[..=last]
  48. } else {
  49. &[]
  50. }
  51. }
  52. fn ltrim(&self) -> &Self {
  53. if let Some(first) = self.iter().position(|b| !is_whitespace!(*b)) {
  54. &self[first..]
  55. } else {
  56. &[]
  57. }
  58. }
  59. fn trim(&self) -> &[u8] {
  60. self.rtrim().ltrim()
  61. }
  62. // https://stackoverflow.com/a/35907071
  63. fn find(&self, needle: &[u8]) -> Option<usize> {
  64. self.windows(needle.len())
  65. .position(|window| window == needle)
  66. }
  67. fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8> {
  68. let mut ret = self.to_vec();
  69. if let Some(idx) = self.find(from) {
  70. ret.splice(idx..(idx + from.len()), to.iter().cloned());
  71. }
  72. ret
  73. }
  74. }
  75. fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
  76. if input.len() < 3 {
  77. IResult::Incomplete(Needed::Size(1))
  78. } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
  79. let a = if input[1] < b':' {
  80. input[1] - 48
  81. } else if input[1] < b'[' {
  82. input[1] - 55
  83. } else {
  84. input[1] - 87
  85. };
  86. let b = if input[2] < b':' {
  87. input[2] - 48
  88. } else if input[2] < b'[' {
  89. input[2] - 55
  90. } else {
  91. input[2] - 87
  92. };
  93. IResult::Done(&input[3..], a * 16 + b)
  94. } else {
  95. IResult::Error(error_code!(ErrorKind::Custom(43)))
  96. }
  97. }
  98. // Parser definition
  99. /* A header can span multiple lines, eg:
  100. *
  101. * Received: from -------------------- (-------------------------)
  102. * by --------------------- (--------------------- [------------------]) (-----------------------)
  103. * with ESMTP id ------------ for <------------------->;
  104. * Tue, 5 Jan 2016 21:30:44 +0100 (CET)
  105. */
  106. fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
  107. let input_len = input.len();
  108. for (i, x) in input.iter().enumerate() {
  109. if *x == b'\n'
  110. && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
  111. || i + 1 == input_len)
  112. {
  113. return IResult::Done(&input[(i + 1)..], &input[0..i]);
  114. }
  115. }
  116. IResult::Incomplete(Needed::Unknown)
  117. }
  118. /* Parse a single header as a tuple */
  119. fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
  120. if input.is_empty() {
  121. return IResult::Incomplete(Needed::Unknown);
  122. } else if input.starts_with(b"\n") {
  123. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  124. }
  125. let mut ptr = 0;
  126. let mut name: &[u8] = &input[0..0];
  127. for (i, x) in input.iter().enumerate() {
  128. if *x == b':' {
  129. name = &input[0..i];
  130. ptr = i + 1;
  131. break;
  132. }
  133. }
  134. if name.is_empty() {
  135. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  136. }
  137. if ptr >= input.len() {
  138. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  139. }
  140. if input[ptr] == b'\n' {
  141. ptr += 1;
  142. if ptr >= input.len() {
  143. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  144. }
  145. }
  146. while input[ptr] == b' ' || input[ptr] == b'\t' {
  147. ptr += 1;
  148. if ptr >= input.len() {
  149. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  150. }
  151. }
  152. match header_value(&input[ptr..]) {
  153. IResult::Done(rest, value) => IResult::Done(rest, (name, value)),
  154. IResult::Incomplete(needed) => IResult::Incomplete(needed),
  155. IResult::Error(code) => IResult::Error(code),
  156. }
  157. }
  158. fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
  159. if input.is_empty() {
  160. return IResult::Incomplete(Needed::Unknown);
  161. } else if input.starts_with(b"\n") {
  162. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  163. }
  164. let mut ptr = 0;
  165. let mut name: &[u8] = &input[0..0];
  166. for (i, x) in input.iter().enumerate() {
  167. if *x == b':' || *x == b'\n' {
  168. name = &input[0..i];
  169. ptr = i;
  170. break;
  171. }
  172. }
  173. if name.is_empty() {
  174. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  175. }
  176. if input[ptr] == b':' {
  177. ptr += 1;
  178. if ptr >= input.len() {
  179. return IResult::Incomplete(Needed::Unknown);
  180. }
  181. }
  182. while input[ptr] == b' ' {
  183. ptr += 1;
  184. if ptr >= input.len() {
  185. return IResult::Incomplete(Needed::Unknown);
  186. }
  187. }
  188. if input[ptr..].starts_with(b"\n") {
  189. ptr += 1;
  190. if ptr >= input.len() {
  191. return IResult::Incomplete(Needed::Unknown);
  192. }
  193. if input[ptr] != b' ' && input[ptr] != b'\t' {
  194. IResult::Done(&input[ptr..], (name, b""))
  195. } else {
  196. IResult::Error(error_code!(ErrorKind::Custom(43)))
  197. }
  198. } else {
  199. IResult::Error(error_code!(ErrorKind::Custom(43)))
  200. }
  201. }
  202. named!(
  203. header<(&[u8], &[u8])>,
  204. alt_complete!(call!(header_without_val) | call!(header_with_val))
  205. );
  206. /* Parse all headers -> Vec<(&str, Vec<&str>)> */
  207. named!(pub headers<std::vec::Vec<(&[u8], &[u8])>>,
  208. many1!(complete!(header)));
  209. pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
  210. if input.is_empty() {
  211. return IResult::Incomplete(Needed::Unknown);
  212. }
  213. for (i, x) in input.iter().enumerate() {
  214. if *x == b'\n' && i + 1 < input.len() && input[i + 1] == b'\n' {
  215. return IResult::Done(&input[(i + 1)..], &input[0..=i]);
  216. }
  217. }
  218. IResult::Error(error_code!(ErrorKind::Custom(43)))
  219. }
  220. named!(pub body_raw<&[u8]>,
  221. do_parse!(
  222. take_until1!("\n\n") >>
  223. body: take_while!(call!(|_| true)) >>
  224. ( { body } )));
  225. named!(pub mail<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
  226. separated_pair!(headers, tag!(b"\n"), take_while!(call!(|_| true))));
  227. named!(pub attachment<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
  228. do_parse!(
  229. opt!(is_a!(" \n\t\r")) >>
  230. pair: pair!(many0!(complete!(header)), take_while!(call!(|_| true))) >>
  231. ( { pair } )));
  232. /* Header parsers */
  233. /* Encoded words
  234. *"=?charset?encoding?encoded text?=".
  235. */
  236. fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
  237. if input.is_empty() {
  238. return IResult::Done(&[], Vec::with_capacity(0));
  239. }
  240. if input.len() < 5 {
  241. return IResult::Incomplete(Needed::Unknown);
  242. } else if input[0] != b'=' || input[1] != b'?' {
  243. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  244. }
  245. /* find end of Charset tag:
  246. * =?charset?encoding?encoded text?=
  247. * ---------^
  248. */
  249. let mut tag_end_idx = None;
  250. for (idx, b) in input[2..].iter().enumerate() {
  251. if *b == b'?' {
  252. tag_end_idx = Some(idx + 2);
  253. break;
  254. }
  255. }
  256. if tag_end_idx.is_none() {
  257. return IResult::Error(error_code!(ErrorKind::Custom(42)));
  258. }
  259. let tag_end_idx = tag_end_idx.unwrap();
  260. if input[2 + tag_end_idx] != b'?' {
  261. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  262. }
  263. /* See if input ends with "?=" and get ending index
  264. * =?charset?encoding?encoded text?=
  265. * -------------------------------^
  266. */
  267. let mut encoded_end_idx = None;
  268. for i in (3 + tag_end_idx)..input.len() {
  269. if input[i] == b'?' && i < input.len() && input[i + 1] == b'=' {
  270. encoded_end_idx = Some(i);
  271. break;
  272. }
  273. }
  274. if encoded_end_idx.is_none() {
  275. return IResult::Error(error_code!(ErrorKind::Custom(44)));
  276. }
  277. let encoded_end_idx = encoded_end_idx.unwrap();
  278. let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];
  279. let s: Vec<u8> = match input[tag_end_idx + 1] {
  280. b'b' | b'B' => match BASE64_MIME.decode(encoded_text) {
  281. Ok(v) => v,
  282. Err(_) => encoded_text.to_vec(),
  283. },
  284. b'q' | b'Q' => match quoted_printable_bytes_header(encoded_text) {
  285. IResult::Done(b"", s) => s,
  286. _ => return IResult::Error(error_code!(ErrorKind::Custom(45))),
  287. },
  288. _ => return IResult::Error(error_code!(ErrorKind::Custom(46))),
  289. };
  290. let charset = Charset::from(&input[2..tag_end_idx]);
  291. if let Charset::UTF8 = charset {
  292. IResult::Done(&input[encoded_end_idx + 2..], s)
  293. } else {
  294. match decode_charset(&s, charset) {
  295. Ok(v) => IResult::Done(&input[encoded_end_idx + 2..], v.into_bytes()),
  296. _ => IResult::Error(error_code!(ErrorKind::Custom(43))),
  297. }
  298. }
  299. }
  300. pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
  301. match charset {
  302. Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
  303. Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
  304. Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
  305. Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
  306. Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
  307. Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
  308. Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
  309. Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
  310. Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
  311. // Unimplemented:
  312. Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
  313. Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
  314. Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
  315. Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
  316. }
  317. }
  318. fn quoted_printable_soft_break(input: &[u8]) -> IResult<&[u8], &[u8]> {
  319. if input.len() < 2 {
  320. IResult::Incomplete(Needed::Size(1))
  321. } else if input[0] == b'=' && input[1] == b'\n' {
  322. IResult::Done(&input[2..], &input[0..2]) // `=\n` is an escaped space character.
  323. } else {
  324. IResult::Error(error_code!(ErrorKind::Custom(43)))
  325. }
  326. }
  327. named!(
  328. qp_underscore_header<u8>,
  329. do_parse!(tag!(b"_") >> ({ 0x20 }))
  330. );
  331. // With MIME, headers in quoted printable format can contain underscores that represent spaces.
  332. // In non-header context, an underscore is just a plain underscore.
  333. named!(
  334. pub quoted_printable_bytes_header<Vec<u8>>,
  335. many0!(alt_complete!(
  336. quoted_printable_byte | qp_underscore_header | le_u8
  337. ))
  338. );
  339. // For atoms in Header values.
  340. named!(
  341. pub quoted_printable_bytes<Vec<u8>>,
  342. many0!(alt_complete!(
  343. preceded!(quoted_printable_soft_break, quoted_printable_byte) |
  344. preceded!(quoted_printable_soft_break, le_u8) | quoted_printable_byte | le_u8
  345. ))
  346. );
  347. fn display_addr(input: &[u8]) -> IResult<&[u8], Address> {
  348. if input.is_empty() || input.len() < 3 {
  349. IResult::Incomplete(Needed::Size(1))
  350. } else if !is_whitespace!(input[0]) {
  351. let mut display_name = StrBuilder {
  352. offset: 0,
  353. length: 0,
  354. };
  355. let mut flag = false;
  356. for (i, b) in input[0..].iter().enumerate() {
  357. if *b == b'<' {
  358. display_name.length = i.saturating_sub(1); // if i != 0 { i - 1 } else { 0 };
  359. flag = true;
  360. break;
  361. }
  362. }
  363. if !flag {
  364. let (rest, output) = match phrase(input) {
  365. IResult::Done(rest, raw) => (rest, raw),
  366. _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
  367. };
  368. if output.contains(&b'<') {
  369. match display_addr(&output) {
  370. IResult::Done(_, address) => return IResult::Done(rest, address),
  371. _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
  372. }
  373. }
  374. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  375. }
  376. let mut end = input.len();
  377. let mut flag = false;
  378. for (i, b) in input[display_name.length + 2..].iter().enumerate() {
  379. match *b {
  380. b'@' => flag = true,
  381. b'>' => {
  382. end = i;
  383. break;
  384. }
  385. _ => {}
  386. }
  387. }
  388. if flag {
  389. match phrase(&input[0..end + display_name.length + 3]) {
  390. IResult::Error(e) => IResult::Error(e),
  391. IResult::Incomplete(i) => IResult::Incomplete(i),
  392. IResult::Done(rest, raw) => {
  393. let display_name_end = raw.find(b"<").unwrap();
  394. display_name.length = { raw[0..display_name_end].trim().len() };
  395. let address_spec = if display_name_end == 0 {
  396. StrBuilder {
  397. offset: 1,
  398. length: end + 1,
  399. }
  400. } else {
  401. StrBuilder {
  402. offset: display_name_end + 1,
  403. length: end,
  404. }
  405. };
  406. IResult::Done(
  407. rest,
  408. Address::Mailbox(MailboxAddress {
  409. raw,
  410. display_name,
  411. address_spec,
  412. }),
  413. )
  414. }
  415. }
  416. } else {
  417. IResult::Error(error_code!(ErrorKind::Custom(43)))
  418. }
  419. } else {
  420. IResult::Error(error_code!(ErrorKind::Custom(43)))
  421. }
  422. }
  423. fn addr_spec(input: &[u8]) -> IResult<&[u8], Address> {
  424. if input.is_empty() || input.len() < 3 {
  425. IResult::Incomplete(Needed::Size(1))
  426. } else if !is_whitespace!(input[0]) {
  427. let mut end = input[1..].len();
  428. let mut flag = false;
  429. for (i, b) in input[1..].iter().enumerate() {
  430. if *b == b'@' {
  431. flag = true;
  432. }
  433. if is_whitespace!(*b) {
  434. end = i;
  435. break;
  436. }
  437. }
  438. if flag {
  439. IResult::Done(
  440. &input[end..],
  441. Address::Mailbox(MailboxAddress {
  442. raw: input[0..=end].into(),
  443. display_name: StrBuilder {
  444. offset: 0,
  445. length: 0,
  446. },
  447. address_spec: StrBuilder {
  448. offset: 0,
  449. length: input[0..=end].len(),
  450. },
  451. }),
  452. )
  453. } else {
  454. IResult::Error(error_code!(ErrorKind::Custom(43)))
  455. }
  456. } else {
  457. IResult::Error(error_code!(ErrorKind::Custom(42)))
  458. }
  459. }
  460. named!(
  461. pub mailbox<Address>,
  462. ws!(alt_complete!(display_addr | addr_spec))
  463. );
  464. named!(mailbox_list<Vec<Address>>, many0!(mailbox));
  465. /*
  466. * group of recipients eg. undisclosed-recipients;
  467. */
  468. fn group(input: &[u8]) -> IResult<&[u8], Address> {
  469. let mut flag = false;
  470. let mut dlength = 0;
  471. for (i, b) in input.iter().enumerate() {
  472. if *b == b':' {
  473. flag = true;
  474. dlength = i;
  475. break;
  476. }
  477. }
  478. if !flag {
  479. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  480. }
  481. match mailbox_list(&input[dlength..]) {
  482. IResult::Error(e) => IResult::Error(e),
  483. IResult::Done(rest, vec) => {
  484. let size: usize =
  485. (rest.as_ptr() as usize).wrapping_sub((&input[0..] as &[u8]).as_ptr() as usize);
  486. IResult::Done(
  487. rest,
  488. Address::Group(GroupAddress {
  489. raw: input[0..size].into(),
  490. display_name: StrBuilder {
  491. offset: 0,
  492. length: dlength,
  493. },
  494. mailbox_list: vec,
  495. }),
  496. )
  497. }
  498. IResult::Incomplete(i) => IResult::Incomplete(i),
  499. }
  500. }
  501. named!(pub address<Address>, ws!(alt_complete!(mailbox | group)));
  502. named!(pub rfc2822address_list<Vec<Address>>, ws!( separated_list!(is_a!(","), address)));
  503. named!(pub address_list<String>, ws!(do_parse!(
  504. list: alt_complete!( encoded_word_list | ascii_token) >>
  505. ( {
  506. let list: Vec<&[u8]> = list.split(|c| *c == b',').collect();
  507. let string_len = list.iter().fold(0, |mut acc, x| { acc+=x.trim().len(); acc }) + list.len() - 1;
  508. let list_len = list.len();
  509. let mut i = 0;
  510. list.iter().fold(String::with_capacity(string_len),
  511. |acc, x| {
  512. let mut acc = acc + &String::from_utf8_lossy(x.replace(b"\n", b"").replace(b"\t", b" ").trim());
  513. if i != list_len - 1 {
  514. acc.push_str(" ");
  515. i+=1;
  516. }
  517. acc
  518. })
  519. } )
  520. )));
  521. fn eat_comments(input: &[u8]) -> Vec<u8> {
  522. let mut in_comment = false;
  523. input
  524. .iter()
  525. .fold(Vec::with_capacity(input.len()), |mut acc, x| {
  526. if *x == b'(' && !in_comment {
  527. in_comment = true;
  528. acc
  529. } else if *x == b')' && in_comment {
  530. in_comment = false;
  531. acc
  532. } else if in_comment {
  533. acc
  534. } else {
  535. acc.push(*x);
  536. acc
  537. }
  538. })
  539. }
  540. /*
  541. * Date should tokenize input and convert the tokens,
  542. * right now we expect input will have no extra spaces in between tokens
  543. *
  544. * We should use a custom parser here*/
  545. pub fn date(input: &[u8]) -> Option<chrono::DateTime<chrono::FixedOffset>> {
  546. let parsed_result = phrase(&eat_comments(input))
  547. .to_full_result()
  548. .unwrap()
  549. .replace(b"-", b"+");
  550. chrono::DateTime::parse_from_rfc2822(String::from_utf8_lossy(parsed_result.trim()).as_ref())
  551. .ok()
  552. }
  553. named!(pub message_id<&[u8]>,
  554. complete!(delimited!(tag!("<"), take_until1!(">"), tag!(">")))
  555. );
  556. fn message_id_peek(input: &[u8]) -> IResult<&[u8], &[u8]> {
  557. let input_length = input.len();
  558. if input.is_empty() {
  559. IResult::Incomplete(Needed::Size(1))
  560. } else if input_length == 2 || input[0] != b'<' {
  561. IResult::Error(error_code!(ErrorKind::Custom(43)))
  562. } else {
  563. for (i, &x) in input.iter().take(input_length).enumerate().skip(1) {
  564. if x == b'>' {
  565. return IResult::Done(&input[i + 1..], &input[0..=i]);
  566. }
  567. }
  568. IResult::Incomplete(Needed::Unknown)
  569. }
  570. }
  571. named!(pub references<Vec<&[u8]>>, separated_list!(complete!(is_a!(" \n\t\r")), message_id_peek));
  572. fn attachments_f<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
  573. let mut ret: Vec<&[u8]> = Vec::new();
  574. let mut input = input.ltrim();
  575. loop {
  576. let b_start = if let Some(v) = input.find(boundary) {
  577. v
  578. } else {
  579. return IResult::Error(error_code!(ErrorKind::Custom(39)));
  580. };
  581. if b_start < 2 {
  582. return IResult::Error(error_code!(ErrorKind::Custom(40)));
  583. }
  584. input = &input[b_start - 2..];
  585. if &input[0..2] == b"--" {
  586. input = &input[2 + boundary.len()..];
  587. if &input[0..1] != b"\n" {
  588. continue;
  589. }
  590. input = &input[1..];
  591. break;
  592. }
  593. }
  594. loop {
  595. if input.len() < boundary.len() + 4 {
  596. return IResult::Error(error_code!(ErrorKind::Custom(41)));
  597. }
  598. if let Some(end) = input.find(boundary) {
  599. if &input[end - 2..end] != b"--" {
  600. return IResult::Error(error_code!(ErrorKind::Custom(42)));
  601. }
  602. ret.push(&input[0..end - 2]);
  603. input = &input[end + boundary.len()..];
  604. if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
  605. break;
  606. }
  607. input = &input[1..];
  608. continue;
  609. } else {
  610. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  611. }
  612. }
  613. IResult::Done(input, ret)
  614. }
  615. named_args!(pub attachments<'a>(boundary: &'a [u8]) < Vec<&'this_is_probably_unique_i_hope_please [u8]> >,
  616. alt_complete!(call!(attachments_f, boundary) | do_parse!(
  617. take_until_and_consume!(&b"--"[..]) >>
  618. take_until_and_consume!(boundary) >>
  619. ( { Vec::<&[u8]>::new() } ))
  620. ));
  621. named!(
  622. content_type_parameter<(&[u8], &[u8])>,
  623. do_parse!(
  624. tag!(";")
  625. >> name: terminated!(ws!(take_until!("=")), tag!("="))
  626. >> value:
  627. ws!(alt_complete!(
  628. delimited!(tag!("\""), take_until!("\""), tag!("\"")) | is_not!(";")
  629. ))
  630. >> ({ (name, value) })
  631. )
  632. );
  633. named!(pub content_type< (&[u8], &[u8], Vec<(&[u8], &[u8])>) >,
  634. do_parse!(
  635. _type: take_until!("/") >>
  636. tag!("/") >>
  637. _subtype: is_not!(";") >>
  638. parameters: many0!(complete!(content_type_parameter)) >>
  639. ( {
  640. (_type, _subtype, parameters)
  641. } )
  642. ));
  643. named!(pub space, eat_separator!(&b" \t\r\n"[..]));
  644. named!(
  645. encoded_word_list<Vec<u8>>,
  646. ws!(do_parse!(
  647. list: separated_nonempty_list!(call!(space), encoded_word)
  648. >> ({
  649. let list_len = list.iter().fold(0, |mut acc, x| {
  650. acc += x.len();
  651. acc
  652. });
  653. list.iter()
  654. .fold(Vec::with_capacity(list_len), |mut acc, x| {
  655. acc.append(&mut x.clone());
  656. acc
  657. })
  658. })
  659. ))
  660. );
  661. named!(
  662. ascii_token<Vec<u8>>,
  663. do_parse!(
  664. word: alt_complete!(
  665. terminated!(
  666. take_until1!(" =?"),
  667. peek!(preceded!(tag!(b" "), call!(encoded_word)))
  668. ) | take_while!(call!(|_| true))
  669. ) >> ({ word.into() })
  670. )
  671. );
  672. pub fn phrase(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
  673. if input.is_empty() {
  674. return IResult::Done(&[], Vec::with_capacity(0));
  675. }
  676. let mut input = input.ltrim();
  677. let mut acc: Vec<u8> = Vec::new();
  678. let mut ptr = 0;
  679. while ptr < input.len() {
  680. let mut flag = false;
  681. // Check if word is encoded.
  682. while let IResult::Done(rest, v) = encoded_word(&input[ptr..]) {
  683. flag = true;
  684. input = rest;
  685. ptr = 0;
  686. acc.extend(v);
  687. // consume whitespace
  688. while ptr < input.len() && (is_whitespace!(input[ptr])) {
  689. ptr += 1;
  690. }
  691. if ptr >= input.len() {
  692. break;
  693. }
  694. }
  695. if flag && ptr < input.len() && ptr != 0 {
  696. acc.push(b' ');
  697. }
  698. let end = input[ptr..].find(b"=?");
  699. let end = end.unwrap_or_else(|| input.len() - ptr) + ptr;
  700. let ascii_s = ptr;
  701. let mut ascii_e;
  702. while ptr < end && !(is_whitespace!(input[ptr])) {
  703. ptr += 1;
  704. }
  705. ascii_e = ptr;
  706. while ptr < input.len() && (is_whitespace!(input[ptr])) {
  707. ptr += 1;
  708. }
  709. if ptr >= input.len() {
  710. acc.extend(
  711. ascii_token(&input[ascii_s..ascii_e])
  712. .to_full_result()
  713. .unwrap(),
  714. );
  715. break;
  716. }
  717. if ascii_s == ascii_e {
  718. /* We have the start of an encoded word but not the end, so parse it as ascii */
  719. ascii_e = input[ascii_s..]
  720. .find(b" ")
  721. .unwrap_or_else(|| input[ascii_s..].len());
  722. ptr = ascii_e;
  723. }
  724. acc.extend(
  725. ascii_token(&input[ascii_s..ascii_e])
  726. .to_full_result()
  727. .unwrap(),
  728. );
  729. if ptr != ascii_e {
  730. acc.push(b' ');
  731. }
  732. }
  733. IResult::Done(&input[ptr..], acc)
  734. }
  735. named!(pub angle_bracket_delimeted_list<Vec<&[u8]>>, separated_nonempty_list!(complete!(is_a!(",")), ws!(complete!(message_id))));
  736. pub fn mailto(mut input: &[u8]) -> IResult<&[u8], Mailto> {
  737. if !input.starts_with(b"mailto:") {
  738. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  739. }
  740. input = &input[b"mailto:".len()..];
  741. let end = input.iter().position(|e| *e == b'?').unwrap_or(input.len());
  742. let address: Address;
  743. if let IResult::Done(_, addr) = crate::email::parser::address(&input[..end]) {
  744. address = addr;
  745. input = if input[end..].is_empty() {
  746. &input[end..]
  747. } else {
  748. &input[end + 1..]
  749. };
  750. } else {
  751. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  752. }
  753. let mut subject = None;
  754. let mut cc = None;
  755. let mut bcc = None;
  756. let mut body = None;
  757. while !input.is_empty() {
  758. let tag = if let Some(tag_pos) = input.iter().position(|e| *e == b'=') {
  759. let ret = &input[0..tag_pos];
  760. input = &input[tag_pos + 1..];
  761. ret
  762. } else {
  763. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  764. };
  765. let value_end = input.iter().position(|e| *e == b'&').unwrap_or(input.len());
  766. let value = String::from_utf8_lossy(&input[..value_end]).to_string();
  767. match tag {
  768. b"subject" if subject.is_none() => {
  769. subject = Some(value);
  770. }
  771. b"cc" if cc.is_none() => {
  772. cc = Some(value);
  773. }
  774. b"bcc" if bcc.is_none() => {
  775. bcc = Some(value);
  776. }
  777. b"body" if body.is_none() => {
  778. /* FIXME:
  779. * Parse escaped characters properly.
  780. */
  781. body = Some(value.replace("%20", " ").replace("%0A", "\n"));
  782. }
  783. _ => {
  784. return IResult::Error(error_code!(ErrorKind::Custom(43)));
  785. }
  786. }
  787. if input[value_end..].is_empty() {
  788. break;
  789. }
  790. input = &input[value_end + 1..];
  791. }
  792. IResult::Done(
  793. input,
  794. Mailto {
  795. address,
  796. subject,
  797. cc,
  798. bcc,
  799. body,
  800. },
  801. )
  802. }
  803. #[cfg(test)]
  804. mod tests {
  805. use super::*;
  806. #[test]
  807. fn test_subject() {
  808. let words = b"=?iso-8859-7?B?W215Y291cnNlcy5udHVhLmdyIC0gyvXs4fTp6t4g6uHpIMri4e306ere?=
  809. =?iso-8859-7?B?INb18+nq3l0gzd3hIMHt4erv3+358+c6IMzF0c/TIMHQz9TFy8XTzMHU?=
  810. =?iso-8859-7?B?2c0gwiDUzC4gysHNLiDFzsXUwdPH0yAyMDE3LTE4OiDTx8zFydnTxw==?=";
  811. assert!("[mycourses.ntua.gr - Κυματική και Κβαντική Φυσική] Νέα Ανακοίνωση: ΜΕΡΟΣ ΑΠΟΤΕΛΕΣΜΑΤΩΝ Β ΤΜ. ΚΑΝ. ΕΞΕΤΑΣΗΣ 2017-18: ΣΗΜΕΙΩΣΗ" == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap());
  812. let words = b"=?UTF-8?Q?=CE=A0=CF=81=CF=8C=CF=83=CE=B8=CE=B5?= =?UTF-8?Q?=CF=84=CE=B7_=CE=B5=CE=BE=CE=B5=CF=84?= =?UTF-8?Q?=CE=B1=CF=83=CF=84=CE=B9=CE=BA=CE=AE?=";
  813. assert!(
  814. "Πρόσθετη εξεταστική"
  815. == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap()
  816. );
  817. let words = b"[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
  818. assert!(
  819. "[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store"
  820. == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap()
  821. );
  822. let words = b"Re: [Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=
  823. =?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=
  824. =?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
  825. assert!(
  826. "Re: [Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store"
  827. == std::str::from_utf8(&phrase(words.trim()).to_full_result().unwrap()).unwrap()
  828. );
  829. let words = b"sdf";
  830. assert!("sdf" == std::str::from_utf8(&phrase(words).to_full_result().unwrap()).unwrap());
  831. let words = b"=?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?= =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
  832. assert!(
  833. "Seg fault στην εκτέλεση του example ru n _sniper"
  834. == std::str::from_utf8(&phrase(words).to_full_result().unwrap()).unwrap()
  835. );
  836. let words = b"Re: [Advcomparch]
  837. =?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?=
  838. =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
  839. assert!(
  840. "Re: [Advcomparch] Seg fault στην εκτέλεση του example ru n _sniper"
  841. == std::str::from_utf8(&phrase(words).to_full_result().unwrap()).unwrap()
  842. );
  843. }
  844. macro_rules! make_address {
  845. ($d:literal, $a:literal) => {
  846. Address::Mailbox(if $d.is_empty() {
  847. MailboxAddress {
  848. raw: format!("<{}>", $a).into_bytes(),
  849. display_name: StrBuilder {
  850. offset: 0,
  851. length: 0,
  852. },
  853. address_spec: StrBuilder {
  854. offset: 1,
  855. length: $a.len(),
  856. },
  857. }
  858. } else {
  859. MailboxAddress {
  860. raw: format!("{} <{}>", $d, $a).into_bytes(),
  861. display_name: StrBuilder {
  862. offset: 0,
  863. length: $d.len(),
  864. },
  865. address_spec: StrBuilder {
  866. offset: $d.len() + 2,
  867. length: $a.len(),
  868. },
  869. }
  870. })
  871. };
  872. }
  873. #[test]
  874. fn test_address_list() {
  875. let s = b"Obit Oppidum <user@domain>,
  876. list <list@domain.tld>, list2 <list2@domain.tld>,
  877. Bobit Boppidum <user@otherdomain.com>, Cobit Coppidum <user2@otherdomain.com>, <user@domain.tld>";
  878. assert_eq!(
  879. (
  880. &s[0..0],
  881. vec![
  882. make_address!("Obit Oppidum", "user@domain"),
  883. make_address!("list", "list@domain.tld"),
  884. make_address!("list2", "list2@domain.tld"),
  885. make_address!("Bobit Boppidum", "user@otherdomain.com"),
  886. make_address!("Cobit Coppidum", "user2@otherdomain.com"),
  887. make_address!("", "user@domain.tld")
  888. ]
  889. ),
  890. rfc2822address_list(s).unwrap()
  891. );
  892. }
  893. #[test]
  894. fn test_date() {
  895. let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";
  896. let _s = b"Thu, 31 Aug 2017 13:43:37 +0000";
  897. let __s = b"=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=";
  898. debug!("{:?}, {:?}", date(s), date(_s));
  899. debug!("{:?}", date(__s));
  900. assert_eq!(date(s).unwrap(), date(_s).unwrap());
  901. assert_eq!(date(_s).unwrap(), date(__s).unwrap());
  902. }
  903. #[test]
  904. fn test_attachments() {
  905. //FIXME: add file
  906. return;
  907. /*
  908. use std::io::Read;
  909. let mut buffer: Vec<u8> = Vec::new();
  910. let _ = std::fs::File::open("").unwrap().read_to_end(&mut buffer);
  911. let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
  912. let (_, body) = match mail(&buffer).to_full_result() {
  913. Ok(v) => v,
  914. Err(_) => panic!(),
  915. };
  916. let attachments = attachments(body, boundary).to_full_result().unwrap();
  917. assert_eq!(attachments.len(), 4);
  918. let v: Vec<&str> = attachments
  919. .iter()
  920. .map(|v| std::str::from_utf8(v).unwrap())
  921. .collect();
  922. println!("attachments {:?}", v);
  923. */
  924. }
  925. #[test]
  926. fn test_addresses() {
  927. {
  928. let s = b"=?iso-8859-7?B?0/Th/fHv8iDM4ev03ebv8g==?= <maltezos@central.ntua.gr>";
  929. let r = mailbox(s).unwrap().1;
  930. match r {
  931. Address::Mailbox(ref m) => assert!(
  932. "Σταύρος Μαλτέζος"
  933. == std::str::from_utf8(&m.display_name.display_bytes(&m.raw)).unwrap()
  934. && std::str::from_utf8(&m.address_spec.display_bytes(&m.raw)).unwrap()
  935. == "maltezos@central.ntua.gr"
  936. ),
  937. _ => assert!(false),
  938. }
  939. }
  940. {
  941. let s = b"user@domain";
  942. let r = mailbox(s).unwrap().1;
  943. match r {
  944. Address::Mailbox(ref m) => assert!(
  945. m.display_name.display_bytes(&m.raw) == b""
  946. && m.address_spec.display_bytes(&m.raw) == b"user@domain"
  947. ),
  948. _ => assert!(false),
  949. }
  950. }
  951. {
  952. let s = b"Name <user@domain>";
  953. let r = display_addr(s).unwrap().1;
  954. match r {
  955. Address::Mailbox(ref m) => assert!(
  956. b"Name" == m.display_name.display_bytes(&m.raw)
  957. && b"user@domain" == m.address_spec.display_bytes(&m.raw)
  958. ),
  959. _ => {}
  960. }
  961. }
  962. {
  963. let s = b"user@domain";
  964. let r = mailbox(s).unwrap().1;
  965. match r {
  966. Address::Mailbox(ref m) => assert!(
  967. b"" == m.display_name.display_bytes(&m.raw)
  968. && b"user@domain" == m.address_spec.display_bytes(&m.raw)
  969. ),
  970. _ => {}
  971. }
  972. }
  973. }
  974. }