From aebff3d3d9864b8854aba5e7f43a61d515e8057f Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 30 May 2023 19:16:50 +0300 Subject: [PATCH] melib: implement mailto RFC properly This allows mailto links with `In-Reply-To` parameters to work properly. PS Mailto links can be used with the `mailto MAILTO_URI` command --- melib/src/email/address.rs | 5 +- melib/src/email/headers.rs | 8 + melib/src/email/headers/names.rs | 9 + melib/src/email/mailto.rs | 455 ++++++++++++++++++++----- melib/src/email/parser.rs | 174 +++++++--- melib/src/lib.rs | 300 +--------------- melib/src/{ => utils}/connections.rs | 0 melib/src/{ => utils}/datetime.rs | 0 melib/src/{ => utils}/logging.rs | 0 melib/src/utils/mod.rs | 155 +++++++++ melib/src/{ => utils}/parsec.rs | 0 melib/src/utils/percent_encoding.rs | 488 +++++++++++++++++++++++++++ melib/src/utils/shellexpand.rs | 240 +++++++++++++ melib/src/{ => utils}/sqlite3.rs | 0 src/components/mail/compose.rs | 23 +- src/state.rs | 5 +- src/terminal/keys.rs | 19 +- 17 files changed, 1437 insertions(+), 444 deletions(-) rename melib/src/{ => utils}/connections.rs (100%) rename melib/src/{ => utils}/datetime.rs (100%) rename melib/src/{ => utils}/logging.rs (100%) create mode 100644 melib/src/utils/mod.rs rename melib/src/{ => utils}/parsec.rs (100%) create mode 100644 melib/src/utils/percent_encoding.rs create mode 100644 melib/src/utils/shellexpand.rs rename melib/src/{ => utils}/sqlite3.rs (100%) diff --git a/melib/src/email/address.rs b/melib/src/email/address.rs index 37d8bb97..9e2d35fe 100644 --- a/melib/src/email/address.rs +++ b/melib/src/email/address.rs @@ -231,8 +231,8 @@ impl Address { .collect::<_>() } - pub fn list_try_from(val: &str) -> Result> { - Ok(parser::address::rfc2822address_list(val.as_bytes())? + pub fn list_try_from>(val: T) -> Result> { + Ok(parser::address::rfc2822address_list(val.as_ref())? .1 .to_vec()) } @@ -380,6 +380,7 @@ impl core::fmt::Debug for Address { impl TryFrom<&str> for Address { type Error = Error; + fn try_from(val: &str) -> Result
{ Ok(parser::address::address(val.as_bytes())?.1) } diff --git a/melib/src/email/headers.rs b/melib/src/email/headers.rs index cbf26e02..3cc7316a 100644 --- a/melib/src/email/headers.rs +++ b/melib/src/email/headers.rs @@ -122,6 +122,10 @@ impl std::ops::Index for HeaderMap { } impl HeaderMap { + pub fn empty() -> Self { + Self::default() + } + pub fn get_mut + std::fmt::Debug>( &mut self, key: T, @@ -156,6 +160,10 @@ impl HeaderMap { let k = key.try_into().expect("Invalid bytes in header name."); (self.0).remove(&k) } + + pub fn into_inner(self) -> indexmap::IndexMap { + self.0 + } } impl Deref for HeaderMap { diff --git a/melib/src/email/headers/names.rs b/melib/src/email/headers/names.rs index 7895df05..d07284fa 100644 --- a/melib/src/email/headers/names.rs +++ b/melib/src/email/headers/names.rs @@ -567,6 +567,15 @@ impl HeaderName { }) } } + + pub const fn is_standard(&self) -> bool { + matches!( + self, + Self { + inner: Repr::Standard(_) + } + ) + } } impl FromStr for HeaderName { diff --git a/melib/src/email/mailto.rs b/melib/src/email/mailto.rs index bf62a2fb..7c393982 100644 --- a/melib/src/email/mailto.rs +++ b/melib/src/email/mailto.rs @@ -19,34 +19,110 @@ * along with meli. If not, see . */ -/*! Parsing of `mailto` addresses */ +//! Parsing of `mailto` addresses. +//! +//! Conforming to [RFC6068](https://www.rfc-editor.org/rfc/rfc6068) which obsoletes +//! [RFC2368](https://www.rfc-editor.org/rfc/rfc2368). + use std::convert::TryFrom; use super::*; +use crate::{ + email::headers::HeaderMap, + percent_encoding::{AsciiSet, CONTROLS}, +}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct Mailto { - pub address: Address, - pub subject: Option, - pub cc: Option, - pub bcc: Option, + pub address: Vec
, pub body: Option, + pub headers: HeaderMap, +} + +impl Mailto { + pub const IGNORE_HEADERS: &[HeaderName] = &[ + HeaderName::FROM, + HeaderName::DATE, + HeaderName::MESSAGE_ID, + HeaderName::APPARENTLY_TO, + HeaderName::ARC_AUTHENTICATION_RESULTS, + HeaderName::ARC_MESSAGE_SIGNATURE, + HeaderName::ARC_SEAL, + HeaderName::AUTHENTICATION_RESULTS, + HeaderName::AUTOFORWARDED, + HeaderName::AUTO_SUBMITTED, + HeaderName::AUTOSUBMITTED, + HeaderName::BASE, + HeaderName::CONTENT_ALTERNATIVE, + HeaderName::CONTENT_BASE, + HeaderName::CONTENT_DESCRIPTION, + HeaderName::CONTENT_DISPOSITION, + HeaderName::CONTENT_DURATION, + HeaderName::CONTENT_FEATURES, + HeaderName::CONTENT_ID, + HeaderName::CONTENT_IDENTIFIER, + HeaderName::CONTENT_LANGUAGE, + HeaderName::CONTENT_LENGTH, + HeaderName::CONTENT_LOCATION, + HeaderName::CONTENT_MD5, + HeaderName::CONTENT_RETURN, + HeaderName::CONTENT_TRANSFER_ENCODING, + HeaderName::CONTENT_TRANSLATION_TYPE, + HeaderName::CONTENT_TYPE, + HeaderName::DELIVERED_TO, + HeaderName::DKIM_SIGNATURE, + HeaderName::ENCRYPTED, + HeaderName::FORWARDED, + HeaderName::MAIL_FOLLOWUP_TO, + HeaderName::MAIL_REPLY_TO, + HeaderName::MIME_VERSION, + HeaderName::ORIGINAL_ENCODED_INFORMATION_TYPES, + HeaderName::ORIGINAL_FROM, + HeaderName::ORIGINAL_MESSAGE_ID, + HeaderName::ORIGINAL_RECIPIENT, + HeaderName::ORIGINAL_SUBJECT, + HeaderName::ORIGINATOR_RETURN_ADDRESS, + HeaderName::RECEIVED, + HeaderName::RECEIVED_SPF, + HeaderName::RESENT_BCC, + HeaderName::RESENT_CC, + HeaderName::RESENT_DATE, + HeaderName::RESENT_FROM, + HeaderName::RESENT_MESSAGE_ID, + HeaderName::RESENT_REPLY_TO, + HeaderName::RESENT_SENDER, + HeaderName::RESENT_TO, + HeaderName::RETURN_PATH, + HeaderName::SENDER, + HeaderName::USER_AGENT, + ]; + + pub const MAILTO_CHARSET: &AsciiSet = &CONTROLS + .add(b' ') + .add(b'"') + .add(b'"') + .add(b'#') + .add(b'%') + .add(b'/') + .add(b'<') + .add(b'>') + .add(b'?') + .add(b'`') + .add(b'{') + .add(b'}'); } impl From for Draft { fn from(val: Mailto) -> Self { let mut ret = Draft::default(); let Mailto { - address, - subject, - cc, - bcc, + address: _, body, + headers, } = val; - ret.set_header(HeaderName::SUBJECT, subject.unwrap_or_default()); - ret.set_header(HeaderName::CC, cc.unwrap_or_default()); - ret.set_header(HeaderName::BCC, bcc.unwrap_or_default()); - ret.set_header(HeaderName::TO, address.to_string()); + for (hdr, val) in headers.into_inner() { + ret.set_header(hdr, val); + } ret.set_body(body.unwrap_or_default()); ret } @@ -76,81 +152,304 @@ impl TryFrom<&[u8]> for Mailto { } } +impl TryFrom<&str> for Mailto { + type Error = String; + + fn try_from(value: &str) -> std::result::Result { + let parse_res = super::parser::generic::mailto(value.as_bytes()).map(|(_, v)| v); + if let Ok(res) = parse_res { + Ok(res) + } else { + debug!( + "parser::mailto returned error while parsing {}:\n{:?}", + value, + parse_res.as_ref().err().unwrap() + ); + Err(format!("{:?}", parse_res.err().unwrap())) + } + } +} + #[cfg(test)] mod tests { + use HeaderName as HDR; + use super::*; #[test] fn test_mailto() { - let test_address = super::parser::address::address(b"info@example.com") - .map(|(_, v)| v) - .unwrap(); - let mailto = Mailto::try_from(&b"mailto:info@example.com?subject=email%20subject"[0..]) - .expect("Could not parse mailto link."); - let Mailto { - ref address, - ref subject, - ref cc, - ref bcc, - ref body, - } = mailto; + macro_rules! addr { + ($lit:literal) => { + Address::try_from($lit).unwrap() + }; + } + + macro_rules! mlt { + ($lit:literal) => { + Mailto::try_from($lit).expect("Could not parse mailto link.") + }; + } + + macro_rules! hdr { + ($lit:literal) => { + HeaderName::try_from($lit).expect("Could not parse header name.") + }; + } + + macro_rules! hdrmap { + ($(($field:literal, $val:literal)),+) => {{ + let mut m = HeaderMap::empty(); + $( + m.insert(hdr!($field), $val.into()); + )+ + + m + }}; + } + + macro_rules! test_case { + ($mailto:literal, addresses => $($addr:literal),*; body => $body:expr; $(($field:literal, $val:literal)),+) => {{ + let addresses = &[ + $( + addr!($addr) + ),* + ]; + let Mailto { + address, + body, + headers, + } = mlt!($mailto); + assert_eq!( + (address.as_slice(), body.as_ref().map(|b| b.as_str()), headers), + (addresses.as_slice(), $body, hdrmap!($(($field, $val)),*)) + ); + }} + } + + test_case!("mailto:info@example.com?subject=email%20subject", + addresses=> "info@example.com"; + body => None; + ("To", "info@example.com"), ("Subject", "email subject") + ); + test_case!("mailto:info@example.com?cc=8cc9@example.com", + addresses=> "info@example.com"; + body => None; + ("To", "info@example.com"), ("Cc", "8cc9@example.com") + ); + test_case!("mailto:info@example.com?bcc=7bcc8@example.com&body=line%20first%0Abut%20not%0Alast", + addresses=> "info@example.com"; + body => Some("line first\nbut not\nlast"); + ("To", "info@example.com"), ("Bcc", "7bcc8@example.com") + ); + + test_case!("mailto:info@example.com?In-Reply-To=%3C20230526204845.673031-1-manos.pitsidianakis@linaro.org%3E&Cc=kraxel%40redhat.com%2Cqemu-devel%40nongnu.org&Subject=Re%3A%20%5BPATCH%5D%20Add%20virtio-sound%20and%20virtio-sound-pci%20devices", + addresses=> "info@example.com"; + body => None; + ("To", "info@example.com"), ("Subject", "Re: [PATCH] Add virtio-sound and virtio-sound-pci devices"), ("Cc", "kraxel@redhat.com,qemu-devel@nongnu.org"), ("In-Reply-To", "<20230526204845.673031-1-manos.pitsidianakis@linaro.org>") + ); assert_eq!( - ( - address, - subject.as_ref().map(String::as_str), - cc.as_ref().map(String::as_str), - bcc.as_ref().map(String::as_str), - body.as_ref().map(String::as_str), - ), - (&test_address, Some("email subject"), None, None, None) + mlt!("mailto:chris@example.com%2C%20tony@example.com"), + mlt!("mailto:?to=chris@example.com%2C%20tony@example.com") ); - let mailto = Mailto::try_from(&b"mailto:info@example.com?cc=8cc9@example.com"[0..]) - .expect("Could not parse mailto link."); - let Mailto { - ref address, - ref subject, - ref cc, - ref bcc, - ref body, - } = mailto; - assert_eq!( - ( - address, - subject.as_ref().map(String::as_str), - cc.as_ref().map(String::as_str), - bcc.as_ref().map(String::as_str), - body.as_ref().map(String::as_str), - ), - (&test_address, None, Some("8cc9@example.com"), None, None) + + /* address plus to= should be ignored */ + assert!( + Mailto::try_from("mailto:?to=chris@example.com%2C%20tony@example.com") + != Mailto::try_from("mailto:chris@example.com?to=tony@example.com"), + "{:?} == {:?}", + Mailto::try_from("mailto:?to=chris@example.com%2C%20tony@example.com"), + Mailto::try_from("mailto:chris@example.com?to=tony@example.com") ); - let mailto = Mailto::try_from( - &b"mailto:info@example.com?bcc=7bcc8@example.com&body=line%20first%0Abut%20not%0Alast" - [0..], - ) - .expect("Could not parse mailto link."); - let Mailto { - ref address, - ref subject, - ref cc, - ref bcc, - ref body, - } = mailto; + + // URLs for an ordinary individual mailing address: + test_case!("mailto:chris@example.com", + addresses=> "chris@example.com"; + body => None; + ("To", "chris@example.com") + ); + + // A URL for a mail response system that requires the name of the file in the + // subject: + + test_case!("mailto:infobot@example.com?subject=current-issue", + addresses => "infobot@example.com"; + body => None; + ("To", "infobot@example.com"), ("Subject", "current-issue") + ); + + // A mail response system that requires a "send" request in the body: + + test_case!("mailto:infobot@example.com?body=send%20current-issue", + addresses => "infobot@example.com"; + body => Some("send current-issue"); + ("To", "infobot@example.com") + ); + + //A similar URL could have two lines with different "send" requests (in this + // case, "send current-issue" and, on the next line, "send index".) + + test_case!("mailto:infobot@example.com?body=send%20current-issue%0D%0Asend%20index", + addresses => "infobot@example.com"; + body => Some("send current-issue\r\nsend index"); + ("To", "infobot@example.com") + ); + // An interesting use of your mailto URL is when browsing archives of messages. + // Each browsed message might contain a mailto URL like: + + test_case!("mailto:foobar@example.com?In-Reply-To=%3c3469A91.D10AF4C@example.com%3e", + addresses => "foobar@example.com"; + body => None; + ("To", "foobar@example.com"), ("In-Reply-To", "<3469A91.D10AF4C@example.com>") + ); + + // A request to subscribe to a mailing list: + + test_case!("mailto:majordomo@example.com?body=subscribe%20bamboo-l", + addresses => "majordomo@example.com"; + body => Some("subscribe bamboo-l"); + ("To", "majordomo@example.com") + ); + + // A URL for a single user which includes a CC of another user: + + test_case!("mailto:joe@example.com?cc=bob@example.com&body=hello", + addresses => "joe@example.com"; + body => Some("hello"); + ("To", "joe@example.com"), ("Cc", "bob@example.com") + ); + + // Another way of expressing the same thing: + + test_case!("mailto:?to=joe@example.com&cc=bob@example.com&body=hello", + addresses => "joe@example.com"; + body => Some("hello"); + ("To", "joe@example.com"), ("Cc", "bob@example.com") + ); + + // Note the use of the "&" reserved character, above. The following example, + // by using "?" twice, is incorrect: ; WRONG! + + assert!(Mailto::try_from("mailto:joe@example.com?cc=bob@example.com?body=hello").is_err()); + + // assert + // these are equal + + test_case!("mailto:?to=joe@example.com&cc=bob@example.com&body=hello", + addresses => "joe@example.com"; + body => Some("hello"); + ("To", "joe@example.com"), ("Cc", "bob@example.com") + ); + + // To indicate the address "gorby%kremvax@example.com" one would do: + // + + test_case!("mailto:gorby%25kremvax@example.com", + addresses => "gorby%kremvax@example.com"; + body => None; + ("To", "gorby%kremvax@example.com") + ); + + // Custom header is ignored + // + + test_case!("mailto:address@example.com?blat=foop", + addresses => "address@example.com"; + body => None; + ("To", "address@example.com") + ); + + // 6.2. Examples of Complicated Email Addresses + assert_eq!( - ( - address, - subject.as_ref().map(String::as_str), - cc.as_ref().map(String::as_str), - bcc.as_ref().map(String::as_str), - body.as_ref().map(String::as_str), - ), - ( - &test_address, - None, - None, - Some("7bcc8@example.com"), - Some("line first\nbut not\nlast") - ) + mlt!("mailto:%22not%40me%22@example.org").address, + vec![addr!(r#""not@me"@example.org"#)] + ); + + // Email address: "oh\\no"@example.org; corresponding 'mailto' URI: + + // . + + // Email address: "\\\"it's\ ugly\\\""@example.org; corresponding + // 'mailto' URI: + + // . + // [tag:FIXME] + //assert_eq!( + // mlt!("mailto:%22%5C%5C%5C%22it's%5C%20ugly%5C%5C%5C%22%22@example.org"). + // address, vec![addr!(r#"\"it's ugly\"@example.org"#)] + //); + + // When an email address itself includes an "&" (ampersand) character, that + // character has to be percent-encoded. For example, the 'mailto' URI + // to send mail to "Mike&family@example.org" is + // . + assert_eq!( + mlt!("mailto:Mike%26family@example.org").address, + vec![addr!("Mike&family@example.org")] + ); + + // Sending a mail with the subject "coffee" in French, i.e., "cafe" where the + // final e is an e-acute, using UTF-8 and percent-encoding: + // + assert_eq!( + &mlt!("mailto:user@example.org?subject=caf%C3%A9").headers[HDR::SUBJECT], + "café" + ); + + // The same subject, this time using an encoded-word (escaping the "=" + // and "?" characters used in the encoded-word syntax, because they are + // reserved): + // [tag:FIXME] + // + assert_eq!( + &mlt!("mailto:user@example.org?subject=%3D%3Futf-8%3FQ%3Fcaf%3DC3%3DA9%3F%3D").headers + [HDR::SUBJECT], + "=?utf-8?Q?caf=C3=A9?=" + ); + + // The same subject, this time encoded as iso-8859-1: + + // + // [tag:FIXME] + assert_eq!( + &mlt!("mailto:user@example.org?subject=%3D%3Fiso-8859-1%3FQ%3Fcaf%3DE9%3F%3D").headers + [HDR::SUBJECT], + "=?iso-8859-1?Q?caf=E9?=" + ); + + // Going back to straight UTF-8 and adding a body with the same value: + // + // + test_case!("mailto:user@example.org?subject=caf%C3%A9&body=caf%C3%A9", + addresses => "user@example.org"; + body => Some("café"); + ("To", "user@example.org"), + ("Subject", "café") + ); + + // The following example uses the Japanese word "natto" (Unicode + // characters U+7D0D U+8C46) as a domain name label, sending a mail to a + // user at "natto".example.org: + + // + + // When constructing the email, the domain name label is converted to + // punycode. The resulting message may look as follows: + + // From: sender@example.net + // To: user@xn--99zt52a.example.org + // Subject: Test + // Content-Type: text/plain + // Content-Transfer-Encoding: 7bit + // + // NATTO + test_case!("mailto:user@%E7%B4%8D%E8%B1%86.example.org?subject=Test&body=NATTO", + addresses => "user@納豆.example.org"; + body => Some("NATTO"); + ("To", "user@納豆.example.org"), + ("Subject", "Test") ); } } diff --git a/melib/src/email/parser.rs b/melib/src/email/parser.rs index c1bd25b1..905ad1d5 100644 --- a/melib/src/email/parser.rs +++ b/melib/src/email/parser.rs @@ -19,8 +19,9 @@ * along with meli. If not, see . */ -/*! Parsers for email. See submodules */ -use std::borrow::Cow; +//! Parsers for email. See submodules. + +use std::{borrow::Cow, convert::TryFrom, fmt::Write}; use nom::{ branch::alt, @@ -34,7 +35,16 @@ use nom::{ }; use smallvec::SmallVec; -use crate::error::{Error, Result, ResultIntoError}; +use crate::{ + email::{ + address::Address, + headers::{HeaderMap, HeaderName}, + mailto::Mailto, + }, + error::{Error, Result, ResultIntoError}, + html_escape::HtmlEntity, + percent_encoding::percent_decode, +}; macro_rules! to_str { ($l:expr) => {{ @@ -913,85 +923,157 @@ pub mod generic { } } - use crate::email::{address::Address, mailto::Mailto}; pub fn mailto(mut input: &[u8]) -> IResult<&[u8], Mailto> { + let orig_input = input; if !input.starts_with(b"mailto:") { return Err(nom::Err::Error( (input, "mailto(): input doesn't start with `mailto:`").into(), )); } + let mut body = None; + let mut headers = HeaderMap::empty(); + let mut address: Vec
; + + if String::from_utf8_lossy(input).matches('?').count() > 1 { + return Err(nom::Err::Error( + (input, "mailto(): Using '?' twice is invalid.").into(), + )); + } input = &input[b"mailto:".len()..]; + let mut decoded_owned = percent_decode(input).decode_utf8().unwrap().to_string(); - let end = input.iter().position(|e| *e == b'?').unwrap_or(input.len()); - let address: Address; + let mut substitutions = vec![]; + for (i, _) in decoded_owned.match_indices('&') { + if let Some(j) = HtmlEntity::ALL + .iter() + .position(|e| decoded_owned[i..].starts_with(e)) + { + substitutions.push((i, HtmlEntity::ALL[j].len(), HtmlEntity::GLYPHS[j])); + } + } - if let Ok((_, addr)) = crate::email::parser::address::address(&input[..end]) { + for (i, len, g) in substitutions.into_iter().rev() { + decoded_owned.replace_range(i..(i + len), g); + } + + let mut decoded = decoded_owned.as_str(); + + let end = decoded.as_bytes().iter().position(|e| *e == b'?'); + let end_or_len = end.unwrap_or(decoded.len()); + + if let Ok(addr) = Address::list_try_from(&decoded[..end_or_len]) { address = addr; - input = if input[end..].is_empty() { - &input[end..] + decoded = if decoded[end_or_len..].is_empty() { + &decoded[end_or_len..] } else { - &input[end + 1..] + &decoded[end_or_len + 1..] }; + } else if end.is_some() { + decoded = &decoded[1..]; + address = vec![]; } else { return Err(nom::Err::Error( - (input, "mailto(): address not found in input").into(), + ( + input, + format!("input {:?}", String::from_utf8_lossy(orig_input)), + ) + .into(), )); } - let mut subject = None; - let mut cc = None; - let mut bcc = None; - let mut body = None; - while !input.is_empty() { - let tag = if let Some(tag_pos) = input.iter().position(|e| *e == b'=') { - let ret = &input[0..tag_pos]; - input = &input[tag_pos + 1..]; + if !address.is_empty() { + let mut full_address = String::new(); + for address in &address { + write!(&mut full_address, "{}, ", address) + .expect("Could not write into a String, are you out of memory?"); + } + if full_address.ends_with(", ") { + let len = full_address.len(); + full_address.truncate(len - ", ".len()); + } + headers.insert(HeaderName::TO, full_address); + } + + while !decoded.is_empty() { + if decoded.starts_with("&") { + decoded = &decoded["&".len()..]; + continue; + } + + let tag = if let Some(tag_pos) = decoded.as_bytes().iter().position(|e| *e == b'=') { + let ret = &decoded[0..tag_pos]; + decoded = &decoded[tag_pos + 1..]; ret } else { return Err(nom::Err::Error( - (input, "mailto(): extra characters found in input").into(), + ( + input, + format!("mailto(): extra characters found in input: {}", decoded), + ) + .into(), )); }; - let value_end = input.iter().position(|e| *e == b'&').unwrap_or(input.len()); + let value_end = decoded + .as_bytes() + .iter() + .position(|e| *e == b'&') + .unwrap_or(decoded.len()); - let value = String::from_utf8_lossy(&input[..value_end]).to_string(); + let value = decoded[..value_end].to_string(); match tag { - b"subject" if subject.is_none() => { - subject = Some(value.replace("%20", " ")); - } - b"cc" if cc.is_none() => { - cc = Some(value); - } - b"bcc" if bcc.is_none() => { - bcc = Some(value); - } - b"body" if body.is_none() => { - /* FIXME: - * Parse escaped characters properly. - */ - body = Some(value.replace("%20", " ").replace("%0A", "\n")); - } - _ => { - return Err(nom::Err::Error( - (input, "mailto(): unknown tag in input").into(), - )); + "body" if body.is_none() => { + body = Some(value); } + other => match HeaderName::try_from(other) { + Ok(hdr) if hdr == HeaderName::TO => { + if !headers.contains_key(&hdr) { + if let Ok(address_val) = Address::list_try_from(value.as_str()) { + address.extend(address_val.into_iter()); + } + headers.insert(HeaderName::TO, value); + } + } + Ok(hdr) if hdr.is_standard() => { + if Mailto::IGNORE_HEADERS.contains(&hdr) { + log::warn!( + "parsing mailto(): header {} is not allowed in mailto URIs for \ + safety and will be ignored. Value was {:?}", + hdr, + value + ); + } + if !headers.contains_key(&hdr) { + headers.insert(hdr, value); + } + } + Ok(hdr) => { + log::warn!( + "parsing mailto(): header {} is not a known header and it will be \ + ignored.Value was {:?}", + hdr, + value + ); + } + _ => { + return Err(nom::Err::Error( + (input, "mailto(): unknown tag in input").into(), + )); + } + }, } - if input[value_end..].is_empty() { + if decoded[value_end..].is_empty() { break; } - input = &input[value_end + 1..]; + decoded = &decoded[value_end + 1..]; } Ok(( input, Mailto { address, - subject, - cc, - bcc, body, + headers, }, )) } diff --git a/melib/src/lib.rs b/melib/src/lib.rs index 853ef0e6..e561a04c 100644 --- a/melib/src/lib.rs +++ b/melib/src/lib.rs @@ -39,8 +39,6 @@ //! - Basic mail account configuration to use with //! [`backends`](./backends/index.html) (see module //! [`conf`](./conf/index.html)) -//! - Parser combinators (see module [`parsec`](./parsec/index.html)) -//! - A `ShellExpandTrait` to expand paths like a shell. //! - A `debug` macro that works like `std::dbg` but for multiple threads. (see //! [`debug` macro](./macro.debug.html)) @@ -84,11 +82,8 @@ pub mod dbg { #[cfg(feature = "unicode_algorithms")] pub mod text_processing; -pub mod datetime; -pub use datetime::UnixTimestamp; +pub use utils::{datetime::UnixTimestamp, *}; -#[macro_use] -mod logging; pub use self::logging::{LogLevel, StderrLogger}; pub mod addressbook; @@ -106,16 +101,15 @@ pub mod error; pub use crate::error::*; pub mod thread; pub use thread::*; -pub mod connections; -pub mod parsec; pub mod search; +#[macro_use] +mod utils; + #[cfg(feature = "gpgme")] pub mod gpgme; #[cfg(feature = "smtp")] pub mod smtp; -#[cfg(feature = "sqlite3")] -pub mod sqlite3; #[macro_use] extern crate serde_derive; @@ -165,289 +159,3 @@ impl core::fmt::Display for Bytes { } pub use shellexpand::ShellExpandTrait; -pub mod shellexpand { - - #[cfg(not(any(target_os = "netbsd", target_os = "macos")))] - use std::os::unix::io::AsRawFd; - use std::{ - ffi::OsStr, - os::unix::ffi::OsStrExt, - path::{Path, PathBuf}, - }; - - use smallvec::SmallVec; - - pub trait ShellExpandTrait { - fn expand(&self) -> PathBuf; - fn complete(&self, force: bool) -> SmallVec<[String; 128]>; - } - - impl ShellExpandTrait for Path { - fn expand(&self) -> PathBuf { - let mut ret = PathBuf::new(); - for c in self.components() { - let c_to_str = c.as_os_str().to_str(); - match c_to_str { - Some("~") => { - if let Ok(home_dir) = std::env::var("HOME") { - ret.push(home_dir) - } else { - return PathBuf::new(); - } - } - Some(var) if var.starts_with('$') => { - let env_name = var.split_at(1).1; - if env_name.chars().all(char::is_uppercase) { - ret.push(std::env::var(env_name).unwrap_or_default()); - } else { - ret.push(c); - } - } - Some(_) => { - ret.push(c); - } - None => { - /* path is invalid */ - return PathBuf::new(); - } - } - } - ret - } - - #[cfg(target_os = "linux")] - fn complete(&self, force: bool) -> SmallVec<[String; 128]> { - use libc::dirent64; - use nix::fcntl::OFlag; - const BUF_SIZE: ::libc::size_t = 8 << 10; - - let (prefix, _match) = if self.as_os_str().as_bytes().ends_with(b"/.") { - (self.components().as_path(), OsStr::from_bytes(b".")) - } else if self.exists() && (!force || self.as_os_str().as_bytes().ends_with(b"/")) { - return SmallVec::new(); - } else { - let last_component = self - .components() - .last() - .map(|c| c.as_os_str()) - .unwrap_or_else(|| OsStr::from_bytes(b"")); - let prefix = if let Some(p) = self.parent() { - p - } else { - return SmallVec::new(); - }; - (prefix, last_component) - }; - - let dir = match ::nix::dir::Dir::openat( - ::libc::AT_FDCWD, - prefix, - OFlag::O_DIRECTORY | OFlag::O_NOATIME | OFlag::O_RDONLY | OFlag::O_CLOEXEC, - ::nix::sys::stat::Mode::S_IRUSR | ::nix::sys::stat::Mode::S_IXUSR, - ) - .or_else(|_| { - ::nix::dir::Dir::openat( - ::libc::AT_FDCWD, - prefix, - OFlag::O_DIRECTORY | OFlag::O_RDONLY | OFlag::O_CLOEXEC, - ::nix::sys::stat::Mode::S_IRUSR | ::nix::sys::stat::Mode::S_IXUSR, - ) - }) { - Ok(dir) => dir, - Err(err) => { - debug!(prefix); - debug!(err); - return SmallVec::new(); - } - }; - - let mut buf: Vec = Vec::with_capacity(BUF_SIZE); - let mut entries = SmallVec::new(); - loop { - let n: i64 = unsafe { - ::libc::syscall( - ::libc::SYS_getdents64, - dir.as_raw_fd(), - buf.as_ptr(), - BUF_SIZE - 256, - ) - }; - if n < 0 { - return SmallVec::new(); - } else if n == 0 { - break; - } - - let n = n as usize; - unsafe { - buf.set_len(n); - } - let mut pos = 0; - while pos < n { - let dir = unsafe { std::mem::transmute::<&[u8], &[dirent64]>(&buf[pos..]) }; - let entry = unsafe { std::ffi::CStr::from_ptr(dir[0].d_name.as_ptr()) }; - if entry.to_bytes() != b"." && entry.to_bytes() != b".." { - if entry.to_bytes().starts_with(_match.as_bytes()) { - if dir[0].d_type == ::libc::DT_DIR && !entry.to_bytes().ends_with(b"/") - { - let mut s = unsafe { - String::from_utf8_unchecked( - entry.to_bytes()[_match.as_bytes().len()..].to_vec(), - ) - }; - s.push('/'); - entries.push(s); - } else { - entries.push(unsafe { - String::from_utf8_unchecked( - entry.to_bytes()[_match.as_bytes().len()..].to_vec(), - ) - }); - } - } - } - pos += dir[0].d_reclen as usize; - } - // https://github.com/romkatv/gitstatus/blob/caf44f7aaf33d0f46e6749e50595323c277e0908/src/dir.cc - // "It's tempting to bail here if n + sizeof(linux_dirent64) + - // 512 <= n. After all, there was enough space - // for another entry but SYS_getdents64 didn't write it, so this - // must be the end of the directory listing, - // right? Unfortunately, no. SYS_getdents64 is finicky. - // It sometimes writes a partial list of entries even if the - // full list would fit." - } - entries - } - - #[cfg(not(target_os = "linux"))] - fn complete(&self, force: bool) -> SmallVec<[String; 128]> { - let mut entries = SmallVec::new(); - let (prefix, _match) = { - if self.exists() && (!force || self.as_os_str().as_bytes().ends_with(b"/")) { - // println!("{} {:?}", self.display(), self.components().last()); - return entries; - } else { - let last_component = self - .components() - .last() - .map(|c| c.as_os_str()) - .unwrap_or_else(|| OsStr::from_bytes(b"")); - let prefix = if let Some(p) = self.parent() { - p - } else { - return entries; - }; - (prefix, last_component) - } - }; - if force && self.is_dir() && !self.as_os_str().as_bytes().ends_with(b"/") { - entries.push("/".to_string()); - } - - if let Ok(iter) = std::fs::read_dir(&prefix) { - for entry in iter.flatten() { - if entry.path().as_os_str().as_bytes() != b"." - && entry.path().as_os_str().as_bytes() != b".." - && entry - .path() - .as_os_str() - .as_bytes() - .starts_with(_match.as_bytes()) - { - if entry.path().is_dir() - && !entry.path().as_os_str().as_bytes().ends_with(b"/") - { - let mut s = unsafe { - String::from_utf8_unchecked( - entry.path().as_os_str().as_bytes()[_match.as_bytes().len()..] - .to_vec(), - ) - }; - s.push('/'); - entries.push(s); - } else { - entries.push(unsafe { - String::from_utf8_unchecked( - entry.path().as_os_str().as_bytes()[_match.as_bytes().len()..] - .to_vec(), - ) - }); - } - } - } - } - entries - } - } - - #[test] - fn test_shellexpandtrait() { - assert!(Path::new("~").expand().complete(false).is_empty()); - assert!(!Path::new("~").expand().complete(true).is_empty()); - } -} - -#[macro_export] -macro_rules! declare_u64_hash { - ($type_name:ident) => { - #[derive( - Hash, - Eq, - PartialEq, - Debug, - Ord, - PartialOrd, - Default, - Serialize, - Deserialize, - Copy, - Clone, - )] - #[repr(transparent)] - pub struct $type_name(pub u64); - - impl $type_name { - #[inline(always)] - pub fn from_bytes(bytes: &[u8]) -> Self { - use std::{collections::hash_map::DefaultHasher, hash::Hasher}; - let mut h = DefaultHasher::new(); - h.write(bytes); - Self(h.finish()) - } - - #[inline(always)] - pub const fn to_be_bytes(self) -> [u8; 8] { - self.0.to_be_bytes() - } - - #[inline(always)] - pub const fn is_null(self) -> bool { - self.0 == 0 - } - } - - impl core::fmt::Display for $type_name { - fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(fmt, "{}", self.0) - } - } - #[cfg(feature = "sqlite3")] - impl rusqlite::types::ToSql for $type_name { - fn to_sql(&self) -> rusqlite::Result { - Ok(rusqlite::types::ToSqlOutput::from(self.0 as i64)) - } - } - - #[cfg(feature = "sqlite3")] - impl rusqlite::types::FromSql for $type_name { - fn column_result( - value: rusqlite::types::ValueRef, - ) -> rusqlite::types::FromSqlResult { - let b: i64 = rusqlite::types::FromSql::column_result(value)?; - - Ok($type_name(b as u64)) - } - } - }; -} diff --git a/melib/src/connections.rs b/melib/src/utils/connections.rs similarity index 100% rename from melib/src/connections.rs rename to melib/src/utils/connections.rs diff --git a/melib/src/datetime.rs b/melib/src/utils/datetime.rs similarity index 100% rename from melib/src/datetime.rs rename to melib/src/utils/datetime.rs diff --git a/melib/src/logging.rs b/melib/src/utils/logging.rs similarity index 100% rename from melib/src/logging.rs rename to melib/src/utils/logging.rs diff --git a/melib/src/utils/mod.rs b/melib/src/utils/mod.rs new file mode 100644 index 00000000..288e74d8 --- /dev/null +++ b/melib/src/utils/mod.rs @@ -0,0 +1,155 @@ +/* + * meli - lib.rs + * + * Copyright 2017 Manos Pitsidianakis + * + * This file is part of meli. + * + * meli is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * meli is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with meli. If not, see . + */ + +//! Utility modules for general use. + +pub mod connections; +pub mod datetime; +#[macro_use] +pub mod logging; +pub mod parsec; +pub mod percent_encoding; +pub mod shellexpand; +#[cfg(feature = "sqlite3")] +pub mod sqlite3; + +pub mod html_escape { + //! HTML Coded Character Set + + /// Numeric and Special Graphic Entity Set + /// + /// ```text + /// GLYPH NAME SYNTAX DESCRIPTION + /// < lt < Less than sign + /// > gt > Greater than sign + /// & amp & Ampersand + /// " quot " Double quote sign + /// ``` + /// + /// Source: + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub enum HtmlEntity { + /// Less than sign + Lt, + /// Greater than sign + Gt, + /// Ampersand + Amp, + /// Double quote sign + Quot, + } + + impl HtmlEntity { + pub const ALL: [&str; 4] = ["<", ">", "&", """]; + pub const GLYPHS: [&str; 4] = ["<", ">", "&", "\""]; + + pub const fn glyph(self) -> char { + match self { + Self::Lt => '<', + Self::Gt => '>', + Self::Amp => '&', + Self::Quot => '"', + } + } + + pub const fn name(self) -> &'static str { + match self { + Self::Lt => "lt", + Self::Gt => "gt", + Self::Amp => "amp", + Self::Quot => "quot", + } + } + + pub const fn syntax(self) -> &'static str { + match self { + Self::Lt => "<", + Self::Gt => ">", + Self::Amp => "&", + Self::Quot => """, + } + } + } +} + +#[macro_export] +macro_rules! declare_u64_hash { + ($type_name:ident) => { + #[derive( + Hash, + Eq, + PartialEq, + Debug, + Ord, + PartialOrd, + Default, + Serialize, + Deserialize, + Copy, + Clone, + )] + #[repr(transparent)] + pub struct $type_name(pub u64); + + impl $type_name { + #[inline(always)] + pub fn from_bytes(bytes: &[u8]) -> Self { + use std::{collections::hash_map::DefaultHasher, hash::Hasher}; + let mut h = DefaultHasher::new(); + h.write(bytes); + Self(h.finish()) + } + + #[inline(always)] + pub const fn to_be_bytes(self) -> [u8; 8] { + self.0.to_be_bytes() + } + + #[inline(always)] + pub const fn is_null(self) -> bool { + self.0 == 0 + } + } + + impl core::fmt::Display for $type_name { + fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(fmt, "{}", self.0) + } + } + #[cfg(feature = "sqlite3")] + impl rusqlite::types::ToSql for $type_name { + fn to_sql(&self) -> rusqlite::Result { + Ok(rusqlite::types::ToSqlOutput::from(self.0 as i64)) + } + } + + #[cfg(feature = "sqlite3")] + impl rusqlite::types::FromSql for $type_name { + fn column_result( + value: rusqlite::types::ValueRef, + ) -> rusqlite::types::FromSqlResult { + let b: i64 = rusqlite::types::FromSql::column_result(value)?; + + Ok($type_name(b as u64)) + } + } + }; +} diff --git a/melib/src/parsec.rs b/melib/src/utils/parsec.rs similarity index 100% rename from melib/src/parsec.rs rename to melib/src/utils/parsec.rs diff --git a/melib/src/utils/percent_encoding.rs b/melib/src/utils/percent_encoding.rs new file mode 100644 index 00000000..da93f7f1 --- /dev/null +++ b/melib/src/utils/percent_encoding.rs @@ -0,0 +1,488 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! URLs use special characters to indicate the parts of the request. +//! For example, a `?` question mark marks the end of a path and the start of a +//! query string. In order for that character to exist inside a path, it needs +//! to be encoded differently. +//! +//! Percent encoding replaces reserved characters with the `%` escape character +//! followed by a byte value as two hexadecimal digits. +//! For example, an ASCII space is replaced with `%20`. +//! +//! When encoding, the set of characters that can (and should, for readability) +//! be left alone depends on the context. +//! The `?` question mark mentioned above is not a separator when used literally +//! inside of a query string, and therefore does not need to be encoded. +//! The [`AsciiSet`] parameter of [`percent_encode`] and [`utf8_percent_encode`] +//! lets callers configure this. +//! +//! This crate deliberately does not provide many different sets. +//! Users should consider in what context the encoded string will be used, +//! read relevant specifications, and define their own set. +//! This is done by using the `add` method of an existing set. +//! +//! # Examples +//! +//! ```rust +//! use melib::percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, CONTROLS}; +//! +//! /// https://url.spec.whatwg.org/#fragment-percent-encode-set +//! const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); +//! +//! assert_eq!( +//! utf8_percent_encode("foo ", FRAGMENT).to_string(), +//! "foo%20%3Cbar%3E" +//! ); +//! +//! assert_eq!( +//! percent_decode_str("foo%20%3Cbar%3E").decode_utf8().unwrap(), +//! "foo " +//! ); +//! ``` + +use std::{borrow::Cow, fmt, mem, slice, str}; + +/// Represents a set of characters or bytes in the ASCII range. +/// +/// This is used in [`percent_encode`] and [`utf8_percent_encode`]. +/// This is similar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes). +/// +/// Use the `add` method of an existing set to define a new set. For example: +/// +/// ``` +/// use melib::percent_encoding::{AsciiSet, CONTROLS}; +/// +/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set +/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); +/// ``` +pub struct AsciiSet { + mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK], +} + +type Chunk = u32; + +const ASCII_RANGE_LEN: usize = 0x80; + +const BITS_PER_CHUNK: usize = 8 * mem::size_of::(); + +impl AsciiSet { + /// Called with UTF-8 bytes rather than code points. + /// Not used for non-ASCII bytes. + const fn contains(&self, byte: u8) -> bool { + let chunk = self.mask[byte as usize / BITS_PER_CHUNK]; + let mask = 1 << (byte as usize % BITS_PER_CHUNK); + (chunk & mask) != 0 + } + + fn should_percent_encode(&self, byte: u8) -> bool { + !byte.is_ascii() || self.contains(byte) + } + + pub const fn add(&self, byte: u8) -> Self { + let mut mask = self.mask; + mask[byte as usize / BITS_PER_CHUNK] |= 1 << (byte as usize % BITS_PER_CHUNK); + AsciiSet { mask } + } + + pub const fn remove(&self, byte: u8) -> Self { + let mut mask = self.mask; + mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK)); + AsciiSet { mask } + } +} + +/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL). +/// +/// Note that this includes the newline and tab characters, but not the space +/// 0x20. +/// +/// +pub const CONTROLS: &AsciiSet = &AsciiSet { + mask: [ + !0_u32, // C0: 0x00 to 0x1F (32 bits set) + 0, + 0, + 1 << (0x7F_u32 % 32), // DEL: 0x7F (one bit set) + ], +}; + +macro_rules! static_assert { + ($( $bool: expr, )+) => { + fn _static_assert() { + $( + let _ = mem::transmute::<[u8; $bool as usize], u8>; + )+ + } + } +} + +static_assert! { + CONTROLS.contains(0x00), + CONTROLS.contains(0x1F), + !CONTROLS.contains(0x20), + !CONTROLS.contains(0x7E), + CONTROLS.contains(0x7F), +} + +/// Everything that is not an ASCII letter or digit. +/// +/// This is probably more eager than necessary in any context. +pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS + .add(b' ') + .add(b'!') + .add(b'"') + .add(b'#') + .add(b'$') + .add(b'%') + .add(b'&') + .add(b'\'') + .add(b'(') + .add(b')') + .add(b'*') + .add(b'+') + .add(b',') + .add(b'-') + .add(b'.') + .add(b'/') + .add(b':') + .add(b';') + .add(b'<') + .add(b'=') + .add(b'>') + .add(b'?') + .add(b'@') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'_') + .add(b'`') + .add(b'{') + .add(b'|') + .add(b'}') + .add(b'~'); + +/// Return the percent-encoding of the given byte. +/// +/// This is unconditional, unlike `percent_encode()` which has an `AsciiSet` +/// parameter. +/// +/// # Examples +/// +/// ``` +/// use melib::percent_encoding::percent_encode_byte; +/// +/// assert_eq!( +/// "foo bar" +/// .bytes() +/// .map(percent_encode_byte) +/// .collect::(), +/// "%66%6F%6F%20%62%61%72" +/// ); +/// ``` +#[inline] +pub fn percent_encode_byte(byte: u8) -> &'static str { + static ENC_TABLE: &[u8; 768] = b"\ + %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\ + %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\ + %20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\ + %30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\ + %40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\ + %50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\ + %60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\ + %70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\ + %80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\ + %90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\ + %A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\ + %B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\ + %C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\ + %D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\ + %E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\ + %F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\ + "; + + let index = usize::from(byte) * 3; + // SAFETY: ENC_TABLE is ascii-only, so any subset if it should be + // ascii-only too, which is valid utf8. + unsafe { str::from_utf8_unchecked(&ENC_TABLE[index..index + 3]) } +} + +/// Percent-encode the given bytes with the given set. +/// +/// Non-ASCII bytes and bytes in `ascii_set` are encoded. +/// +/// The return type: +/// +/// * Implements `Iterator` and therefore has a +/// `.collect::()` method, +/// * Implements `Display` and therefore has a `.to_string()` method, +/// * Implements `Into>` borrowing `input` when none of its bytes are +/// encoded. +/// +/// # Examples +/// +/// ``` +/// use melib::percent_encoding::{percent_encode, NON_ALPHANUMERIC}; +/// +/// assert_eq!( +/// percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), +/// "foo%20bar%3F" +/// ); +/// ``` +#[inline] +pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> PercentEncode<'a> { + PercentEncode { + bytes: input, + ascii_set, + } +} + +/// Percent-encode the UTF-8 encoding of the given string. +/// +/// See [`percent_encode`] regarding the return type. +/// +/// # Examples +/// +/// ``` +/// use melib::percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; +/// +/// assert_eq!( +/// utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), +/// "foo%20bar%3F" +/// ); +/// ``` +#[inline] +pub fn utf8_percent_encode<'a>(input: &'a str, ascii_set: &'static AsciiSet) -> PercentEncode<'a> { + percent_encode(input.as_bytes(), ascii_set) +} + +/// The return type of [`percent_encode`] and [`utf8_percent_encode`]. +#[derive(Clone)] +pub struct PercentEncode<'a> { + bytes: &'a [u8], + ascii_set: &'static AsciiSet, +} + +impl<'a> Iterator for PercentEncode<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if let Some((&first_byte, remaining)) = self.bytes.split_first() { + if self.ascii_set.should_percent_encode(first_byte) { + self.bytes = remaining; + Some(percent_encode_byte(first_byte)) + } else { + // The unsafe blocks here are appropriate because the bytes are + // confirmed as a subset of UTF-8 in should_percent_encode. + for (i, &byte) in remaining.iter().enumerate() { + if self.ascii_set.should_percent_encode(byte) { + // 1 for first_byte + i for previous iterations of this loop + let (unchanged_slice, remaining) = self.bytes.split_at(1 + i); + self.bytes = remaining; + return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }); + } + } + let unchanged_slice = self.bytes; + self.bytes = &[][..]; + Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + } + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + if self.bytes.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.bytes.len())) + } + } +} + +impl<'a> fmt::Display for PercentEncode<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + for c in (*self).clone() { + formatter.write_str(c)? + } + Ok(()) + } +} + +impl<'a> From> for Cow<'a, str> { + fn from(mut iter: PercentEncode<'a>) -> Self { + match iter.next() { + None => "".into(), + Some(first) => match iter.next() { + None => first.into(), + Some(second) => { + let mut string = first.to_owned(); + string.push_str(second); + string.extend(iter); + string.into() + } + }, + } + } +} + +/// Percent-decode the given string. +/// +/// +/// +/// See [`percent_decode`] regarding the return type. +#[inline] +pub fn percent_decode_str(input: &str) -> PercentDecode<'_> { + percent_decode(input.as_bytes()) +} + +/// Percent-decode the given bytes. +/// +/// +/// +/// Any sequence of `%` followed by two hexadecimal digits is decoded. +/// The return type: +/// +/// * Implements `Into>` borrowing `input` when it contains no +/// percent-encoded sequence, +/// * Implements `Iterator` and therefore has a +/// `.collect::>()` method, +/// * Has `decode_utf8()` and `decode_utf8_lossy()` methods. +/// +/// # Examples +/// +/// ``` +/// use melib::percent_encoding::percent_decode; +/// +/// assert_eq!( +/// percent_decode(b"foo%20bar%3f").decode_utf8().unwrap(), +/// "foo bar?" +/// ); +/// ``` +#[inline] +pub fn percent_decode(input: &[u8]) -> PercentDecode<'_> { + PercentDecode { + bytes: input.iter(), + } +} + +/// The return type of [`percent_decode`]. +#[derive(Clone, Debug)] +pub struct PercentDecode<'a> { + bytes: slice::Iter<'a, u8>, +} + +fn after_percent_sign(iter: &mut slice::Iter<'_, u8>) -> Option { + let mut cloned_iter = iter.clone(); + let h = char::from(*cloned_iter.next()?).to_digit(16)?; + let l = char::from(*cloned_iter.next()?).to_digit(16)?; + *iter = cloned_iter; + Some(h as u8 * 0x10 + l as u8) +} + +impl<'a> Iterator for PercentDecode<'a> { + type Item = u8; + + fn next(&mut self) -> Option { + self.bytes.next().map(|&byte| { + if byte == b'%' { + after_percent_sign(&mut self.bytes).unwrap_or(byte) + } else { + byte + } + }) + } + + fn size_hint(&self) -> (usize, Option) { + let bytes = self.bytes.len(); + ((bytes + 2) / 3, Some(bytes)) + } +} + +impl<'a> From> for Cow<'a, [u8]> { + fn from(iter: PercentDecode<'a>) -> Self { + match iter.if_any() { + Some(vec) => Cow::Owned(vec), + None => Cow::Borrowed(iter.bytes.as_slice()), + } + } +} + +impl<'a> PercentDecode<'a> { + /// If the percent-decoding is different from the input, return it as a new + /// bytes vector. + fn if_any(&self) -> Option> { + let mut bytes_iter = self.bytes.clone(); + while bytes_iter.any(|&b| b == b'%') { + if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter) { + let initial_bytes = self.bytes.as_slice(); + let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3; + let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned(); + decoded.push(decoded_byte); + decoded.extend(PercentDecode { bytes: bytes_iter }); + return Some(decoded); + } + } + // Nothing to decode + None + } + + /// Decode the result of percent-decoding as UTF-8. + /// + /// This is return `Err` when the percent-decoded bytes are not well-formed + /// in UTF-8. + pub fn decode_utf8(self) -> Result, str::Utf8Error> { + match self.clone().into() { + Cow::Borrowed(bytes) => match str::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e), + }, + Cow::Owned(bytes) => match String::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e.utf8_error()), + }, + } + } + + /// Decode the result of percent-decoding as UTF-8, lossily. + /// + /// Invalid UTF-8 percent-encoded byte sequences will be replaced � U+FFFD, + /// the replacement character. + pub fn decode_utf8_lossy(self) -> Cow<'a, str> { + decode_utf8_lossy(self.clone().into()) + } +} + +fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { + // Note: This function is duplicated in `form_urlencoded/src/query_encoding.rs`. + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => { + // If from_utf8_lossy returns a Cow::Borrowed, then we can + // be sure our original bytes were valid UTF-8. This is because + // if the bytes were invalid UTF-8 from_utf8_lossy would have + // to allocate a new owned string to back the Cow so it could + // replace invalid bytes with a placeholder. + + // First we do a debug_assert to confirm our description above. + let raw_utf8: *const [u8] = utf8.as_bytes(); + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + + // Given we know the original input bytes are valid UTF-8, + // and we have ownership of those bytes, we re-use them and + // return a Cow::Owned here. + Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) + } + Cow::Owned(s) => Cow::Owned(s), + } + } + } +} diff --git a/melib/src/utils/shellexpand.rs b/melib/src/utils/shellexpand.rs new file mode 100644 index 00000000..4be4bfd9 --- /dev/null +++ b/melib/src/utils/shellexpand.rs @@ -0,0 +1,240 @@ +/* + * meli - lib.rs + * + * Copyright 2017 Manos Pitsidianakis + * + * This file is part of meli. + * + * meli is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * meli is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with meli. If not, see . + */ + +//! A `ShellExpandTrait` to expand paths like a shell. + +#[cfg(not(any(target_os = "netbsd", target_os = "macos")))] +use std::os::unix::io::AsRawFd; +use std::{ + ffi::OsStr, + os::unix::ffi::OsStrExt, + path::{Path, PathBuf}, +}; + +use smallvec::SmallVec; + +pub trait ShellExpandTrait { + fn expand(&self) -> PathBuf; + fn complete(&self, force: bool) -> SmallVec<[String; 128]>; +} + +impl ShellExpandTrait for Path { + fn expand(&self) -> PathBuf { + let mut ret = PathBuf::new(); + for c in self.components() { + let c_to_str = c.as_os_str().to_str(); + match c_to_str { + Some("~") => { + if let Ok(home_dir) = std::env::var("HOME") { + ret.push(home_dir) + } else { + return PathBuf::new(); + } + } + Some(var) if var.starts_with('$') => { + let env_name = var.split_at(1).1; + if env_name.chars().all(char::is_uppercase) { + ret.push(std::env::var(env_name).unwrap_or_default()); + } else { + ret.push(c); + } + } + Some(_) => { + ret.push(c); + } + None => { + /* path is invalid */ + return PathBuf::new(); + } + } + } + ret + } + + #[cfg(target_os = "linux")] + fn complete(&self, force: bool) -> SmallVec<[String; 128]> { + use libc::dirent64; + use nix::fcntl::OFlag; + const BUF_SIZE: ::libc::size_t = 8 << 10; + + let (prefix, _match) = if self.as_os_str().as_bytes().ends_with(b"/.") { + (self.components().as_path(), OsStr::from_bytes(b".")) + } else if self.exists() && (!force || self.as_os_str().as_bytes().ends_with(b"/")) { + return SmallVec::new(); + } else { + let last_component = self + .components() + .last() + .map(|c| c.as_os_str()) + .unwrap_or_else(|| OsStr::from_bytes(b"")); + let prefix = if let Some(p) = self.parent() { + p + } else { + return SmallVec::new(); + }; + (prefix, last_component) + }; + + let dir = match ::nix::dir::Dir::openat( + ::libc::AT_FDCWD, + prefix, + OFlag::O_DIRECTORY | OFlag::O_NOATIME | OFlag::O_RDONLY | OFlag::O_CLOEXEC, + ::nix::sys::stat::Mode::S_IRUSR | ::nix::sys::stat::Mode::S_IXUSR, + ) + .or_else(|_| { + ::nix::dir::Dir::openat( + ::libc::AT_FDCWD, + prefix, + OFlag::O_DIRECTORY | OFlag::O_RDONLY | OFlag::O_CLOEXEC, + ::nix::sys::stat::Mode::S_IRUSR | ::nix::sys::stat::Mode::S_IXUSR, + ) + }) { + Ok(dir) => dir, + Err(err) => { + debug!(prefix); + debug!(err); + return SmallVec::new(); + } + }; + + let mut buf: Vec = Vec::with_capacity(BUF_SIZE); + let mut entries = SmallVec::new(); + loop { + let n: i64 = unsafe { + ::libc::syscall( + ::libc::SYS_getdents64, + dir.as_raw_fd(), + buf.as_ptr(), + BUF_SIZE - 256, + ) + }; + if n < 0 { + return SmallVec::new(); + } else if n == 0 { + break; + } + + let n = n as usize; + unsafe { + buf.set_len(n); + } + let mut pos = 0; + while pos < n { + let dir = unsafe { std::mem::transmute::<&[u8], &[dirent64]>(&buf[pos..]) }; + let entry = unsafe { std::ffi::CStr::from_ptr(dir[0].d_name.as_ptr()) }; + if entry.to_bytes() != b"." && entry.to_bytes() != b".." { + if entry.to_bytes().starts_with(_match.as_bytes()) { + if dir[0].d_type == ::libc::DT_DIR && !entry.to_bytes().ends_with(b"/") { + let mut s = unsafe { + String::from_utf8_unchecked( + entry.to_bytes()[_match.as_bytes().len()..].to_vec(), + ) + }; + s.push('/'); + entries.push(s); + } else { + entries.push(unsafe { + String::from_utf8_unchecked( + entry.to_bytes()[_match.as_bytes().len()..].to_vec(), + ) + }); + } + } + } + pos += dir[0].d_reclen as usize; + } + // https://github.com/romkatv/gitstatus/blob/caf44f7aaf33d0f46e6749e50595323c277e0908/src/dir.cc + // "It's tempting to bail here if n + sizeof(linux_dirent64) + + // 512 <= n. After all, there was enough space + // for another entry but SYS_getdents64 didn't write it, so this + // must be the end of the directory listing, + // right? Unfortunately, no. SYS_getdents64 is finicky. + // It sometimes writes a partial list of entries even if the + // full list would fit." + } + entries + } + + #[cfg(not(target_os = "linux"))] + fn complete(&self, force: bool) -> SmallVec<[String; 128]> { + let mut entries = SmallVec::new(); + let (prefix, _match) = { + if self.exists() && (!force || self.as_os_str().as_bytes().ends_with(b"/")) { + // println!("{} {:?}", self.display(), self.components().last()); + return entries; + } else { + let last_component = self + .components() + .last() + .map(|c| c.as_os_str()) + .unwrap_or_else(|| OsStr::from_bytes(b"")); + let prefix = if let Some(p) = self.parent() { + p + } else { + return entries; + }; + (prefix, last_component) + } + }; + if force && self.is_dir() && !self.as_os_str().as_bytes().ends_with(b"/") { + entries.push("/".to_string()); + } + + if let Ok(iter) = std::fs::read_dir(&prefix) { + for entry in iter.flatten() { + if entry.path().as_os_str().as_bytes() != b"." + && entry.path().as_os_str().as_bytes() != b".." + && entry + .path() + .as_os_str() + .as_bytes() + .starts_with(_match.as_bytes()) + { + if entry.path().is_dir() && !entry.path().as_os_str().as_bytes().ends_with(b"/") + { + let mut s = unsafe { + String::from_utf8_unchecked( + entry.path().as_os_str().as_bytes()[_match.as_bytes().len()..] + .to_vec(), + ) + }; + s.push('/'); + entries.push(s); + } else { + entries.push(unsafe { + String::from_utf8_unchecked( + entry.path().as_os_str().as_bytes()[_match.as_bytes().len()..] + .to_vec(), + ) + }); + } + } + } + } + entries + } +} + +#[test] +fn test_shellexpandtrait() { + assert!(Path::new("~").expand().complete(false).is_empty()); + assert!(!Path::new("~").expand().complete(true).is_empty()); +} diff --git a/melib/src/sqlite3.rs b/melib/src/utils/sqlite3.rs similarity index 100% rename from melib/src/sqlite3.rs rename to melib/src/utils/sqlite3.rs diff --git a/src/components/mail/compose.rs b/src/components/mail/compose.rs index 42b5d4ff..3fd14e97 100644 --- a/src/components/mail/compose.rs +++ b/src/components/mail/compose.rs @@ -343,7 +343,7 @@ impl Composer { melib::email::parser::generic::mailto(list_post_addr) .map(|(_, m)| m.address) { - to.insert(list_address); + to.extend(list_address.into_iter()); } } } @@ -439,19 +439,18 @@ impl Composer { if let Some(actions) = list_management::ListActions::detect(&parent_message) { if let Some(post) = actions.post { if let list_management::ListAction::Email(list_post_addr) = post[0] { - if let Ok(list_address) = melib::email::parser::generic::mailto(list_post_addr) - .map(|(_, m)| m.address) - { - let list_address_string = list_address.to_string(); + if let Ok((_, mailto)) = melib::email::parser::generic::mailto(list_post_addr) { + let mut addresses = vec![( + parent_message.from()[0].clone(), + parent_message.field_from_to_string(), + )]; + for add in mailto.address { + let add_s = add.to_string(); + addresses.push((add, add_s)); + } ret.mode = ViewMode::SelectRecipients(UIDialog::new( "select recipients", - vec![ - ( - parent_message.from()[0].clone(), - parent_message.field_from_to_string(), - ), - (list_address, list_address_string), - ], + addresses, false, Some(Box::new(move |id: ComponentId, results: &[Address]| { Some(UIEvent::FinishedUIDialog( diff --git a/src/state.rs b/src/state.rs index bf16c94b..1cc2188c 100644 --- a/src/state.rs +++ b/src/state.rs @@ -37,7 +37,10 @@ use melib::backends::{AccountHash, BackendEventConsumer}; use smallvec::SmallVec; use super::*; -use crate::{jobs::JobExecutor, terminal::screen::Screen}; +use crate::{ + jobs::JobExecutor, + terminal::{get_events, screen::Screen}, +}; struct InputHandler { pipe: (RawFd, RawFd), diff --git a/src/terminal/keys.rs b/src/terminal/keys.rs index 6d558f82..99061e73 100644 --- a/src/terminal/keys.rs +++ b/src/terminal/keys.rs @@ -157,17 +157,18 @@ use std::os::unix::io::{AsRawFd, RawFd}; use nix::poll::{poll, PollFd, PollFlags}; use termion::input::TermReadEventsAndRaw; -/* - * If we fork (for example start $EDITOR) we want the input-thread to stop - * reading from stdin. The best way I came up with right now is to send a - * signal to the thread that is read in the first input in stdin after the - * fork, and then the thread kills itself. The parent process spawns a new - * input-thread when the child returns. - * - * The main loop uses try_wait_on_child() to check if child has exited. - */ + /// The thread function that listens for user input and forwards it to the main /// event loop. +/// +/// If we fork (for example start `$EDITOR`) we want the `input-thread` to stop +/// reading from stdin. The best way I came up with right now is to send a +/// signal to the thread that is read in the first input in stdin after the +/// fork, and then the thread kills itself. The parent process spawns a new +/// input-thread when the child returns. +/// +/// The main loop uses [`State::try_wait_on_child`] to check if child has +/// exited. pub fn get_events( mut closure: impl FnMut((Key, Vec)), rx: &Receiver,