From 6906142278c69a250811d2691dbe96d5fa39732e Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Fri, 5 Jul 2019 18:58:46 +0300 Subject: [PATCH] melib: don't exclude whitespaces in mime encoded words --- melib/src/email/compose/mime.rs | 70 ++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/melib/src/email/compose/mime.rs b/melib/src/email/compose/mime.rs index fa6b8517..7eddb8c0 100644 --- a/melib/src/email/compose/mime.rs +++ b/melib/src/email/compose/mime.rs @@ -1,18 +1,68 @@ use super::*; +use crate::grapheme_clusters::Graphemes; pub fn encode_header(value: &str) -> String { - eprintln!("encoding \"{}\"", value); let mut ret = String::with_capacity(value.len()); - for word in value.split_whitespace() { - if word.is_ascii() { - ret.push_str(word); - } else { - ret.push_str( - format!("=?UTF-8?B?{}?=", BASE64_MIME.encode(word.trim().as_bytes())).trim(), - ); + let graphemes = value.graphemes_indices(); + let mut is_current_window_ascii = true; + let mut current_window_start = 0; + for (idx, g) in graphemes { + match (g.is_ascii(), is_current_window_ascii) { + (true, true) => { + ret.push_str(g); + } + (false, true) => { + current_window_start = idx; + is_current_window_ascii = false; + } + (true, false) => { + /* If !g.is_whitespace() + * + * Whitespaces inside encoded tokens must be greedily taken, + * instead of splitting each non-ascii word into separate encoded tokens. */ + if !g.split_whitespace().collect::>().is_empty() { + ret.push_str(&format!( + "=?UTF-8?B?{}?=", + BASE64_MIME + .encode(value[current_window_start..idx].as_bytes()) + .trim() + )); + if idx != value.len() - 1 { + ret.push(' '); + } + is_current_window_ascii = true; + } + } + /* RFC2047 recommends: + * 'While there is no limit to the length of a multiple-line header field, each line of + * a header field that contains one or more 'encoded-word's is limited to 76 + * characters.' + * This is a rough compliance. + */ + (false, false) if (((4 * (idx - current_window_start) / 3) + 3) & !3) > 33 => { + ret.push_str(&format!( + "=?UTF-8?B?{}?=", + BASE64_MIME + .encode(value[current_window_start..idx].as_bytes()) + .trim() + )); + if idx != value.len() - 1 { + ret.push(' '); + } + current_window_start = idx; + } + (false, false) => {} } - ret.push(' '); } - ret.pop(); + /* If the last part of the header value is encoded, it won't be pushed inside the previous for + * block */ + if !is_current_window_ascii { + ret.push_str(&format!( + "=?UTF-8?B?{}?=", + BASE64_MIME + .encode(value[current_window_start..].as_bytes()) + .trim() + )); + } ret }