meli/melib/src/email/compose/mime.rs

/*
 * meli - melib crate.
 *
 * Copyright 2017-2020 Manos Pitsidianakis
 *
 * This file is part of meli.
 *
 * meli is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * meli is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with meli. If not, see <http://www.gnu.org/licenses/>.
 */

use super::*;

#[cfg(feature = "unicode_algorithms")]
use crate::text_processing::grapheme_clusters::TextProcessing;

pub fn encode_header(value: &str) -> String {
    let mut ret = String::with_capacity(value.len());
    let mut is_current_window_ascii = true;
    let mut current_window_start = 0;
    #[cfg(feature = "unicode_algorithms")]
    {
        let graphemes = value.graphemes_indices();
        for (idx, g) in graphemes {
            match (g.is_ascii(), is_current_window_ascii) {
                (true, true) => {
                    ret.push_str(g);
                }
                (true, false) => {
                    /* If !g.is_whitespace()
                     *
                     * Whitespaces inside encoded tokens must be greedily taken,
                     * instead of splitting each non-ascii word into separate encoded tokens. */
                    if g.split_whitespace().next().is_some() {
                        ret.push_str(&format!(
                            "=?UTF-8?B?{}?=",
                            BASE64_MIME
                                .encode(value[current_window_start..idx].as_bytes())
                                .trim()
                        ));
                        if idx != value.len() - 1 && (idx == 0 || !value[..idx].ends_with(' ')) {
                            ret.push(' ');
                        }
                        is_current_window_ascii = true;
                        current_window_start = idx;
                        ret.push_str(g);
                    }
                }
                (false, true) => {
                    current_window_start = idx;
                    is_current_window_ascii = false;
                }
                /* RFC2047 recommends:
                 * 'While there is no limit to the length of a multiple-line header field, each line of
                 * a header field that contains one or more 'encoded-word's is limited to 76
                 * characters.'
                 * This is a rough compliance.
                 */
                (false, false) if (((4 * (idx - current_window_start) / 3) + 3) & !3) > 33 => {
                    ret.push_str(&format!(
                        "=?UTF-8?B?{}?=",
                        BASE64_MIME
                            .encode(value[current_window_start..idx].as_bytes())
                            .trim()
                    ));
                    if idx != value.len() - 1 {
                        ret.push(' ');
                    }
                    current_window_start = idx;
                }
                (false, false) => {}
            }
        }
    }
    #[cfg(not(feature = "unicode_algorithms"))]
    {
        /* TODO: test this. If it works as fine as the one above, there's no need to keep the above
         * implementation.*/
        for (i, g) in value.char_indices() {
            match (g.is_ascii(), is_current_window_ascii) {
                (true, true) => {
                    ret.push(g);
                }
                (true, false) => {
                    /* If !g.is_whitespace()
                     *
                     * Whitespaces inside encoded tokens must be greedily taken,
                     * instead of splitting each non-ascii word into separate encoded tokens. */
                    if !g.is_whitespace() && value.is_char_boundary(i) {
                        ret.push_str(&format!(
                            "=?UTF-8?B?{}?=",
                            BASE64_MIME
                                .encode(value[current_window_start..i].as_bytes())
                                .trim()
                        ));
                        if i != value.len() - 1 {
                            ret.push(' ');
                        }
                        is_current_window_ascii = true;
                        current_window_start = i;
                        ret.push(g);
                    }
                }
                (false, true) => {
                    current_window_start = i;
                    is_current_window_ascii = false;
                }
                /* RFC2047 recommends:
                 * 'While there is no limit to the length of a multiple-line header field, each line of
                 * a header field that contains one or more 'encoded-word's is limited to 76
                 * characters.'
                 * This is a rough compliance.
                 */
                (false, false)
                    if value.is_char_boundary(i) && value[current_window_start..i].len() > 76 =>
                {
                    ret.push_str(&format!(
                        "=?UTF-8?B?{}?=",
                        BASE64_MIME
                            .encode(value[current_window_start..i].as_bytes())
                            .trim()
                    ));
                    if i != value.len() - 1 {
                        ret.push(' ');
                    }
                    current_window_start = i;
                }
                (false, false) => {}
            }
        }
    }
    /* If the last part of the header value is encoded, it won't be pushed inside the previous for
     * block */
    if !is_current_window_ascii {
        ret.push_str(&format!(
            "=?UTF-8?B?{}?=",
            BASE64_MIME
                .encode(value[current_window_start..].as_bytes())
                .trim()
        ));
    }
    ret
}
#[test]
fn test_encode_header() {
    let words = "compilers/2020a σε Rust";
    assert_eq!(
        "compilers/2020a =?UTF-8?B?z4POtSA=?=Rust",
        &encode_header(&words),
    );
    assert_eq!(
        &std::str::from_utf8(
            &crate::email::parser::encodings::phrase(encode_header(&words).as_bytes(), false)
                .unwrap()
                .1
        )
        .unwrap(),
        &words,
    );
    let words = "[internal] =?UTF-8?B?zp3Orc6/z4Igzp/OtM63zrPPjM+CIM6jz4U=?= =?UTF-8?B?zrPOs8+BzrHPhs6uz4I=?=";
    let words_enc = r#"[internal] Νέος Οδηγός Συγγραφής"#;
    assert_eq!(words, &encode_header(&words_enc),);
    assert_eq!(
        r#"[internal] Νέος Οδηγός Συγγραφής"#,
        std::str::from_utf8(
            &crate::email::parser::encodings::phrase(encode_header(&words_enc).as_bytes(), false)
                .unwrap()
                .1
        )
        .unwrap(),
    );
    //let words = "[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
    let words_enc = "[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store";
    assert_eq!(
        "[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store",
        std::str::from_utf8(
            &crate::email::parser::encodings::phrase(encode_header(&words_enc).as_bytes(), false)
                .unwrap()
                .1
        )
        .unwrap(),
    );
}
Add missing copyright preambles 2020-01-30 00:25:51 +02:00			`/*`
			`* meli - melib crate.`
			`*`
			`* Copyright 2017-2020 Manos Pitsidianakis`
			`*`
			`* This file is part of meli.`
			`*`
			`* meli is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, either version 3 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* meli is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with meli. If not, see <http://www.gnu.org/licenses/>.`
			`*/`

melib: encode header values closes #102 2019-04-14 23:05:29 +03:00			`use super::*;`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00
			`#[cfg(feature = "unicode_algorithms")]`
Remove text_processing Unwrap text_processing into melib In preparation for uploading meli as a separate crate on crates.io. 2020-02-04 17:26:25 +02:00			`use crate::text_processing::grapheme_clusters::TextProcessing;`
melib: encode header values closes #102 2019-04-14 23:05:29 +03:00
			`pub fn encode_header(value: &str) -> String {`
Fix warnings, lints, and 2018 errors 2019-06-18 21:13:58 +03:00			`let mut ret = String::with_capacity(value.len());`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`let mut is_current_window_ascii = true;`
			`let mut current_window_start = 0;`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`#[cfg(feature = "unicode_algorithms")]`
			`{`
			`let graphemes = value.graphemes_indices();`
			`for (idx, g) in graphemes {`
			`match (g.is_ascii(), is_current_window_ascii) {`
			`(true, true) => {`
			`ret.push_str(g);`
			`}`
			`(true, false) => {`
			`/* If !g.is_whitespace()`
			`*`
			`* Whitespaces inside encoded tokens must be greedily taken,`
			`* instead of splitting each non-ascii word into separate encoded tokens. */`
Fix some clippy lints 2020-08-25 16:39:12 +03:00			`if g.split_whitespace().next().is_some() {`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push_str(&format!(`
			`"=?UTF-8?B?{}?=",`
			`BASE64_MIME`
			`.encode(value[current_window_start..idx].as_bytes())`
			`.trim()`
			`));`
Fix clippy lints 2020-07-05 15:28:55 +03:00			`if idx != value.len() - 1 && (idx == 0 \|\| !value[..idx].ends_with(' ')) {`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push(' ');`
			`}`
			`is_current_window_ascii = true;`
			`current_window_start = idx;`
			`ret.push_str(g);`
			`}`
			`}`
			`(false, true) => {`
			`current_window_start = idx;`
			`is_current_window_ascii = false;`
			`}`
			`/* RFC2047 recommends:`
			`* 'While there is no limit to the length of a multiple-line header field, each line of`
			`* a header field that contains one or more 'encoded-word's is limited to 76`
			`* characters.'`
			`* This is a rough compliance.`
			`*/`
			`(false, false) if (((4 * (idx - current_window_start) / 3) + 3) & !3) > 33 => {`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`ret.push_str(&format!(`
			`"=?UTF-8?B?{}?=",`
			`BASE64_MIME`
			`.encode(value[current_window_start..idx].as_bytes())`
			`.trim()`
			`));`
			`if idx != value.len() - 1 {`
			`ret.push(' ');`
			`}`
fix missing grapheme in mime encoded string 2019-07-11 17:52:51 +03:00			`current_window_start = idx;`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`}`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`(false, false) => {}`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`}`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`}`
			`}`
			`#[cfg(not(feature = "unicode_algorithms"))]`
			`{`
			`/* TODO: test this. If it works as fine as the one above, there's no need to keep the above`
			`* implementation.*/`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`for (i, g) in value.char_indices() {`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`match (g.is_ascii(), is_current_window_ascii) {`
			`(true, true) => {`
			`ret.push(g);`
			`}`
			`(true, false) => {`
			`/* If !g.is_whitespace()`
			`*`
			`* Whitespaces inside encoded tokens must be greedily taken,`
			`* instead of splitting each non-ascii word into separate encoded tokens. */`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`if !g.is_whitespace() && value.is_char_boundary(i) {`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push_str(&format!(`
			`"=?UTF-8?B?{}?=",`
			`BASE64_MIME`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`.encode(value[current_window_start..i].as_bytes())`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`.trim()`
			`));`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`if i != value.len() - 1 {`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push(' ');`
			`}`
			`is_current_window_ascii = true;`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`current_window_start = i;`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push(g);`
			`}`
			`}`
			`(false, true) => {`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`current_window_start = i;`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`is_current_window_ascii = false;`
			`}`
			`/* RFC2047 recommends:`
			`* 'While there is no limit to the length of a multiple-line header field, each line of`
			`* a header field that contains one or more 'encoded-word's is limited to 76`
			`* characters.'`
			`* This is a rough compliance.`
			`*/`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`(false, false)`
			`if value.is_char_boundary(i) && value[current_window_start..i].len() > 76 =>`
			`{`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push_str(&format!(`
			`"=?UTF-8?B?{}?=",`
			`BASE64_MIME`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`.encode(value[current_window_start..i].as_bytes())`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`.trim()`
			`));`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`if i != value.len() - 1 {`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`ret.push(' ');`
			`}`
melib: fix non-unicode encode_header() char boundary issue 2020-06-04 17:54:38 +03:00			`current_window_start = i;`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`}`
melib: turn unicode algos and backends into features 2019-09-21 21:23:06 +03:00			`(false, false) => {}`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`}`
melib: encode header values closes #102 2019-04-14 23:05:29 +03:00			`}`
			`}`
melib: don't exclude whitespaces in mime encoded words 2019-07-05 18:58:46 +03:00			`/* If the last part of the header value is encoded, it won't be pushed inside the previous for`
			`* block */`
			`if !is_current_window_ascii {`
			`ret.push_str(&format!(`
			`"=?UTF-8?B?{}?=",`
			`BASE64_MIME`
			`.encode(value[current_window_start..].as_bytes())`
			`.trim()`
			`));`
			`}`
melib: encode header values closes #102 2019-04-14 23:05:29 +03:00			`ret`
			`}`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`#[test]`
			`fn test_encode_header() {`
			`let words = "compilers/2020a σε Rust";`
			`assert_eq!(`
			`"compilers/2020a =?UTF-8?B?z4POtSA=?=Rust",`
			`&encode_header(&words),`
			`);`
			`assert_eq!(`
			`&std::str::from_utf8(`
melib: update nom dependency from 3.2.0 to 5.1.1 That was hecking exhausting 2020-06-06 19:38:20 +03:00			`&crate::email::parser::encodings::phrase(encode_header(&words).as_bytes(), false)`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`.unwrap()`
melib: update nom dependency from 3.2.0 to 5.1.1 That was hecking exhausting 2020-06-06 19:38:20 +03:00			`.1`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`)`
			`.unwrap(),`
			`&words,`
			`);`
			`let words = "[internal] =?UTF-8?B?zp3Orc6/z4Igzp/OtM63zrPPjM+CIM6jz4U=?= =?UTF-8?B?zrPOs8+BzrHPhs6uz4I=?=";`
			`let words_enc = r#"[internal] Νέος Οδηγός Συγγραφής"#;`
			`assert_eq!(words, &encode_header(&words_enc),);`
			`assert_eq!(`
			`r#"[internal] Νέος Οδηγός Συγγραφής"#,`
			`std::str::from_utf8(`
melib: update nom dependency from 3.2.0 to 5.1.1 That was hecking exhausting 2020-06-06 19:38:20 +03:00			`&crate::email::parser::encodings::phrase(encode_header(&words_enc).as_bytes(), false)`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`.unwrap()`
melib: update nom dependency from 3.2.0 to 5.1.1 That was hecking exhausting 2020-06-06 19:38:20 +03:00			`.1`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`)`
			`.unwrap(),`
			`);`
Fix test errors and warnings 2020-11-16 01:04:04 +02:00			`//let words = "[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`let words_enc = "[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store";`
			`assert_eq!(`
			`"[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store",`
			`std::str::from_utf8(`
melib: update nom dependency from 3.2.0 to 5.1.1 That was hecking exhausting 2020-06-06 19:38:20 +03:00			`&crate::email::parser::encodings::phrase(encode_header(&words_enc).as_bytes(), false)`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`.unwrap()`
melib: update nom dependency from 3.2.0 to 5.1.1 That was hecking exhausting 2020-06-06 19:38:20 +03:00			`.1`
melib/email: fix whitespace duplication in mime encoding 2020-03-28 11:44:30 +02:00			`)`
			`.unwrap(),`
			`);`
			`}`