2020-01-30 00:25:51 +02:00
/*
* meli - melib crate .
*
* Copyright 2017 - 2020 Manos Pitsidianakis
*
* This file is part of meli .
*
* meli is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* meli is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with meli . If not , see < http ://www.gnu.org/licenses/>.
* /
2019-04-14 23:05:29 +03:00
use super ::* ;
2019-09-21 21:23:06 +03:00
#[ cfg(feature = " unicode_algorithms " ) ]
2020-02-04 17:26:25 +02:00
use crate ::text_processing ::grapheme_clusters ::TextProcessing ;
2019-04-14 23:05:29 +03:00
pub fn encode_header ( value : & str ) -> String {
2019-06-18 21:13:58 +03:00
let mut ret = String ::with_capacity ( value . len ( ) ) ;
2019-07-05 18:58:46 +03:00
let mut is_current_window_ascii = true ;
let mut current_window_start = 0 ;
2019-09-21 21:23:06 +03:00
#[ cfg(feature = " unicode_algorithms " ) ]
{
let graphemes = value . graphemes_indices ( ) ;
for ( idx , g ) in graphemes {
match ( g . is_ascii ( ) , is_current_window_ascii ) {
( true , true ) = > {
ret . push_str ( g ) ;
}
( true , false ) = > {
/* If !g.is_whitespace()
*
* Whitespaces inside encoded tokens must be greedily taken ,
* instead of splitting each non - ascii word into separate encoded tokens . * /
2020-08-25 16:39:12 +03:00
if g . split_whitespace ( ) . next ( ) . is_some ( ) {
2019-09-21 21:23:06 +03:00
ret . push_str ( & format! (
" =?UTF-8?B?{}?= " ,
BASE64_MIME
. encode ( value [ current_window_start .. idx ] . as_bytes ( ) )
. trim ( )
) ) ;
2020-07-05 15:28:55 +03:00
if idx ! = value . len ( ) - 1 & & ( idx = = 0 | | ! value [ .. idx ] . ends_with ( ' ' ) ) {
2019-09-21 21:23:06 +03:00
ret . push ( ' ' ) ;
}
is_current_window_ascii = true ;
current_window_start = idx ;
ret . push_str ( g ) ;
}
}
( false , true ) = > {
current_window_start = idx ;
is_current_window_ascii = false ;
}
/* RFC2047 recommends:
* ' While there is no limit to the length of a multiple - line header field , each line of
* a header field that contains one or more ' encoded - word ' s is limited to 76
* characters . '
* This is a rough compliance .
* /
( false , false ) if ( ( ( 4 * ( idx - current_window_start ) / 3 ) + 3 ) & ! 3 ) > 33 = > {
2019-07-05 18:58:46 +03:00
ret . push_str ( & format! (
" =?UTF-8?B?{}?= " ,
BASE64_MIME
. encode ( value [ current_window_start .. idx ] . as_bytes ( ) )
. trim ( )
) ) ;
if idx ! = value . len ( ) - 1 {
ret . push ( ' ' ) ;
}
2019-07-11 17:52:51 +03:00
current_window_start = idx ;
2019-07-05 18:58:46 +03:00
}
2019-09-21 21:23:06 +03:00
( false , false ) = > { }
2019-07-05 18:58:46 +03:00
}
2019-09-21 21:23:06 +03:00
}
}
#[ cfg(not(feature = " unicode_algorithms " )) ]
{
/* TODO: test this. If it works as fine as the one above, there's no need to keep the above
* implementation . * /
2020-06-04 17:54:38 +03:00
for ( i , g ) in value . char_indices ( ) {
2019-09-21 21:23:06 +03:00
match ( g . is_ascii ( ) , is_current_window_ascii ) {
( true , true ) = > {
ret . push ( g ) ;
}
( true , false ) = > {
/* If !g.is_whitespace()
*
* Whitespaces inside encoded tokens must be greedily taken ,
* instead of splitting each non - ascii word into separate encoded tokens . * /
2020-06-04 17:54:38 +03:00
if ! g . is_whitespace ( ) & & value . is_char_boundary ( i ) {
2019-09-21 21:23:06 +03:00
ret . push_str ( & format! (
" =?UTF-8?B?{}?= " ,
BASE64_MIME
2020-06-04 17:54:38 +03:00
. encode ( value [ current_window_start .. i ] . as_bytes ( ) )
2019-09-21 21:23:06 +03:00
. trim ( )
) ) ;
2020-06-04 17:54:38 +03:00
if i ! = value . len ( ) - 1 {
2019-09-21 21:23:06 +03:00
ret . push ( ' ' ) ;
}
is_current_window_ascii = true ;
2020-06-04 17:54:38 +03:00
current_window_start = i ;
2019-09-21 21:23:06 +03:00
ret . push ( g ) ;
}
}
( false , true ) = > {
2020-06-04 17:54:38 +03:00
current_window_start = i ;
2019-09-21 21:23:06 +03:00
is_current_window_ascii = false ;
}
/* RFC2047 recommends:
* ' While there is no limit to the length of a multiple - line header field , each line of
* a header field that contains one or more ' encoded - word ' s is limited to 76
* characters . '
* This is a rough compliance .
* /
2020-06-04 17:54:38 +03:00
( false , false )
if value . is_char_boundary ( i ) & & value [ current_window_start .. i ] . len ( ) > 76 = >
{
2019-09-21 21:23:06 +03:00
ret . push_str ( & format! (
" =?UTF-8?B?{}?= " ,
BASE64_MIME
2020-06-04 17:54:38 +03:00
. encode ( value [ current_window_start .. i ] . as_bytes ( ) )
2019-09-21 21:23:06 +03:00
. trim ( )
) ) ;
2020-06-04 17:54:38 +03:00
if i ! = value . len ( ) - 1 {
2019-09-21 21:23:06 +03:00
ret . push ( ' ' ) ;
}
2020-06-04 17:54:38 +03:00
current_window_start = i ;
2019-07-05 18:58:46 +03:00
}
2019-09-21 21:23:06 +03:00
( false , false ) = > { }
2019-07-05 18:58:46 +03:00
}
2019-04-14 23:05:29 +03:00
}
}
2019-07-05 18:58:46 +03:00
/* If the last part of the header value is encoded, it won't be pushed inside the previous for
* block * /
if ! is_current_window_ascii {
ret . push_str ( & format! (
" =?UTF-8?B?{}?= " ,
BASE64_MIME
. encode ( value [ current_window_start .. ] . as_bytes ( ) )
. trim ( )
) ) ;
}
2019-04-14 23:05:29 +03:00
ret
}
2020-03-28 11:44:30 +02:00
#[ test ]
fn test_encode_header ( ) {
let words = " compilers/2020a σε Rust " ;
assert_eq! (
" compilers/2020a =?UTF-8?B?z4POtSA=?=Rust " ,
& encode_header ( & words ) ,
) ;
assert_eq! (
& std ::str ::from_utf8 (
2020-06-06 19:38:20 +03:00
& crate ::email ::parser ::encodings ::phrase ( encode_header ( & words ) . as_bytes ( ) , false )
2020-03-28 11:44:30 +02:00
. unwrap ( )
2020-06-06 19:38:20 +03:00
. 1
2020-03-28 11:44:30 +02:00
)
. unwrap ( ) ,
& words ,
) ;
let words = " [internal] =?UTF-8?B?zp3Orc6/z4Igzp/OtM63zrPPjM+CIM6jz4U=?= =?UTF-8?B?zrPOs8+BzrHPhs6uz4I=?= " ;
let words_enc = r # "[internal] Νέος Οδηγός Συγγραφής"# ;
assert_eq! ( words , & encode_header ( & words_enc ) , ) ;
assert_eq! (
r # "[internal] Νέος Οδηγός Συγγραφής"# ,
std ::str ::from_utf8 (
2020-06-06 19:38:20 +03:00
& crate ::email ::parser ::encodings ::phrase ( encode_header ( & words_enc ) . as_bytes ( ) , false )
2020-03-28 11:44:30 +02:00
. unwrap ( )
2020-06-06 19:38:20 +03:00
. 1
2020-03-28 11:44:30 +02:00
)
. unwrap ( ) ,
) ;
2020-11-16 01:04:04 +02:00
//let words = "[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
2020-03-28 11:44:30 +02:00
let words_enc = " [Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store " ;
assert_eq! (
" [Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store " ,
std ::str ::from_utf8 (
2020-06-06 19:38:20 +03:00
& crate ::email ::parser ::encodings ::phrase ( encode_header ( & words_enc ) . as_bytes ( ) , false )
2020-03-28 11:44:30 +02:00
. unwrap ( )
2020-06-06 19:38:20 +03:00
. 1
2020-03-28 11:44:30 +02:00
)
. unwrap ( ) ,
) ;
}