Add support for utf-7 encoding

Closes #175
pull/182/head
Manos Pitsidianakis 2023-04-09 00:03:20 +03:00
parent 2447a2cbfe
commit 3adf72aed0
Signed by: Manos Pitsidianakis
GPG Key ID: 7729C7707F7E09D0
14 changed files with 333 additions and 51 deletions

2
Cargo.lock generated
View File

@ -1171,6 +1171,7 @@ dependencies = [
"bitflags",
"data-encoding",
"encoding",
"encoding_rs",
"flate2",
"futures",
"indexmap",
@ -1182,6 +1183,7 @@ dependencies = [
"nix",
"nom",
"notify",
"regex",
"rusqlite",
"serde",
"serde_derive",

View File

@ -26,10 +26,6 @@ path = "src/lib.rs"
name = "managesieve-client"
path = "src/managesieve.rs"
#[[bin]]
#name = "async"
#path = "src/async.rs"
[dependencies]
async-task = "^4.2.0"
bincode = { version = "^1.3.0", default-features = false }

View File

@ -505,6 +505,12 @@ Example:
"INBOX/Drafts" = { sort_order = 1 }
"INBOX/Lists" = { sort_order = 2 }
.Ed
.It Ic encoding Ar String
.Pq Em optional
Override the default utf-8 charset for the mailbox name.
Useful only for mUTF-7 mailboxes.
.\" default value
.Pq Em "utf7", "utf-7", "utf8", "utf-8"
.El
.Sh COMPOSING
Composing specific options

View File

@ -26,6 +26,7 @@ bincode = { version = "^1.3.0", default-features = false }
bitflags = "1.0"
data-encoding = { version = "2.1.1" }
encoding = { version = "0.2.33", default-features = false }
encoding_rs = { version = "^0.8" }
flate2 = { version = "1.0.16", optional = true }
futures = "0.3.5"
@ -38,6 +39,7 @@ native-tls = { version = "0.2.3", default-features = false, optional = true }
nix = "^0.24"
nom = { version = "7" }
notify = { version = "4.0.15", optional = true }
regex = { version = "1" }
rusqlite = { version = "^0.28", default-features = false, optional = true }
serde = { version = "1.0.71", features = ["rc", ] }
serde_derive = "1.0.71"

View File

@ -19,6 +19,7 @@
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
pub mod utf7;
use smallvec::SmallVec;
#[cfg(feature = "imap_backend")]
@ -555,10 +556,10 @@ impl SpecialUsageMailbox {
pub trait BackendMailbox: Debug {
fn hash(&self) -> MailboxHash;
/// Final component of `path`.
fn name(&self) -> &str;
/// Path of mailbox within the mailbox hierarchy, with `/` as separator.
fn path(&self) -> &str;
fn change_name(&mut self, new_name: &str);
fn clone(&self) -> Mailbox;
fn children(&self) -> &[MailboxHash];
fn parent(&self) -> Option<MailboxHash>;

View File

@ -83,10 +83,6 @@ impl BackendMailbox for ImapMailbox {
&self.path
}
fn change_name(&mut self, s: &str) {
self.name = s.to_string();
}
fn children(&self) -> &[MailboxHash] {
&self.children
}

View File

@ -58,8 +58,6 @@ impl BackendMailbox for JmapMailbox {
&self.path
}
fn change_name(&mut self, _s: &str) {}
fn clone(&self) -> Mailbox {
Box::new(std::clone::Clone::clone(self))
}

View File

@ -220,10 +220,6 @@ impl BackendMailbox for MaildirMailbox {
self.path.to_str().unwrap_or_else(|| self.name())
}
fn change_name(&mut self, s: &str) {
self.name = s.to_string();
}
fn children(&self) -> &[MailboxHash] {
&self.children
}

View File

@ -214,10 +214,6 @@ impl BackendMailbox for MboxMailbox {
self.path.to_str().unwrap()
}
fn change_name(&mut self, s: &str) {
self.name = s.to_string();
}
fn clone(&self) -> Mailbox {
Box::new(MboxMailbox {
hash: self.hash,

View File

@ -18,6 +18,7 @@
* You should have received a copy of the GNU General Public License
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
use crate::backends::{
BackendMailbox, LazyCountSet, Mailbox, MailboxHash, MailboxPermissions, SpecialUsageMailbox,
};
@ -58,10 +59,6 @@ impl BackendMailbox for NntpMailbox {
&self.nntp_path
}
fn change_name(&mut self, s: &str) {
self.nntp_path = s.to_string();
}
fn children(&self) -> &[MailboxHash] {
&[]
}

View File

@ -253,8 +253,6 @@ impl BackendMailbox for NotmuchMailbox {
self.path.as_str()
}
fn change_name(&mut self, _s: &str) {}
fn clone(&self) -> Mailbox {
Box::new(std::clone::Clone::clone(self))
}

View File

@ -0,0 +1,196 @@
/*
* MIT License
*
* Copyright (c) 2021 Ilya Medvedev
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* Code from <https://github.com/iam-medvedev/rust-utf7-imap> */
//! A Rust library for encoding and decoding [UTF-7](https://datatracker.ietf.org/doc/html/rfc2152) string as defined by the [IMAP](https://datatracker.ietf.org/doc/html/rfc3501) standard in [RFC 3501 (#5.1.3)](https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3).
//!
//! Idea is based on Python [mutf7](https://github.com/cheshire-mouse/mutf7) library.
use encoding_rs::UTF_16BE;
use regex::{Captures, Regex};
/// Encode UTF-7 IMAP mailbox name
///
/// <https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3>
pub fn encode_utf7_imap(text: String) -> String {
let mut result = "".to_string();
let text = text.replace('&', "&-");
let mut text = text.as_str();
while !text.is_empty() {
result = format!("{}{}", result, get_ascii(text));
text = remove_ascii(text);
if !text.is_empty() {
let tmp = get_nonascii(text);
result = format!("{}{}", result, encode_modified_utf7(tmp));
text = remove_nonascii(text);
}
}
result
}
fn is_ascii_custom(c: u8) -> bool {
(0x20..=0x7f).contains(&c)
}
fn get_ascii(s: &str) -> &str {
let bytes = s.as_bytes();
for (i, &item) in bytes.iter().enumerate() {
if !is_ascii_custom(item) {
return &s[0..i];
}
}
s
}
fn get_nonascii(s: &str) -> &str {
let bytes = s.as_bytes();
for (i, &item) in bytes.iter().enumerate() {
if is_ascii_custom(item) {
return &s[0..i];
}
}
s
}
fn remove_ascii(s: &str) -> &str {
let bytes = s.as_bytes();
for (i, &item) in bytes.iter().enumerate() {
if !is_ascii_custom(item) {
return &s[i..];
}
}
""
}
fn remove_nonascii(s: &str) -> &str {
let bytes = s.as_bytes();
for (i, &item) in bytes.iter().enumerate() {
if is_ascii_custom(item) {
return &s[i..];
}
}
""
}
fn encode_modified_utf7(text: &str) -> String {
let capacity = 2 * text.len();
let mut input = Vec::with_capacity(capacity);
let text_u16 = text.encode_utf16();
for value in text_u16 {
input.extend_from_slice(&value.to_be_bytes());
}
let text_u16 = base64::encode(input);
let text_u16 = text_u16.trim_end_matches('=');
let result = text_u16.replace('/', ",");
format!("&{}-", result)
}
/// Decode UTF-7 IMAP mailbox name
///
/// <https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3>
pub fn decode_utf7_imap(text: &str) -> String {
let pattern = Regex::new(r"&([^-]*)-").unwrap();
pattern.replace_all(&text, expand).to_string()
}
fn expand(cap: &Captures) -> String {
if cap.get(1).unwrap().as_str() == "" {
"&".to_string()
} else {
decode_utf7_part(cap.get(0).unwrap().as_str())
}
}
fn decode_utf7_part(text: &str) -> String {
if text == "&-" {
return String::from("&");
}
let text_mb64 = &text[1..text.len() - 1];
let mut text_b64 = text_mb64.replace(',', "/");
while (text_b64.len() % 4) != 0 {
text_b64 += "=";
}
let text_u16 = base64::decode(text_b64).unwrap();
let (cow, _encoding_used, _had_errors) = UTF_16BE.decode(&text_u16);
let result = cow.as_ref();
String::from(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_test() {
assert_eq!(
encode_utf7_imap("Отправленные"),
"&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-"
);
}
#[test]
fn encode_test_split() {
assert_eq!(
encode_utf7_imap("Šiukšliadėžė"),
"&AWA-iuk&AWE-liad&ARcBfgEX-"
)
}
#[test]
fn encode_consecutive_accents() {
assert_eq!(encode_utf7_imap("théâtre"), "th&AOkA4g-tre")
}
#[test]
fn decode_test() {
assert_eq!(
decode_utf7_imap("&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-"),
"Отправленные"
);
}
#[test]
fn decode_test_split() {
// input string with utf7 encoded bits being separated by ascii
assert_eq!(
decode_utf7_imap("&AWA-iuk&AWE-liad&ARcBfgEX-"),
"Šiukšliadėžė"
)
}
#[test]
fn decode_consecutive_accents() {
assert_eq!(decode_utf7_imap("th&AOkA4g-tre"), "théâtre")
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(10000))]
#[test]
fn fuzzy_dec_enc_check(s in "\\PC*") {
assert_eq!(decode_utf7_imap(encode_utf7_imap(s.clone())),s)
}
}
}

View File

@ -127,6 +127,8 @@ pub struct MailboxConf {
pub usage: Option<SpecialUsageMailbox>,
#[serde(default = "none")]
pub sort_order: Option<usize>,
#[serde(default = "none")]
pub encoding: Option<String>,
#[serde(flatten)]
pub extra: HashMap<String, String>,
}
@ -140,6 +142,7 @@ impl Default for MailboxConf {
ignore: ToggleFlag::Unset,
usage: None,
sort_order: None,
encoding: None,
extra: HashMap::default(),
}
}
@ -166,15 +169,6 @@ pub fn none<T>() -> Option<T> {
macro_rules! named_unit_variant {
($variant:ident) => {
pub mod $variant {
/*
pub fn serialize<S>(serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(stringify!($variant))
}
*/
pub fn deserialize<'de, D>(deserializer: D) -> Result<(), D::Error>
where
D: serde::Deserializer<'de>,

View File

@ -101,11 +101,45 @@ impl MailboxStatus {
pub struct MailboxEntry {
pub status: MailboxStatus,
pub name: String,
pub path: String,
pub ref_mailbox: Mailbox,
pub conf: FileMailboxConf,
}
impl MailboxEntry {
pub fn new(
status: MailboxStatus,
name: String,
ref_mailbox: Mailbox,
conf: FileMailboxConf,
) -> Self {
let mut ret = Self {
status,
name,
path: ref_mailbox.path().into(),
ref_mailbox,
conf,
};
match ret.conf.mailbox_conf.extra.get("encoding") {
None => {}
Some(v) if ["utf-8", "utf8"].iter().any(|e| v.eq_ignore_ascii_case(e)) => {}
Some(v) if ["utf-7", "utf7"].iter().any(|e| v.eq_ignore_ascii_case(e)) => {
ret.name = melib::backends::utf7::decode_utf7_imap(&ret.name);
ret.path = melib::backends::utf7::decode_utf7_imap(&ret.path);
}
Some(other) => {
melib::log(
format!(
"mailbox `{}`: unrecognized mailbox name charset: {}",
&ret.name, other
),
melib::WARN,
);
}
}
ret
}
pub fn status(&self) -> String {
match self.status {
MailboxStatus::Available => format!(
@ -564,12 +598,12 @@ impl Account {
}
mailbox_entries.insert(
f.hash(),
MailboxEntry {
ref_mailbox: f.clone(),
name: f.path().to_string(),
status: MailboxStatus::None,
conf: conf.clone(),
},
MailboxEntry::new(
MailboxStatus::None,
f.path().to_string(),
f.clone(),
conf.clone(),
),
);
} else {
let mut new = FileMailboxConf::default();
@ -588,12 +622,7 @@ impl Account {
mailbox_entries.insert(
f.hash(),
MailboxEntry {
ref_mailbox: f.clone(),
name: f.path().to_string(),
status: MailboxStatus::None,
conf: new,
},
MailboxEntry::new(MailboxStatus::None, f.path().to_string(), f.clone(), new),
);
}
}
@ -1951,12 +1980,12 @@ impl Account {
self.mailbox_entries.insert(
mailbox_hash,
MailboxEntry {
name: mailboxes[&mailbox_hash].path().to_string(),
MailboxEntry::new(
status,
conf: new,
ref_mailbox: mailboxes.remove(&mailbox_hash).unwrap(),
},
mailboxes[&mailbox_hash].path().to_string(),
mailboxes.remove(&mailbox_hash).unwrap(),
new,
),
);
self.collection
.threads
@ -2370,3 +2399,78 @@ fn build_mailboxes_order(
rec(node, mailbox_entries, 0, false);
}
}
#[test]
fn test_mailbox_utf7() {
#[derive(Debug)]
struct TestMailbox(String);
impl melib::BackendMailbox for TestMailbox {
fn hash(&self) -> MailboxHash {
unimplemented!()
}
fn name(&self) -> &str {
&self.0
}
fn path(&self) -> &str {
&self.0
}
fn children(&self) -> &[MailboxHash] {
unimplemented!()
}
fn clone(&self) -> Mailbox {
unimplemented!()
}
fn special_usage(&self) -> SpecialUsageMailbox {
unimplemented!()
}
fn parent(&self) -> Option<MailboxHash> {
unimplemented!()
}
fn permissions(&self) -> MailboxPermissions {
unimplemented!()
}
fn is_subscribed(&self) -> bool {
unimplemented!()
}
fn set_is_subscribed(&mut self, _: bool) -> Result<()> {
unimplemented!()
}
fn set_special_usage(&mut self, _: SpecialUsageMailbox) -> Result<()> {
unimplemented!()
}
fn count(&self) -> Result<(usize, usize)> {
unimplemented!()
}
}
for (n, d) in [
("~peter/mail/&U,BTFw-/&ZeVnLIqe-", "~peter/mail/台北/日本語"),
("&BB4EQgQ,BEAEMAQyBDsENQQ9BD0ESwQ1-", "Отправленные"),
] {
let ref_mbox = TestMailbox(n.to_string());
let mut conf: melib::MailboxConf = Default::default();
conf.extra.insert("encoding".to_string(), "utf7".into());
let entry = MailboxEntry::new(
MailboxStatus::None,
n.to_string(),
Box::new(ref_mbox),
FileMailboxConf {
mailbox_conf: conf,
..Default::default()
},
);
assert_eq!(&entry.path, d);
}
}