Browse Source

Remove text_processing

Unwrap text_processing into melib

In preparation for uploading meli as a separate crate on crates.io.
tags/alpha-0.5.0
Manos Pitsidianakis 2 weeks ago
parent
commit
05b91f1c02
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS. GPG Key ID: 73627C2F690DF710
27 changed files with 129 additions and 156 deletions
  1. +4
    -13
      Cargo.lock
  2. +2
    -3
      Cargo.toml
  3. +16
    -4
      melib/Cargo.toml
  4. +77
    -1
      melib/build.rs
  5. +1
    -1
      melib/src/email/compose/mime.rs
  6. +1
    -1
      melib/src/lib.rs
  7. +4
    -4
      melib/src/text_processing/grapheme_clusters.rs
  8. +8
    -8
      melib/src/text_processing/line_break.rs
  9. +1
    -1
      melib/src/text_processing/mod.rs
  10. +2
    -2
      melib/src/text_processing/tables.rs
  11. +0
    -0
      melib/src/text_processing/types.rs
  12. +0
    -0
      melib/src/text_processing/wcwidth.rs
  13. +1
    -1
      melib/src/thread.rs
  14. +0
    -2
      src/bin.rs
  15. +1
    -0
      src/components.rs
  16. +1
    -0
      src/components/contacts/contact_list.rs
  17. +1
    -1
      src/conf.rs
  18. +1
    -1
      src/conf/accounts.rs
  19. +1
    -1
      src/mailcap.rs
  20. +1
    -1
      src/terminal/cells.rs
  21. +1
    -1
      src/terminal/embed/grid.rs
  22. +1
    -1
      src/terminal/text_editing.rs
  23. +1
    -1
      src/workers.rs
  24. +1
    -2
      testing/Cargo.toml
  25. +2
    -2
      testing/src/email_parse.rs
  26. +0
    -10
      text_processing/Cargo.toml
  27. +0
    -94
      text_processing/build.rs

+ 4
- 13
Cargo.lock View File

@@ -739,7 +739,7 @@ dependencies = [
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)",
"linkify 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"melib 0.4.1",
"melib 0.5.0",
"nix 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"notify 4.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -755,7 +755,6 @@ dependencies = [
"signal-hook-registry 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"termion 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"text_processing 0.4.1",
"toml 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -765,7 +764,7 @@ dependencies = [

[[package]]
name = "melib"
version = "0.4.1"
version = "0.5.0"
dependencies = [
"bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -786,7 +785,7 @@ dependencies = [
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"termion 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"text_processing 0.4.1",
"unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
"xdg 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -1645,15 +1644,7 @@ dependencies = [
name = "testing"
version = "0.4.1"
dependencies = [
"melib 0.4.1",
"text_processing 0.4.1",
]

[[package]]
name = "text_processing"
version = "0.4.1"
dependencies = [
"unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"melib 0.5.0",
]

[[package]]


+ 2
- 3
Cargo.toml View File

@@ -14,7 +14,7 @@ crossbeam = "0.7.2"
signal-hook = "0.1.12"
signal-hook-registry = "1.2.0"
nix = "0.16.1"
melib = { path = "melib", version = "0.4.1" }
melib = { path = "melib", version = "0.5.0" }

serde = "1.0.71"
serde_derive = "1.0.71"
@@ -30,7 +30,6 @@ termion = "1.5.1"
bincode = "1.2.0"
uuid = { version = "0.7.4", features = ["serde", "v4"] }
unicode-segmentation = "1.2.1" # >:c
text_processing = { path = "text_processing", version = "0.4.1" }
libc = {version = "0.2.59", features = ["extra_traits",]}
rusqlite = {version = "0.20.0", optional =true }
rmp = "^0.8"
@@ -44,7 +43,7 @@ lto = true
debug = false

[workspace]
members = ["melib", "testing", "text_processing"]
members = ["melib", "testing", ]

[features]
default = ["sqlite3"]


+ 16
- 4
melib/Cargo.toml View File

@@ -1,11 +1,22 @@
[package]
name = "melib"
version = "0.4.1"
authors = ["Manos Pitsidianakis <el13635@mail.ntua.gr>"]
version = "0.5.0"
authors = ["Manos Pitsidianakis <epilys@nessuent.xyz>"]
workspace = ".."
edition = "2018"
build = "build.rs"

homepage = "https://meli.delivery"
repository = "https://git.meli.delivery/meli/meli.git"
description = "backend mail client library"
keywords = ["mail", "mua", "maildir", "imap"]
categories = [ "email"]
license = "GPL-3.0-or-later"

[lib]
name = "melib"
path = "src/lib.rs"

[dependencies]
bitflags = "1.0"
crossbeam = "0.7.2"
@@ -23,7 +34,8 @@ serde = { version = "1.0.71", features = ["rc", ] }
serde_derive = "1.0.71"
bincode = "1.2.0"
uuid = { version = "0.7.4", features = ["serde", "v4"] }
text_processing = { path = "../text_processing", version = "*", optional= true }

unicode-segmentation = { version = "1.2.1", optional = true }
libc = {version = "0.2.59", features = ["extra_traits",]}
reqwest = { version ="0.10.0-alpha.2", optional=true, features = ["json", "blocking" ]}
serde_json = { version = "1.0", optional = true, features = ["raw_value",] }
@@ -34,7 +46,7 @@ nix = "0.16.1"
default = ["unicode_algorithms", "imap_backend", "maildir_backend", "mbox_backend", "vcard"]

debug-tracing = []
unicode_algorithms = ["text_processing"]
unicode_algorithms = ["unicode-segmentation"]
imap_backend = ["native-tls"]
maildir_backend = ["notify", "notify-rust", "memmap"]
mbox_backend = ["notify", "notify-rust", "memmap"]


+ 77
- 1
melib/build.rs View File

@@ -19,9 +19,85 @@
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/

fn main() {
#[cfg(feature = "unicode_algorithms")]
include!("src/text_processing/types.rs");

fn main() -> Result<(), std::io::Error> {
#[cfg(feature = "notmuch_backend")]
{
println!("cargo:rustc-link-lib=notmuch");
}
#[cfg(feature = "unicode_algorithms")]
{
use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::process::Command;
const LINE_BREAK_TABLE_URL: &str =
"http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt";

let mod_path = Path::new("src/text_processing/tables.rs");
if mod_path.exists() {
eprintln!(
"{} already exists, delete it if you want to replace it.",
mod_path.display()
);
std::process::exit(0);
}
let mut tmpdir_path = PathBuf::from(
std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout)
.unwrap()
.trim(),
);
tmpdir_path.push("LineBreak.txt");
Command::new("curl")
.args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL])
.output()?;

let file = File::open(&tmpdir_path)?;
let buf_reader = BufReader::new(file);

let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800);
for line in buf_reader.lines() {
let line = line.unwrap();
if line.starts_with('#') || line.starts_with(' ') || line.is_empty() {
continue;
}
let tokens: &str = line.split_whitespace().next().unwrap();

let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap();
/* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */
let chars_str: &str = &tokens[..semicolon_idx];

let mut codepoint_iter = chars_str.split("..");

let first_codepoint: u32 =
u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap();

let sec_codepoint: u32 = codepoint_iter
.next()
.map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap())
.unwrap_or(first_codepoint);
let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2];
line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class)));
}

let mut file = File::create(&mod_path)?;
file.write_all(b"use crate::types::LineBreakClass::*;\n")
.unwrap();
file.write_all(b"use crate::types::LineBreakClass;\n\n")
.unwrap();
file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n")
.unwrap();
for l in &line_break_table {
file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes())
.unwrap();
}
file.write_all(b"];").unwrap();
std::fs::remove_file(&tmpdir_path).unwrap();
tmpdir_path.pop();
std::fs::remove_dir(&tmpdir_path).unwrap();
}
Ok(())
}

+ 1
- 1
melib/src/email/compose/mime.rs View File

@@ -22,7 +22,7 @@
use super::*;

#[cfg(feature = "unicode_algorithms")]
use text_processing::grapheme_clusters::TextProcessing;
use crate::text_processing::grapheme_clusters::TextProcessing;

pub fn encode_header(value: &str) -> String {
let mut ret = String::with_capacity(value.len());


+ 1
- 1
melib/src/lib.rs View File

@@ -102,7 +102,7 @@ pub mod dbg {
}

#[cfg(feature = "unicode_algorithms")]
extern crate text_processing;
pub mod text_processing;

pub mod datetime;
pub use datetime::UnixTimestamp;


text_processing/src/grapheme_clusters.rs → melib/src/text_processing/grapheme_clusters.rs View File

@@ -29,8 +29,8 @@

*/

use crate::types::Reflow;
use crate::wcwidth::{wcwidth, CodePointsIter};
use super::types::Reflow;
use super::wcwidth::{wcwidth, CodePointsIter};
extern crate unicode_segmentation;
use self::unicode_segmentation::UnicodeSegmentation;

@@ -71,11 +71,11 @@ pub trait TextProcessing: UnicodeSegmentation + CodePointsIter {

impl TextProcessing for str {
fn split_lines(&self, width: usize) -> Vec<String> {
crate::line_break::linear(self, width)
super::line_break::linear(self, width)
}

fn split_lines_reflow(&self, reflow: Reflow, width: Option<usize>) -> Vec<String> {
crate::line_break::split_lines_reflow(self, reflow, width)
super::line_break::split_lines_reflow(self, reflow, width)
}
}


text_processing/src/line_break.rs → melib/src/text_processing/line_break.rs View File

@@ -21,10 +21,10 @@

extern crate unicode_segmentation;
use self::unicode_segmentation::UnicodeSegmentation;
use crate::grapheme_clusters::TextProcessing;
use crate::tables::LINE_BREAK_RULES;
use crate::types::LineBreakClass;
use crate::types::Reflow;
use super::grapheme_clusters::TextProcessing;
use super::tables::LINE_BREAK_RULES;
use super::types::LineBreakClass;
use super::types::Reflow;
use core::cmp::Ordering;
use core::iter::Peekable;
use core::str::FromStr;
@@ -118,8 +118,8 @@ macro_rules! next_grapheme_class {
/// Returns positions where breaks can happen
/// Examples:
/// ```
/// use text_processing::{self, LineBreakCandidate::{self, *}};
/// use text_processing::line_break::LineBreakCandidateIter;
/// use melib::text_processing::{self, LineBreakCandidate::{self, *}};
/// use melib::text_processing::line_break::LineBreakCandidateIter;
///
/// assert!(LineBreakCandidateIter::new("").collect::<Vec<(usize, LineBreakCandidate)>>().is_empty());
/// assert_eq!(&[(7, BreakAllowed), (12, MandatoryBreak)],
@@ -749,8 +749,8 @@ mod tests {
pub use alg::linear;

mod alg {
use super::*;
use crate::grapheme_clusters::TextProcessing;
use super::super::grapheme_clusters::TextProcessing;
use super::super::*;
fn cost(i: usize, j: usize, width: usize, minima: &Vec<usize>, offsets: &Vec<usize>) -> usize {
let w = offsets[j] + j - offsets[i] - i - 1;
if w > width {

text_processing/src/lib.rs → melib/src/text_processing/mod.rs View File

@@ -1,5 +1,5 @@
/*
* meli - text_processing crate.
* meli - text_processing mod.
*
* Copyright 2017-2020 Manos Pitsidianakis
*

text_processing/src/tables.rs → melib/src/text_processing/tables.rs View File

@@ -19,8 +19,8 @@
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/

use crate::types::LineBreakClass;
use crate::types::LineBreakClass::*;
use super::types::LineBreakClass;
use super::types::LineBreakClass::*;

pub const LINE_BREAK_RULES: &'static [(u32, u32, LineBreakClass)] = &[
(0x0, 0x8, CM),

text_processing/src/types.rs → melib/src/text_processing/types.rs View File


text_processing/src/wcwidth.rs → melib/src/text_processing/wcwidth.rs View File


+ 1
- 1
melib/src/thread.rs View File

@@ -40,7 +40,7 @@ mod iterators;
pub use iterators::*;

#[cfg(feature = "unicode_algorithms")]
use text_processing::grapheme_clusters::*;
use crate::text_processing::grapheme_clusters::*;
use uuid::Uuid;

use fnv::{FnvHashMap, FnvHashSet};


+ 0
- 2
src/bin.rs View File

@@ -31,8 +31,6 @@ use std::alloc::System;
use std::collections::VecDeque;
use std::path::{Path, PathBuf};
extern crate notify_rust;
extern crate text_processing;
use text_processing::*;
extern crate xdg_utils;
#[macro_use]
extern crate serde_derive;


+ 1
- 0
src/components.rs View File

@@ -26,6 +26,7 @@
*/

use super::*;
use crate::melib::text_processing::{TextProcessing, Truncate};

pub mod mail;
pub use crate::mail::*;


+ 1
- 0
src/components/contacts/contact_list.rs View File

@@ -19,6 +19,7 @@
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
use super::*;
use crate::melib::text_processing::TextProcessing;

use melib::CardId;
use std::cmp;


+ 1
- 1
src/conf.rs View File

@@ -167,7 +167,7 @@ impl From<FileAccount> for AccountConf {
let mut folder_confs = x.folders.clone();
for s in &x.subscribed_folders {
if !folder_confs.contains_key(s) {
use text_processing::GlobMatch;
use melib::text_processing::GlobMatch;
if s.is_glob() {
continue;
}


+ 1
- 1
src/conf/accounts.rs View File

@@ -32,10 +32,10 @@ use melib::backends::{
};
use melib::error::{MeliError, Result};
use melib::mailbox::*;
use melib::text_processing::GlobMatch;
use melib::thread::{SortField, SortOrder, ThreadNode, ThreadNodeHash, Threads};
use melib::AddressBook;
use smallvec::SmallVec;
use text_processing::GlobMatch;

use crate::types::UIEvent::{self, EnvelopeRemove, EnvelopeRename, EnvelopeUpdate, Notification};
use crate::{StatusEvent, ThreadEvent};


+ 1
- 1
src/mailcap.rs View File

@@ -26,12 +26,12 @@ use crate::state::Context;
use crate::types::{create_temp_file, ForkType, UIEvent};
use fnv::FnvHashMap;
use melib::attachments::decode;
use melib::text_processing::GlobMatch;
use melib::{email::Attachment, MeliError, Result};
use std::io::Read;
use std::io::Write;
use std::path::PathBuf;
use std::process::{Command, Stdio};
use text_processing::GlobMatch;

pub struct MailcapEntry {
command: String,


+ 1
- 1
src/terminal/cells.rs View File

@@ -26,7 +26,7 @@

use super::position::*;
use crate::state::Context;
use text_processing::wcwidth;
use melib::text_processing::wcwidth;

use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use std::convert::From;


+ 1
- 1
src/terminal/embed/grid.rs View File

@@ -22,9 +22,9 @@
use super::*;
use crate::terminal::cells::*;
use melib::error::{MeliError, Result};
use melib::text_processing::wcwidth;
use nix::sys::wait::WaitStatus;
use nix::sys::wait::{waitpid, WaitPidFlag};
use text_processing::wcwidth;
/**
* `EmbedGrid` manages the terminal grid state of the embed process.
*


+ 1
- 1
src/terminal/text_editing.rs View File

@@ -19,7 +19,7 @@
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/

use text_processing::TextProcessing;
use melib::text_processing::TextProcessing;

#[derive(Debug, Clone, Default, PartialEq)]
pub struct UText {


+ 1
- 1
src/workers.rs View File

@@ -28,10 +28,10 @@ use crossbeam::{
};
use fnv::FnvHashMap;
use melib::async_workers::{Work, WorkContext};
use melib::text_processing::Truncate;
use std::sync::Arc;
use std::sync::Mutex;
use std::thread;
use text_processing::Truncate;

const MAX_WORKER: usize = 4;



+ 1
- 2
testing/Cargo.toml View File

@@ -14,8 +14,7 @@ name = "imapconn"
path = "src/imap_conn.rs"

[dependencies]
melib = { path = "../melib", version = "*", features = ["debug-tracing"] }
text_processing = { path = "../text_processing", version = "*" }
melib = { path = "../melib", version = "*", features = ["debug-tracing", "unicode_algorithms"] }

[features]
default = []


+ 2
- 2
testing/src/email_parse.rs View File

@@ -10,14 +10,14 @@ use melib::*;
/// ```

fn main() -> Result<()> {
if args.len() == 1 {
if std::env::args().len() == 1 {
eprintln!("Usage: ./emailparse /path/to/email [/path/to/email2 /path/to/email3 ..]");
std::process::exit(1);
}

for i in std::env::args().skip(1) {
println!("Path is {}", i);
let filename = std::path::PathBuf::from(i);
let filename = std::path::PathBuf::from(&i);

if filename.exists() && filename.is_file() {
let buffer = std::fs::read_to_string(&filename)


+ 0
- 10
text_processing/Cargo.toml View File

@@ -1,10 +0,0 @@
[package]
name = "text_processing"
version = "0.4.1"
authors = ["Manos Pitsidianakis <el13635@mail.ntua.gr>"]
workspace = ".."
edition = "2018"
build = "build.rs"

[dependencies]
unicode-segmentation = "1.2.1"

+ 0
- 94
text_processing/build.rs View File

@@ -1,94 +0,0 @@
/*
* meli - text_processing crate.
*
* Copyright 2017-2020 Manos Pitsidianakis
*
* This file is part of meli.
*
* meli is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* meli is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/

const LINE_BREAK_TABLE_URL: &str = "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt";
use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::process::Command;

include!("src/types.rs");

fn main() -> Result<(), std::io::Error> {
let mod_path = Path::new("src/tables.rs");
if mod_path.exists() {
eprintln!(
"{} already exists, delete it if you want to replace it.",
mod_path.display()
);
std::process::exit(0);
}
let mut tmpdir_path = PathBuf::from(
std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout)
.unwrap()
.trim(),
);
tmpdir_path.push("LineBreak.txt");
Command::new("curl")
.args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL])
.output()?;

let file = File::open(&tmpdir_path)?;
let buf_reader = BufReader::new(file);

let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800);
for line in buf_reader.lines() {
let line = line.unwrap();
if line.starts_with('#') || line.starts_with(' ') || line.is_empty() {
continue;
}
let tokens: &str = line.split_whitespace().next().unwrap();

let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap();
/* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */
let chars_str: &str = &tokens[..semicolon_idx];

let mut codepoint_iter = chars_str.split("..");

let first_codepoint: u32 =
u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap();

let sec_codepoint: u32 = codepoint_iter
.next()
.map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap())
.unwrap_or(first_codepoint);
let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2];
line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class)));
}

let mut file = File::create(&mod_path)?;
file.write_all(b"use crate::types::LineBreakClass::*;\n")
.unwrap();
file.write_all(b"use crate::types::LineBreakClass;\n\n")
.unwrap();
file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n")
.unwrap();
for l in &line_break_table {
file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes())
.unwrap();
}
file.write_all(b"];").unwrap();
std::fs::remove_file(&tmpdir_path).unwrap();
tmpdir_path.pop();
std::fs::remove_dir(&tmpdir_path).unwrap();
Ok(())
}

Loading…
Cancel
Save