From 05b91f1c02a273c653a73569493f8e7154a62a11 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 4 Feb 2020 17:26:25 +0200 Subject: [PATCH] Remove text_processing Unwrap text_processing into melib In preparation for uploading meli as a separate crate on crates.io. --- Cargo.lock | 17 +--- Cargo.toml | 5 +- melib/Cargo.toml | 20 +++- melib/build.rs | 78 ++++++++++++++- melib/src/email/compose/mime.rs | 2 +- melib/src/lib.rs | 2 +- .../src/text_processing}/grapheme_clusters.rs | 8 +- .../src/text_processing}/line_break.rs | 16 ++-- .../src/text_processing/mod.rs | 2 +- .../src/text_processing}/tables.rs | 4 +- .../src/text_processing}/types.rs | 0 .../src/text_processing}/wcwidth.rs | 0 melib/src/thread.rs | 2 +- src/bin.rs | 2 - src/components.rs | 1 + src/components/contacts/contact_list.rs | 1 + src/conf.rs | 2 +- src/conf/accounts.rs | 2 +- src/mailcap.rs | 2 +- src/terminal/cells.rs | 2 +- src/terminal/embed/grid.rs | 2 +- src/terminal/text_editing.rs | 2 +- src/workers.rs | 2 +- testing/Cargo.toml | 3 +- testing/src/email_parse.rs | 4 +- text_processing/Cargo.toml | 10 -- text_processing/build.rs | 94 ------------------- 27 files changed, 129 insertions(+), 156 deletions(-) rename {text_processing/src => melib/src/text_processing}/grapheme_clusters.rs (99%) rename {text_processing/src => melib/src/text_processing}/line_break.rs (99%) rename text_processing/src/lib.rs => melib/src/text_processing/mod.rs (98%) rename {text_processing/src => melib/src/text_processing}/tables.rs (99%) rename {text_processing/src => melib/src/text_processing}/types.rs (100%) rename {text_processing/src => melib/src/text_processing}/wcwidth.rs (100%) delete mode 100644 text_processing/Cargo.toml delete mode 100644 text_processing/build.rs diff --git a/Cargo.lock b/Cargo.lock index ca5bae4b..0cd49243 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -739,7 +739,7 @@ dependencies = [ "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)", "linkify 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "melib 0.4.1", + "melib 0.5.0", "nix 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)", "nom 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "notify 4.0.12 (registry+https://github.com/rust-lang/crates.io-index)", @@ -755,7 +755,6 @@ dependencies = [ "signal-hook-registry 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "termion 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "text_processing 0.4.1", "toml 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -765,7 +764,7 @@ dependencies = [ [[package]] name = "melib" -version = "0.4.1" +version = "0.5.0" dependencies = [ "bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -786,7 +785,7 @@ dependencies = [ "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "termion 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "text_processing 0.4.1", + "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", "xdg 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1645,15 +1644,7 @@ dependencies = [ name = "testing" version = "0.4.1" dependencies = [ - "melib 0.4.1", - "text_processing 0.4.1", -] - -[[package]] -name = "text_processing" -version = "0.4.1" -dependencies = [ - "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "melib 0.5.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a92df14b..53dc3e3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ crossbeam = "0.7.2" signal-hook = "0.1.12" signal-hook-registry = "1.2.0" nix = "0.16.1" -melib = { path = "melib", version = "0.4.1" } +melib = { path = "melib", version = "0.5.0" } serde = "1.0.71" serde_derive = "1.0.71" @@ -30,7 +30,6 @@ termion = "1.5.1" bincode = "1.2.0" uuid = { version = "0.7.4", features = ["serde", "v4"] } unicode-segmentation = "1.2.1" # >:c -text_processing = { path = "text_processing", version = "0.4.1" } libc = {version = "0.2.59", features = ["extra_traits",]} rusqlite = {version = "0.20.0", optional =true } rmp = "^0.8" @@ -44,7 +43,7 @@ lto = true debug = false [workspace] -members = ["melib", "testing", "text_processing"] +members = ["melib", "testing", ] [features] default = ["sqlite3"] diff --git a/melib/Cargo.toml b/melib/Cargo.toml index b2e0aefd..d91dc133 100644 --- a/melib/Cargo.toml +++ b/melib/Cargo.toml @@ -1,11 +1,22 @@ [package] name = "melib" -version = "0.4.1" -authors = ["Manos Pitsidianakis "] +version = "0.5.0" +authors = ["Manos Pitsidianakis "] workspace = ".." edition = "2018" build = "build.rs" +homepage = "https://meli.delivery" +repository = "https://git.meli.delivery/meli/meli.git" +description = "backend mail client library" +keywords = ["mail", "mua", "maildir", "imap"] +categories = [ "email"] +license = "GPL-3.0-or-later" + +[lib] +name = "melib" +path = "src/lib.rs" + [dependencies] bitflags = "1.0" crossbeam = "0.7.2" @@ -23,7 +34,8 @@ serde = { version = "1.0.71", features = ["rc", ] } serde_derive = "1.0.71" bincode = "1.2.0" uuid = { version = "0.7.4", features = ["serde", "v4"] } -text_processing = { path = "../text_processing", version = "*", optional= true } + +unicode-segmentation = { version = "1.2.1", optional = true } libc = {version = "0.2.59", features = ["extra_traits",]} reqwest = { version ="0.10.0-alpha.2", optional=true, features = ["json", "blocking" ]} serde_json = { version = "1.0", optional = true, features = ["raw_value",] } @@ -34,7 +46,7 @@ nix = "0.16.1" default = ["unicode_algorithms", "imap_backend", "maildir_backend", "mbox_backend", "vcard"] debug-tracing = [] -unicode_algorithms = ["text_processing"] +unicode_algorithms = ["unicode-segmentation"] imap_backend = ["native-tls"] maildir_backend = ["notify", "notify-rust", "memmap"] mbox_backend = ["notify", "notify-rust", "memmap"] diff --git a/melib/build.rs b/melib/build.rs index 2efeb64e..dea4ad21 100644 --- a/melib/build.rs +++ b/melib/build.rs @@ -19,9 +19,85 @@ * along with meli. If not, see . */ -fn main() { +#[cfg(feature = "unicode_algorithms")] +include!("src/text_processing/types.rs"); + +fn main() -> Result<(), std::io::Error> { #[cfg(feature = "notmuch_backend")] { println!("cargo:rustc-link-lib=notmuch"); } + #[cfg(feature = "unicode_algorithms")] + { + use std::fs::File; + use std::io::prelude::*; + use std::io::BufReader; + use std::path::{Path, PathBuf}; + use std::process::Command; + const LINE_BREAK_TABLE_URL: &str = + "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt"; + + let mod_path = Path::new("src/text_processing/tables.rs"); + if mod_path.exists() { + eprintln!( + "{} already exists, delete it if you want to replace it.", + mod_path.display() + ); + std::process::exit(0); + } + let mut tmpdir_path = PathBuf::from( + std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout) + .unwrap() + .trim(), + ); + tmpdir_path.push("LineBreak.txt"); + Command::new("curl") + .args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL]) + .output()?; + + let file = File::open(&tmpdir_path)?; + let buf_reader = BufReader::new(file); + + let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800); + for line in buf_reader.lines() { + let line = line.unwrap(); + if line.starts_with('#') || line.starts_with(' ') || line.is_empty() { + continue; + } + let tokens: &str = line.split_whitespace().next().unwrap(); + + let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap(); + /* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */ + let chars_str: &str = &tokens[..semicolon_idx]; + + let mut codepoint_iter = chars_str.split(".."); + + let first_codepoint: u32 = + u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap(); + + let sec_codepoint: u32 = codepoint_iter + .next() + .map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap()) + .unwrap_or(first_codepoint); + let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2]; + line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class))); + } + + let mut file = File::create(&mod_path)?; + file.write_all(b"use crate::types::LineBreakClass::*;\n") + .unwrap(); + file.write_all(b"use crate::types::LineBreakClass;\n\n") + .unwrap(); + file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n") + .unwrap(); + for l in &line_break_table { + file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes()) + .unwrap(); + } + file.write_all(b"];").unwrap(); + std::fs::remove_file(&tmpdir_path).unwrap(); + tmpdir_path.pop(); + std::fs::remove_dir(&tmpdir_path).unwrap(); + } + Ok(()) } diff --git a/melib/src/email/compose/mime.rs b/melib/src/email/compose/mime.rs index 4a15049c..8282817c 100644 --- a/melib/src/email/compose/mime.rs +++ b/melib/src/email/compose/mime.rs @@ -22,7 +22,7 @@ use super::*; #[cfg(feature = "unicode_algorithms")] -use text_processing::grapheme_clusters::TextProcessing; +use crate::text_processing::grapheme_clusters::TextProcessing; pub fn encode_header(value: &str) -> String { let mut ret = String::with_capacity(value.len()); diff --git a/melib/src/lib.rs b/melib/src/lib.rs index cb5a8557..c9dc9307 100644 --- a/melib/src/lib.rs +++ b/melib/src/lib.rs @@ -102,7 +102,7 @@ pub mod dbg { } #[cfg(feature = "unicode_algorithms")] -extern crate text_processing; +pub mod text_processing; pub mod datetime; pub use datetime::UnixTimestamp; diff --git a/text_processing/src/grapheme_clusters.rs b/melib/src/text_processing/grapheme_clusters.rs similarity index 99% rename from text_processing/src/grapheme_clusters.rs rename to melib/src/text_processing/grapheme_clusters.rs index 3e54304b..174b4f5d 100644 --- a/text_processing/src/grapheme_clusters.rs +++ b/melib/src/text_processing/grapheme_clusters.rs @@ -29,8 +29,8 @@ */ -use crate::types::Reflow; -use crate::wcwidth::{wcwidth, CodePointsIter}; +use super::types::Reflow; +use super::wcwidth::{wcwidth, CodePointsIter}; extern crate unicode_segmentation; use self::unicode_segmentation::UnicodeSegmentation; @@ -71,11 +71,11 @@ pub trait TextProcessing: UnicodeSegmentation + CodePointsIter { impl TextProcessing for str { fn split_lines(&self, width: usize) -> Vec { - crate::line_break::linear(self, width) + super::line_break::linear(self, width) } fn split_lines_reflow(&self, reflow: Reflow, width: Option) -> Vec { - crate::line_break::split_lines_reflow(self, reflow, width) + super::line_break::split_lines_reflow(self, reflow, width) } } diff --git a/text_processing/src/line_break.rs b/melib/src/text_processing/line_break.rs similarity index 99% rename from text_processing/src/line_break.rs rename to melib/src/text_processing/line_break.rs index ddc7d4f9..6c50c5e5 100644 --- a/text_processing/src/line_break.rs +++ b/melib/src/text_processing/line_break.rs @@ -21,10 +21,10 @@ extern crate unicode_segmentation; use self::unicode_segmentation::UnicodeSegmentation; -use crate::grapheme_clusters::TextProcessing; -use crate::tables::LINE_BREAK_RULES; -use crate::types::LineBreakClass; -use crate::types::Reflow; +use super::grapheme_clusters::TextProcessing; +use super::tables::LINE_BREAK_RULES; +use super::types::LineBreakClass; +use super::types::Reflow; use core::cmp::Ordering; use core::iter::Peekable; use core::str::FromStr; @@ -118,8 +118,8 @@ macro_rules! next_grapheme_class { /// Returns positions where breaks can happen /// Examples: /// ``` -/// use text_processing::{self, LineBreakCandidate::{self, *}}; -/// use text_processing::line_break::LineBreakCandidateIter; +/// use melib::text_processing::{self, LineBreakCandidate::{self, *}}; +/// use melib::text_processing::line_break::LineBreakCandidateIter; /// /// assert!(LineBreakCandidateIter::new("").collect::>().is_empty()); /// assert_eq!(&[(7, BreakAllowed), (12, MandatoryBreak)], @@ -749,8 +749,8 @@ mod tests { pub use alg::linear; mod alg { - use super::*; - use crate::grapheme_clusters::TextProcessing; + use super::super::grapheme_clusters::TextProcessing; + use super::super::*; fn cost(i: usize, j: usize, width: usize, minima: &Vec, offsets: &Vec) -> usize { let w = offsets[j] + j - offsets[i] - i - 1; if w > width { diff --git a/text_processing/src/lib.rs b/melib/src/text_processing/mod.rs similarity index 98% rename from text_processing/src/lib.rs rename to melib/src/text_processing/mod.rs index 6b949419..e5d7a8bd 100644 --- a/text_processing/src/lib.rs +++ b/melib/src/text_processing/mod.rs @@ -1,5 +1,5 @@ /* - * meli - text_processing crate. + * meli - text_processing mod. * * Copyright 2017-2020 Manos Pitsidianakis * diff --git a/text_processing/src/tables.rs b/melib/src/text_processing/tables.rs similarity index 99% rename from text_processing/src/tables.rs rename to melib/src/text_processing/tables.rs index 2ce6cfb4..cecfe5cb 100644 --- a/text_processing/src/tables.rs +++ b/melib/src/text_processing/tables.rs @@ -19,8 +19,8 @@ * along with meli. If not, see . */ -use crate::types::LineBreakClass; -use crate::types::LineBreakClass::*; +use super::types::LineBreakClass; +use super::types::LineBreakClass::*; pub const LINE_BREAK_RULES: &'static [(u32, u32, LineBreakClass)] = &[ (0x0, 0x8, CM), diff --git a/text_processing/src/types.rs b/melib/src/text_processing/types.rs similarity index 100% rename from text_processing/src/types.rs rename to melib/src/text_processing/types.rs diff --git a/text_processing/src/wcwidth.rs b/melib/src/text_processing/wcwidth.rs similarity index 100% rename from text_processing/src/wcwidth.rs rename to melib/src/text_processing/wcwidth.rs diff --git a/melib/src/thread.rs b/melib/src/thread.rs index 5d9509ed..1a7eb0ac 100644 --- a/melib/src/thread.rs +++ b/melib/src/thread.rs @@ -40,7 +40,7 @@ mod iterators; pub use iterators::*; #[cfg(feature = "unicode_algorithms")] -use text_processing::grapheme_clusters::*; +use crate::text_processing::grapheme_clusters::*; use uuid::Uuid; use fnv::{FnvHashMap, FnvHashSet}; diff --git a/src/bin.rs b/src/bin.rs index f1feea40..331f9120 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -31,8 +31,6 @@ use std::alloc::System; use std::collections::VecDeque; use std::path::{Path, PathBuf}; extern crate notify_rust; -extern crate text_processing; -use text_processing::*; extern crate xdg_utils; #[macro_use] extern crate serde_derive; diff --git a/src/components.rs b/src/components.rs index e0fc897d..d5e2f3b2 100644 --- a/src/components.rs +++ b/src/components.rs @@ -26,6 +26,7 @@ */ use super::*; +use crate::melib::text_processing::{TextProcessing, Truncate}; pub mod mail; pub use crate::mail::*; diff --git a/src/components/contacts/contact_list.rs b/src/components/contacts/contact_list.rs index a79d4cfd..d3156285 100644 --- a/src/components/contacts/contact_list.rs +++ b/src/components/contacts/contact_list.rs @@ -19,6 +19,7 @@ * along with meli. If not, see . */ use super::*; +use crate::melib::text_processing::TextProcessing; use melib::CardId; use std::cmp; diff --git a/src/conf.rs b/src/conf.rs index 2964c042..a39a10ba 100644 --- a/src/conf.rs +++ b/src/conf.rs @@ -167,7 +167,7 @@ impl From for AccountConf { let mut folder_confs = x.folders.clone(); for s in &x.subscribed_folders { if !folder_confs.contains_key(s) { - use text_processing::GlobMatch; + use melib::text_processing::GlobMatch; if s.is_glob() { continue; } diff --git a/src/conf/accounts.rs b/src/conf/accounts.rs index 5cda13cd..8ed24e9d 100644 --- a/src/conf/accounts.rs +++ b/src/conf/accounts.rs @@ -32,10 +32,10 @@ use melib::backends::{ }; use melib::error::{MeliError, Result}; use melib::mailbox::*; +use melib::text_processing::GlobMatch; use melib::thread::{SortField, SortOrder, ThreadNode, ThreadNodeHash, Threads}; use melib::AddressBook; use smallvec::SmallVec; -use text_processing::GlobMatch; use crate::types::UIEvent::{self, EnvelopeRemove, EnvelopeRename, EnvelopeUpdate, Notification}; use crate::{StatusEvent, ThreadEvent}; diff --git a/src/mailcap.rs b/src/mailcap.rs index 0973bcfa..b88c660c 100644 --- a/src/mailcap.rs +++ b/src/mailcap.rs @@ -26,12 +26,12 @@ use crate::state::Context; use crate::types::{create_temp_file, ForkType, UIEvent}; use fnv::FnvHashMap; use melib::attachments::decode; +use melib::text_processing::GlobMatch; use melib::{email::Attachment, MeliError, Result}; use std::io::Read; use std::io::Write; use std::path::PathBuf; use std::process::{Command, Stdio}; -use text_processing::GlobMatch; pub struct MailcapEntry { command: String, diff --git a/src/terminal/cells.rs b/src/terminal/cells.rs index df76faf6..1ccfe914 100644 --- a/src/terminal/cells.rs +++ b/src/terminal/cells.rs @@ -26,7 +26,7 @@ use super::position::*; use crate::state::Context; -use text_processing::wcwidth; +use melib::text_processing::wcwidth; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::convert::From; diff --git a/src/terminal/embed/grid.rs b/src/terminal/embed/grid.rs index 630597bf..32477ea1 100644 --- a/src/terminal/embed/grid.rs +++ b/src/terminal/embed/grid.rs @@ -22,9 +22,9 @@ use super::*; use crate::terminal::cells::*; use melib::error::{MeliError, Result}; +use melib::text_processing::wcwidth; use nix::sys::wait::WaitStatus; use nix::sys::wait::{waitpid, WaitPidFlag}; -use text_processing::wcwidth; /** * `EmbedGrid` manages the terminal grid state of the embed process. * diff --git a/src/terminal/text_editing.rs b/src/terminal/text_editing.rs index e2fb53e3..9a0fb9ae 100644 --- a/src/terminal/text_editing.rs +++ b/src/terminal/text_editing.rs @@ -19,7 +19,7 @@ * along with meli. If not, see . */ -use text_processing::TextProcessing; +use melib::text_processing::TextProcessing; #[derive(Debug, Clone, Default, PartialEq)] pub struct UText { diff --git a/src/workers.rs b/src/workers.rs index bed12a33..79ba0834 100644 --- a/src/workers.rs +++ b/src/workers.rs @@ -28,10 +28,10 @@ use crossbeam::{ }; use fnv::FnvHashMap; use melib::async_workers::{Work, WorkContext}; +use melib::text_processing::Truncate; use std::sync::Arc; use std::sync::Mutex; use std::thread; -use text_processing::Truncate; const MAX_WORKER: usize = 4; diff --git a/testing/Cargo.toml b/testing/Cargo.toml index 3f56d205..a89f0a00 100644 --- a/testing/Cargo.toml +++ b/testing/Cargo.toml @@ -14,8 +14,7 @@ name = "imapconn" path = "src/imap_conn.rs" [dependencies] -melib = { path = "../melib", version = "*", features = ["debug-tracing"] } -text_processing = { path = "../text_processing", version = "*" } +melib = { path = "../melib", version = "*", features = ["debug-tracing", "unicode_algorithms"] } [features] default = [] diff --git a/testing/src/email_parse.rs b/testing/src/email_parse.rs index 2d7cb088..aa240a0b 100644 --- a/testing/src/email_parse.rs +++ b/testing/src/email_parse.rs @@ -10,14 +10,14 @@ use melib::*; /// ``` fn main() -> Result<()> { - if args.len() == 1 { + if std::env::args().len() == 1 { eprintln!("Usage: ./emailparse /path/to/email [/path/to/email2 /path/to/email3 ..]"); std::process::exit(1); } for i in std::env::args().skip(1) { println!("Path is {}", i); - let filename = std::path::PathBuf::from(i); + let filename = std::path::PathBuf::from(&i); if filename.exists() && filename.is_file() { let buffer = std::fs::read_to_string(&filename) diff --git a/text_processing/Cargo.toml b/text_processing/Cargo.toml deleted file mode 100644 index 963952f0..00000000 --- a/text_processing/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "text_processing" -version = "0.4.1" -authors = ["Manos Pitsidianakis "] -workspace = ".." -edition = "2018" -build = "build.rs" - -[dependencies] -unicode-segmentation = "1.2.1" diff --git a/text_processing/build.rs b/text_processing/build.rs deleted file mode 100644 index 952ff0a6..00000000 --- a/text_processing/build.rs +++ /dev/null @@ -1,94 +0,0 @@ -/* - * meli - text_processing crate. - * - * Copyright 2017-2020 Manos Pitsidianakis - * - * This file is part of meli. - * - * meli is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * meli is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with meli. If not, see . - */ - -const LINE_BREAK_TABLE_URL: &str = "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt"; -use std::fs::File; -use std::io::prelude::*; -use std::io::BufReader; -use std::path::{Path, PathBuf}; -use std::process::Command; - -include!("src/types.rs"); - -fn main() -> Result<(), std::io::Error> { - let mod_path = Path::new("src/tables.rs"); - if mod_path.exists() { - eprintln!( - "{} already exists, delete it if you want to replace it.", - mod_path.display() - ); - std::process::exit(0); - } - let mut tmpdir_path = PathBuf::from( - std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout) - .unwrap() - .trim(), - ); - tmpdir_path.push("LineBreak.txt"); - Command::new("curl") - .args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL]) - .output()?; - - let file = File::open(&tmpdir_path)?; - let buf_reader = BufReader::new(file); - - let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800); - for line in buf_reader.lines() { - let line = line.unwrap(); - if line.starts_with('#') || line.starts_with(' ') || line.is_empty() { - continue; - } - let tokens: &str = line.split_whitespace().next().unwrap(); - - let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap(); - /* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */ - let chars_str: &str = &tokens[..semicolon_idx]; - - let mut codepoint_iter = chars_str.split(".."); - - let first_codepoint: u32 = - u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap(); - - let sec_codepoint: u32 = codepoint_iter - .next() - .map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap()) - .unwrap_or(first_codepoint); - let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2]; - line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class))); - } - - let mut file = File::create(&mod_path)?; - file.write_all(b"use crate::types::LineBreakClass::*;\n") - .unwrap(); - file.write_all(b"use crate::types::LineBreakClass;\n\n") - .unwrap(); - file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n") - .unwrap(); - for l in &line_break_table { - file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes()) - .unwrap(); - } - file.write_all(b"];").unwrap(); - std::fs::remove_file(&tmpdir_path).unwrap(); - tmpdir_path.pop(); - std::fs::remove_dir(&tmpdir_path).unwrap(); - Ok(()) -}