diff --git a/melib/build.rs b/melib/build.rs index ffa9b9584..17570c0b2 100644 --- a/melib/build.rs +++ b/melib/build.rs @@ -25,11 +25,12 @@ include!("src/text_processing/types.rs"); fn main() -> Result<(), std::io::Error> { #[cfg(feature = "unicode_algorithms")] { + /* Line break tables */ use std::fs::File; use std::io::prelude::*; use std::io::BufReader; - use std::path::{Path, PathBuf}; - use std::process::Command; + use std::path::Path; + use std::process::{Command, Stdio}; const LINE_BREAK_TABLE_URL: &str = "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt"; @@ -41,18 +42,14 @@ fn main() -> Result<(), std::io::Error> { ); std::process::exit(0); } - let mut tmpdir_path = PathBuf::from( - std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout) - .unwrap() - .trim(), - ); - tmpdir_path.push("LineBreak.txt"); - Command::new("curl") - .args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL]) - .output()?; + let mut child = Command::new("curl") + .args(&["-o", "-", LINE_BREAK_TABLE_URL]) + .stdout(Stdio::piped()) + .stdin(Stdio::null()) + .stderr(Stdio::inherit()) + .spawn()?; - let file = File::open(&tmpdir_path)?; - let buf_reader = BufReader::new(file); + let buf_reader = BufReader::new(child.stdout.take().unwrap()); let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800); for line in buf_reader.lines() { @@ -78,22 +75,42 @@ fn main() -> Result<(), std::io::Error> { let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2]; line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class))); } + child.wait()?; let mut file = File::create(&mod_path)?; - file.write_all(b"use crate::types::LineBreakClass::*;\n") - .unwrap(); - file.write_all(b"use crate::types::LineBreakClass;\n\n") - .unwrap(); - file.write_all(b"const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[\n") - .unwrap(); + file.write_all( + br#"/* + * meli - text_processing crate. + * + * Copyright 2017-2020 Manos Pitsidianakis + * + * This file is part of meli. + * + * meli is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * meli is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with meli. If not, see . + */ + +use super::types::LineBreakClass::{self, *}; + +pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ +"#, + ) + .unwrap(); for l in &line_break_table { file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes()) .unwrap(); } file.write_all(b"];").unwrap(); - std::fs::remove_file(&tmpdir_path).unwrap(); - tmpdir_path.pop(); - std::fs::remove_dir(&tmpdir_path).unwrap(); } Ok(()) } diff --git a/melib/src/text_processing/tables.rs b/melib/src/text_processing/tables.rs index 2c5081fe1..ea5120c6f 100644 --- a/melib/src/text_processing/tables.rs +++ b/melib/src/text_processing/tables.rs @@ -19,8 +19,7 @@ * along with meli. If not, see . */ -use super::types::LineBreakClass; -use super::types::LineBreakClass::*; +use super::types::LineBreakClass::{self, *}; pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x0, 0x8, CM), @@ -249,7 +248,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x85E, 0x85E, AL), (0x860, 0x86A, AL), (0x8A0, 0x8B4, AL), - (0x8B6, 0x8BD, AL), + (0x8B6, 0x8C7, AL), (0x8D3, 0x8E1, CM), (0x8E2, 0x8E2, AL), (0x8E3, 0x8FF, CM), @@ -368,7 +367,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0xB47, 0xB48, CM), (0xB4B, 0xB4C, CM), (0xB4D, 0xB4D, CM), - (0xB56, 0xB56, CM), + (0xB55, 0xB56, CM), (0xB57, 0xB57, CM), (0xB5C, 0xB5D, AL), (0xB5F, 0xB61, AL), @@ -447,7 +446,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0xCF1, 0xCF2, AL), (0xD00, 0xD01, CM), (0xD02, 0xD03, CM), - (0xD05, 0xD0C, AL), + (0xD04, 0xD0C, AL), (0xD0E, 0xD10, AL), (0xD12, 0xD3A, AL), (0xD3B, 0xD3C, CM), @@ -468,6 +467,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0xD70, 0xD78, AL), (0xD79, 0xD79, PO), (0xD7A, 0xD7F, AL), + (0xD81, 0xD81, CM), (0xD82, 0xD83, CM), (0xD85, 0xD96, AL), (0xD9A, 0xDB1, AL), @@ -736,6 +736,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1AA8, 0x1AAD, SA), (0x1AB0, 0x1ABD, CM), (0x1ABE, 0x1ABE, CM), + (0x1ABF, 0x1AC0, CM), (0x1B00, 0x1B03, CM), (0x1B04, 0x1B04, CM), (0x1B05, 0x1B33, AL), @@ -1275,7 +1276,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x2B55, 0x2B59, AI), (0x2B5A, 0x2B73, AL), (0x2B76, 0x2B95, AL), - (0x2B98, 0x2BFF, AL), + (0x2B97, 0x2BFF, AL), (0x2C00, 0x2C2E, AL), (0x2C30, 0x2C5E, AL), (0x2C60, 0x2C7B, AL), @@ -1357,6 +1358,8 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x2E4C, 0x2E4C, BA), (0x2E4D, 0x2E4D, AL), (0x2E4E, 0x2E4F, BA), + (0x2E50, 0x2E51, AL), + (0x2E52, 0x2E52, AL), (0x2E80, 0x2E99, ID), (0x2E9B, 0x2EF3, ID), (0x2F00, 0x2FD5, ID), @@ -1460,7 +1463,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x3190, 0x3191, ID), (0x3192, 0x3195, ID), (0x3196, 0x319F, ID), - (0x31A0, 0x31BA, ID), + (0x31A0, 0x31BF, ID), (0x31C0, 0x31E3, ID), (0x31F0, 0x31FF, CJ), (0x3200, 0x321E, ID), @@ -1475,11 +1478,10 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x32B1, 0x32BF, ID), (0x32C0, 0x32FF, ID), (0x3300, 0x33FF, ID), - (0x3400, 0x4DB5, ID), - (0x4DB6, 0x4DBF, ID), + (0x3400, 0x4DBF, ID), (0x4DC0, 0x4DFF, AL), - (0x4E00, 0x9FEF, ID), - (0x9FF0, 0x9FFF, ID), + (0x4E00, 0x9FFC, ID), + (0x9FFD, 0x9FFF, ID), (0xA000, 0xA014, ID), (0xA015, 0xA015, NS), (0xA016, 0xA48C, ID), @@ -1522,7 +1524,8 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0xA78B, 0xA78E, AL), (0xA78F, 0xA78F, AL), (0xA790, 0xA7BF, AL), - (0xA7C2, 0xA7C6, AL), + (0xA7C2, 0xA7CA, AL), + (0xA7F5, 0xA7F6, AL), (0xA7F7, 0xA7F7, AL), (0xA7F8, 0xA7F9, AL), (0xA7FA, 0xA7FA, AL), @@ -1538,6 +1541,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0xA825, 0xA826, CM), (0xA827, 0xA827, CM), (0xA828, 0xA82B, AL), + (0xA82C, 0xA82C, CM), (0xA830, 0xA835, AL), (0xA836, 0xA837, AL), (0xA838, 0xA838, PO), @@ -1642,7 +1646,9 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0xAB30, 0xAB5A, AL), (0xAB5B, 0xAB5B, AL), (0xAB5C, 0xAB5F, AL), - (0xAB60, 0xAB67, AL), + (0xAB60, 0xAB68, AL), + (0xAB69, 0xAB69, AL), + (0xAB6A, 0xAB6B, AL), (0xAB70, 0xABBF, AL), (0xABC0, 0xABE2, AL), (0xABE3, 0xABE4, CM), @@ -2614,7 +2620,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x10179, 0x10189, AL), (0x1018A, 0x1018B, AL), (0x1018C, 0x1018E, AL), - (0x10190, 0x1019B, AL), + (0x10190, 0x1019C, AL), (0x101A0, 0x101A0, AL), (0x101D0, 0x101FC, AL), (0x101FD, 0x101FD, CM), @@ -2718,6 +2724,10 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x10D24, 0x10D27, CM), (0x10D30, 0x10D39, NU), (0x10E60, 0x10E7E, AL), + (0x10E80, 0x10EA9, AL), + (0x10EAB, 0x10EAC, CM), + (0x10EAD, 0x10EAD, BA), + (0x10EB0, 0x10EB1, AL), (0x10F00, 0x10F1C, AL), (0x10F1D, 0x10F26, AL), (0x10F27, 0x10F27, AL), @@ -2725,6 +2735,8 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x10F46, 0x10F50, CM), (0x10F51, 0x10F54, AL), (0x10F55, 0x10F59, AL), + (0x10FB0, 0x10FC4, AL), + (0x10FC5, 0x10FCB, AL), (0x10FE0, 0x10FF6, AL), (0x11000, 0x11000, CM), (0x11001, 0x11001, CM), @@ -2758,6 +2770,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x11140, 0x11143, BA), (0x11144, 0x11144, AL), (0x11145, 0x11146, CM), + (0x11147, 0x11147, AL), (0x11150, 0x11172, AL), (0x11173, 0x11173, CM), (0x11174, 0x11174, AL), @@ -2775,6 +2788,8 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x111C8, 0x111C8, BA), (0x111C9, 0x111CC, CM), (0x111CD, 0x111CD, AL), + (0x111CE, 0x111CE, CM), + (0x111CF, 0x111CF, CM), (0x111D0, 0x111D9, NU), (0x111DA, 0x111DA, AL), (0x111DB, 0x111DB, BB), @@ -2837,10 +2852,10 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1144B, 0x1144E, BA), (0x1144F, 0x1144F, AL), (0x11450, 0x11459, NU), - (0x1145B, 0x1145B, BA), + (0x1145A, 0x1145B, BA), (0x1145D, 0x1145D, AL), (0x1145E, 0x1145E, CM), - (0x1145F, 0x1145F, AL), + (0x1145F, 0x11461, AL), (0x11480, 0x114AF, AL), (0x114B0, 0x114B2, CM), (0x114B3, 0x114B8, CM), @@ -2910,6 +2925,23 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x118E0, 0x118E9, NU), (0x118EA, 0x118F2, AL), (0x118FF, 0x118FF, AL), + (0x11900, 0x11906, AL), + (0x11909, 0x11909, AL), + (0x1190C, 0x11913, AL), + (0x11915, 0x11916, AL), + (0x11918, 0x1192F, AL), + (0x11930, 0x11935, CM), + (0x11937, 0x11938, CM), + (0x1193B, 0x1193C, CM), + (0x1193D, 0x1193D, CM), + (0x1193E, 0x1193E, CM), + (0x1193F, 0x1193F, AL), + (0x11940, 0x11940, CM), + (0x11941, 0x11941, AL), + (0x11942, 0x11942, CM), + (0x11943, 0x11943, CM), + (0x11944, 0x11946, BA), + (0x11950, 0x11959, NU), (0x119A0, 0x119A7, AL), (0x119AA, 0x119D0, AL), (0x119D1, 0x119D3, CM), @@ -2993,6 +3025,7 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x11EF3, 0x11EF4, CM), (0x11EF5, 0x11EF6, CM), (0x11EF7, 0x11EF8, AL), + (0x11FB0, 0x11FB0, AL), (0x11FC0, 0x11FD4, AL), (0x11FD5, 0x11FDC, AL), (0x11FDD, 0x11FE0, PO), @@ -3055,8 +3088,12 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x16FE0, 0x16FE1, NS), (0x16FE2, 0x16FE2, NS), (0x16FE3, 0x16FE3, NS), + (0x16FE4, 0x16FE4, GL), + (0x16FF0, 0x16FF1, CM), (0x17000, 0x187F7, ID), - (0x18800, 0x18AF2, ID), + (0x18800, 0x18AFF, ID), + (0x18B00, 0x18CD5, AL), + (0x18D00, 0x18D08, ID), (0x1B000, 0x1B0FF, ID), (0x1B100, 0x1B11E, ID), (0x1B150, 0x1B152, CJ), @@ -3229,7 +3266,8 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1F16A, 0x1F16C, AL), (0x1F16D, 0x1F16F, ID), (0x1F170, 0x1F1AC, AI), - (0x1F1AD, 0x1F1E5, ID), + (0x1F1AD, 0x1F1AD, ID), + (0x1F1AE, 0x1F1E5, ID), (0x1F1E6, 0x1F1FF, RI), (0x1F200, 0x1F202, ID), (0x1F203, 0x1F20F, ID), @@ -3320,12 +3358,12 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1F6C0, 0x1F6C0, EB), (0x1F6C1, 0x1F6CB, ID), (0x1F6CC, 0x1F6CC, EB), - (0x1F6CD, 0x1F6D5, ID), - (0x1F6D6, 0x1F6DF, ID), + (0x1F6CD, 0x1F6D7, ID), + (0x1F6D8, 0x1F6DF, ID), (0x1F6E0, 0x1F6EC, ID), (0x1F6ED, 0x1F6EF, ID), - (0x1F6F0, 0x1F6FA, ID), - (0x1F6FB, 0x1F6FF, ID), + (0x1F6F0, 0x1F6FC, ID), + (0x1F6FD, 0x1F6FF, ID), (0x1F700, 0x1F773, AL), (0x1F774, 0x1F77F, ID), (0x1F780, 0x1F7D4, AL), @@ -3342,9 +3380,11 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1F860, 0x1F887, AL), (0x1F888, 0x1F88F, ID), (0x1F890, 0x1F8AD, AL), - (0x1F8AE, 0x1F8FF, ID), + (0x1F8AE, 0x1F8AF, ID), + (0x1F8B0, 0x1F8B1, ID), + (0x1F8B2, 0x1F8FF, ID), (0x1F900, 0x1F90B, AL), - (0x1F90C, 0x1F90C, ID), + (0x1F90C, 0x1F90C, EB), (0x1F90D, 0x1F90E, ID), (0x1F90F, 0x1F90F, EB), (0x1F910, 0x1F917, ID), @@ -3355,22 +3395,18 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1F930, 0x1F939, EB), (0x1F93A, 0x1F93B, ID), (0x1F93C, 0x1F93E, EB), - (0x1F93F, 0x1F971, ID), - (0x1F972, 0x1F972, ID), - (0x1F973, 0x1F976, ID), - (0x1F977, 0x1F979, ID), - (0x1F97A, 0x1F9A2, ID), - (0x1F9A3, 0x1F9A4, ID), - (0x1F9A5, 0x1F9AA, ID), - (0x1F9AB, 0x1F9AD, ID), - (0x1F9AE, 0x1F9B4, ID), + (0x1F93F, 0x1F976, ID), + (0x1F977, 0x1F977, EB), + (0x1F978, 0x1F978, ID), + (0x1F979, 0x1F979, ID), + (0x1F97A, 0x1F9B4, ID), (0x1F9B5, 0x1F9B6, EB), (0x1F9B7, 0x1F9B7, ID), (0x1F9B8, 0x1F9B9, EB), (0x1F9BA, 0x1F9BA, ID), (0x1F9BB, 0x1F9BB, EB), - (0x1F9BC, 0x1F9CA, ID), - (0x1F9CB, 0x1F9CC, ID), + (0x1F9BC, 0x1F9CB, ID), + (0x1F9CC, 0x1F9CC, ID), (0x1F9CD, 0x1F9CF, EB), (0x1F9D0, 0x1F9D0, ID), (0x1F9D1, 0x1F9DD, EB), @@ -3379,17 +3415,26 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x1FA54, 0x1FA5F, ID), (0x1FA60, 0x1FA6D, ID), (0x1FA6E, 0x1FA6F, ID), - (0x1FA70, 0x1FA73, ID), - (0x1FA74, 0x1FA77, ID), + (0x1FA70, 0x1FA74, ID), + (0x1FA75, 0x1FA77, ID), (0x1FA78, 0x1FA7A, ID), (0x1FA7B, 0x1FA7F, ID), - (0x1FA80, 0x1FA82, ID), - (0x1FA83, 0x1FA8F, ID), - (0x1FA90, 0x1FA95, ID), - (0x1FA96, 0x1FAFF, ID), - (0x1FB00, 0x1FFFD, ID), - (0x20000, 0x2A6D6, ID), - (0x2A6D7, 0x2A6FF, ID), + (0x1FA80, 0x1FA86, ID), + (0x1FA87, 0x1FA8F, ID), + (0x1FA90, 0x1FAA8, ID), + (0x1FAA9, 0x1FAAF, ID), + (0x1FAB0, 0x1FAB6, ID), + (0x1FAB7, 0x1FABF, ID), + (0x1FAC0, 0x1FAC2, ID), + (0x1FAC3, 0x1FACF, ID), + (0x1FAD0, 0x1FAD6, ID), + (0x1FAD7, 0x1FAFF, ID), + (0x1FB00, 0x1FB92, AL), + (0x1FB94, 0x1FBCA, AL), + (0x1FBF0, 0x1FBF9, NU), + (0x1FC00, 0x1FFFD, ID), + (0x20000, 0x2A6DD, ID), + (0x2A6DE, 0x2A6FF, ID), (0x2A700, 0x2B734, ID), (0x2B735, 0x2B73F, ID), (0x2B740, 0x2B81D, ID), @@ -3401,10 +3446,11 @@ pub const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[ (0x2F800, 0x2FA1D, ID), (0x2FA1E, 0x2FA1F, ID), (0x2FA20, 0x2FFFD, ID), - (0x30000, 0x3FFFD, ID), + (0x30000, 0x3134A, ID), + (0x3134B, 0x3FFFD, ID), (0xE0001, 0xE0001, CM), (0xE0020, 0xE007F, CM), (0xE0100, 0xE01EF, CM), (0xF0000, 0xFFFFD, XX), (0x100000, 0x10FFFD, XX), -]; +]; \ No newline at end of file