diff --git a/testing/Cargo.toml b/testing/Cargo.toml index d6afbc05..a3bf118a 100644 --- a/testing/Cargo.toml +++ b/testing/Cargo.toml @@ -8,7 +8,11 @@ edition = "2018" [[bin]] name = "emailparse" path = "src/email_parse.rs" +[[bin]] +name = "linebreak" +path = "src/linebreak.rs" [dependencies] melib = { path = "../melib", version = "*" } +text_processing = { path = "../text_processing", version = "*" } diff --git a/testing/src/linebreak.rs b/testing/src/linebreak.rs new file mode 100644 index 00000000..1d3eedf0 --- /dev/null +++ b/testing/src/linebreak.rs @@ -0,0 +1,162 @@ +extern crate melib; +use melib::Result; +use melib::StackVec; + +extern crate text_processing; +use text_processing::line_break::*; + +fn cost(i: usize, j: usize, width: usize, minima: &Vec, offsets: &Vec) -> usize { + let w = offsets[j] - offsets[i] + j - i - 1; + if w > width { + return 65536 * (w - width); + } + minima[i] + (width - w) * (width - w) +} + +fn smawk( + rows: &mut StackVec, + columns: &mut StackVec, + minima: &mut Vec, + breaks: &mut Vec, + width: usize, + offsets: &Vec, +) { + let mut stack = StackVec::new(); + let mut i = 0; + while i < rows.len() { + if stack.len() > 0 { + let c = columns[stack.len() - 1]; + if cost(*stack.iter().last().unwrap(), c, width, minima, offsets) + < cost(rows[i], c, width, minima, offsets) + { + if stack.len() < columns.len() { + stack.push(rows[i]); + } + i += 1; + } else { + stack.pop(); + } + } else { + stack.push(rows[i]); + i += 1; + } + } + let rows = &mut stack; + if columns.len() > 1 { + let mut odd_columns = columns.iter().skip(1).step_by(2).cloned().collect(); + smawk(rows, &mut odd_columns, minima, breaks, width, offsets); + for (i, o) in odd_columns.into_iter().enumerate() { + columns.set(2 * i + 1, o); + } + } + let mut i = 0; + let mut j = 0; + while j < columns.len() { + let end = if j + 1 < columns.len() { + breaks[columns[j + 1]] + } else { + *rows.iter().last().unwrap() + }; + let c = cost(rows[i], columns[j], width, minima, offsets); + if c < minima[columns[j]] { + minima[columns[j]] = c; + breaks[columns[j]] = rows[i]; + } + if rows[i] < end { + i += 1; + } else { + j += 2; + } + } +} + +fn linear(text: &str, width: usize) -> Vec { + let mut words = Vec::new(); + let breaks = LineBreakCandidateIter::new(text).collect::>(); + { + let mut prev = 0; + for b in breaks { + if &text[prev..b.0] != "\n" { + words.push(text[prev..b.0].trim_end_matches("\n")); + if text[prev..b.0].ends_with("\n") { + words.push(" "); + } + } + prev = b.0; + } + if &text[prev..] != "\n" { + words.push(text[prev..].trim_end_matches("\n")); + } + } + let count = words.len(); + let mut minima = vec![std::usize::MAX - 1; count + 1]; + minima[0] = 0; + let mut offsets = Vec::with_capacity(words.len()); + offsets.push(0); + for w in words.iter() { + offsets.push(offsets.iter().last().unwrap() + w.len()); + } + + let mut breaks = vec![0; count + 1]; + + let mut n = count + 1; + let mut i = 1; + let mut offset = 0; + loop { + let r = std::cmp::min(n, 2 * i); + let edge = i + offset; + smawk( + &mut (offset..edge).collect(), + &mut (edge..(r + offset)).collect(), + &mut minima, + &mut breaks, + width, + &offsets, + ); + let x = minima[r - 1 + offset]; + let mut for_was_broken = false; + for j in i..(r - 1) { + let y = cost(j + offset, r - 1 + offset, width, &minima, &offsets); + if y <= x { + n -= j; + i = 1; + offset += j; + for_was_broken = true; + break; + } + } + + if !for_was_broken || i >= (r - 1) { + if r == n { + break; + } + i *= 2; + } + } + let mut lines = Vec::new(); + let mut j = count; + while j > 0 { + let mut line = String::new(); + for i in breaks[j]..j { + line.push_str(words[i]); + } + lines.push(line); + j = breaks[j]; + } + lines.reverse(); + lines +} + +fn main() -> Result<()> { + let text = std::fs::read_to_string(std::env::args().nth(1).unwrap())?; + let paragraphs = text.split("\n\n").collect::>(); + for (i, p) in paragraphs.iter().enumerate() { + for l in linear(&p, 72) { + println!("{}", l.trim()); + } + if i + 1 < paragraphs.len() { + println!(""); + } + } + Ok(()) +} diff --git a/text_processing/Cargo.toml b/text_processing/Cargo.toml index 4b6b1c02..2074a00c 100644 --- a/text_processing/Cargo.toml +++ b/text_processing/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" #:version authors = ["Manos Pitsidianakis "] workspace = ".." edition = "2018" +build = "build.rs" [dependencies] unicode-segmentation = "1.2.1" diff --git a/text_processing/build.rs b/text_processing/build.rs new file mode 100644 index 00000000..d740676b --- /dev/null +++ b/text_processing/build.rs @@ -0,0 +1,73 @@ +const LINE_BREAK_TABLE_URL: &str = "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt"; +use std::fs::File; +use std::io::prelude::*; +use std::io::BufReader; +use std::path::PathBuf; +use std::process::Command; + +include!("src/types.rs"); + +fn main() -> Result<(), std::io::Error> { + let mod_path = PathBuf::from("src/tables.rs"); + if mod_path.exists() { + eprintln!( + "{} already exists, delete it if you want to replace it.", + mod_path.display() + ); + std::process::exit(0); + } + let mut tmpdir_path = PathBuf::from( + std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout) + .unwrap() + .trim(), + ); + tmpdir_path.push("LineBreak.txt"); + Command::new("curl") + .args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL]) + .output()?; + + let file = File::open(&tmpdir_path)?; + let buf_reader = BufReader::new(file); + + let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800); + for line in buf_reader.lines() { + let line = line.unwrap(); + if line.starts_with('#') || line.starts_with(' ') || line.is_empty() { + continue; + } + let tokens: &str = line.split_whitespace().next().unwrap(); + + let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap(); + /* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */ + let chars_str: &str = &tokens[..semicolon_idx]; + + let mut codepoint_iter = chars_str.split(".."); + + let first_codepoint: u32 = + u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap(); + + let sec_codepoint: u32 = codepoint_iter + .next() + .map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap()) + .unwrap_or(first_codepoint); + let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2]; + line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class))); + } + + let mut file = File::create(&mod_path)?; + file.write_all(b"use crate::types::LineBreakClass::*;\n") + .unwrap(); + file.write_all(b"use crate::types::LineBreakClass;\n\n") + .unwrap(); + file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n") + .unwrap(); + for l in &line_break_table { + file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes()) + .unwrap(); + } + file.write_all(b"];").unwrap(); + std::fs::remove_file(&tmpdir_path).unwrap(); + tmpdir_path.pop(); + std::fs::remove_dir(&tmpdir_path).unwrap(); + Ok(()) +} diff --git a/text_processing/src/lib.rs b/text_processing/src/lib.rs index 3b7b33ae..59d03123 100644 --- a/text_processing/src/lib.rs +++ b/text_processing/src/lib.rs @@ -1,4 +1,8 @@ pub mod grapheme_clusters; +pub mod line_break; +mod tables; +mod types; pub mod wcwidth; pub use grapheme_clusters::*; +pub use line_break::*; pub use wcwidth::*; diff --git a/text_processing/src/line_break.rs b/text_processing/src/line_break.rs new file mode 100644 index 00000000..5f42e7b9 --- /dev/null +++ b/text_processing/src/line_break.rs @@ -0,0 +1,703 @@ +extern crate unicode_segmentation; +use self::unicode_segmentation::UnicodeSegmentation; +use crate::tables::LINE_BREAK_RULES; +use crate::types::LineBreakClass; +use core::cmp::Ordering; +use core::iter::Peekable; +use core::str::FromStr; +use LineBreakClass::*; + +#[derive(Debug, PartialEq)] +pub enum LineBreakCandidate { + MandatoryBreak, + BreakAllowed, + // NoBreak, Not used. +} + +use LineBreakCandidate::*; + +pub struct LineBreakCandidateIter<'a> { + text: &'a str, + iter: Peekable>, + pos: usize, + /* Needed for rule LB30a */ + reg_ind_streak: u32, +} + +impl<'a> LineBreakCandidateIter<'a> { + pub fn new(text: &'a str) -> Self { + LineBreakCandidateIter { + text, + pos: 0, + iter: UnicodeSegmentation::grapheme_indices(text, true).peekable(), + reg_ind_streak: 0, + } + } +} + +macro_rules! get_base_character { + ($grapheme:ident) => {{ + char::from_str($grapheme.get(0..1).unwrap_or_else(|| { + $grapheme.get(0..2).unwrap_or_else(|| { + $grapheme + .get(0..3) + .unwrap_or_else(|| $grapheme.get(0..4).unwrap()) + }) + })) + }}; + ($grapheme:expr) => {{ + char::from_str($grapheme.get(0..1).unwrap_or_else(|| { + $grapheme.get(0..2).unwrap_or_else(|| { + $grapheme + .get(0..3) + .unwrap_or_else(|| $grapheme.get(0..4).unwrap()) + }) + })) + }}; +} + +/// Side effects: none +macro_rules! get_class { + ($grapheme:ident) => {{ + get_base_character!($grapheme) + .map(|char| search_table(char as u32, LINE_BREAK_RULES)) + .unwrap_or(XX) + }}; + ($grapheme:expr) => {{ + get_base_character!($grapheme) + .map(|char| search_table(char as u32, LINE_BREAK_RULES)) + .unwrap_or(XX) + }}; +} + +/// Side effects: Updates $graph_iter and potentially $idx and $grapheme +macro_rules! next_grapheme_class { + ($graph_iter:ident, $grapheme:ident) => ({ + if let Some((_, g)) = $graph_iter.next() { + $grapheme = g; + Some(get_class!(g)) + } else { None } + }); + (($next_char:ident is $class:expr)) => ({ + $next_char.is_some() && get_class!(($next_char.unwrap().1)) == $class + }); + (($next_char:ident is $($class:ident),+)) => ({ + $next_char.is_some() && ($(get_class!(($next_char.unwrap().1)) == $class)||+) + }); +} + +/// Returns positions where breaks can happen +/// Examples: +/// ``` +/// use text_processing::{self, LineBreakCandidate::{self, *}}; +/// use text_processing::line_break::LineBreakCandidateIter; +/// +/// assert!(LineBreakCandidateIter::new("").collect::>().is_empty()); +/// assert_eq!(&[(7, BreakAllowed), (12, MandatoryBreak)], +/// LineBreakCandidateIter::new("Sample Text.").collect::>().as_slice()); +/// assert_eq!(&[(3, MandatoryBreak), (7, MandatoryBreak), (10, BreakAllowed), (17, MandatoryBreak)], +/// LineBreakCandidateIter::new("Sa\nmp\r\nle T(e)xt.").collect::>().as_slice()); +/// ``` +impl<'a> Iterator for LineBreakCandidateIter<'a> { + type Item = (usize, LineBreakCandidate); + fn next(&mut self) -> Option { + // After end of text, there are no breaks. + if self.pos >= self.text.len() { + return None; + } + // LB3 Always break at the end of text + if self.pos + 1 == self.text.len() { + self.pos += 1; + return Some((self.pos, MandatoryBreak)); + } + + let (idx, mut grapheme) = self.iter.next().unwrap(); + let LineBreakCandidateIter { + ref mut iter, + ref text, + ref mut reg_ind_streak, + ref mut pos, + } = self; + let iter = iter.by_ref(); + + debug_assert_eq!(idx, *pos); + + // LB2 Never break at the start of text + if idx == 0 { + *pos += grapheme.len(); + return self.next(); + } + + let class = get_class!(grapheme); + + if class != RI { + *reg_ind_streak = 0; + } + + /* LB1 Assign a line breaking class to each code point of the input. Resolve AI, CB, CJ, + * SA, SG, and XX into other line breaking classes depending on criteria outside the scope + * of this algorithm. + * + * In the absence of such criteria all characters with a specific combination of original + * class and General_Category property value are resolved as follows: + * Resolved Original General_Category + * AL AI, SG, XX Any + * CM SA Only Mn or Mc + * AL SA Any except Mn and Mc + * NS SJ Any + */ + + // TODO: LB1 + + /* Check if next character class allows breaks before it */ + let next_char: Option<&(usize, &str)> = iter.peek(); + + match class { + BK => { + // LB4 Always Break after hard line breaks. + *pos += grapheme.len(); + return Some((*pos, MandatoryBreak)); + } + // LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks + CR if next_grapheme_class!((next_char is LF)) => { + *pos += grapheme.len(); + assert!(Some(LF) == next_grapheme_class!(iter, grapheme)); + *pos += grapheme.len(); + return Some((*pos, MandatoryBreak)); + } + CR | LF | NL => { + *pos += grapheme.len(); + return Some((*pos, MandatoryBreak)); + } + _ => {} + } + if let Some((_, next_grapheme)) = next_char { + let next_class = get_class!(next_grapheme); + match next_class { + /* LB6 Do not break before hard line breaks. × ( BK | CR | LF | NL ) */ + BK | CR | LF | NL => { + *pos += grapheme.len(); + return self.next(); + } + /* LB7 Do not break before spaces or zero width + * space. × SP × ZW */ + SP | ZW => { + *pos += grapheme.len(); + return self.next(); + } + _ => {} + } + } + match class { + ZW => { + // LB8 Break before any character following a zero-width space, even if one or more + // spaces intervene + // ZW SP* ÷ + *pos += grapheme.len(); + while Some(SP) == next_grapheme_class!(iter, grapheme) { + *pos += grapheme.len(); + } + return Some((*pos, MandatoryBreak)); + } + ZWJ => { + // LB8a Do not break after a zero width joiner. + *pos += grapheme.len(); + return self.next(); + } + + CM => { + // LB9 Do not break a combining character sequence; treat it as if it has the line + // breaking class of the base character in all of the following rules. Treat ZWJ as + // if it were CM. + // Treat X (CM | ZWJ)* as if it were X. + // where X is any line break class except BK, CR, LF, NL, SP, or ZW. + + /* Unreachable since we break lines based on graphemes, not characters */ + unreachable!(); + } + WJ => { + /*: LB11 Do not break before or after Word joiner and related characters.*/ + *pos += grapheme.len(); + /* Get next grapheme */ + if next_grapheme_class!(iter, grapheme).is_some() { + *pos += grapheme.len(); + } + return self.next(); + } + GL => { + /*LB12 Non-breaking characters: LB12 Do not break after NBSP and related characters.*/ + *pos += grapheme.len(); + return self.next(); + } + _ => {} + } + if let Some((next_idx, next_grapheme)) = next_char { + let next_class = get_class!(next_grapheme); + match next_class { + GL if ![SP, BA, HY].contains(&class) => { + /* LB12a Do not break before NBSP and related characters, except after spaces and + * hyphens. [^SP BA HY] × GL + * Also LB12 Do not break after NBSP and related characters */ + *pos += grapheme.len(); + return self.next(); + } + /* LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces. */ + CL | CP | EX | IS | SY => { + *pos = *next_idx; + return self.next(); + } + _ => {} + } + } + + match class { + /* LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces. */ + SP if [CL, CP, EX, IS, SY].contains(&get_class!(text[idx..].trim_start())) => { + *pos += grapheme.len(); + while ![CL, CP, EX, IS, SY].contains(&next_grapheme_class!(iter, grapheme).unwrap()) + { + *pos += grapheme.len(); + } + *pos += grapheme.len(); + return self.next(); + } + OP => { + /* LB14 Do not break after ‘[’, even after spaces. + * OP SP* × + */ + while let Some((idx, grapheme)) = self.iter.next() { + *pos = idx + grapheme.len(); + if !(get_class!(grapheme) == SP) { + break; + } + } + return self.next(); + } + QU if get_class!(text[idx..].trim_start()) == OP => { + /* LB15 Do not break within ‘”[’, even with intervening spaces. + * QU SP* × OP */ + *pos += grapheme.len(); + while Some(SP) == next_grapheme_class!(iter, grapheme) { + *pos += grapheme.len(); + } + *pos = idx; + return self.next(); + } + QU => { + /* LB19 Do not break before or after quotation marks, such as ‘ ” ’. */ + *pos += grapheme.len(); + if let Some((_, g)) = self.iter.next() { + *pos += g.len(); + } + return self.next(); + } + LineBreakClass::CL | LineBreakClass::CP + if get_class!(text[idx..].trim_start()) == NS => + { + /* LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with + * intervening spaces. + * (CL | CP) SP* × NS */ + *pos += grapheme.len(); + while Some(SP) == next_grapheme_class!(iter, grapheme) { + *pos += grapheme.len(); + } + return self.next(); + } + B2 if get_class!(text[idx..].trim_start()) == B2 => { + *pos += grapheme.len(); + while Some(SP) == next_grapheme_class!(iter, grapheme) { + *pos += grapheme.len(); + } + return self.next(); + } + SP => { + /* LB18 Break after spaces. SP ÷ */ + // Space 0x20 is 1 byte long. + *pos += 1; + return Some((*pos, BreakAllowed)); + } + _ => {} + } + if let Some((next_idx, next_grapheme)) = next_char { + let next_class = get_class!(next_grapheme); + match next_class { + QU if class != SP => { + /* LB19 Do not break before or after quotation marks, such as ‘ ” ’. */ + *pos = *next_idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + _ => {} + } + } + match class { + CB => { + /* LB20 Break before and after unresolved CB. */ + *pos += grapheme.len(); + return Some((*pos - 1, BreakAllowed)); + } + /* LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small + * kana, and other non-starters, or after acute accents. × BA, × HY, × NS, BB × */ + BB => { + *pos += grapheme.len(); + return self.next(); + } + _ => {} + } + + if let Some((_, next_grapheme)) = next_char { + let next_class = get_class!(next_grapheme); + match next_class { + BA | HY | NS => { + /* LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small + * kana, and other non-starters, or after acute accents. × BA, × HY, × NS, BB × */ + *pos += grapheme.len(); + return self.next(); + } + _ => {} + } + } + match class { + HL if next_grapheme_class!((next_char is HY, BA)) => { + /* LB21a Don’t break after Hebrew + Hyphen. HL (HY | BA) × */ + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB21b Don’t break between ,Solidus and Hebrew letters. SY × HL */ + SY if next_grapheme_class!((next_char is HL)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + /* bypass next_char */ + self.iter.next().unwrap(); + if let Some((idx, next_grapheme)) = self.iter.next() { + *pos = idx + next_grapheme.len(); + } + return self.next(); + } + /* LB22 Do not break between two ellipses, or between letters, numbers or excla- + * mations and ellipsis. + * Examples: ‘9...’, ‘a...’, ‘H...’ + * (AL | HL) × IN */ + AL | HL if next_grapheme_class!((next_char is IN)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* EX × IN */ + EX if next_grapheme_class!((next_char is IN)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + EX => { + // LB13 + *pos += grapheme.len(); + return self.next(); + } + /* (ID | EB | EM) × IN */ + ID | EB | EM if next_grapheme_class!((next_char is IN)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* IN × IN */ + IN if next_grapheme_class!((next_char is IN)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* NU × IN */ + NU if next_grapheme_class!((next_char is IN)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB23 Do not break between digits and letters. + * (AL | HL) × NU */ + AL | HL if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* NU × (AL | HL) */ + NU if next_grapheme_class!((next_char is AL, HL)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB23a Do not break between numeric prefixes and ideographs, or between ideographs + * and numeric postfixes. + * PR × (ID | EB | EM) */ + PR if next_grapheme_class!((next_char is ID, EB, EM)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* (ID | EB | EM) × PO */ + ID | EB | EM if next_grapheme_class!((next_char is PO)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* B24 Do not break between numeric prefix/postfix and letters, or between + letters and prefix/postfix. + (PR | PO) × (AL | HL)*/ + PR | PO if next_grapheme_class!((next_char is AL, HL)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /*(AL | HL) × (PR | PO) */ + AL | HL if next_grapheme_class!((next_char is PR, PO)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB25 Do not break between the following pairs of classes relevant to numbers: + * CL × PO */ + CL if next_grapheme_class!((next_char is PO)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* CP × PO */ + CP if next_grapheme_class!((next_char is PO)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* CL × PR */ + CL if next_grapheme_class!((next_char is PR)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* CP × PR */ + CP if next_grapheme_class!((next_char is PR)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* NU × PO */ + NU if next_grapheme_class!((next_char is PO)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* NU × PR */ + NU if next_grapheme_class!((next_char is PR)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* PO × OP */ + PO if next_grapheme_class!((next_char is OP)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* PO × NU */ + PO if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* PR × OP */ + PR if next_grapheme_class!((next_char is OP)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* PR × NU */ + PR if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* HY × NU */ + HY if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* IS × NU */ + IS if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* NU × NU */ + NU if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* SY × NU */ + SY if next_grapheme_class!((next_char is NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB26 Do not break a Korean syllable. + * JL × (JL | JV | H2 | H3) */ + JL if next_grapheme_class!((next_char is JL, JV, H2, H3)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* (JV | H2) × (JV | JT) */ + JV | H2 if next_grapheme_class!((next_char is JV, JT)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* (JT | H3) × JT */ + JT | H3 if next_grapheme_class!((next_char is JT)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB27 Treat a Korean Syllable Block the same as ID. + * (JL | JV | JT | H2 | H3) × IN */ + JL | JV | JT | H2 | H3 if next_grapheme_class!((next_char is IN)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* (JL | JV | JT | H2 | H3) × PO */ + JL | JV | JT | H2 | H3 if next_grapheme_class!((next_char is PO)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* PR × (JL | JV | JT | H2 | H3) */ + PR if next_grapheme_class!((next_char is JL, JV, JT, H2, H3)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB28 Do not break between alphabetics (“at”). + (AL | HL) × (AL | HL) */ + AL | HL if next_grapheme_class!((next_char is AL, HL)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB29 Do not break between numeric punctuation and alphabetics (“e.g.”). + IS × (AL | HL) */ + IS if next_grapheme_class!((next_char is AL, HL)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* LB30 Do not break between letters, numbers, or ordinary symbols and opening + or closing parentheses. + (AL | HL | NU) × OP */ + AL | HL | NU if next_grapheme_class!((next_char is OP)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /* CP × (AL | HL | NU) */ + CP if next_grapheme_class!((next_char is AL, HL , NU)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + /*LB30b Do not break between an emoji base and an emoji modifier. + * EB × EM */ + EB if next_grapheme_class!((next_char is EM)) => { + let (idx, next_grapheme) = next_char.unwrap(); + *pos = idx + next_grapheme.len(); + self.iter.next(); + return self.next(); + } + RI => { + /* LB30a Break between two regional indicator symbols if and only if there are an + * even number of regional indicators preceding the position of the break. + * sot (RI RI)* RI × RI + * [^RI] (RI RI)* RI × RI */ + *reg_ind_streak += 1; + *pos += grapheme.len(); + if *reg_ind_streak % 2 == 1 { + return Some((*pos - grapheme.len(), BreakAllowed)); + } + self.iter.next(); + return self.next(); + } + _ => { + *pos += grapheme.len(); + return Some((*pos - grapheme.len(), BreakAllowed)); + } + } + } +} + +fn search_table(c: u32, t: &'static [(u32, u32, LineBreakClass)]) -> LineBreakClass { + match t.binary_search_by(|&(lo, hi, _)| { + if lo <= c && c <= hi { + Ordering::Equal + } else if hi < c { + Ordering::Less + } else { + Ordering::Greater + } + }) { + Ok(idx) => t[idx].2, + Err(_) => XX, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_line_breaks() { + let s = "Fell past it.\n\n‘Well!’ thought Alice to herself."; + let breaks = LineBreakCandidateIter::new(s).collect::>(); + let mut prev = 0; + for b in breaks { + println!("{:?}", &s[prev..b.0]); + prev = b.0; + } + println!("{:?}", &s[prev..]); + } +} diff --git a/text_processing/src/tables.rs b/text_processing/src/tables.rs new file mode 100644 index 00000000..4a6f885f --- /dev/null +++ b/text_processing/src/tables.rs @@ -0,0 +1,3389 @@ +use crate::types::LineBreakClass; +use crate::types::LineBreakClass::*; + +pub const LINE_BREAK_RULES: &'static [(u32, u32, LineBreakClass)] = &[ + (0x0, 0x8, CM), + (0x9, 0x9, BA), + (0xA, 0xA, LF), + (0xB, 0xC, BK), + (0xD, 0xD, CR), + (0xE, 0x1F, CM), + (0x20, 0x20, SP), + (0x21, 0x21, EX), + (0x22, 0x22, QU), + (0x23, 0x23, AL), + (0x24, 0x24, PR), + (0x25, 0x25, PO), + (0x26, 0x26, AL), + (0x27, 0x27, QU), + (0x28, 0x28, OP), + (0x29, 0x29, CP), + (0x2A, 0x2A, AL), + (0x2B, 0x2B, PR), + (0x2C, 0x2C, IS), + (0x2D, 0x2D, HY), + (0x2E, 0x2E, IS), + (0x2F, 0x2F, SY), + (0x30, 0x39, NU), + (0x3A, 0x3B, IS), + (0x3C, 0x3E, AL), + (0x3F, 0x3F, EX), + (0x40, 0x40, AL), + (0x41, 0x5A, AL), + (0x5B, 0x5B, OP), + (0x5C, 0x5C, PR), + (0x5D, 0x5D, CP), + (0x5E, 0x5E, AL), + (0x5F, 0x5F, AL), + (0x60, 0x60, AL), + (0x61, 0x7A, AL), + (0x7B, 0x7B, OP), + (0x7C, 0x7C, BA), + (0x7D, 0x7D, CL), + (0x7E, 0x7E, AL), + (0x7F, 0x7F, CM), + (0x80, 0x84, CM), + (0x85, 0x85, NL), + (0x86, 0x9F, CM), + (0xA0, 0xA0, GL), + (0xA1, 0xA1, OP), + (0xA2, 0xA2, PO), + (0xA3, 0xA5, PR), + (0xA6, 0xA6, AL), + (0xA7, 0xA7, AI), + (0xA8, 0xA8, AI), + (0xA9, 0xA9, AL), + (0xAA, 0xAA, AI), + (0xAB, 0xAB, QU), + (0xAC, 0xAC, AL), + (0xAD, 0xAD, BA), + (0xAE, 0xAE, AL), + (0xAF, 0xAF, AL), + (0xB0, 0xB0, PO), + (0xB1, 0xB1, PR), + (0xB2, 0xB3, AI), + (0xB4, 0xB4, BB), + (0xB5, 0xB5, AL), + (0xB6, 0xB7, AI), + (0xB8, 0xB8, AI), + (0xB9, 0xB9, AI), + (0xBA, 0xBA, AI), + (0xBB, 0xBB, QU), + (0xBC, 0xBE, AI), + (0xBF, 0xBF, OP), + (0xC0, 0xD6, AL), + (0xD7, 0xD7, AI), + (0xD8, 0xF6, AL), + (0xF7, 0xF7, AI), + (0xF8, 0xFF, AL), + (0x100, 0x17F, AL), + (0x180, 0x1BA, AL), + (0x1BB, 0x1BB, AL), + (0x1BC, 0x1BF, AL), + (0x1C0, 0x1C3, AL), + (0x1C4, 0x24F, AL), + (0x250, 0x293, AL), + (0x294, 0x294, AL), + (0x295, 0x2AF, AL), + (0x2B0, 0x2C1, AL), + (0x2C2, 0x2C5, AL), + (0x2C6, 0x2C6, AL), + (0x2C7, 0x2C7, AI), + (0x2C8, 0x2C8, BB), + (0x2C9, 0x2CB, AI), + (0x2CC, 0x2CC, BB), + (0x2CD, 0x2CD, AI), + (0x2CE, 0x2CF, AL), + (0x2D0, 0x2D0, AI), + (0x2D1, 0x2D1, AL), + (0x2D2, 0x2D7, AL), + (0x2D8, 0x2DB, AI), + (0x2DC, 0x2DC, AL), + (0x2DD, 0x2DD, AI), + (0x2DE, 0x2DE, AL), + (0x2DF, 0x2DF, BB), + (0x2E0, 0x2E4, AL), + (0x2E5, 0x2EB, AL), + (0x2EC, 0x2EC, AL), + (0x2ED, 0x2ED, AL), + (0x2EE, 0x2EE, AL), + (0x2EF, 0x2FF, AL), + (0x300, 0x34E, CM), + (0x34F, 0x34F, GL), + (0x350, 0x35B, CM), + (0x35C, 0x362, GL), + (0x363, 0x36F, CM), + (0x370, 0x373, AL), + (0x374, 0x374, AL), + (0x375, 0x375, AL), + (0x376, 0x377, AL), + (0x37A, 0x37A, AL), + (0x37B, 0x37D, AL), + (0x37E, 0x37E, IS), + (0x37F, 0x37F, AL), + (0x384, 0x385, AL), + (0x386, 0x386, AL), + (0x387, 0x387, AL), + (0x388, 0x38A, AL), + (0x38C, 0x38C, AL), + (0x38E, 0x3A1, AL), + (0x3A3, 0x3F5, AL), + (0x3F6, 0x3F6, AL), + (0x3F7, 0x3FF, AL), + (0x400, 0x481, AL), + (0x482, 0x482, AL), + (0x483, 0x487, CM), + (0x488, 0x489, CM), + (0x48A, 0x4FF, AL), + (0x500, 0x52F, AL), + (0x531, 0x556, AL), + (0x559, 0x559, AL), + (0x55A, 0x55F, AL), + (0x560, 0x588, AL), + (0x589, 0x589, IS), + (0x58A, 0x58A, BA), + (0x58D, 0x58E, AL), + (0x58F, 0x58F, PR), + (0x591, 0x5BD, CM), + (0x5BE, 0x5BE, BA), + (0x5BF, 0x5BF, CM), + (0x5C0, 0x5C0, AL), + (0x5C1, 0x5C2, CM), + (0x5C3, 0x5C3, AL), + (0x5C4, 0x5C5, CM), + (0x5C6, 0x5C6, EX), + (0x5C7, 0x5C7, CM), + (0x5D0, 0x5EA, HL), + (0x5EF, 0x5F2, HL), + (0x5F3, 0x5F4, AL), + (0x600, 0x605, AL), + (0x606, 0x608, AL), + (0x609, 0x60A, PO), + (0x60B, 0x60B, PO), + (0x60C, 0x60D, IS), + (0x60E, 0x60F, AL), + (0x610, 0x61A, CM), + (0x61B, 0x61B, EX), + (0x61C, 0x61C, CM), + (0x61E, 0x61F, EX), + (0x620, 0x63F, AL), + (0x640, 0x640, AL), + (0x641, 0x64A, AL), + (0x64B, 0x65F, CM), + (0x660, 0x669, NU), + (0x66A, 0x66A, PO), + (0x66B, 0x66C, NU), + (0x66D, 0x66D, AL), + (0x66E, 0x66F, AL), + (0x670, 0x670, CM), + (0x671, 0x6D3, AL), + (0x6D4, 0x6D4, EX), + (0x6D5, 0x6D5, AL), + (0x6D6, 0x6DC, CM), + (0x6DD, 0x6DD, AL), + (0x6DE, 0x6DE, AL), + (0x6DF, 0x6E4, CM), + (0x6E5, 0x6E6, AL), + (0x6E7, 0x6E8, CM), + (0x6E9, 0x6E9, AL), + (0x6EA, 0x6ED, CM), + (0x6EE, 0x6EF, AL), + (0x6F0, 0x6F9, NU), + (0x6FA, 0x6FC, AL), + (0x6FD, 0x6FE, AL), + (0x6FF, 0x6FF, AL), + (0x700, 0x70D, AL), + (0x70F, 0x70F, AL), + (0x710, 0x710, AL), + (0x711, 0x711, CM), + (0x712, 0x72F, AL), + (0x730, 0x74A, CM), + (0x74D, 0x74F, AL), + (0x750, 0x77F, AL), + (0x780, 0x7A5, AL), + (0x7A6, 0x7B0, CM), + (0x7B1, 0x7B1, AL), + (0x7C0, 0x7C9, NU), + (0x7CA, 0x7EA, AL), + (0x7EB, 0x7F3, CM), + (0x7F4, 0x7F5, AL), + (0x7F6, 0x7F6, AL), + (0x7F7, 0x7F7, AL), + (0x7F8, 0x7F8, IS), + (0x7F9, 0x7F9, EX), + (0x7FA, 0x7FA, AL), + (0x7FD, 0x7FD, CM), + (0x7FE, 0x7FF, PR), + (0x800, 0x815, AL), + (0x816, 0x819, CM), + (0x81A, 0x81A, AL), + (0x81B, 0x823, CM), + (0x824, 0x824, AL), + (0x825, 0x827, CM), + (0x828, 0x828, AL), + (0x829, 0x82D, CM), + (0x830, 0x83E, AL), + (0x840, 0x858, AL), + (0x859, 0x85B, CM), + (0x85E, 0x85E, AL), + (0x860, 0x86A, AL), + (0x8A0, 0x8B4, AL), + (0x8B6, 0x8BD, AL), + (0x8D3, 0x8E1, CM), + (0x8E2, 0x8E2, AL), + (0x8E3, 0x8FF, CM), + (0x900, 0x902, CM), + (0x903, 0x903, CM), + (0x904, 0x939, AL), + (0x93A, 0x93A, CM), + (0x93B, 0x93B, CM), + (0x93C, 0x93C, CM), + (0x93D, 0x93D, AL), + (0x93E, 0x940, CM), + (0x941, 0x948, CM), + (0x949, 0x94C, CM), + (0x94D, 0x94D, CM), + (0x94E, 0x94F, CM), + (0x950, 0x950, AL), + (0x951, 0x957, CM), + (0x958, 0x961, AL), + (0x962, 0x963, CM), + (0x964, 0x965, BA), + (0x966, 0x96F, NU), + (0x970, 0x970, AL), + (0x971, 0x971, AL), + (0x972, 0x97F, AL), + (0x980, 0x980, AL), + (0x981, 0x981, CM), + (0x982, 0x983, CM), + (0x985, 0x98C, AL), + (0x98F, 0x990, AL), + (0x993, 0x9A8, AL), + (0x9AA, 0x9B0, AL), + (0x9B2, 0x9B2, AL), + (0x9B6, 0x9B9, AL), + (0x9BC, 0x9BC, CM), + (0x9BD, 0x9BD, AL), + (0x9BE, 0x9C0, CM), + (0x9C1, 0x9C4, CM), + (0x9C7, 0x9C8, CM), + (0x9CB, 0x9CC, CM), + (0x9CD, 0x9CD, CM), + (0x9CE, 0x9CE, AL), + (0x9D7, 0x9D7, CM), + (0x9DC, 0x9DD, AL), + (0x9DF, 0x9E1, AL), + (0x9E2, 0x9E3, CM), + (0x9E6, 0x9EF, NU), + (0x9F0, 0x9F1, AL), + (0x9F2, 0x9F3, PO), + (0x9F4, 0x9F8, AL), + (0x9F9, 0x9F9, PO), + (0x9FA, 0x9FA, AL), + (0x9FB, 0x9FB, PR), + (0x9FC, 0x9FC, AL), + (0x9FD, 0x9FD, AL), + (0x9FE, 0x9FE, CM), + (0xA01, 0xA02, CM), + (0xA03, 0xA03, CM), + (0xA05, 0xA0A, AL), + (0xA0F, 0xA10, AL), + (0xA13, 0xA28, AL), + (0xA2A, 0xA30, AL), + (0xA32, 0xA33, AL), + (0xA35, 0xA36, AL), + (0xA38, 0xA39, AL), + (0xA3C, 0xA3C, CM), + (0xA3E, 0xA40, CM), + (0xA41, 0xA42, CM), + (0xA47, 0xA48, CM), + (0xA4B, 0xA4D, CM), + (0xA51, 0xA51, CM), + (0xA59, 0xA5C, AL), + (0xA5E, 0xA5E, AL), + (0xA66, 0xA6F, NU), + (0xA70, 0xA71, CM), + (0xA72, 0xA74, AL), + (0xA75, 0xA75, CM), + (0xA76, 0xA76, AL), + (0xA81, 0xA82, CM), + (0xA83, 0xA83, CM), + (0xA85, 0xA8D, AL), + (0xA8F, 0xA91, AL), + (0xA93, 0xAA8, AL), + (0xAAA, 0xAB0, AL), + (0xAB2, 0xAB3, AL), + (0xAB5, 0xAB9, AL), + (0xABC, 0xABC, CM), + (0xABD, 0xABD, AL), + (0xABE, 0xAC0, CM), + (0xAC1, 0xAC5, CM), + (0xAC7, 0xAC8, CM), + (0xAC9, 0xAC9, CM), + (0xACB, 0xACC, CM), + (0xACD, 0xACD, CM), + (0xAD0, 0xAD0, AL), + (0xAE0, 0xAE1, AL), + (0xAE2, 0xAE3, CM), + (0xAE6, 0xAEF, NU), + (0xAF0, 0xAF0, AL), + (0xAF1, 0xAF1, PR), + (0xAF9, 0xAF9, AL), + (0xAFA, 0xAFF, CM), + (0xB01, 0xB01, CM), + (0xB02, 0xB03, CM), + (0xB05, 0xB0C, AL), + (0xB0F, 0xB10, AL), + (0xB13, 0xB28, AL), + (0xB2A, 0xB30, AL), + (0xB32, 0xB33, AL), + (0xB35, 0xB39, AL), + (0xB3C, 0xB3C, CM), + (0xB3D, 0xB3D, AL), + (0xB3E, 0xB3E, CM), + (0xB3F, 0xB3F, CM), + (0xB40, 0xB40, CM), + (0xB41, 0xB44, CM), + (0xB47, 0xB48, CM), + (0xB4B, 0xB4C, CM), + (0xB4D, 0xB4D, CM), + (0xB56, 0xB56, CM), + (0xB57, 0xB57, CM), + (0xB5C, 0xB5D, AL), + (0xB5F, 0xB61, AL), + (0xB62, 0xB63, CM), + (0xB66, 0xB6F, NU), + (0xB70, 0xB70, AL), + (0xB71, 0xB71, AL), + (0xB72, 0xB77, AL), + (0xB82, 0xB82, CM), + (0xB83, 0xB83, AL), + (0xB85, 0xB8A, AL), + (0xB8E, 0xB90, AL), + (0xB92, 0xB95, AL), + (0xB99, 0xB9A, AL), + (0xB9C, 0xB9C, AL), + (0xB9E, 0xB9F, AL), + (0xBA3, 0xBA4, AL), + (0xBA8, 0xBAA, AL), + (0xBAE, 0xBB9, AL), + (0xBBE, 0xBBF, CM), + (0xBC0, 0xBC0, CM), + (0xBC1, 0xBC2, CM), + (0xBC6, 0xBC8, CM), + (0xBCA, 0xBCC, CM), + (0xBCD, 0xBCD, CM), + (0xBD0, 0xBD0, AL), + (0xBD7, 0xBD7, CM), + (0xBE6, 0xBEF, NU), + (0xBF0, 0xBF2, AL), + (0xBF3, 0xBF8, AL), + (0xBF9, 0xBF9, PR), + (0xBFA, 0xBFA, AL), + (0xC00, 0xC00, CM), + (0xC01, 0xC03, CM), + (0xC04, 0xC04, CM), + (0xC05, 0xC0C, AL), + (0xC0E, 0xC10, AL), + (0xC12, 0xC28, AL), + (0xC2A, 0xC39, AL), + (0xC3D, 0xC3D, AL), + (0xC3E, 0xC40, CM), + (0xC41, 0xC44, CM), + (0xC46, 0xC48, CM), + (0xC4A, 0xC4D, CM), + (0xC55, 0xC56, CM), + (0xC58, 0xC5A, AL), + (0xC60, 0xC61, AL), + (0xC62, 0xC63, CM), + (0xC66, 0xC6F, NU), + (0xC77, 0xC77, BB), + (0xC78, 0xC7E, AL), + (0xC7F, 0xC7F, AL), + (0xC80, 0xC80, AL), + (0xC81, 0xC81, CM), + (0xC82, 0xC83, CM), + (0xC84, 0xC84, BB), + (0xC85, 0xC8C, AL), + (0xC8E, 0xC90, AL), + (0xC92, 0xCA8, AL), + (0xCAA, 0xCB3, AL), + (0xCB5, 0xCB9, AL), + (0xCBC, 0xCBC, CM), + (0xCBD, 0xCBD, AL), + (0xCBE, 0xCBE, CM), + (0xCBF, 0xCBF, CM), + (0xCC0, 0xCC4, CM), + (0xCC6, 0xCC6, CM), + (0xCC7, 0xCC8, CM), + (0xCCA, 0xCCB, CM), + (0xCCC, 0xCCD, CM), + (0xCD5, 0xCD6, CM), + (0xCDE, 0xCDE, AL), + (0xCE0, 0xCE1, AL), + (0xCE2, 0xCE3, CM), + (0xCE6, 0xCEF, NU), + (0xCF1, 0xCF2, AL), + (0xD00, 0xD01, CM), + (0xD02, 0xD03, CM), + (0xD05, 0xD0C, AL), + (0xD0E, 0xD10, AL), + (0xD12, 0xD3A, AL), + (0xD3B, 0xD3C, CM), + (0xD3D, 0xD3D, AL), + (0xD3E, 0xD40, CM), + (0xD41, 0xD44, CM), + (0xD46, 0xD48, CM), + (0xD4A, 0xD4C, CM), + (0xD4D, 0xD4D, CM), + (0xD4E, 0xD4E, AL), + (0xD4F, 0xD4F, AL), + (0xD54, 0xD56, AL), + (0xD57, 0xD57, CM), + (0xD58, 0xD5E, AL), + (0xD5F, 0xD61, AL), + (0xD62, 0xD63, CM), + (0xD66, 0xD6F, NU), + (0xD70, 0xD78, AL), + (0xD79, 0xD79, PO), + (0xD7A, 0xD7F, AL), + (0xD82, 0xD83, CM), + (0xD85, 0xD96, AL), + (0xD9A, 0xDB1, AL), + (0xDB3, 0xDBB, AL), + (0xDBD, 0xDBD, AL), + (0xDC0, 0xDC6, AL), + (0xDCA, 0xDCA, CM), + (0xDCF, 0xDD1, CM), + (0xDD2, 0xDD4, CM), + (0xDD6, 0xDD6, CM), + (0xDD8, 0xDDF, CM), + (0xDE6, 0xDEF, NU), + (0xDF2, 0xDF3, CM), + (0xDF4, 0xDF4, AL), + (0xE01, 0xE30, SA), + (0xE31, 0xE31, SA), + (0xE32, 0xE33, SA), + (0xE34, 0xE3A, SA), + (0xE3F, 0xE3F, PR), + (0xE40, 0xE45, SA), + (0xE46, 0xE46, SA), + (0xE47, 0xE4E, SA), + (0xE4F, 0xE4F, AL), + (0xE50, 0xE59, NU), + (0xE5A, 0xE5B, BA), + (0xE81, 0xE82, SA), + (0xE84, 0xE84, SA), + (0xE86, 0xE8A, SA), + (0xE8C, 0xEA3, SA), + (0xEA5, 0xEA5, SA), + (0xEA7, 0xEB0, SA), + (0xEB1, 0xEB1, SA), + (0xEB2, 0xEB3, SA), + (0xEB4, 0xEBC, SA), + (0xEBD, 0xEBD, SA), + (0xEC0, 0xEC4, SA), + (0xEC6, 0xEC6, SA), + (0xEC8, 0xECD, SA), + (0xED0, 0xED9, NU), + (0xEDC, 0xEDF, SA), + (0xF00, 0xF00, AL), + (0xF01, 0xF03, BB), + (0xF04, 0xF04, BB), + (0xF05, 0xF05, AL), + (0xF06, 0xF07, BB), + (0xF08, 0xF08, GL), + (0xF09, 0xF0A, BB), + (0xF0B, 0xF0B, BA), + (0xF0C, 0xF0C, GL), + (0xF0D, 0xF11, EX), + (0xF12, 0xF12, GL), + (0xF13, 0xF13, AL), + (0xF14, 0xF14, EX), + (0xF15, 0xF17, AL), + (0xF18, 0xF19, CM), + (0xF1A, 0xF1F, AL), + (0xF20, 0xF29, NU), + (0xF2A, 0xF33, AL), + (0xF34, 0xF34, BA), + (0xF35, 0xF35, CM), + (0xF36, 0xF36, AL), + (0xF37, 0xF37, CM), + (0xF38, 0xF38, AL), + (0xF39, 0xF39, CM), + (0xF3A, 0xF3A, OP), + (0xF3B, 0xF3B, CL), + (0xF3C, 0xF3C, OP), + (0xF3D, 0xF3D, CL), + (0xF3E, 0xF3F, CM), + (0xF40, 0xF47, AL), + (0xF49, 0xF6C, AL), + (0xF71, 0xF7E, CM), + (0xF7F, 0xF7F, BA), + (0xF80, 0xF84, CM), + (0xF85, 0xF85, BA), + (0xF86, 0xF87, CM), + (0xF88, 0xF8C, AL), + (0xF8D, 0xF97, CM), + (0xF99, 0xFBC, CM), + (0xFBE, 0xFBF, BA), + (0xFC0, 0xFC5, AL), + (0xFC6, 0xFC6, CM), + (0xFC7, 0xFCC, AL), + (0xFCE, 0xFCF, AL), + (0xFD0, 0xFD1, BB), + (0xFD2, 0xFD2, BA), + (0xFD3, 0xFD3, BB), + (0xFD4, 0xFD4, AL), + (0xFD5, 0xFD8, AL), + (0xFD9, 0xFDA, GL), + (0x1000, 0x102A, SA), + (0x102B, 0x102C, SA), + (0x102D, 0x1030, SA), + (0x1031, 0x1031, SA), + (0x1032, 0x1037, SA), + (0x1038, 0x1038, SA), + (0x1039, 0x103A, SA), + (0x103B, 0x103C, SA), + (0x103D, 0x103E, SA), + (0x103F, 0x103F, SA), + (0x1040, 0x1049, NU), + (0x104A, 0x104B, BA), + (0x104C, 0x104F, AL), + (0x1050, 0x1055, SA), + (0x1056, 0x1057, SA), + (0x1058, 0x1059, SA), + (0x105A, 0x105D, SA), + (0x105E, 0x1060, SA), + (0x1061, 0x1061, SA), + (0x1062, 0x1064, SA), + (0x1065, 0x1066, SA), + (0x1067, 0x106D, SA), + (0x106E, 0x1070, SA), + (0x1071, 0x1074, SA), + (0x1075, 0x1081, SA), + (0x1082, 0x1082, SA), + (0x1083, 0x1084, SA), + (0x1085, 0x1086, SA), + (0x1087, 0x108C, SA), + (0x108D, 0x108D, SA), + (0x108E, 0x108E, SA), + (0x108F, 0x108F, SA), + (0x1090, 0x1099, NU), + (0x109A, 0x109C, SA), + (0x109D, 0x109D, SA), + (0x109E, 0x109F, SA), + (0x10A0, 0x10C5, AL), + (0x10C7, 0x10C7, AL), + (0x10CD, 0x10CD, AL), + (0x10D0, 0x10FA, AL), + (0x10FB, 0x10FB, AL), + (0x10FC, 0x10FC, AL), + (0x10FD, 0x10FF, AL), + (0x1100, 0x115F, JL), + (0x1160, 0x11A7, JV), + (0x11A8, 0x11FF, JT), + (0x1200, 0x1248, AL), + (0x124A, 0x124D, AL), + (0x1250, 0x1256, AL), + (0x1258, 0x1258, AL), + (0x125A, 0x125D, AL), + (0x1260, 0x1288, AL), + (0x128A, 0x128D, AL), + (0x1290, 0x12B0, AL), + (0x12B2, 0x12B5, AL), + (0x12B8, 0x12BE, AL), + (0x12C0, 0x12C0, AL), + (0x12C2, 0x12C5, AL), + (0x12C8, 0x12D6, AL), + (0x12D8, 0x1310, AL), + (0x1312, 0x1315, AL), + (0x1318, 0x135A, AL), + (0x135D, 0x135F, CM), + (0x1360, 0x1360, AL), + (0x1361, 0x1361, BA), + (0x1362, 0x1368, AL), + (0x1369, 0x137C, AL), + (0x1380, 0x138F, AL), + (0x1390, 0x1399, AL), + (0x13A0, 0x13F5, AL), + (0x13F8, 0x13FD, AL), + (0x1400, 0x1400, BA), + (0x1401, 0x166C, AL), + (0x166D, 0x166D, AL), + (0x166E, 0x166E, AL), + (0x166F, 0x167F, AL), + (0x1680, 0x1680, BA), + (0x1681, 0x169A, AL), + (0x169B, 0x169B, OP), + (0x169C, 0x169C, CL), + (0x16A0, 0x16EA, AL), + (0x16EB, 0x16ED, BA), + (0x16EE, 0x16F0, AL), + (0x16F1, 0x16F8, AL), + (0x1700, 0x170C, AL), + (0x170E, 0x1711, AL), + (0x1712, 0x1714, CM), + (0x1720, 0x1731, AL), + (0x1732, 0x1734, CM), + (0x1735, 0x1736, BA), + (0x1740, 0x1751, AL), + (0x1752, 0x1753, CM), + (0x1760, 0x176C, AL), + (0x176E, 0x1770, AL), + (0x1772, 0x1773, CM), + (0x1780, 0x17B3, SA), + (0x17B4, 0x17B5, SA), + (0x17B6, 0x17B6, SA), + (0x17B7, 0x17BD, SA), + (0x17BE, 0x17C5, SA), + (0x17C6, 0x17C6, SA), + (0x17C7, 0x17C8, SA), + (0x17C9, 0x17D3, SA), + (0x17D4, 0x17D5, BA), + (0x17D6, 0x17D6, NS), + (0x17D7, 0x17D7, SA), + (0x17D8, 0x17D8, BA), + (0x17D9, 0x17D9, AL), + (0x17DA, 0x17DA, BA), + (0x17DB, 0x17DB, PR), + (0x17DC, 0x17DC, SA), + (0x17DD, 0x17DD, SA), + (0x17E0, 0x17E9, NU), + (0x17F0, 0x17F9, AL), + (0x1800, 0x1801, AL), + (0x1802, 0x1803, EX), + (0x1804, 0x1805, BA), + (0x1806, 0x1806, BB), + (0x1807, 0x1807, AL), + (0x1808, 0x1809, EX), + (0x180A, 0x180A, AL), + (0x180B, 0x180D, CM), + (0x180E, 0x180E, GL), + (0x1810, 0x1819, NU), + (0x1820, 0x1842, AL), + (0x1843, 0x1843, AL), + (0x1844, 0x1878, AL), + (0x1880, 0x1884, AL), + (0x1885, 0x1886, CM), + (0x1887, 0x18A8, AL), + (0x18A9, 0x18A9, CM), + (0x18AA, 0x18AA, AL), + (0x18B0, 0x18F5, AL), + (0x1900, 0x191E, AL), + (0x1920, 0x1922, CM), + (0x1923, 0x1926, CM), + (0x1927, 0x1928, CM), + (0x1929, 0x192B, CM), + (0x1930, 0x1931, CM), + (0x1932, 0x1932, CM), + (0x1933, 0x1938, CM), + (0x1939, 0x193B, CM), + (0x1940, 0x1940, AL), + (0x1944, 0x1945, EX), + (0x1946, 0x194F, NU), + (0x1950, 0x196D, SA), + (0x1970, 0x1974, SA), + (0x1980, 0x19AB, SA), + (0x19B0, 0x19C9, SA), + (0x19D0, 0x19D9, NU), + (0x19DA, 0x19DA, SA), + (0x19DE, 0x19DF, SA), + (0x19E0, 0x19FF, AL), + (0x1A00, 0x1A16, AL), + (0x1A17, 0x1A18, CM), + (0x1A19, 0x1A1A, CM), + (0x1A1B, 0x1A1B, CM), + (0x1A1E, 0x1A1F, AL), + (0x1A20, 0x1A54, SA), + (0x1A55, 0x1A55, SA), + (0x1A56, 0x1A56, SA), + (0x1A57, 0x1A57, SA), + (0x1A58, 0x1A5E, SA), + (0x1A60, 0x1A60, SA), + (0x1A61, 0x1A61, SA), + (0x1A62, 0x1A62, SA), + (0x1A63, 0x1A64, SA), + (0x1A65, 0x1A6C, SA), + (0x1A6D, 0x1A72, SA), + (0x1A73, 0x1A7C, SA), + (0x1A7F, 0x1A7F, CM), + (0x1A80, 0x1A89, NU), + (0x1A90, 0x1A99, NU), + (0x1AA0, 0x1AA6, SA), + (0x1AA7, 0x1AA7, SA), + (0x1AA8, 0x1AAD, SA), + (0x1AB0, 0x1ABD, CM), + (0x1ABE, 0x1ABE, CM), + (0x1B00, 0x1B03, CM), + (0x1B04, 0x1B04, CM), + (0x1B05, 0x1B33, AL), + (0x1B34, 0x1B34, CM), + (0x1B35, 0x1B35, CM), + (0x1B36, 0x1B3A, CM), + (0x1B3B, 0x1B3B, CM), + (0x1B3C, 0x1B3C, CM), + (0x1B3D, 0x1B41, CM), + (0x1B42, 0x1B42, CM), + (0x1B43, 0x1B44, CM), + (0x1B45, 0x1B4B, AL), + (0x1B50, 0x1B59, NU), + (0x1B5A, 0x1B5B, BA), + (0x1B5C, 0x1B5C, AL), + (0x1B5D, 0x1B60, BA), + (0x1B61, 0x1B6A, AL), + (0x1B6B, 0x1B73, CM), + (0x1B74, 0x1B7C, AL), + (0x1B80, 0x1B81, CM), + (0x1B82, 0x1B82, CM), + (0x1B83, 0x1BA0, AL), + (0x1BA1, 0x1BA1, CM), + (0x1BA2, 0x1BA5, CM), + (0x1BA6, 0x1BA7, CM), + (0x1BA8, 0x1BA9, CM), + (0x1BAA, 0x1BAA, CM), + (0x1BAB, 0x1BAD, CM), + (0x1BAE, 0x1BAF, AL), + (0x1BB0, 0x1BB9, NU), + (0x1BBA, 0x1BBF, AL), + (0x1BC0, 0x1BE5, AL), + (0x1BE6, 0x1BE6, CM), + (0x1BE7, 0x1BE7, CM), + (0x1BE8, 0x1BE9, CM), + (0x1BEA, 0x1BEC, CM), + (0x1BED, 0x1BED, CM), + (0x1BEE, 0x1BEE, CM), + (0x1BEF, 0x1BF1, CM), + (0x1BF2, 0x1BF3, CM), + (0x1BFC, 0x1BFF, AL), + (0x1C00, 0x1C23, AL), + (0x1C24, 0x1C2B, CM), + (0x1C2C, 0x1C33, CM), + (0x1C34, 0x1C35, CM), + (0x1C36, 0x1C37, CM), + (0x1C3B, 0x1C3F, BA), + (0x1C40, 0x1C49, NU), + (0x1C4D, 0x1C4F, AL), + (0x1C50, 0x1C59, NU), + (0x1C5A, 0x1C77, AL), + (0x1C78, 0x1C7D, AL), + (0x1C7E, 0x1C7F, BA), + (0x1C80, 0x1C88, AL), + (0x1C90, 0x1CBA, AL), + (0x1CBD, 0x1CBF, AL), + (0x1CC0, 0x1CC7, AL), + (0x1CD0, 0x1CD2, CM), + (0x1CD3, 0x1CD3, AL), + (0x1CD4, 0x1CE0, CM), + (0x1CE1, 0x1CE1, CM), + (0x1CE2, 0x1CE8, CM), + (0x1CE9, 0x1CEC, AL), + (0x1CED, 0x1CED, CM), + (0x1CEE, 0x1CF3, AL), + (0x1CF4, 0x1CF4, CM), + (0x1CF5, 0x1CF6, AL), + (0x1CF7, 0x1CF7, CM), + (0x1CF8, 0x1CF9, CM), + (0x1CFA, 0x1CFA, AL), + (0x1D00, 0x1D2B, AL), + (0x1D2C, 0x1D6A, AL), + (0x1D6B, 0x1D77, AL), + (0x1D78, 0x1D78, AL), + (0x1D79, 0x1D7F, AL), + (0x1D80, 0x1D9A, AL), + (0x1D9B, 0x1DBF, AL), + (0x1DC0, 0x1DF9, CM), + (0x1DFB, 0x1DFF, CM), + (0x1E00, 0x1EFF, AL), + (0x1F00, 0x1F15, AL), + (0x1F18, 0x1F1D, AL), + (0x1F20, 0x1F45, AL), + (0x1F48, 0x1F4D, AL), + (0x1F50, 0x1F57, AL), + (0x1F59, 0x1F59, AL), + (0x1F5B, 0x1F5B, AL), + (0x1F5D, 0x1F5D, AL), + (0x1F5F, 0x1F7D, AL), + (0x1F80, 0x1FB4, AL), + (0x1FB6, 0x1FBC, AL), + (0x1FBD, 0x1FBD, AL), + (0x1FBE, 0x1FBE, AL), + (0x1FBF, 0x1FC1, AL), + (0x1FC2, 0x1FC4, AL), + (0x1FC6, 0x1FCC, AL), + (0x1FCD, 0x1FCF, AL), + (0x1FD0, 0x1FD3, AL), + (0x1FD6, 0x1FDB, AL), + (0x1FDD, 0x1FDF, AL), + (0x1FE0, 0x1FEC, AL), + (0x1FED, 0x1FEF, AL), + (0x1FF2, 0x1FF4, AL), + (0x1FF6, 0x1FFC, AL), + (0x1FFD, 0x1FFD, BB), + (0x1FFE, 0x1FFE, AL), + (0x2000, 0x2006, BA), + (0x2007, 0x2007, GL), + (0x2008, 0x200A, BA), + (0x200B, 0x200B, ZW), + (0x200C, 0x200C, CM), + (0x200D, 0x200D, ZW), + (0x200E, 0x200F, CM), + (0x2010, 0x2010, BA), + (0x2011, 0x2011, GL), + (0x2012, 0x2013, BA), + (0x2014, 0x2014, B2), + (0x2015, 0x2015, AI), + (0x2016, 0x2016, AI), + (0x2017, 0x2017, AL), + (0x2018, 0x2018, QU), + (0x2019, 0x2019, QU), + (0x201A, 0x201A, OP), + (0x201B, 0x201C, QU), + (0x201D, 0x201D, QU), + (0x201E, 0x201E, OP), + (0x201F, 0x201F, QU), + (0x2020, 0x2021, AI), + (0x2022, 0x2023, AL), + (0x2024, 0x2026, IN), + (0x2027, 0x2027, BA), + (0x2028, 0x2028, BK), + (0x2029, 0x2029, BK), + (0x202A, 0x202E, CM), + (0x202F, 0x202F, GL), + (0x2030, 0x2037, PO), + (0x2038, 0x2038, AL), + (0x2039, 0x2039, QU), + (0x203A, 0x203A, QU), + (0x203B, 0x203B, AI), + (0x203C, 0x203D, NS), + (0x203E, 0x203E, AL), + (0x203F, 0x2040, AL), + (0x2041, 0x2043, AL), + (0x2044, 0x2044, IS), + (0x2045, 0x2045, OP), + (0x2046, 0x2046, CL), + (0x2047, 0x2049, NS), + (0x204A, 0x2051, AL), + (0x2052, 0x2052, AL), + (0x2053, 0x2053, AL), + (0x2054, 0x2054, AL), + (0x2055, 0x2055, AL), + (0x2056, 0x2056, BA), + (0x2057, 0x2057, AL), + (0x2058, 0x205B, BA), + (0x205C, 0x205C, AL), + (0x205D, 0x205E, BA), + (0x205F, 0x205F, BA), + (0x2060, 0x2060, WJ), + (0x2061, 0x2064, AL), + (0x2066, 0x206F, CM), + (0x2070, 0x2070, AL), + (0x2071, 0x2071, AL), + (0x2074, 0x2074, AI), + (0x2075, 0x2079, AL), + (0x207A, 0x207C, AL), + (0x207D, 0x207D, OP), + (0x207E, 0x207E, CL), + (0x207F, 0x207F, AI), + (0x2080, 0x2080, AL), + (0x2081, 0x2084, AI), + (0x2085, 0x2089, AL), + (0x208A, 0x208C, AL), + (0x208D, 0x208D, OP), + (0x208E, 0x208E, CL), + (0x2090, 0x209C, AL), + (0x20A0, 0x20A6, PR), + (0x20A7, 0x20A7, PO), + (0x20A8, 0x20B5, PR), + (0x20B6, 0x20B6, PO), + (0x20B7, 0x20BA, PR), + (0x20BB, 0x20BB, PO), + (0x20BC, 0x20BD, PR), + (0x20BE, 0x20BE, PO), + (0x20BF, 0x20BF, PR), + (0x20C0, 0x20CF, PR), + (0x20D0, 0x20DC, CM), + (0x20DD, 0x20E0, CM), + (0x20E1, 0x20E1, CM), + (0x20E2, 0x20E4, CM), + (0x20E5, 0x20F0, CM), + (0x2100, 0x2101, AL), + (0x2102, 0x2102, AL), + (0x2103, 0x2103, PO), + (0x2104, 0x2104, AL), + (0x2105, 0x2105, AI), + (0x2106, 0x2106, AL), + (0x2107, 0x2107, AL), + (0x2108, 0x2108, AL), + (0x2109, 0x2109, PO), + (0x210A, 0x2112, AL), + (0x2113, 0x2113, AI), + (0x2114, 0x2114, AL), + (0x2115, 0x2115, AL), + (0x2116, 0x2116, PR), + (0x2117, 0x2117, AL), + (0x2118, 0x2118, AL), + (0x2119, 0x211D, AL), + (0x211E, 0x2120, AL), + (0x2121, 0x2122, AI), + (0x2123, 0x2123, AL), + (0x2124, 0x2124, AL), + (0x2125, 0x2125, AL), + (0x2126, 0x2126, AL), + (0x2127, 0x2127, AL), + (0x2128, 0x2128, AL), + (0x2129, 0x2129, AL), + (0x212A, 0x212A, AL), + (0x212B, 0x212B, AI), + (0x212C, 0x212D, AL), + (0x212E, 0x212E, AL), + (0x212F, 0x2134, AL), + (0x2135, 0x2138, AL), + (0x2139, 0x2139, AL), + (0x213A, 0x213B, AL), + (0x213C, 0x213F, AL), + (0x2140, 0x2144, AL), + (0x2145, 0x2149, AL), + (0x214A, 0x214A, AL), + (0x214B, 0x214B, AL), + (0x214C, 0x214D, AL), + (0x214E, 0x214E, AL), + (0x214F, 0x214F, AL), + (0x2150, 0x2153, AL), + (0x2154, 0x2155, AI), + (0x2156, 0x215A, AL), + (0x215B, 0x215B, AI), + (0x215C, 0x215D, AL), + (0x215E, 0x215E, AI), + (0x215F, 0x215F, AL), + (0x2160, 0x216B, AI), + (0x216C, 0x216F, AL), + (0x2170, 0x2179, AI), + (0x217A, 0x2182, AL), + (0x2183, 0x2184, AL), + (0x2185, 0x2188, AL), + (0x2189, 0x2189, AI), + (0x218A, 0x218B, AL), + (0x2190, 0x2194, AI), + (0x2195, 0x2199, AI), + (0x219A, 0x219B, AL), + (0x219C, 0x219F, AL), + (0x21A0, 0x21A0, AL), + (0x21A1, 0x21A2, AL), + (0x21A3, 0x21A3, AL), + (0x21A4, 0x21A5, AL), + (0x21A6, 0x21A6, AL), + (0x21A7, 0x21AD, AL), + (0x21AE, 0x21AE, AL), + (0x21AF, 0x21CD, AL), + (0x21CE, 0x21CF, AL), + (0x21D0, 0x21D1, AL), + (0x21D2, 0x21D2, AI), + (0x21D3, 0x21D3, AL), + (0x21D4, 0x21D4, AI), + (0x21D5, 0x21F3, AL), + (0x21F4, 0x21FF, AL), + (0x2200, 0x2200, AI), + (0x2201, 0x2201, AL), + (0x2202, 0x2203, AI), + (0x2204, 0x2206, AL), + (0x2207, 0x2208, AI), + (0x2209, 0x220A, AL), + (0x220B, 0x220B, AI), + (0x220C, 0x220E, AL), + (0x220F, 0x220F, AI), + (0x2210, 0x2210, AL), + (0x2211, 0x2211, AI), + (0x2212, 0x2213, PR), + (0x2214, 0x2214, AL), + (0x2215, 0x2215, AI), + (0x2216, 0x2219, AL), + (0x221A, 0x221A, AI), + (0x221B, 0x221C, AL), + (0x221D, 0x2220, AI), + (0x2221, 0x2222, AL), + (0x2223, 0x2223, AI), + (0x2224, 0x2224, AL), + (0x2225, 0x2225, AI), + (0x2226, 0x2226, AL), + (0x2227, 0x222C, AI), + (0x222D, 0x222D, AL), + (0x222E, 0x222E, AI), + (0x222F, 0x2233, AL), + (0x2234, 0x2237, AI), + (0x2238, 0x223B, AL), + (0x223C, 0x223D, AI), + (0x223E, 0x2247, AL), + (0x2248, 0x2248, AI), + (0x2249, 0x224B, AL), + (0x224C, 0x224C, AI), + (0x224D, 0x2251, AL), + (0x2252, 0x2252, AI), + (0x2253, 0x225F, AL), + (0x2260, 0x2261, AI), + (0x2262, 0x2263, AL), + (0x2264, 0x2267, AI), + (0x2268, 0x2269, AL), + (0x226A, 0x226B, AI), + (0x226C, 0x226D, AL), + (0x226E, 0x226F, AI), + (0x2270, 0x2281, AL), + (0x2282, 0x2283, AI), + (0x2284, 0x2285, AL), + (0x2286, 0x2287, AI), + (0x2288, 0x2294, AL), + (0x2295, 0x2295, AI), + (0x2296, 0x2298, AL), + (0x2299, 0x2299, AI), + (0x229A, 0x22A4, AL), + (0x22A5, 0x22A5, AI), + (0x22A6, 0x22BE, AL), + (0x22BF, 0x22BF, AI), + (0x22C0, 0x22EE, AL), + (0x22EF, 0x22EF, IN), + (0x22F0, 0x22FF, AL), + (0x2300, 0x2307, AL), + (0x2308, 0x2308, OP), + (0x2309, 0x2309, CL), + (0x230A, 0x230A, OP), + (0x230B, 0x230B, CL), + (0x230C, 0x2311, AL), + (0x2312, 0x2312, AI), + (0x2313, 0x2319, AL), + (0x231A, 0x231B, ID), + (0x231C, 0x231F, AL), + (0x2320, 0x2321, AL), + (0x2322, 0x2328, AL), + (0x2329, 0x2329, OP), + (0x232A, 0x232A, CL), + (0x232B, 0x237B, AL), + (0x237C, 0x237C, AL), + (0x237D, 0x239A, AL), + (0x239B, 0x23B3, AL), + (0x23B4, 0x23DB, AL), + (0x23DC, 0x23E1, AL), + (0x23E2, 0x23EF, AL), + (0x23F0, 0x23F3, ID), + (0x23F4, 0x23FF, AL), + (0x2400, 0x2426, AL), + (0x2440, 0x244A, AL), + (0x2460, 0x249B, AI), + (0x249C, 0x24E9, AI), + (0x24EA, 0x24FE, AI), + (0x24FF, 0x24FF, AL), + (0x2500, 0x254B, AI), + (0x254C, 0x254F, AL), + (0x2550, 0x2574, AI), + (0x2575, 0x257F, AL), + (0x2580, 0x258F, AI), + (0x2590, 0x2591, AL), + (0x2592, 0x2595, AI), + (0x2596, 0x259F, AL), + (0x25A0, 0x25A1, AI), + (0x25A2, 0x25A2, AL), + (0x25A3, 0x25A9, AI), + (0x25AA, 0x25B1, AL), + (0x25B2, 0x25B3, AI), + (0x25B4, 0x25B5, AL), + (0x25B6, 0x25B6, AI), + (0x25B7, 0x25B7, AI), + (0x25B8, 0x25BB, AL), + (0x25BC, 0x25BD, AI), + (0x25BE, 0x25BF, AL), + (0x25C0, 0x25C0, AI), + (0x25C1, 0x25C1, AI), + (0x25C2, 0x25C5, AL), + (0x25C6, 0x25C8, AI), + (0x25C9, 0x25CA, AL), + (0x25CB, 0x25CB, AI), + (0x25CC, 0x25CD, AL), + (0x25CE, 0x25D1, AI), + (0x25D2, 0x25E1, AL), + (0x25E2, 0x25E5, AI), + (0x25E6, 0x25EE, AL), + (0x25EF, 0x25EF, AI), + (0x25F0, 0x25F7, AL), + (0x25F8, 0x25FF, AL), + (0x2600, 0x2603, ID), + (0x2604, 0x2604, AL), + (0x2605, 0x2606, AI), + (0x2607, 0x2608, AL), + (0x2609, 0x2609, AI), + (0x260A, 0x260D, AL), + (0x260E, 0x260F, AI), + (0x2610, 0x2613, AL), + (0x2614, 0x2615, ID), + (0x2616, 0x2617, AI), + (0x2618, 0x2618, ID), + (0x2619, 0x2619, AL), + (0x261A, 0x261C, ID), + (0x261D, 0x261D, EB), + (0x261E, 0x261F, ID), + (0x2620, 0x2638, AL), + (0x2639, 0x263B, ID), + (0x263C, 0x263F, AL), + (0x2640, 0x2640, AI), + (0x2641, 0x2641, AL), + (0x2642, 0x2642, AI), + (0x2643, 0x265F, AL), + (0x2660, 0x2661, AI), + (0x2662, 0x2662, AL), + (0x2663, 0x2665, AI), + (0x2666, 0x2666, AL), + (0x2667, 0x2667, AI), + (0x2668, 0x2668, ID), + (0x2669, 0x266A, AI), + (0x266B, 0x266B, AL), + (0x266C, 0x266D, AI), + (0x266E, 0x266E, AL), + (0x266F, 0x266F, AI), + (0x2670, 0x267E, AL), + (0x267F, 0x267F, ID), + (0x2680, 0x269D, AL), + (0x269E, 0x269F, AI), + (0x26A0, 0x26BC, AL), + (0x26BD, 0x26C8, ID), + (0x26C9, 0x26CC, AI), + (0x26CD, 0x26CD, ID), + (0x26CE, 0x26CE, AL), + (0x26CF, 0x26D1, ID), + (0x26D2, 0x26D2, AI), + (0x26D3, 0x26D4, ID), + (0x26D5, 0x26D7, AI), + (0x26D8, 0x26D9, ID), + (0x26DA, 0x26DB, AI), + (0x26DC, 0x26DC, ID), + (0x26DD, 0x26DE, AI), + (0x26DF, 0x26E1, ID), + (0x26E2, 0x26E2, AL), + (0x26E3, 0x26E3, AI), + (0x26E4, 0x26E7, AL), + (0x26E8, 0x26E9, AI), + (0x26EA, 0x26EA, ID), + (0x26EB, 0x26F0, AI), + (0x26F1, 0x26F5, ID), + (0x26F6, 0x26F6, AI), + (0x26F7, 0x26F8, ID), + (0x26F9, 0x26F9, EB), + (0x26FA, 0x26FA, ID), + (0x26FB, 0x26FC, AI), + (0x26FD, 0x26FF, ID), + (0x2700, 0x2704, ID), + (0x2705, 0x2707, AL), + (0x2708, 0x2709, ID), + (0x270A, 0x270D, EB), + (0x270E, 0x2756, AL), + (0x2757, 0x2757, AI), + (0x2758, 0x275A, AL), + (0x275B, 0x2760, QU), + (0x2761, 0x2761, AL), + (0x2762, 0x2763, EX), + (0x2764, 0x2764, ID), + (0x2765, 0x2767, AL), + (0x2768, 0x2768, OP), + (0x2769, 0x2769, CL), + (0x276A, 0x276A, OP), + (0x276B, 0x276B, CL), + (0x276C, 0x276C, OP), + (0x276D, 0x276D, CL), + (0x276E, 0x276E, OP), + (0x276F, 0x276F, CL), + (0x2770, 0x2770, OP), + (0x2771, 0x2771, CL), + (0x2772, 0x2772, OP), + (0x2773, 0x2773, CL), + (0x2774, 0x2774, OP), + (0x2775, 0x2775, CL), + (0x2776, 0x2793, AI), + (0x2794, 0x27BF, AL), + (0x27C0, 0x27C4, AL), + (0x27C5, 0x27C5, OP), + (0x27C6, 0x27C6, CL), + (0x27C7, 0x27E5, AL), + (0x27E6, 0x27E6, OP), + (0x27E7, 0x27E7, CL), + (0x27E8, 0x27E8, OP), + (0x27E9, 0x27E9, CL), + (0x27EA, 0x27EA, OP), + (0x27EB, 0x27EB, CL), + (0x27EC, 0x27EC, OP), + (0x27ED, 0x27ED, CL), + (0x27EE, 0x27EE, OP), + (0x27EF, 0x27EF, CL), + (0x27F0, 0x27FF, AL), + (0x2800, 0x28FF, AL), + (0x2900, 0x297F, AL), + (0x2980, 0x2982, AL), + (0x2983, 0x2983, OP), + (0x2984, 0x2984, CL), + (0x2985, 0x2985, OP), + (0x2986, 0x2986, CL), + (0x2987, 0x2987, OP), + (0x2988, 0x2988, CL), + (0x2989, 0x2989, OP), + (0x298A, 0x298A, CL), + (0x298B, 0x298B, OP), + (0x298C, 0x298C, CL), + (0x298D, 0x298D, OP), + (0x298E, 0x298E, CL), + (0x298F, 0x298F, OP), + (0x2990, 0x2990, CL), + (0x2991, 0x2991, OP), + (0x2992, 0x2992, CL), + (0x2993, 0x2993, OP), + (0x2994, 0x2994, CL), + (0x2995, 0x2995, OP), + (0x2996, 0x2996, CL), + (0x2997, 0x2997, OP), + (0x2998, 0x2998, CL), + (0x2999, 0x29D7, AL), + (0x29D8, 0x29D8, OP), + (0x29D9, 0x29D9, CL), + (0x29DA, 0x29DA, OP), + (0x29DB, 0x29DB, CL), + (0x29DC, 0x29FB, AL), + (0x29FC, 0x29FC, OP), + (0x29FD, 0x29FD, CL), + (0x29FE, 0x29FF, AL), + (0x2A00, 0x2AFF, AL), + (0x2B00, 0x2B2F, AL), + (0x2B30, 0x2B44, AL), + (0x2B45, 0x2B46, AL), + (0x2B47, 0x2B4C, AL), + (0x2B4D, 0x2B54, AL), + (0x2B55, 0x2B59, AI), + (0x2B5A, 0x2B73, AL), + (0x2B76, 0x2B95, AL), + (0x2B98, 0x2BFF, AL), + (0x2C00, 0x2C2E, AL), + (0x2C30, 0x2C5E, AL), + (0x2C60, 0x2C7B, AL), + (0x2C7C, 0x2C7D, AL), + (0x2C7E, 0x2C7F, AL), + (0x2C80, 0x2CE4, AL), + (0x2CE5, 0x2CEA, AL), + (0x2CEB, 0x2CEE, AL), + (0x2CEF, 0x2CF1, CM), + (0x2CF2, 0x2CF3, AL), + (0x2CF9, 0x2CF9, EX), + (0x2CFA, 0x2CFC, BA), + (0x2CFD, 0x2CFD, AL), + (0x2CFE, 0x2CFE, EX), + (0x2CFF, 0x2CFF, BA), + (0x2D00, 0x2D25, AL), + (0x2D27, 0x2D27, AL), + (0x2D2D, 0x2D2D, AL), + (0x2D30, 0x2D67, AL), + (0x2D6F, 0x2D6F, AL), + (0x2D70, 0x2D70, BA), + (0x2D7F, 0x2D7F, CM), + (0x2D80, 0x2D96, AL), + (0x2DA0, 0x2DA6, AL), + (0x2DA8, 0x2DAE, AL), + (0x2DB0, 0x2DB6, AL), + (0x2DB8, 0x2DBE, AL), + (0x2DC0, 0x2DC6, AL), + (0x2DC8, 0x2DCE, AL), + (0x2DD0, 0x2DD6, AL), + (0x2DD8, 0x2DDE, AL), + (0x2DE0, 0x2DFF, CM), + (0x2E00, 0x2E01, QU), + (0x2E02, 0x2E02, QU), + (0x2E03, 0x2E03, QU), + (0x2E04, 0x2E04, QU), + (0x2E05, 0x2E05, QU), + (0x2E06, 0x2E08, QU), + (0x2E09, 0x2E09, QU), + (0x2E0A, 0x2E0A, QU), + (0x2E0B, 0x2E0B, QU), + (0x2E0C, 0x2E0C, QU), + (0x2E0D, 0x2E0D, QU), + (0x2E0E, 0x2E15, BA), + (0x2E16, 0x2E16, AL), + (0x2E17, 0x2E17, BA), + (0x2E18, 0x2E18, OP), + (0x2E19, 0x2E19, BA), + (0x2E1A, 0x2E1A, AL), + (0x2E1B, 0x2E1B, AL), + (0x2E1C, 0x2E1C, QU), + (0x2E1D, 0x2E1D, QU), + (0x2E1E, 0x2E1F, AL), + (0x2E20, 0x2E20, QU), + (0x2E21, 0x2E21, QU), + (0x2E22, 0x2E22, OP), + (0x2E23, 0x2E23, CL), + (0x2E24, 0x2E24, OP), + (0x2E25, 0x2E25, CL), + (0x2E26, 0x2E26, OP), + (0x2E27, 0x2E27, CL), + (0x2E28, 0x2E28, OP), + (0x2E29, 0x2E29, CL), + (0x2E2A, 0x2E2D, BA), + (0x2E2E, 0x2E2E, EX), + (0x2E2F, 0x2E2F, AL), + (0x2E30, 0x2E31, BA), + (0x2E32, 0x2E32, AL), + (0x2E33, 0x2E34, BA), + (0x2E35, 0x2E39, AL), + (0x2E3A, 0x2E3B, B2), + (0x2E3C, 0x2E3E, BA), + (0x2E3F, 0x2E3F, AL), + (0x2E40, 0x2E40, BA), + (0x2E41, 0x2E41, BA), + (0x2E42, 0x2E42, OP), + (0x2E43, 0x2E4A, BA), + (0x2E4B, 0x2E4B, AL), + (0x2E4C, 0x2E4C, BA), + (0x2E4D, 0x2E4D, AL), + (0x2E4E, 0x2E4F, BA), + (0x2E80, 0x2E99, ID), + (0x2E9B, 0x2EF3, ID), + (0x2F00, 0x2FD5, ID), + (0x2FF0, 0x2FFB, ID), + (0x3000, 0x3000, BA), + (0x3001, 0x3002, CL), + (0x3003, 0x3003, ID), + (0x3004, 0x3004, ID), + (0x3005, 0x3005, NS), + (0x3006, 0x3006, ID), + (0x3007, 0x3007, ID), + (0x3008, 0x3008, OP), + (0x3009, 0x3009, CL), + (0x300A, 0x300A, OP), + (0x300B, 0x300B, CL), + (0x300C, 0x300C, OP), + (0x300D, 0x300D, CL), + (0x300E, 0x300E, OP), + (0x300F, 0x300F, CL), + (0x3010, 0x3010, OP), + (0x3011, 0x3011, CL), + (0x3012, 0x3013, ID), + (0x3014, 0x3014, OP), + (0x3015, 0x3015, CL), + (0x3016, 0x3016, OP), + (0x3017, 0x3017, CL), + (0x3018, 0x3018, OP), + (0x3019, 0x3019, CL), + (0x301A, 0x301A, OP), + (0x301B, 0x301B, CL), + (0x301C, 0x301C, NS), + (0x301D, 0x301D, OP), + (0x301E, 0x301F, CL), + (0x3020, 0x3020, ID), + (0x3021, 0x3029, ID), + (0x302A, 0x302D, CM), + (0x302E, 0x302F, CM), + (0x3030, 0x3030, ID), + (0x3031, 0x3034, ID), + (0x3035, 0x3035, CM), + (0x3036, 0x3037, ID), + (0x3038, 0x303A, ID), + (0x303B, 0x303B, NS), + (0x303C, 0x303C, NS), + (0x303D, 0x303D, ID), + (0x303E, 0x303F, ID), + (0x3041, 0x3041, CJ), + (0x3042, 0x3042, ID), + (0x3043, 0x3043, CJ), + (0x3044, 0x3044, ID), + (0x3045, 0x3045, CJ), + (0x3046, 0x3046, ID), + (0x3047, 0x3047, CJ), + (0x3048, 0x3048, ID), + (0x3049, 0x3049, CJ), + (0x304A, 0x3062, ID), + (0x3063, 0x3063, CJ), + (0x3064, 0x3082, ID), + (0x3083, 0x3083, CJ), + (0x3084, 0x3084, ID), + (0x3085, 0x3085, CJ), + (0x3086, 0x3086, ID), + (0x3087, 0x3087, CJ), + (0x3088, 0x308D, ID), + (0x308E, 0x308E, CJ), + (0x308F, 0x3094, ID), + (0x3095, 0x3096, CJ), + (0x3099, 0x309A, CM), + (0x309B, 0x309C, NS), + (0x309D, 0x309E, NS), + (0x309F, 0x309F, ID), + (0x30A0, 0x30A0, NS), + (0x30A1, 0x30A1, CJ), + (0x30A2, 0x30A2, ID), + (0x30A3, 0x30A3, CJ), + (0x30A4, 0x30A4, ID), + (0x30A5, 0x30A5, CJ), + (0x30A6, 0x30A6, ID), + (0x30A7, 0x30A7, CJ), + (0x30A8, 0x30A8, ID), + (0x30A9, 0x30A9, CJ), + (0x30AA, 0x30C2, ID), + (0x30C3, 0x30C3, CJ), + (0x30C4, 0x30E2, ID), + (0x30E3, 0x30E3, CJ), + (0x30E4, 0x30E4, ID), + (0x30E5, 0x30E5, CJ), + (0x30E6, 0x30E6, ID), + (0x30E7, 0x30E7, CJ), + (0x30E8, 0x30ED, ID), + (0x30EE, 0x30EE, CJ), + (0x30EF, 0x30F4, ID), + (0x30F5, 0x30F6, CJ), + (0x30F7, 0x30FA, ID), + (0x30FB, 0x30FB, NS), + (0x30FC, 0x30FC, CJ), + (0x30FD, 0x30FE, NS), + (0x30FF, 0x30FF, ID), + (0x3105, 0x312F, ID), + (0x3131, 0x318E, ID), + (0x3190, 0x3191, ID), + (0x3192, 0x3195, ID), + (0x3196, 0x319F, ID), + (0x31A0, 0x31BA, ID), + (0x31C0, 0x31E3, ID), + (0x31F0, 0x31FF, CJ), + (0x3200, 0x321E, ID), + (0x3220, 0x3229, ID), + (0x322A, 0x3247, ID), + (0x3248, 0x324F, AI), + (0x3250, 0x3250, ID), + (0x3251, 0x325F, ID), + (0x3260, 0x327F, ID), + (0x3280, 0x3289, ID), + (0x328A, 0x32B0, ID), + (0x32B1, 0x32BF, ID), + (0x32C0, 0x32FF, ID), + (0x3300, 0x33FF, ID), + (0x3400, 0x4DB5, ID), + (0x4DB6, 0x4DBF, ID), + (0x4DC0, 0x4DFF, AL), + (0x4E00, 0x9FEF, ID), + (0x9FF0, 0x9FFF, ID), + (0xA000, 0xA014, ID), + (0xA015, 0xA015, NS), + (0xA016, 0xA48C, ID), + (0xA490, 0xA4C6, ID), + (0xA4D0, 0xA4F7, AL), + (0xA4F8, 0xA4FD, AL), + (0xA4FE, 0xA4FF, BA), + (0xA500, 0xA60B, AL), + (0xA60C, 0xA60C, AL), + (0xA60D, 0xA60D, BA), + (0xA60E, 0xA60E, EX), + (0xA60F, 0xA60F, BA), + (0xA610, 0xA61F, AL), + (0xA620, 0xA629, NU), + (0xA62A, 0xA62B, AL), + (0xA640, 0xA66D, AL), + (0xA66E, 0xA66E, AL), + (0xA66F, 0xA66F, CM), + (0xA670, 0xA672, CM), + (0xA673, 0xA673, AL), + (0xA674, 0xA67D, CM), + (0xA67E, 0xA67E, AL), + (0xA67F, 0xA67F, AL), + (0xA680, 0xA69B, AL), + (0xA69C, 0xA69D, AL), + (0xA69E, 0xA69F, CM), + (0xA6A0, 0xA6E5, AL), + (0xA6E6, 0xA6EF, AL), + (0xA6F0, 0xA6F1, CM), + (0xA6F2, 0xA6F2, AL), + (0xA6F3, 0xA6F7, BA), + (0xA700, 0xA716, AL), + (0xA717, 0xA71F, AL), + (0xA720, 0xA721, AL), + (0xA722, 0xA76F, AL), + (0xA770, 0xA770, AL), + (0xA771, 0xA787, AL), + (0xA788, 0xA788, AL), + (0xA789, 0xA78A, AL), + (0xA78B, 0xA78E, AL), + (0xA78F, 0xA78F, AL), + (0xA790, 0xA7BF, AL), + (0xA7C2, 0xA7C6, AL), + (0xA7F7, 0xA7F7, AL), + (0xA7F8, 0xA7F9, AL), + (0xA7FA, 0xA7FA, AL), + (0xA7FB, 0xA7FF, AL), + (0xA800, 0xA801, AL), + (0xA802, 0xA802, CM), + (0xA803, 0xA805, AL), + (0xA806, 0xA806, CM), + (0xA807, 0xA80A, AL), + (0xA80B, 0xA80B, CM), + (0xA80C, 0xA822, AL), + (0xA823, 0xA824, CM), + (0xA825, 0xA826, CM), + (0xA827, 0xA827, CM), + (0xA828, 0xA82B, AL), + (0xA830, 0xA835, AL), + (0xA836, 0xA837, AL), + (0xA838, 0xA838, PO), + (0xA839, 0xA839, AL), + (0xA840, 0xA873, AL), + (0xA874, 0xA875, BB), + (0xA876, 0xA877, EX), + (0xA880, 0xA881, CM), + (0xA882, 0xA8B3, AL), + (0xA8B4, 0xA8C3, CM), + (0xA8C4, 0xA8C5, CM), + (0xA8CE, 0xA8CF, BA), + (0xA8D0, 0xA8D9, NU), + (0xA8E0, 0xA8F1, CM), + (0xA8F2, 0xA8F7, AL), + (0xA8F8, 0xA8FA, AL), + (0xA8FB, 0xA8FB, AL), + (0xA8FC, 0xA8FC, BB), + (0xA8FD, 0xA8FE, AL), + (0xA8FF, 0xA8FF, CM), + (0xA900, 0xA909, NU), + (0xA90A, 0xA925, AL), + (0xA926, 0xA92D, CM), + (0xA92E, 0xA92F, BA), + (0xA930, 0xA946, AL), + (0xA947, 0xA951, CM), + (0xA952, 0xA953, CM), + (0xA95F, 0xA95F, AL), + (0xA960, 0xA97C, JL), + (0xA980, 0xA982, CM), + (0xA983, 0xA983, CM), + (0xA984, 0xA9B2, AL), + (0xA9B3, 0xA9B3, CM), + (0xA9B4, 0xA9B5, CM), + (0xA9B6, 0xA9B9, CM), + (0xA9BA, 0xA9BB, CM), + (0xA9BC, 0xA9BD, CM), + (0xA9BE, 0xA9C0, CM), + (0xA9C1, 0xA9C6, AL), + (0xA9C7, 0xA9C9, BA), + (0xA9CA, 0xA9CD, AL), + (0xA9CF, 0xA9CF, AL), + (0xA9D0, 0xA9D9, NU), + (0xA9DE, 0xA9DF, AL), + (0xA9E0, 0xA9E4, SA), + (0xA9E5, 0xA9E5, SA), + (0xA9E6, 0xA9E6, SA), + (0xA9E7, 0xA9EF, SA), + (0xA9F0, 0xA9F9, NU), + (0xA9FA, 0xA9FE, SA), + (0xAA00, 0xAA28, AL), + (0xAA29, 0xAA2E, CM), + (0xAA2F, 0xAA30, CM), + (0xAA31, 0xAA32, CM), + (0xAA33, 0xAA34, CM), + (0xAA35, 0xAA36, CM), + (0xAA40, 0xAA42, AL), + (0xAA43, 0xAA43, CM), + (0xAA44, 0xAA4B, AL), + (0xAA4C, 0xAA4C, CM), + (0xAA4D, 0xAA4D, CM), + (0xAA50, 0xAA59, NU), + (0xAA5C, 0xAA5C, AL), + (0xAA5D, 0xAA5F, BA), + (0xAA60, 0xAA6F, SA), + (0xAA70, 0xAA70, SA), + (0xAA71, 0xAA76, SA), + (0xAA77, 0xAA79, SA), + (0xAA7A, 0xAA7A, SA), + (0xAA7B, 0xAA7B, SA), + (0xAA7C, 0xAA7C, SA), + (0xAA7D, 0xAA7D, SA), + (0xAA7E, 0xAA7F, SA), + (0xAA80, 0xAAAF, SA), + (0xAAB0, 0xAAB0, SA), + (0xAAB1, 0xAAB1, SA), + (0xAAB2, 0xAAB4, SA), + (0xAAB5, 0xAAB6, SA), + (0xAAB7, 0xAAB8, SA), + (0xAAB9, 0xAABD, SA), + (0xAABE, 0xAABF, SA), + (0xAAC0, 0xAAC0, SA), + (0xAAC1, 0xAAC1, SA), + (0xAAC2, 0xAAC2, SA), + (0xAADB, 0xAADC, SA), + (0xAADD, 0xAADD, SA), + (0xAADE, 0xAADF, SA), + (0xAAE0, 0xAAEA, AL), + (0xAAEB, 0xAAEB, CM), + (0xAAEC, 0xAAED, CM), + (0xAAEE, 0xAAEF, CM), + (0xAAF0, 0xAAF1, BA), + (0xAAF2, 0xAAF2, AL), + (0xAAF3, 0xAAF4, AL), + (0xAAF5, 0xAAF5, CM), + (0xAAF6, 0xAAF6, CM), + (0xAB01, 0xAB06, AL), + (0xAB09, 0xAB0E, AL), + (0xAB11, 0xAB16, AL), + (0xAB20, 0xAB26, AL), + (0xAB28, 0xAB2E, AL), + (0xAB30, 0xAB5A, AL), + (0xAB5B, 0xAB5B, AL), + (0xAB5C, 0xAB5F, AL), + (0xAB60, 0xAB67, AL), + (0xAB70, 0xABBF, AL), + (0xABC0, 0xABE2, AL), + (0xABE3, 0xABE4, CM), + (0xABE5, 0xABE5, CM), + (0xABE6, 0xABE7, CM), + (0xABE8, 0xABE8, CM), + (0xABE9, 0xABEA, CM), + (0xABEB, 0xABEB, BA), + (0xABEC, 0xABEC, CM), + (0xABED, 0xABED, CM), + (0xABF0, 0xABF9, NU), + (0xAC00, 0xAC00, H2), + (0xAC01, 0xAC1B, H3), + (0xAC1C, 0xAC1C, H2), + (0xAC1D, 0xAC37, H3), + (0xAC38, 0xAC38, H2), + (0xAC39, 0xAC53, H3), + (0xAC54, 0xAC54, H2), + (0xAC55, 0xAC6F, H3), + (0xAC70, 0xAC70, H2), + (0xAC71, 0xAC8B, H3), + (0xAC8C, 0xAC8C, H2), + (0xAC8D, 0xACA7, H3), + (0xACA8, 0xACA8, H2), + (0xACA9, 0xACC3, H3), + (0xACC4, 0xACC4, H2), + (0xACC5, 0xACDF, H3), + (0xACE0, 0xACE0, H2), + (0xACE1, 0xACFB, H3), + (0xACFC, 0xACFC, H2), + (0xACFD, 0xAD17, H3), + (0xAD18, 0xAD18, H2), + (0xAD19, 0xAD33, H3), + (0xAD34, 0xAD34, H2), + (0xAD35, 0xAD4F, H3), + (0xAD50, 0xAD50, H2), + (0xAD51, 0xAD6B, H3), + (0xAD6C, 0xAD6C, H2), + (0xAD6D, 0xAD87, H3), + (0xAD88, 0xAD88, H2), + (0xAD89, 0xADA3, H3), + (0xADA4, 0xADA4, H2), + (0xADA5, 0xADBF, H3), + (0xADC0, 0xADC0, H2), + (0xADC1, 0xADDB, H3), + (0xADDC, 0xADDC, H2), + (0xADDD, 0xADF7, H3), + (0xADF8, 0xADF8, H2), + (0xADF9, 0xAE13, H3), + (0xAE14, 0xAE14, H2), + (0xAE15, 0xAE2F, H3), + (0xAE30, 0xAE30, H2), + (0xAE31, 0xAE4B, H3), + (0xAE4C, 0xAE4C, H2), + (0xAE4D, 0xAE67, H3), + (0xAE68, 0xAE68, H2), + (0xAE69, 0xAE83, H3), + (0xAE84, 0xAE84, H2), + (0xAE85, 0xAE9F, H3), + (0xAEA0, 0xAEA0, H2), + (0xAEA1, 0xAEBB, H3), + (0xAEBC, 0xAEBC, H2), + (0xAEBD, 0xAED7, H3), + (0xAED8, 0xAED8, H2), + (0xAED9, 0xAEF3, H3), + (0xAEF4, 0xAEF4, H2), + (0xAEF5, 0xAF0F, H3), + (0xAF10, 0xAF10, H2), + (0xAF11, 0xAF2B, H3), + (0xAF2C, 0xAF2C, H2), + (0xAF2D, 0xAF47, H3), + (0xAF48, 0xAF48, H2), + (0xAF49, 0xAF63, H3), + (0xAF64, 0xAF64, H2), + (0xAF65, 0xAF7F, H3), + (0xAF80, 0xAF80, H2), + (0xAF81, 0xAF9B, H3), + (0xAF9C, 0xAF9C, H2), + (0xAF9D, 0xAFB7, H3), + (0xAFB8, 0xAFB8, H2), + (0xAFB9, 0xAFD3, H3), + (0xAFD4, 0xAFD4, H2), + (0xAFD5, 0xAFEF, H3), + (0xAFF0, 0xAFF0, H2), + (0xAFF1, 0xB00B, H3), + (0xB00C, 0xB00C, H2), + (0xB00D, 0xB027, H3), + (0xB028, 0xB028, H2), + (0xB029, 0xB043, H3), + (0xB044, 0xB044, H2), + (0xB045, 0xB05F, H3), + (0xB060, 0xB060, H2), + (0xB061, 0xB07B, H3), + (0xB07C, 0xB07C, H2), + (0xB07D, 0xB097, H3), + (0xB098, 0xB098, H2), + (0xB099, 0xB0B3, H3), + (0xB0B4, 0xB0B4, H2), + (0xB0B5, 0xB0CF, H3), + (0xB0D0, 0xB0D0, H2), + (0xB0D1, 0xB0EB, H3), + (0xB0EC, 0xB0EC, H2), + (0xB0ED, 0xB107, H3), + (0xB108, 0xB108, H2), + (0xB109, 0xB123, H3), + (0xB124, 0xB124, H2), + (0xB125, 0xB13F, H3), + (0xB140, 0xB140, H2), + (0xB141, 0xB15B, H3), + (0xB15C, 0xB15C, H2), + (0xB15D, 0xB177, H3), + (0xB178, 0xB178, H2), + (0xB179, 0xB193, H3), + (0xB194, 0xB194, H2), + (0xB195, 0xB1AF, H3), + (0xB1B0, 0xB1B0, H2), + (0xB1B1, 0xB1CB, H3), + (0xB1CC, 0xB1CC, H2), + (0xB1CD, 0xB1E7, H3), + (0xB1E8, 0xB1E8, H2), + (0xB1E9, 0xB203, H3), + (0xB204, 0xB204, H2), + (0xB205, 0xB21F, H3), + (0xB220, 0xB220, H2), + (0xB221, 0xB23B, H3), + (0xB23C, 0xB23C, H2), + (0xB23D, 0xB257, H3), + (0xB258, 0xB258, H2), + (0xB259, 0xB273, H3), + (0xB274, 0xB274, H2), + (0xB275, 0xB28F, H3), + (0xB290, 0xB290, H2), + (0xB291, 0xB2AB, H3), + (0xB2AC, 0xB2AC, H2), + (0xB2AD, 0xB2C7, H3), + (0xB2C8, 0xB2C8, H2), + (0xB2C9, 0xB2E3, H3), + (0xB2E4, 0xB2E4, H2), + (0xB2E5, 0xB2FF, H3), + (0xB300, 0xB300, H2), + (0xB301, 0xB31B, H3), + (0xB31C, 0xB31C, H2), + (0xB31D, 0xB337, H3), + (0xB338, 0xB338, H2), + (0xB339, 0xB353, H3), + (0xB354, 0xB354, H2), + (0xB355, 0xB36F, H3), + (0xB370, 0xB370, H2), + (0xB371, 0xB38B, H3), + (0xB38C, 0xB38C, H2), + (0xB38D, 0xB3A7, H3), + (0xB3A8, 0xB3A8, H2), + (0xB3A9, 0xB3C3, H3), + (0xB3C4, 0xB3C4, H2), + (0xB3C5, 0xB3DF, H3), + (0xB3E0, 0xB3E0, H2), + (0xB3E1, 0xB3FB, H3), + (0xB3FC, 0xB3FC, H2), + (0xB3FD, 0xB417, H3), + (0xB418, 0xB418, H2), + (0xB419, 0xB433, H3), + (0xB434, 0xB434, H2), + (0xB435, 0xB44F, H3), + (0xB450, 0xB450, H2), + (0xB451, 0xB46B, H3), + (0xB46C, 0xB46C, H2), + (0xB46D, 0xB487, H3), + (0xB488, 0xB488, H2), + (0xB489, 0xB4A3, H3), + (0xB4A4, 0xB4A4, H2), + (0xB4A5, 0xB4BF, H3), + (0xB4C0, 0xB4C0, H2), + (0xB4C1, 0xB4DB, H3), + (0xB4DC, 0xB4DC, H2), + (0xB4DD, 0xB4F7, H3), + (0xB4F8, 0xB4F8, H2), + (0xB4F9, 0xB513, H3), + (0xB514, 0xB514, H2), + (0xB515, 0xB52F, H3), + (0xB530, 0xB530, H2), + (0xB531, 0xB54B, H3), + (0xB54C, 0xB54C, H2), + (0xB54D, 0xB567, H3), + (0xB568, 0xB568, H2), + (0xB569, 0xB583, H3), + (0xB584, 0xB584, H2), + (0xB585, 0xB59F, H3), + (0xB5A0, 0xB5A0, H2), + (0xB5A1, 0xB5BB, H3), + (0xB5BC, 0xB5BC, H2), + (0xB5BD, 0xB5D7, H3), + (0xB5D8, 0xB5D8, H2), + (0xB5D9, 0xB5F3, H3), + (0xB5F4, 0xB5F4, H2), + (0xB5F5, 0xB60F, H3), + (0xB610, 0xB610, H2), + (0xB611, 0xB62B, H3), + (0xB62C, 0xB62C, H2), + (0xB62D, 0xB647, H3), + (0xB648, 0xB648, H2), + (0xB649, 0xB663, H3), + (0xB664, 0xB664, H2), + (0xB665, 0xB67F, H3), + (0xB680, 0xB680, H2), + (0xB681, 0xB69B, H3), + (0xB69C, 0xB69C, H2), + (0xB69D, 0xB6B7, H3), + (0xB6B8, 0xB6B8, H2), + (0xB6B9, 0xB6D3, H3), + (0xB6D4, 0xB6D4, H2), + (0xB6D5, 0xB6EF, H3), + (0xB6F0, 0xB6F0, H2), + (0xB6F1, 0xB70B, H3), + (0xB70C, 0xB70C, H2), + (0xB70D, 0xB727, H3), + (0xB728, 0xB728, H2), + (0xB729, 0xB743, H3), + (0xB744, 0xB744, H2), + (0xB745, 0xB75F, H3), + (0xB760, 0xB760, H2), + (0xB761, 0xB77B, H3), + (0xB77C, 0xB77C, H2), + (0xB77D, 0xB797, H3), + (0xB798, 0xB798, H2), + (0xB799, 0xB7B3, H3), + (0xB7B4, 0xB7B4, H2), + (0xB7B5, 0xB7CF, H3), + (0xB7D0, 0xB7D0, H2), + (0xB7D1, 0xB7EB, H3), + (0xB7EC, 0xB7EC, H2), + (0xB7ED, 0xB807, H3), + (0xB808, 0xB808, H2), + (0xB809, 0xB823, H3), + (0xB824, 0xB824, H2), + (0xB825, 0xB83F, H3), + (0xB840, 0xB840, H2), + (0xB841, 0xB85B, H3), + (0xB85C, 0xB85C, H2), + (0xB85D, 0xB877, H3), + (0xB878, 0xB878, H2), + (0xB879, 0xB893, H3), + (0xB894, 0xB894, H2), + (0xB895, 0xB8AF, H3), + (0xB8B0, 0xB8B0, H2), + (0xB8B1, 0xB8CB, H3), + (0xB8CC, 0xB8CC, H2), + (0xB8CD, 0xB8E7, H3), + (0xB8E8, 0xB8E8, H2), + (0xB8E9, 0xB903, H3), + (0xB904, 0xB904, H2), + (0xB905, 0xB91F, H3), + (0xB920, 0xB920, H2), + (0xB921, 0xB93B, H3), + (0xB93C, 0xB93C, H2), + (0xB93D, 0xB957, H3), + (0xB958, 0xB958, H2), + (0xB959, 0xB973, H3), + (0xB974, 0xB974, H2), + (0xB975, 0xB98F, H3), + (0xB990, 0xB990, H2), + (0xB991, 0xB9AB, H3), + (0xB9AC, 0xB9AC, H2), + (0xB9AD, 0xB9C7, H3), + (0xB9C8, 0xB9C8, H2), + (0xB9C9, 0xB9E3, H3), + (0xB9E4, 0xB9E4, H2), + (0xB9E5, 0xB9FF, H3), + (0xBA00, 0xBA00, H2), + (0xBA01, 0xBA1B, H3), + (0xBA1C, 0xBA1C, H2), + (0xBA1D, 0xBA37, H3), + (0xBA38, 0xBA38, H2), + (0xBA39, 0xBA53, H3), + (0xBA54, 0xBA54, H2), + (0xBA55, 0xBA6F, H3), + (0xBA70, 0xBA70, H2), + (0xBA71, 0xBA8B, H3), + (0xBA8C, 0xBA8C, H2), + (0xBA8D, 0xBAA7, H3), + (0xBAA8, 0xBAA8, H2), + (0xBAA9, 0xBAC3, H3), + (0xBAC4, 0xBAC4, H2), + (0xBAC5, 0xBADF, H3), + (0xBAE0, 0xBAE0, H2), + (0xBAE1, 0xBAFB, H3), + (0xBAFC, 0xBAFC, H2), + (0xBAFD, 0xBB17, H3), + (0xBB18, 0xBB18, H2), + (0xBB19, 0xBB33, H3), + (0xBB34, 0xBB34, H2), + (0xBB35, 0xBB4F, H3), + (0xBB50, 0xBB50, H2), + (0xBB51, 0xBB6B, H3), + (0xBB6C, 0xBB6C, H2), + (0xBB6D, 0xBB87, H3), + (0xBB88, 0xBB88, H2), + (0xBB89, 0xBBA3, H3), + (0xBBA4, 0xBBA4, H2), + (0xBBA5, 0xBBBF, H3), + (0xBBC0, 0xBBC0, H2), + (0xBBC1, 0xBBDB, H3), + (0xBBDC, 0xBBDC, H2), + (0xBBDD, 0xBBF7, H3), + (0xBBF8, 0xBBF8, H2), + (0xBBF9, 0xBC13, H3), + (0xBC14, 0xBC14, H2), + (0xBC15, 0xBC2F, H3), + (0xBC30, 0xBC30, H2), + (0xBC31, 0xBC4B, H3), + (0xBC4C, 0xBC4C, H2), + (0xBC4D, 0xBC67, H3), + (0xBC68, 0xBC68, H2), + (0xBC69, 0xBC83, H3), + (0xBC84, 0xBC84, H2), + (0xBC85, 0xBC9F, H3), + (0xBCA0, 0xBCA0, H2), + (0xBCA1, 0xBCBB, H3), + (0xBCBC, 0xBCBC, H2), + (0xBCBD, 0xBCD7, H3), + (0xBCD8, 0xBCD8, H2), + (0xBCD9, 0xBCF3, H3), + (0xBCF4, 0xBCF4, H2), + (0xBCF5, 0xBD0F, H3), + (0xBD10, 0xBD10, H2), + (0xBD11, 0xBD2B, H3), + (0xBD2C, 0xBD2C, H2), + (0xBD2D, 0xBD47, H3), + (0xBD48, 0xBD48, H2), + (0xBD49, 0xBD63, H3), + (0xBD64, 0xBD64, H2), + (0xBD65, 0xBD7F, H3), + (0xBD80, 0xBD80, H2), + (0xBD81, 0xBD9B, H3), + (0xBD9C, 0xBD9C, H2), + (0xBD9D, 0xBDB7, H3), + (0xBDB8, 0xBDB8, H2), + (0xBDB9, 0xBDD3, H3), + (0xBDD4, 0xBDD4, H2), + (0xBDD5, 0xBDEF, H3), + (0xBDF0, 0xBDF0, H2), + (0xBDF1, 0xBE0B, H3), + (0xBE0C, 0xBE0C, H2), + (0xBE0D, 0xBE27, H3), + (0xBE28, 0xBE28, H2), + (0xBE29, 0xBE43, H3), + (0xBE44, 0xBE44, H2), + (0xBE45, 0xBE5F, H3), + (0xBE60, 0xBE60, H2), + (0xBE61, 0xBE7B, H3), + (0xBE7C, 0xBE7C, H2), + (0xBE7D, 0xBE97, H3), + (0xBE98, 0xBE98, H2), + (0xBE99, 0xBEB3, H3), + (0xBEB4, 0xBEB4, H2), + (0xBEB5, 0xBECF, H3), + (0xBED0, 0xBED0, H2), + (0xBED1, 0xBEEB, H3), + (0xBEEC, 0xBEEC, H2), + (0xBEED, 0xBF07, H3), + (0xBF08, 0xBF08, H2), + (0xBF09, 0xBF23, H3), + (0xBF24, 0xBF24, H2), + (0xBF25, 0xBF3F, H3), + (0xBF40, 0xBF40, H2), + (0xBF41, 0xBF5B, H3), + (0xBF5C, 0xBF5C, H2), + (0xBF5D, 0xBF77, H3), + (0xBF78, 0xBF78, H2), + (0xBF79, 0xBF93, H3), + (0xBF94, 0xBF94, H2), + (0xBF95, 0xBFAF, H3), + (0xBFB0, 0xBFB0, H2), + (0xBFB1, 0xBFCB, H3), + (0xBFCC, 0xBFCC, H2), + (0xBFCD, 0xBFE7, H3), + (0xBFE8, 0xBFE8, H2), + (0xBFE9, 0xC003, H3), + (0xC004, 0xC004, H2), + (0xC005, 0xC01F, H3), + (0xC020, 0xC020, H2), + (0xC021, 0xC03B, H3), + (0xC03C, 0xC03C, H2), + (0xC03D, 0xC057, H3), + (0xC058, 0xC058, H2), + (0xC059, 0xC073, H3), + (0xC074, 0xC074, H2), + (0xC075, 0xC08F, H3), + (0xC090, 0xC090, H2), + (0xC091, 0xC0AB, H3), + (0xC0AC, 0xC0AC, H2), + (0xC0AD, 0xC0C7, H3), + (0xC0C8, 0xC0C8, H2), + (0xC0C9, 0xC0E3, H3), + (0xC0E4, 0xC0E4, H2), + (0xC0E5, 0xC0FF, H3), + (0xC100, 0xC100, H2), + (0xC101, 0xC11B, H3), + (0xC11C, 0xC11C, H2), + (0xC11D, 0xC137, H3), + (0xC138, 0xC138, H2), + (0xC139, 0xC153, H3), + (0xC154, 0xC154, H2), + (0xC155, 0xC16F, H3), + (0xC170, 0xC170, H2), + (0xC171, 0xC18B, H3), + (0xC18C, 0xC18C, H2), + (0xC18D, 0xC1A7, H3), + (0xC1A8, 0xC1A8, H2), + (0xC1A9, 0xC1C3, H3), + (0xC1C4, 0xC1C4, H2), + (0xC1C5, 0xC1DF, H3), + (0xC1E0, 0xC1E0, H2), + (0xC1E1, 0xC1FB, H3), + (0xC1FC, 0xC1FC, H2), + (0xC1FD, 0xC217, H3), + (0xC218, 0xC218, H2), + (0xC219, 0xC233, H3), + (0xC234, 0xC234, H2), + (0xC235, 0xC24F, H3), + (0xC250, 0xC250, H2), + (0xC251, 0xC26B, H3), + (0xC26C, 0xC26C, H2), + (0xC26D, 0xC287, H3), + (0xC288, 0xC288, H2), + (0xC289, 0xC2A3, H3), + (0xC2A4, 0xC2A4, H2), + (0xC2A5, 0xC2BF, H3), + (0xC2C0, 0xC2C0, H2), + (0xC2C1, 0xC2DB, H3), + (0xC2DC, 0xC2DC, H2), + (0xC2DD, 0xC2F7, H3), + (0xC2F8, 0xC2F8, H2), + (0xC2F9, 0xC313, H3), + (0xC314, 0xC314, H2), + (0xC315, 0xC32F, H3), + (0xC330, 0xC330, H2), + (0xC331, 0xC34B, H3), + (0xC34C, 0xC34C, H2), + (0xC34D, 0xC367, H3), + (0xC368, 0xC368, H2), + (0xC369, 0xC383, H3), + (0xC384, 0xC384, H2), + (0xC385, 0xC39F, H3), + (0xC3A0, 0xC3A0, H2), + (0xC3A1, 0xC3BB, H3), + (0xC3BC, 0xC3BC, H2), + (0xC3BD, 0xC3D7, H3), + (0xC3D8, 0xC3D8, H2), + (0xC3D9, 0xC3F3, H3), + (0xC3F4, 0xC3F4, H2), + (0xC3F5, 0xC40F, H3), + (0xC410, 0xC410, H2), + (0xC411, 0xC42B, H3), + (0xC42C, 0xC42C, H2), + (0xC42D, 0xC447, H3), + (0xC448, 0xC448, H2), + (0xC449, 0xC463, H3), + (0xC464, 0xC464, H2), + (0xC465, 0xC47F, H3), + (0xC480, 0xC480, H2), + (0xC481, 0xC49B, H3), + (0xC49C, 0xC49C, H2), + (0xC49D, 0xC4B7, H3), + (0xC4B8, 0xC4B8, H2), + (0xC4B9, 0xC4D3, H3), + (0xC4D4, 0xC4D4, H2), + (0xC4D5, 0xC4EF, H3), + (0xC4F0, 0xC4F0, H2), + (0xC4F1, 0xC50B, H3), + (0xC50C, 0xC50C, H2), + (0xC50D, 0xC527, H3), + (0xC528, 0xC528, H2), + (0xC529, 0xC543, H3), + (0xC544, 0xC544, H2), + (0xC545, 0xC55F, H3), + (0xC560, 0xC560, H2), + (0xC561, 0xC57B, H3), + (0xC57C, 0xC57C, H2), + (0xC57D, 0xC597, H3), + (0xC598, 0xC598, H2), + (0xC599, 0xC5B3, H3), + (0xC5B4, 0xC5B4, H2), + (0xC5B5, 0xC5CF, H3), + (0xC5D0, 0xC5D0, H2), + (0xC5D1, 0xC5EB, H3), + (0xC5EC, 0xC5EC, H2), + (0xC5ED, 0xC607, H3), + (0xC608, 0xC608, H2), + (0xC609, 0xC623, H3), + (0xC624, 0xC624, H2), + (0xC625, 0xC63F, H3), + (0xC640, 0xC640, H2), + (0xC641, 0xC65B, H3), + (0xC65C, 0xC65C, H2), + (0xC65D, 0xC677, H3), + (0xC678, 0xC678, H2), + (0xC679, 0xC693, H3), + (0xC694, 0xC694, H2), + (0xC695, 0xC6AF, H3), + (0xC6B0, 0xC6B0, H2), + (0xC6B1, 0xC6CB, H3), + (0xC6CC, 0xC6CC, H2), + (0xC6CD, 0xC6E7, H3), + (0xC6E8, 0xC6E8, H2), + (0xC6E9, 0xC703, H3), + (0xC704, 0xC704, H2), + (0xC705, 0xC71F, H3), + (0xC720, 0xC720, H2), + (0xC721, 0xC73B, H3), + (0xC73C, 0xC73C, H2), + (0xC73D, 0xC757, H3), + (0xC758, 0xC758, H2), + (0xC759, 0xC773, H3), + (0xC774, 0xC774, H2), + (0xC775, 0xC78F, H3), + (0xC790, 0xC790, H2), + (0xC791, 0xC7AB, H3), + (0xC7AC, 0xC7AC, H2), + (0xC7AD, 0xC7C7, H3), + (0xC7C8, 0xC7C8, H2), + (0xC7C9, 0xC7E3, H3), + (0xC7E4, 0xC7E4, H2), + (0xC7E5, 0xC7FF, H3), + (0xC800, 0xC800, H2), + (0xC801, 0xC81B, H3), + (0xC81C, 0xC81C, H2), + (0xC81D, 0xC837, H3), + (0xC838, 0xC838, H2), + (0xC839, 0xC853, H3), + (0xC854, 0xC854, H2), + (0xC855, 0xC86F, H3), + (0xC870, 0xC870, H2), + (0xC871, 0xC88B, H3), + (0xC88C, 0xC88C, H2), + (0xC88D, 0xC8A7, H3), + (0xC8A8, 0xC8A8, H2), + (0xC8A9, 0xC8C3, H3), + (0xC8C4, 0xC8C4, H2), + (0xC8C5, 0xC8DF, H3), + (0xC8E0, 0xC8E0, H2), + (0xC8E1, 0xC8FB, H3), + (0xC8FC, 0xC8FC, H2), + (0xC8FD, 0xC917, H3), + (0xC918, 0xC918, H2), + (0xC919, 0xC933, H3), + (0xC934, 0xC934, H2), + (0xC935, 0xC94F, H3), + (0xC950, 0xC950, H2), + (0xC951, 0xC96B, H3), + (0xC96C, 0xC96C, H2), + (0xC96D, 0xC987, H3), + (0xC988, 0xC988, H2), + (0xC989, 0xC9A3, H3), + (0xC9A4, 0xC9A4, H2), + (0xC9A5, 0xC9BF, H3), + (0xC9C0, 0xC9C0, H2), + (0xC9C1, 0xC9DB, H3), + (0xC9DC, 0xC9DC, H2), + (0xC9DD, 0xC9F7, H3), + (0xC9F8, 0xC9F8, H2), + (0xC9F9, 0xCA13, H3), + (0xCA14, 0xCA14, H2), + (0xCA15, 0xCA2F, H3), + (0xCA30, 0xCA30, H2), + (0xCA31, 0xCA4B, H3), + (0xCA4C, 0xCA4C, H2), + (0xCA4D, 0xCA67, H3), + (0xCA68, 0xCA68, H2), + (0xCA69, 0xCA83, H3), + (0xCA84, 0xCA84, H2), + (0xCA85, 0xCA9F, H3), + (0xCAA0, 0xCAA0, H2), + (0xCAA1, 0xCABB, H3), + (0xCABC, 0xCABC, H2), + (0xCABD, 0xCAD7, H3), + (0xCAD8, 0xCAD8, H2), + (0xCAD9, 0xCAF3, H3), + (0xCAF4, 0xCAF4, H2), + (0xCAF5, 0xCB0F, H3), + (0xCB10, 0xCB10, H2), + (0xCB11, 0xCB2B, H3), + (0xCB2C, 0xCB2C, H2), + (0xCB2D, 0xCB47, H3), + (0xCB48, 0xCB48, H2), + (0xCB49, 0xCB63, H3), + (0xCB64, 0xCB64, H2), + (0xCB65, 0xCB7F, H3), + (0xCB80, 0xCB80, H2), + (0xCB81, 0xCB9B, H3), + (0xCB9C, 0xCB9C, H2), + (0xCB9D, 0xCBB7, H3), + (0xCBB8, 0xCBB8, H2), + (0xCBB9, 0xCBD3, H3), + (0xCBD4, 0xCBD4, H2), + (0xCBD5, 0xCBEF, H3), + (0xCBF0, 0xCBF0, H2), + (0xCBF1, 0xCC0B, H3), + (0xCC0C, 0xCC0C, H2), + (0xCC0D, 0xCC27, H3), + (0xCC28, 0xCC28, H2), + (0xCC29, 0xCC43, H3), + (0xCC44, 0xCC44, H2), + (0xCC45, 0xCC5F, H3), + (0xCC60, 0xCC60, H2), + (0xCC61, 0xCC7B, H3), + (0xCC7C, 0xCC7C, H2), + (0xCC7D, 0xCC97, H3), + (0xCC98, 0xCC98, H2), + (0xCC99, 0xCCB3, H3), + (0xCCB4, 0xCCB4, H2), + (0xCCB5, 0xCCCF, H3), + (0xCCD0, 0xCCD0, H2), + (0xCCD1, 0xCCEB, H3), + (0xCCEC, 0xCCEC, H2), + (0xCCED, 0xCD07, H3), + (0xCD08, 0xCD08, H2), + (0xCD09, 0xCD23, H3), + (0xCD24, 0xCD24, H2), + (0xCD25, 0xCD3F, H3), + (0xCD40, 0xCD40, H2), + (0xCD41, 0xCD5B, H3), + (0xCD5C, 0xCD5C, H2), + (0xCD5D, 0xCD77, H3), + (0xCD78, 0xCD78, H2), + (0xCD79, 0xCD93, H3), + (0xCD94, 0xCD94, H2), + (0xCD95, 0xCDAF, H3), + (0xCDB0, 0xCDB0, H2), + (0xCDB1, 0xCDCB, H3), + (0xCDCC, 0xCDCC, H2), + (0xCDCD, 0xCDE7, H3), + (0xCDE8, 0xCDE8, H2), + (0xCDE9, 0xCE03, H3), + (0xCE04, 0xCE04, H2), + (0xCE05, 0xCE1F, H3), + (0xCE20, 0xCE20, H2), + (0xCE21, 0xCE3B, H3), + (0xCE3C, 0xCE3C, H2), + (0xCE3D, 0xCE57, H3), + (0xCE58, 0xCE58, H2), + (0xCE59, 0xCE73, H3), + (0xCE74, 0xCE74, H2), + (0xCE75, 0xCE8F, H3), + (0xCE90, 0xCE90, H2), + (0xCE91, 0xCEAB, H3), + (0xCEAC, 0xCEAC, H2), + (0xCEAD, 0xCEC7, H3), + (0xCEC8, 0xCEC8, H2), + (0xCEC9, 0xCEE3, H3), + (0xCEE4, 0xCEE4, H2), + (0xCEE5, 0xCEFF, H3), + (0xCF00, 0xCF00, H2), + (0xCF01, 0xCF1B, H3), + (0xCF1C, 0xCF1C, H2), + (0xCF1D, 0xCF37, H3), + (0xCF38, 0xCF38, H2), + (0xCF39, 0xCF53, H3), + (0xCF54, 0xCF54, H2), + (0xCF55, 0xCF6F, H3), + (0xCF70, 0xCF70, H2), + (0xCF71, 0xCF8B, H3), + (0xCF8C, 0xCF8C, H2), + (0xCF8D, 0xCFA7, H3), + (0xCFA8, 0xCFA8, H2), + (0xCFA9, 0xCFC3, H3), + (0xCFC4, 0xCFC4, H2), + (0xCFC5, 0xCFDF, H3), + (0xCFE0, 0xCFE0, H2), + (0xCFE1, 0xCFFB, H3), + (0xCFFC, 0xCFFC, H2), + (0xCFFD, 0xD017, H3), + (0xD018, 0xD018, H2), + (0xD019, 0xD033, H3), + (0xD034, 0xD034, H2), + (0xD035, 0xD04F, H3), + (0xD050, 0xD050, H2), + (0xD051, 0xD06B, H3), + (0xD06C, 0xD06C, H2), + (0xD06D, 0xD087, H3), + (0xD088, 0xD088, H2), + (0xD089, 0xD0A3, H3), + (0xD0A4, 0xD0A4, H2), + (0xD0A5, 0xD0BF, H3), + (0xD0C0, 0xD0C0, H2), + (0xD0C1, 0xD0DB, H3), + (0xD0DC, 0xD0DC, H2), + (0xD0DD, 0xD0F7, H3), + (0xD0F8, 0xD0F8, H2), + (0xD0F9, 0xD113, H3), + (0xD114, 0xD114, H2), + (0xD115, 0xD12F, H3), + (0xD130, 0xD130, H2), + (0xD131, 0xD14B, H3), + (0xD14C, 0xD14C, H2), + (0xD14D, 0xD167, H3), + (0xD168, 0xD168, H2), + (0xD169, 0xD183, H3), + (0xD184, 0xD184, H2), + (0xD185, 0xD19F, H3), + (0xD1A0, 0xD1A0, H2), + (0xD1A1, 0xD1BB, H3), + (0xD1BC, 0xD1BC, H2), + (0xD1BD, 0xD1D7, H3), + (0xD1D8, 0xD1D8, H2), + (0xD1D9, 0xD1F3, H3), + (0xD1F4, 0xD1F4, H2), + (0xD1F5, 0xD20F, H3), + (0xD210, 0xD210, H2), + (0xD211, 0xD22B, H3), + (0xD22C, 0xD22C, H2), + (0xD22D, 0xD247, H3), + (0xD248, 0xD248, H2), + (0xD249, 0xD263, H3), + (0xD264, 0xD264, H2), + (0xD265, 0xD27F, H3), + (0xD280, 0xD280, H2), + (0xD281, 0xD29B, H3), + (0xD29C, 0xD29C, H2), + (0xD29D, 0xD2B7, H3), + (0xD2B8, 0xD2B8, H2), + (0xD2B9, 0xD2D3, H3), + (0xD2D4, 0xD2D4, H2), + (0xD2D5, 0xD2EF, H3), + (0xD2F0, 0xD2F0, H2), + (0xD2F1, 0xD30B, H3), + (0xD30C, 0xD30C, H2), + (0xD30D, 0xD327, H3), + (0xD328, 0xD328, H2), + (0xD329, 0xD343, H3), + (0xD344, 0xD344, H2), + (0xD345, 0xD35F, H3), + (0xD360, 0xD360, H2), + (0xD361, 0xD37B, H3), + (0xD37C, 0xD37C, H2), + (0xD37D, 0xD397, H3), + (0xD398, 0xD398, H2), + (0xD399, 0xD3B3, H3), + (0xD3B4, 0xD3B4, H2), + (0xD3B5, 0xD3CF, H3), + (0xD3D0, 0xD3D0, H2), + (0xD3D1, 0xD3EB, H3), + (0xD3EC, 0xD3EC, H2), + (0xD3ED, 0xD407, H3), + (0xD408, 0xD408, H2), + (0xD409, 0xD423, H3), + (0xD424, 0xD424, H2), + (0xD425, 0xD43F, H3), + (0xD440, 0xD440, H2), + (0xD441, 0xD45B, H3), + (0xD45C, 0xD45C, H2), + (0xD45D, 0xD477, H3), + (0xD478, 0xD478, H2), + (0xD479, 0xD493, H3), + (0xD494, 0xD494, H2), + (0xD495, 0xD4AF, H3), + (0xD4B0, 0xD4B0, H2), + (0xD4B1, 0xD4CB, H3), + (0xD4CC, 0xD4CC, H2), + (0xD4CD, 0xD4E7, H3), + (0xD4E8, 0xD4E8, H2), + (0xD4E9, 0xD503, H3), + (0xD504, 0xD504, H2), + (0xD505, 0xD51F, H3), + (0xD520, 0xD520, H2), + (0xD521, 0xD53B, H3), + (0xD53C, 0xD53C, H2), + (0xD53D, 0xD557, H3), + (0xD558, 0xD558, H2), + (0xD559, 0xD573, H3), + (0xD574, 0xD574, H2), + (0xD575, 0xD58F, H3), + (0xD590, 0xD590, H2), + (0xD591, 0xD5AB, H3), + (0xD5AC, 0xD5AC, H2), + (0xD5AD, 0xD5C7, H3), + (0xD5C8, 0xD5C8, H2), + (0xD5C9, 0xD5E3, H3), + (0xD5E4, 0xD5E4, H2), + (0xD5E5, 0xD5FF, H3), + (0xD600, 0xD600, H2), + (0xD601, 0xD61B, H3), + (0xD61C, 0xD61C, H2), + (0xD61D, 0xD637, H3), + (0xD638, 0xD638, H2), + (0xD639, 0xD653, H3), + (0xD654, 0xD654, H2), + (0xD655, 0xD66F, H3), + (0xD670, 0xD670, H2), + (0xD671, 0xD68B, H3), + (0xD68C, 0xD68C, H2), + (0xD68D, 0xD6A7, H3), + (0xD6A8, 0xD6A8, H2), + (0xD6A9, 0xD6C3, H3), + (0xD6C4, 0xD6C4, H2), + (0xD6C5, 0xD6DF, H3), + (0xD6E0, 0xD6E0, H2), + (0xD6E1, 0xD6FB, H3), + (0xD6FC, 0xD6FC, H2), + (0xD6FD, 0xD717, H3), + (0xD718, 0xD718, H2), + (0xD719, 0xD733, H3), + (0xD734, 0xD734, H2), + (0xD735, 0xD74F, H3), + (0xD750, 0xD750, H2), + (0xD751, 0xD76B, H3), + (0xD76C, 0xD76C, H2), + (0xD76D, 0xD787, H3), + (0xD788, 0xD788, H2), + (0xD789, 0xD7A3, H3), + (0xD7B0, 0xD7C6, JV), + (0xD7CB, 0xD7FB, JT), + (0xD800, 0xDB7F, SG), + (0xDB80, 0xDBFF, SG), + (0xDC00, 0xDFFF, SG), + (0xE000, 0xF8FF, XX), + (0xF900, 0xFA6D, ID), + (0xFA6E, 0xFA6F, ID), + (0xFA70, 0xFAD9, ID), + (0xFADA, 0xFAFF, ID), + (0xFB00, 0xFB06, AL), + (0xFB13, 0xFB17, AL), + (0xFB1D, 0xFB1D, HL), + (0xFB1E, 0xFB1E, CM), + (0xFB1F, 0xFB28, HL), + (0xFB29, 0xFB29, AL), + (0xFB2A, 0xFB36, HL), + (0xFB38, 0xFB3C, HL), + (0xFB3E, 0xFB3E, HL), + (0xFB40, 0xFB41, HL), + (0xFB43, 0xFB44, HL), + (0xFB46, 0xFB4F, HL), + (0xFB50, 0xFBB1, AL), + (0xFBB2, 0xFBC1, AL), + (0xFBD3, 0xFD3D, AL), + (0xFD3E, 0xFD3E, CL), + (0xFD3F, 0xFD3F, OP), + (0xFD50, 0xFD8F, AL), + (0xFD92, 0xFDC7, AL), + (0xFDF0, 0xFDFB, AL), + (0xFDFC, 0xFDFC, PO), + (0xFDFD, 0xFDFD, AL), + (0xFE00, 0xFE0F, CM), + (0xFE10, 0xFE10, IS), + (0xFE11, 0xFE12, CL), + (0xFE13, 0xFE14, IS), + (0xFE15, 0xFE16, EX), + (0xFE17, 0xFE17, OP), + (0xFE18, 0xFE18, CL), + (0xFE19, 0xFE19, IN), + (0xFE20, 0xFE2F, CM), + (0xFE30, 0xFE30, ID), + (0xFE31, 0xFE32, ID), + (0xFE33, 0xFE34, ID), + (0xFE35, 0xFE35, OP), + (0xFE36, 0xFE36, CL), + (0xFE37, 0xFE37, OP), + (0xFE38, 0xFE38, CL), + (0xFE39, 0xFE39, OP), + (0xFE3A, 0xFE3A, CL), + (0xFE3B, 0xFE3B, OP), + (0xFE3C, 0xFE3C, CL), + (0xFE3D, 0xFE3D, OP), + (0xFE3E, 0xFE3E, CL), + (0xFE3F, 0xFE3F, OP), + (0xFE40, 0xFE40, CL), + (0xFE41, 0xFE41, OP), + (0xFE42, 0xFE42, CL), + (0xFE43, 0xFE43, OP), + (0xFE44, 0xFE44, CL), + (0xFE45, 0xFE46, ID), + (0xFE47, 0xFE47, OP), + (0xFE48, 0xFE48, CL), + (0xFE49, 0xFE4C, ID), + (0xFE4D, 0xFE4F, ID), + (0xFE50, 0xFE50, CL), + (0xFE51, 0xFE51, ID), + (0xFE52, 0xFE52, CL), + (0xFE54, 0xFE55, NS), + (0xFE56, 0xFE57, EX), + (0xFE58, 0xFE58, ID), + (0xFE59, 0xFE59, OP), + (0xFE5A, 0xFE5A, CL), + (0xFE5B, 0xFE5B, OP), + (0xFE5C, 0xFE5C, CL), + (0xFE5D, 0xFE5D, OP), + (0xFE5E, 0xFE5E, CL), + (0xFE5F, 0xFE61, ID), + (0xFE62, 0xFE62, ID), + (0xFE63, 0xFE63, ID), + (0xFE64, 0xFE66, ID), + (0xFE68, 0xFE68, ID), + (0xFE69, 0xFE69, PR), + (0xFE6A, 0xFE6A, PO), + (0xFE6B, 0xFE6B, ID), + (0xFE70, 0xFE74, AL), + (0xFE76, 0xFEFC, AL), + (0xFEFF, 0xFEFF, WJ), + (0xFF01, 0xFF01, EX), + (0xFF02, 0xFF03, ID), + (0xFF04, 0xFF04, PR), + (0xFF05, 0xFF05, PO), + (0xFF06, 0xFF07, ID), + (0xFF08, 0xFF08, OP), + (0xFF09, 0xFF09, CL), + (0xFF0A, 0xFF0A, ID), + (0xFF0B, 0xFF0B, ID), + (0xFF0C, 0xFF0C, CL), + (0xFF0D, 0xFF0D, ID), + (0xFF0E, 0xFF0E, CL), + (0xFF0F, 0xFF0F, ID), + (0xFF10, 0xFF19, ID), + (0xFF1A, 0xFF1B, NS), + (0xFF1C, 0xFF1E, ID), + (0xFF1F, 0xFF1F, EX), + (0xFF20, 0xFF20, ID), + (0xFF21, 0xFF3A, ID), + (0xFF3B, 0xFF3B, OP), + (0xFF3C, 0xFF3C, ID), + (0xFF3D, 0xFF3D, CL), + (0xFF3E, 0xFF3E, ID), + (0xFF3F, 0xFF3F, ID), + (0xFF40, 0xFF40, ID), + (0xFF41, 0xFF5A, ID), + (0xFF5B, 0xFF5B, OP), + (0xFF5C, 0xFF5C, ID), + (0xFF5D, 0xFF5D, CL), + (0xFF5E, 0xFF5E, ID), + (0xFF5F, 0xFF5F, OP), + (0xFF60, 0xFF60, CL), + (0xFF61, 0xFF61, CL), + (0xFF62, 0xFF62, OP), + (0xFF63, 0xFF63, CL), + (0xFF64, 0xFF64, CL), + (0xFF65, 0xFF65, NS), + (0xFF66, 0xFF66, ID), + (0xFF67, 0xFF6F, CJ), + (0xFF70, 0xFF70, CJ), + (0xFF71, 0xFF9D, ID), + (0xFF9E, 0xFF9F, NS), + (0xFFA0, 0xFFBE, ID), + (0xFFC2, 0xFFC7, ID), + (0xFFCA, 0xFFCF, ID), + (0xFFD2, 0xFFD7, ID), + (0xFFDA, 0xFFDC, ID), + (0xFFE0, 0xFFE0, PO), + (0xFFE1, 0xFFE1, PR), + (0xFFE2, 0xFFE2, ID), + (0xFFE3, 0xFFE3, ID), + (0xFFE4, 0xFFE4, ID), + (0xFFE5, 0xFFE6, PR), + (0xFFE8, 0xFFE8, AL), + (0xFFE9, 0xFFEC, AL), + (0xFFED, 0xFFEE, AL), + (0xFFF9, 0xFFFB, CM), + (0xFFFC, 0xFFFC, CB), + (0xFFFD, 0xFFFD, AI), + (0x10000, 0x1000B, AL), + (0x1000D, 0x10026, AL), + (0x10028, 0x1003A, AL), + (0x1003C, 0x1003D, AL), + (0x1003F, 0x1004D, AL), + (0x10050, 0x1005D, AL), + (0x10080, 0x100FA, AL), + (0x10100, 0x10102, BA), + (0x10107, 0x10133, AL), + (0x10137, 0x1013F, AL), + (0x10140, 0x10174, AL), + (0x10175, 0x10178, AL), + (0x10179, 0x10189, AL), + (0x1018A, 0x1018B, AL), + (0x1018C, 0x1018E, AL), + (0x10190, 0x1019B, AL), + (0x101A0, 0x101A0, AL), + (0x101D0, 0x101FC, AL), + (0x101FD, 0x101FD, CM), + (0x10280, 0x1029C, AL), + (0x102A0, 0x102D0, AL), + (0x102E0, 0x102E0, CM), + (0x102E1, 0x102FB, AL), + (0x10300, 0x1031F, AL), + (0x10320, 0x10323, AL), + (0x1032D, 0x1032F, AL), + (0x10330, 0x10340, AL), + (0x10341, 0x10341, AL), + (0x10342, 0x10349, AL), + (0x1034A, 0x1034A, AL), + (0x10350, 0x10375, AL), + (0x10376, 0x1037A, CM), + (0x10380, 0x1039D, AL), + (0x1039F, 0x1039F, BA), + (0x103A0, 0x103C3, AL), + (0x103C8, 0x103CF, AL), + (0x103D0, 0x103D0, BA), + (0x103D1, 0x103D5, AL), + (0x10400, 0x1044F, AL), + (0x10450, 0x1047F, AL), + (0x10480, 0x1049D, AL), + (0x104A0, 0x104A9, NU), + (0x104B0, 0x104D3, AL), + (0x104D8, 0x104FB, AL), + (0x10500, 0x10527, AL), + (0x10530, 0x10563, AL), + (0x1056F, 0x1056F, AL), + (0x10600, 0x10736, AL), + (0x10740, 0x10755, AL), + (0x10760, 0x10767, AL), + (0x10800, 0x10805, AL), + (0x10808, 0x10808, AL), + (0x1080A, 0x10835, AL), + (0x10837, 0x10838, AL), + (0x1083C, 0x1083C, AL), + (0x1083F, 0x1083F, AL), + (0x10840, 0x10855, AL), + (0x10857, 0x10857, BA), + (0x10858, 0x1085F, AL), + (0x10860, 0x10876, AL), + (0x10877, 0x10878, AL), + (0x10879, 0x1087F, AL), + (0x10880, 0x1089E, AL), + (0x108A7, 0x108AF, AL), + (0x108E0, 0x108F2, AL), + (0x108F4, 0x108F5, AL), + (0x108FB, 0x108FF, AL), + (0x10900, 0x10915, AL), + (0x10916, 0x1091B, AL), + (0x1091F, 0x1091F, BA), + (0x10920, 0x10939, AL), + (0x1093F, 0x1093F, AL), + (0x10980, 0x1099F, AL), + (0x109A0, 0x109B7, AL), + (0x109BC, 0x109BD, AL), + (0x109BE, 0x109BF, AL), + (0x109C0, 0x109CF, AL), + (0x109D2, 0x109FF, AL), + (0x10A00, 0x10A00, AL), + (0x10A01, 0x10A03, CM), + (0x10A05, 0x10A06, CM), + (0x10A0C, 0x10A0F, CM), + (0x10A10, 0x10A13, AL), + (0x10A15, 0x10A17, AL), + (0x10A19, 0x10A35, AL), + (0x10A38, 0x10A3A, CM), + (0x10A3F, 0x10A3F, CM), + (0x10A40, 0x10A48, AL), + (0x10A50, 0x10A57, BA), + (0x10A58, 0x10A58, AL), + (0x10A60, 0x10A7C, AL), + (0x10A7D, 0x10A7E, AL), + (0x10A7F, 0x10A7F, AL), + (0x10A80, 0x10A9C, AL), + (0x10A9D, 0x10A9F, AL), + (0x10AC0, 0x10AC7, AL), + (0x10AC8, 0x10AC8, AL), + (0x10AC9, 0x10AE4, AL), + (0x10AE5, 0x10AE6, CM), + (0x10AEB, 0x10AEF, AL), + (0x10AF0, 0x10AF5, BA), + (0x10AF6, 0x10AF6, IN), + (0x10B00, 0x10B35, AL), + (0x10B39, 0x10B3F, BA), + (0x10B40, 0x10B55, AL), + (0x10B58, 0x10B5F, AL), + (0x10B60, 0x10B72, AL), + (0x10B78, 0x10B7F, AL), + (0x10B80, 0x10B91, AL), + (0x10B99, 0x10B9C, AL), + (0x10BA9, 0x10BAF, AL), + (0x10C00, 0x10C48, AL), + (0x10C80, 0x10CB2, AL), + (0x10CC0, 0x10CF2, AL), + (0x10CFA, 0x10CFF, AL), + (0x10D00, 0x10D23, AL), + (0x10D24, 0x10D27, CM), + (0x10D30, 0x10D39, NU), + (0x10E60, 0x10E7E, AL), + (0x10F00, 0x10F1C, AL), + (0x10F1D, 0x10F26, AL), + (0x10F27, 0x10F27, AL), + (0x10F30, 0x10F45, AL), + (0x10F46, 0x10F50, CM), + (0x10F51, 0x10F54, AL), + (0x10F55, 0x10F59, AL), + (0x10FE0, 0x10FF6, AL), + (0x11000, 0x11000, CM), + (0x11001, 0x11001, CM), + (0x11002, 0x11002, CM), + (0x11003, 0x11037, AL), + (0x11038, 0x11046, CM), + (0x11047, 0x11048, BA), + (0x11049, 0x1104D, AL), + (0x11052, 0x11065, AL), + (0x11066, 0x1106F, NU), + (0x1107F, 0x1107F, CM), + (0x11080, 0x11081, CM), + (0x11082, 0x11082, CM), + (0x11083, 0x110AF, AL), + (0x110B0, 0x110B2, CM), + (0x110B3, 0x110B6, CM), + (0x110B7, 0x110B8, CM), + (0x110B9, 0x110BA, CM), + (0x110BB, 0x110BC, AL), + (0x110BD, 0x110BD, AL), + (0x110BE, 0x110C1, BA), + (0x110CD, 0x110CD, AL), + (0x110D0, 0x110E8, AL), + (0x110F0, 0x110F9, NU), + (0x11100, 0x11102, CM), + (0x11103, 0x11126, AL), + (0x11127, 0x1112B, CM), + (0x1112C, 0x1112C, CM), + (0x1112D, 0x11134, CM), + (0x11136, 0x1113F, NU), + (0x11140, 0x11143, BA), + (0x11144, 0x11144, AL), + (0x11145, 0x11146, CM), + (0x11150, 0x11172, AL), + (0x11173, 0x11173, CM), + (0x11174, 0x11174, AL), + (0x11175, 0x11175, BB), + (0x11176, 0x11176, AL), + (0x11180, 0x11181, CM), + (0x11182, 0x11182, CM), + (0x11183, 0x111B2, AL), + (0x111B3, 0x111B5, CM), + (0x111B6, 0x111BE, CM), + (0x111BF, 0x111C0, CM), + (0x111C1, 0x111C4, AL), + (0x111C5, 0x111C6, BA), + (0x111C7, 0x111C7, AL), + (0x111C8, 0x111C8, BA), + (0x111C9, 0x111CC, CM), + (0x111CD, 0x111CD, AL), + (0x111D0, 0x111D9, NU), + (0x111DA, 0x111DA, AL), + (0x111DB, 0x111DB, BB), + (0x111DC, 0x111DC, AL), + (0x111DD, 0x111DF, BA), + (0x111E1, 0x111F4, AL), + (0x11200, 0x11211, AL), + (0x11213, 0x1122B, AL), + (0x1122C, 0x1122E, CM), + (0x1122F, 0x11231, CM), + (0x11232, 0x11233, CM), + (0x11234, 0x11234, CM), + (0x11235, 0x11235, CM), + (0x11236, 0x11237, CM), + (0x11238, 0x11239, BA), + (0x1123A, 0x1123A, AL), + (0x1123B, 0x1123C, BA), + (0x1123D, 0x1123D, AL), + (0x1123E, 0x1123E, CM), + (0x11280, 0x11286, AL), + (0x11288, 0x11288, AL), + (0x1128A, 0x1128D, AL), + (0x1128F, 0x1129D, AL), + (0x1129F, 0x112A8, AL), + (0x112A9, 0x112A9, BA), + (0x112B0, 0x112DE, AL), + (0x112DF, 0x112DF, CM), + (0x112E0, 0x112E2, CM), + (0x112E3, 0x112EA, CM), + (0x112F0, 0x112F9, NU), + (0x11300, 0x11301, CM), + (0x11302, 0x11303, CM), + (0x11305, 0x1130C, AL), + (0x1130F, 0x11310, AL), + (0x11313, 0x11328, AL), + (0x1132A, 0x11330, AL), + (0x11332, 0x11333, AL), + (0x11335, 0x11339, AL), + (0x1133B, 0x1133C, CM), + (0x1133D, 0x1133D, AL), + (0x1133E, 0x1133F, CM), + (0x11340, 0x11340, CM), + (0x11341, 0x11344, CM), + (0x11347, 0x11348, CM), + (0x1134B, 0x1134D, CM), + (0x11350, 0x11350, AL), + (0x11357, 0x11357, CM), + (0x1135D, 0x11361, AL), + (0x11362, 0x11363, CM), + (0x11366, 0x1136C, CM), + (0x11370, 0x11374, CM), + (0x11400, 0x11434, AL), + (0x11435, 0x11437, CM), + (0x11438, 0x1143F, CM), + (0x11440, 0x11441, CM), + (0x11442, 0x11444, CM), + (0x11445, 0x11445, CM), + (0x11446, 0x11446, CM), + (0x11447, 0x1144A, AL), + (0x1144B, 0x1144E, BA), + (0x1144F, 0x1144F, AL), + (0x11450, 0x11459, NU), + (0x1145B, 0x1145B, BA), + (0x1145D, 0x1145D, AL), + (0x1145E, 0x1145E, CM), + (0x1145F, 0x1145F, AL), + (0x11480, 0x114AF, AL), + (0x114B0, 0x114B2, CM), + (0x114B3, 0x114B8, CM), + (0x114B9, 0x114B9, CM), + (0x114BA, 0x114BA, CM), + (0x114BB, 0x114BE, CM), + (0x114BF, 0x114C0, CM), + (0x114C1, 0x114C1, CM), + (0x114C2, 0x114C3, CM), + (0x114C4, 0x114C5, AL), + (0x114C6, 0x114C6, AL), + (0x114C7, 0x114C7, AL), + (0x114D0, 0x114D9, NU), + (0x11580, 0x115AE, AL), + (0x115AF, 0x115B1, CM), + (0x115B2, 0x115B5, CM), + (0x115B8, 0x115BB, CM), + (0x115BC, 0x115BD, CM), + (0x115BE, 0x115BE, CM), + (0x115BF, 0x115C0, CM), + (0x115C1, 0x115C1, BB), + (0x115C2, 0x115C3, BA), + (0x115C4, 0x115C5, EX), + (0x115C6, 0x115C8, AL), + (0x115C9, 0x115D7, BA), + (0x115D8, 0x115DB, AL), + (0x115DC, 0x115DD, CM), + (0x11600, 0x1162F, AL), + (0x11630, 0x11632, CM), + (0x11633, 0x1163A, CM), + (0x1163B, 0x1163C, CM), + (0x1163D, 0x1163D, CM), + (0x1163E, 0x1163E, CM), + (0x1163F, 0x11640, CM), + (0x11641, 0x11642, BA), + (0x11643, 0x11643, AL), + (0x11644, 0x11644, AL), + (0x11650, 0x11659, NU), + (0x11660, 0x1166C, BB), + (0x11680, 0x116AA, AL), + (0x116AB, 0x116AB, CM), + (0x116AC, 0x116AC, CM), + (0x116AD, 0x116AD, CM), + (0x116AE, 0x116AF, CM), + (0x116B0, 0x116B5, CM), + (0x116B6, 0x116B6, CM), + (0x116B7, 0x116B7, CM), + (0x116B8, 0x116B8, AL), + (0x116C0, 0x116C9, NU), + (0x11700, 0x1171A, SA), + (0x1171D, 0x1171F, SA), + (0x11720, 0x11721, SA), + (0x11722, 0x11725, SA), + (0x11726, 0x11726, SA), + (0x11727, 0x1172B, SA), + (0x11730, 0x11739, NU), + (0x1173A, 0x1173B, SA), + (0x1173C, 0x1173E, BA), + (0x1173F, 0x1173F, SA), + (0x11800, 0x1182B, AL), + (0x1182C, 0x1182E, CM), + (0x1182F, 0x11837, CM), + (0x11838, 0x11838, CM), + (0x11839, 0x1183A, CM), + (0x1183B, 0x1183B, AL), + (0x118A0, 0x118DF, AL), + (0x118E0, 0x118E9, NU), + (0x118EA, 0x118F2, AL), + (0x118FF, 0x118FF, AL), + (0x119A0, 0x119A7, AL), + (0x119AA, 0x119D0, AL), + (0x119D1, 0x119D3, CM), + (0x119D4, 0x119D7, CM), + (0x119DA, 0x119DB, CM), + (0x119DC, 0x119DF, CM), + (0x119E0, 0x119E0, CM), + (0x119E1, 0x119E1, AL), + (0x119E2, 0x119E2, BB), + (0x119E3, 0x119E3, AL), + (0x119E4, 0x119E4, CM), + (0x11A00, 0x11A00, AL), + (0x11A01, 0x11A0A, CM), + (0x11A0B, 0x11A32, AL), + (0x11A33, 0x11A38, CM), + (0x11A39, 0x11A39, CM), + (0x11A3A, 0x11A3A, AL), + (0x11A3B, 0x11A3E, CM), + (0x11A3F, 0x11A3F, BB), + (0x11A40, 0x11A40, AL), + (0x11A41, 0x11A44, BA), + (0x11A45, 0x11A45, BB), + (0x11A46, 0x11A46, AL), + (0x11A47, 0x11A47, CM), + (0x11A50, 0x11A50, AL), + (0x11A51, 0x11A56, CM), + (0x11A57, 0x11A58, CM), + (0x11A59, 0x11A5B, CM), + (0x11A5C, 0x11A89, AL), + (0x11A8A, 0x11A96, CM), + (0x11A97, 0x11A97, CM), + (0x11A98, 0x11A99, CM), + (0x11A9A, 0x11A9C, BA), + (0x11A9D, 0x11A9D, AL), + (0x11A9E, 0x11AA0, BB), + (0x11AA1, 0x11AA2, BA), + (0x11AC0, 0x11AF8, AL), + (0x11C00, 0x11C08, AL), + (0x11C0A, 0x11C2E, AL), + (0x11C2F, 0x11C2F, CM), + (0x11C30, 0x11C36, CM), + (0x11C38, 0x11C3D, CM), + (0x11C3E, 0x11C3E, CM), + (0x11C3F, 0x11C3F, CM), + (0x11C40, 0x11C40, AL), + (0x11C41, 0x11C45, BA), + (0x11C50, 0x11C59, NU), + (0x11C5A, 0x11C6C, AL), + (0x11C70, 0x11C70, BB), + (0x11C71, 0x11C71, EX), + (0x11C72, 0x11C8F, AL), + (0x11C92, 0x11CA7, CM), + (0x11CA9, 0x11CA9, CM), + (0x11CAA, 0x11CB0, CM), + (0x11CB1, 0x11CB1, CM), + (0x11CB2, 0x11CB3, CM), + (0x11CB4, 0x11CB4, CM), + (0x11CB5, 0x11CB6, CM), + (0x11D00, 0x11D06, AL), + (0x11D08, 0x11D09, AL), + (0x11D0B, 0x11D30, AL), + (0x11D31, 0x11D36, CM), + (0x11D3A, 0x11D3A, CM), + (0x11D3C, 0x11D3D, CM), + (0x11D3F, 0x11D45, CM), + (0x11D46, 0x11D46, AL), + (0x11D47, 0x11D47, CM), + (0x11D50, 0x11D59, NU), + (0x11D60, 0x11D65, AL), + (0x11D67, 0x11D68, AL), + (0x11D6A, 0x11D89, AL), + (0x11D8A, 0x11D8E, CM), + (0x11D90, 0x11D91, CM), + (0x11D93, 0x11D94, CM), + (0x11D95, 0x11D95, CM), + (0x11D96, 0x11D96, CM), + (0x11D97, 0x11D97, CM), + (0x11D98, 0x11D98, AL), + (0x11DA0, 0x11DA9, NU), + (0x11EE0, 0x11EF2, AL), + (0x11EF3, 0x11EF4, CM), + (0x11EF5, 0x11EF6, CM), + (0x11EF7, 0x11EF8, AL), + (0x11FC0, 0x11FD4, AL), + (0x11FD5, 0x11FDC, AL), + (0x11FDD, 0x11FE0, PO), + (0x11FE1, 0x11FF1, AL), + (0x11FFF, 0x11FFF, BA), + (0x12000, 0x12399, AL), + (0x12400, 0x1246E, AL), + (0x12470, 0x12474, BA), + (0x12480, 0x12543, AL), + (0x13000, 0x13257, AL), + (0x13258, 0x1325A, OP), + (0x1325B, 0x1325D, CL), + (0x1325E, 0x13281, AL), + (0x13282, 0x13282, CL), + (0x13283, 0x13285, AL), + (0x13286, 0x13286, OP), + (0x13287, 0x13287, CL), + (0x13288, 0x13288, OP), + (0x13289, 0x13289, CL), + (0x1328A, 0x13378, AL), + (0x13379, 0x13379, OP), + (0x1337A, 0x1337B, CL), + (0x1337C, 0x1342E, AL), + (0x13430, 0x13436, GL), + (0x13437, 0x13437, OP), + (0x13438, 0x13438, CL), + (0x14400, 0x145CD, AL), + (0x145CE, 0x145CE, OP), + (0x145CF, 0x145CF, CL), + (0x145D0, 0x14646, AL), + (0x16800, 0x16A38, AL), + (0x16A40, 0x16A5E, AL), + (0x16A60, 0x16A69, NU), + (0x16A6E, 0x16A6F, BA), + (0x16AD0, 0x16AED, AL), + (0x16AF0, 0x16AF4, CM), + (0x16AF5, 0x16AF5, BA), + (0x16B00, 0x16B2F, AL), + (0x16B30, 0x16B36, CM), + (0x16B37, 0x16B39, BA), + (0x16B3A, 0x16B3B, AL), + (0x16B3C, 0x16B3F, AL), + (0x16B40, 0x16B43, AL), + (0x16B44, 0x16B44, BA), + (0x16B45, 0x16B45, AL), + (0x16B50, 0x16B59, NU), + (0x16B5B, 0x16B61, AL), + (0x16B63, 0x16B77, AL), + (0x16B7D, 0x16B8F, AL), + (0x16E40, 0x16E7F, AL), + (0x16E80, 0x16E96, AL), + (0x16E97, 0x16E98, BA), + (0x16E99, 0x16E9A, AL), + (0x16F00, 0x16F4A, AL), + (0x16F4F, 0x16F4F, CM), + (0x16F50, 0x16F50, AL), + (0x16F51, 0x16F87, CM), + (0x16F8F, 0x16F92, CM), + (0x16F93, 0x16F9F, AL), + (0x16FE0, 0x16FE1, NS), + (0x16FE2, 0x16FE2, NS), + (0x16FE3, 0x16FE3, NS), + (0x17000, 0x187F7, ID), + (0x18800, 0x18AF2, ID), + (0x1B000, 0x1B0FF, ID), + (0x1B100, 0x1B11E, ID), + (0x1B150, 0x1B152, CJ), + (0x1B164, 0x1B167, CJ), + (0x1B170, 0x1B2FB, ID), + (0x1BC00, 0x1BC6A, AL), + (0x1BC70, 0x1BC7C, AL), + (0x1BC80, 0x1BC88, AL), + (0x1BC90, 0x1BC99, AL), + (0x1BC9C, 0x1BC9C, AL), + (0x1BC9D, 0x1BC9E, CM), + (0x1BC9F, 0x1BC9F, BA), + (0x1BCA0, 0x1BCA3, CM), + (0x1D000, 0x1D0F5, AL), + (0x1D100, 0x1D126, AL), + (0x1D129, 0x1D164, AL), + (0x1D165, 0x1D166, CM), + (0x1D167, 0x1D169, CM), + (0x1D16A, 0x1D16C, AL), + (0x1D16D, 0x1D172, CM), + (0x1D173, 0x1D17A, CM), + (0x1D17B, 0x1D182, CM), + (0x1D183, 0x1D184, AL), + (0x1D185, 0x1D18B, CM), + (0x1D18C, 0x1D1A9, AL), + (0x1D1AA, 0x1D1AD, CM), + (0x1D1AE, 0x1D1E8, AL), + (0x1D200, 0x1D241, AL), + (0x1D242, 0x1D244, CM), + (0x1D245, 0x1D245, AL), + (0x1D2E0, 0x1D2F3, AL), + (0x1D300, 0x1D356, AL), + (0x1D360, 0x1D378, AL), + (0x1D400, 0x1D454, AL), + (0x1D456, 0x1D49C, AL), + (0x1D49E, 0x1D49F, AL), + (0x1D4A2, 0x1D4A2, AL), + (0x1D4A5, 0x1D4A6, AL), + (0x1D4A9, 0x1D4AC, AL), + (0x1D4AE, 0x1D4B9, AL), + (0x1D4BB, 0x1D4BB, AL), + (0x1D4BD, 0x1D4C3, AL), + (0x1D4C5, 0x1D505, AL), + (0x1D507, 0x1D50A, AL), + (0x1D50D, 0x1D514, AL), + (0x1D516, 0x1D51C, AL), + (0x1D51E, 0x1D539, AL), + (0x1D53B, 0x1D53E, AL), + (0x1D540, 0x1D544, AL), + (0x1D546, 0x1D546, AL), + (0x1D54A, 0x1D550, AL), + (0x1D552, 0x1D6A5, AL), + (0x1D6A8, 0x1D6C0, AL), + (0x1D6C1, 0x1D6C1, AL), + (0x1D6C2, 0x1D6DA, AL), + (0x1D6DB, 0x1D6DB, AL), + (0x1D6DC, 0x1D6FA, AL), + (0x1D6FB, 0x1D6FB, AL), + (0x1D6FC, 0x1D714, AL), + (0x1D715, 0x1D715, AL), + (0x1D716, 0x1D734, AL), + (0x1D735, 0x1D735, AL), + (0x1D736, 0x1D74E, AL), + (0x1D74F, 0x1D74F, AL), + (0x1D750, 0x1D76E, AL), + (0x1D76F, 0x1D76F, AL), + (0x1D770, 0x1D788, AL), + (0x1D789, 0x1D789, AL), + (0x1D78A, 0x1D7A8, AL), + (0x1D7A9, 0x1D7A9, AL), + (0x1D7AA, 0x1D7C2, AL), + (0x1D7C3, 0x1D7C3, AL), + (0x1D7C4, 0x1D7CB, AL), + (0x1D7CE, 0x1D7FF, NU), + (0x1D800, 0x1D9FF, AL), + (0x1DA00, 0x1DA36, CM), + (0x1DA37, 0x1DA3A, AL), + (0x1DA3B, 0x1DA6C, CM), + (0x1DA6D, 0x1DA74, AL), + (0x1DA75, 0x1DA75, CM), + (0x1DA76, 0x1DA83, AL), + (0x1DA84, 0x1DA84, CM), + (0x1DA85, 0x1DA86, AL), + (0x1DA87, 0x1DA8A, BA), + (0x1DA8B, 0x1DA8B, AL), + (0x1DA9B, 0x1DA9F, CM), + (0x1DAA1, 0x1DAAF, CM), + (0x1E000, 0x1E006, CM), + (0x1E008, 0x1E018, CM), + (0x1E01B, 0x1E021, CM), + (0x1E023, 0x1E024, CM), + (0x1E026, 0x1E02A, CM), + (0x1E100, 0x1E12C, AL), + (0x1E130, 0x1E136, CM), + (0x1E137, 0x1E13D, AL), + (0x1E140, 0x1E149, NU), + (0x1E14E, 0x1E14E, AL), + (0x1E14F, 0x1E14F, AL), + (0x1E2C0, 0x1E2EB, AL), + (0x1E2EC, 0x1E2EF, CM), + (0x1E2F0, 0x1E2F9, NU), + (0x1E2FF, 0x1E2FF, PR), + (0x1E800, 0x1E8C4, AL), + (0x1E8C7, 0x1E8CF, AL), + (0x1E8D0, 0x1E8D6, CM), + (0x1E900, 0x1E943, AL), + (0x1E944, 0x1E94A, CM), + (0x1E94B, 0x1E94B, AL), + (0x1E950, 0x1E959, NU), + (0x1E95E, 0x1E95F, OP), + (0x1EC71, 0x1ECAB, AL), + (0x1ECAC, 0x1ECAC, PO), + (0x1ECAD, 0x1ECAF, AL), + (0x1ECB0, 0x1ECB0, PO), + (0x1ECB1, 0x1ECB4, AL), + (0x1ED01, 0x1ED2D, AL), + (0x1ED2E, 0x1ED2E, AL), + (0x1ED2F, 0x1ED3D, AL), + (0x1EE00, 0x1EE03, AL), + (0x1EE05, 0x1EE1F, AL), + (0x1EE21, 0x1EE22, AL), + (0x1EE24, 0x1EE24, AL), + (0x1EE27, 0x1EE27, AL), + (0x1EE29, 0x1EE32, AL), + (0x1EE34, 0x1EE37, AL), + (0x1EE39, 0x1EE39, AL), + (0x1EE3B, 0x1EE3B, AL), + (0x1EE42, 0x1EE42, AL), + (0x1EE47, 0x1EE47, AL), + (0x1EE49, 0x1EE49, AL), + (0x1EE4B, 0x1EE4B, AL), + (0x1EE4D, 0x1EE4F, AL), + (0x1EE51, 0x1EE52, AL), + (0x1EE54, 0x1EE54, AL), + (0x1EE57, 0x1EE57, AL), + (0x1EE59, 0x1EE59, AL), + (0x1EE5B, 0x1EE5B, AL), + (0x1EE5D, 0x1EE5D, AL), + (0x1EE5F, 0x1EE5F, AL), + (0x1EE61, 0x1EE62, AL), + (0x1EE64, 0x1EE64, AL), + (0x1EE67, 0x1EE6A, AL), + (0x1EE6C, 0x1EE72, AL), + (0x1EE74, 0x1EE77, AL), + (0x1EE79, 0x1EE7C, AL), + (0x1EE7E, 0x1EE7E, AL), + (0x1EE80, 0x1EE89, AL), + (0x1EE8B, 0x1EE9B, AL), + (0x1EEA1, 0x1EEA3, AL), + (0x1EEA5, 0x1EEA9, AL), + (0x1EEAB, 0x1EEBB, AL), + (0x1EEF0, 0x1EEF1, AL), + (0x1F000, 0x1F02B, ID), + (0x1F02C, 0x1F02F, ID), + (0x1F030, 0x1F093, ID), + (0x1F094, 0x1F09F, ID), + (0x1F0A0, 0x1F0AE, ID), + (0x1F0AF, 0x1F0B0, ID), + (0x1F0B1, 0x1F0BF, ID), + (0x1F0C0, 0x1F0C0, ID), + (0x1F0C1, 0x1F0CF, ID), + (0x1F0D0, 0x1F0D0, ID), + (0x1F0D1, 0x1F0F5, ID), + (0x1F0F6, 0x1F0FF, ID), + (0x1F100, 0x1F10C, AI), + (0x1F10D, 0x1F10F, ID), + (0x1F110, 0x1F12D, AI), + (0x1F12E, 0x1F12F, AL), + (0x1F130, 0x1F169, AI), + (0x1F16A, 0x1F16C, AL), + (0x1F16D, 0x1F16F, ID), + (0x1F170, 0x1F1AC, AI), + (0x1F1AD, 0x1F1E5, ID), + (0x1F1E6, 0x1F1FF, RI), + (0x1F200, 0x1F202, ID), + (0x1F203, 0x1F20F, ID), + (0x1F210, 0x1F23B, ID), + (0x1F23C, 0x1F23F, ID), + (0x1F240, 0x1F248, ID), + (0x1F249, 0x1F24F, ID), + (0x1F250, 0x1F251, ID), + (0x1F252, 0x1F25F, ID), + (0x1F260, 0x1F265, ID), + (0x1F266, 0x1F2FF, ID), + (0x1F300, 0x1F384, ID), + (0x1F385, 0x1F385, EB), + (0x1F386, 0x1F39B, ID), + (0x1F39C, 0x1F39D, AL), + (0x1F39E, 0x1F3B4, ID), + (0x1F3B5, 0x1F3B6, AL), + (0x1F3B7, 0x1F3BB, ID), + (0x1F3BC, 0x1F3BC, AL), + (0x1F3BD, 0x1F3C1, ID), + (0x1F3C2, 0x1F3C4, EB), + (0x1F3C5, 0x1F3C6, ID), + (0x1F3C7, 0x1F3C7, EB), + (0x1F3C8, 0x1F3C9, ID), + (0x1F3CA, 0x1F3CC, EB), + (0x1F3CD, 0x1F3FA, ID), + (0x1F3FB, 0x1F3FF, EM), + (0x1F400, 0x1F441, ID), + (0x1F442, 0x1F443, EB), + (0x1F444, 0x1F445, ID), + (0x1F446, 0x1F450, EB), + (0x1F451, 0x1F465, ID), + (0x1F466, 0x1F478, EB), + (0x1F479, 0x1F47B, ID), + (0x1F47C, 0x1F47C, EB), + (0x1F47D, 0x1F480, ID), + (0x1F481, 0x1F483, EB), + (0x1F484, 0x1F484, ID), + (0x1F485, 0x1F487, EB), + (0x1F488, 0x1F48E, ID), + (0x1F48F, 0x1F48F, EB), + (0x1F490, 0x1F490, ID), + (0x1F491, 0x1F491, EB), + (0x1F492, 0x1F49F, ID), + (0x1F4A0, 0x1F4A0, AL), + (0x1F4A1, 0x1F4A1, ID), + (0x1F4A2, 0x1F4A2, AL), + (0x1F4A3, 0x1F4A3, ID), + (0x1F4A4, 0x1F4A4, AL), + (0x1F4A5, 0x1F4A9, ID), + (0x1F4AA, 0x1F4AA, EB), + (0x1F4AB, 0x1F4AE, ID), + (0x1F4AF, 0x1F4AF, AL), + (0x1F4B0, 0x1F4B0, ID), + (0x1F4B1, 0x1F4B2, AL), + (0x1F4B3, 0x1F4FF, ID), + (0x1F500, 0x1F506, AL), + (0x1F507, 0x1F516, ID), + (0x1F517, 0x1F524, AL), + (0x1F525, 0x1F531, ID), + (0x1F532, 0x1F549, AL), + (0x1F54A, 0x1F573, ID), + (0x1F574, 0x1F575, EB), + (0x1F576, 0x1F579, ID), + (0x1F57A, 0x1F57A, EB), + (0x1F57B, 0x1F58F, ID), + (0x1F590, 0x1F590, EB), + (0x1F591, 0x1F594, ID), + (0x1F595, 0x1F596, EB), + (0x1F597, 0x1F5D3, ID), + (0x1F5D4, 0x1F5DB, AL), + (0x1F5DC, 0x1F5F3, ID), + (0x1F5F4, 0x1F5F9, AL), + (0x1F5FA, 0x1F5FF, ID), + (0x1F600, 0x1F644, ID), + (0x1F645, 0x1F647, EB), + (0x1F648, 0x1F64A, ID), + (0x1F64B, 0x1F64F, EB), + (0x1F650, 0x1F675, AL), + (0x1F676, 0x1F678, QU), + (0x1F679, 0x1F67B, NS), + (0x1F67C, 0x1F67F, AL), + (0x1F680, 0x1F6A2, ID), + (0x1F6A3, 0x1F6A3, EB), + (0x1F6A4, 0x1F6B3, ID), + (0x1F6B4, 0x1F6B6, EB), + (0x1F6B7, 0x1F6BF, ID), + (0x1F6C0, 0x1F6C0, EB), + (0x1F6C1, 0x1F6CB, ID), + (0x1F6CC, 0x1F6CC, EB), + (0x1F6CD, 0x1F6D5, ID), + (0x1F6D6, 0x1F6DF, ID), + (0x1F6E0, 0x1F6EC, ID), + (0x1F6ED, 0x1F6EF, ID), + (0x1F6F0, 0x1F6FA, ID), + (0x1F6FB, 0x1F6FF, ID), + (0x1F700, 0x1F773, AL), + (0x1F774, 0x1F77F, ID), + (0x1F780, 0x1F7D4, AL), + (0x1F7D5, 0x1F7D8, ID), + (0x1F7D9, 0x1F7DF, ID), + (0x1F7E0, 0x1F7EB, ID), + (0x1F7EC, 0x1F7FF, ID), + (0x1F800, 0x1F80B, AL), + (0x1F80C, 0x1F80F, ID), + (0x1F810, 0x1F847, AL), + (0x1F848, 0x1F84F, ID), + (0x1F850, 0x1F859, AL), + (0x1F85A, 0x1F85F, ID), + (0x1F860, 0x1F887, AL), + (0x1F888, 0x1F88F, ID), + (0x1F890, 0x1F8AD, AL), + (0x1F8AE, 0x1F8FF, ID), + (0x1F900, 0x1F90B, AL), + (0x1F90C, 0x1F90C, ID), + (0x1F90D, 0x1F90E, ID), + (0x1F90F, 0x1F90F, EB), + (0x1F910, 0x1F917, ID), + (0x1F918, 0x1F91F, EB), + (0x1F920, 0x1F925, ID), + (0x1F926, 0x1F926, EB), + (0x1F927, 0x1F92F, ID), + (0x1F930, 0x1F939, EB), + (0x1F93A, 0x1F93B, ID), + (0x1F93C, 0x1F93E, EB), + (0x1F93F, 0x1F971, ID), + (0x1F972, 0x1F972, ID), + (0x1F973, 0x1F976, ID), + (0x1F977, 0x1F979, ID), + (0x1F97A, 0x1F9A2, ID), + (0x1F9A3, 0x1F9A4, ID), + (0x1F9A5, 0x1F9AA, ID), + (0x1F9AB, 0x1F9AD, ID), + (0x1F9AE, 0x1F9B4, ID), + (0x1F9B5, 0x1F9B6, EB), + (0x1F9B7, 0x1F9B7, ID), + (0x1F9B8, 0x1F9B9, EB), + (0x1F9BA, 0x1F9BA, ID), + (0x1F9BB, 0x1F9BB, EB), + (0x1F9BC, 0x1F9CA, ID), + (0x1F9CB, 0x1F9CC, ID), + (0x1F9CD, 0x1F9CF, EB), + (0x1F9D0, 0x1F9D0, ID), + (0x1F9D1, 0x1F9DD, EB), + (0x1F9DE, 0x1F9FF, ID), + (0x1FA00, 0x1FA53, AL), + (0x1FA54, 0x1FA5F, ID), + (0x1FA60, 0x1FA6D, ID), + (0x1FA6E, 0x1FA6F, ID), + (0x1FA70, 0x1FA73, ID), + (0x1FA74, 0x1FA77, ID), + (0x1FA78, 0x1FA7A, ID), + (0x1FA7B, 0x1FA7F, ID), + (0x1FA80, 0x1FA82, ID), + (0x1FA83, 0x1FA8F, ID), + (0x1FA90, 0x1FA95, ID), + (0x1FA96, 0x1FAFF, ID), + (0x1FB00, 0x1FFFD, ID), + (0x20000, 0x2A6D6, ID), + (0x2A6D7, 0x2A6FF, ID), + (0x2A700, 0x2B734, ID), + (0x2B735, 0x2B73F, ID), + (0x2B740, 0x2B81D, ID), + (0x2B81E, 0x2B81F, ID), + (0x2B820, 0x2CEA1, ID), + (0x2CEA2, 0x2CEAF, ID), + (0x2CEB0, 0x2EBE0, ID), + (0x2EBE1, 0x2F7FF, ID), + (0x2F800, 0x2FA1D, ID), + (0x2FA1E, 0x2FA1F, ID), + (0x2FA20, 0x2FFFD, ID), + (0x30000, 0x3FFFD, ID), + (0xE0001, 0xE0001, CM), + (0xE0020, 0xE007F, CM), + (0xE0100, 0xE01EF, CM), + (0xF0000, 0xFFFFD, XX), + (0x100000, 0x10FFFD, XX), +]; diff --git a/text_processing/src/types.rs b/text_processing/src/types.rs new file mode 100644 index 00000000..c2d619bb --- /dev/null +++ b/text_processing/src/types.rs @@ -0,0 +1,102 @@ +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum LineBreakClass { + BK, + CM, + CR, + GL, + LF, + NL, + SP, + WJ, + ZW, + ZWJ, + AI, + AL, + B2, + BA, + BB, + CB, + CJ, + CL, + CP, + EB, + EM, + EX, + H2, + H3, + HL, + HY, + ID, + IN, + IS, + JL, + JT, + JV, + NS, + NU, + OP, + PO, + PR, + QU, + RI, + SA, + SG, + SY, + XX, +} + +use LineBreakClass::*; + +impl From<&str> for LineBreakClass { + fn from(val: &str) -> Self { + match val { + stringify!(BK) => BK, + stringify!(CM) => CM, + stringify!(CR) => CR, + stringify!(GL) => GL, + stringify!(LF) => LF, + stringify!(NL) => NL, + stringify!(SP) => SP, + stringify!(WJ) => WJ, + stringify!(ZW) => ZW, + stringify!(ZWJ) => ZWJ, + stringify!(AI) => AI, + stringify!(AL) => AL, + stringify!(B2) => B2, + stringify!(BA) => BA, + stringify!(BB) => BB, + stringify!(CB) => CB, + stringify!(CJ) => CJ, + stringify!(CL) => CL, + stringify!(CP) => CP, + stringify!(EB) => EB, + + stringify!(EM) => EM, + stringify!(EX) => EX, + stringify!(H2) => H2, + stringify!(H3) => H3, + stringify!(HL) => HL, + stringify!(HY) => HY, + stringify!(ID) => ID, + stringify!(IN) => IN, + stringify!(IS) => IS, + stringify!(JL) => JL, + + stringify!(JT) => JT, + stringify!(JV) => JV, + stringify!(NS) => NS, + stringify!(NU) => NU, + stringify!(OP) => OP, + stringify!(PO) => PO, + stringify!(PR) => PR, + stringify!(QU) => QU, + stringify!(RI) => RI, + stringify!(SA) => SA, + + stringify!(SG) => SG, + stringify!(SY) => SY, + stringify!(XX) => XX, + _ => unreachable!(), + } + } +}