You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

build.rs 2.6KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. const LINE_BREAK_TABLE_URL: &str = "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt";
  2. use std::fs::File;
  3. use std::io::prelude::*;
  4. use std::io::BufReader;
  5. use std::path::PathBuf;
  6. use std::process::Command;
  7. include!("src/types.rs");
  8. fn main() -> Result<(), std::io::Error> {
  9. let mod_path = PathBuf::from("src/tables.rs");
  10. if mod_path.exists() {
  11. eprintln!(
  12. "{} already exists, delete it if you want to replace it.",
  13. mod_path.display()
  14. );
  15. std::process::exit(0);
  16. }
  17. let mut tmpdir_path = PathBuf::from(
  18. std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout)
  19. .unwrap()
  20. .trim(),
  21. );
  22. tmpdir_path.push("LineBreak.txt");
  23. Command::new("curl")
  24. .args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL])
  25. .output()?;
  26. let file = File::open(&tmpdir_path)?;
  27. let buf_reader = BufReader::new(file);
  28. let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800);
  29. for line in buf_reader.lines() {
  30. let line = line.unwrap();
  31. if line.starts_with('#') || line.starts_with(' ') || line.is_empty() {
  32. continue;
  33. }
  34. let tokens: &str = line.split_whitespace().next().unwrap();
  35. let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap();
  36. /* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */
  37. let chars_str: &str = &tokens[..semicolon_idx];
  38. let mut codepoint_iter = chars_str.split("..");
  39. let first_codepoint: u32 =
  40. u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap();
  41. let sec_codepoint: u32 = codepoint_iter
  42. .next()
  43. .map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap())
  44. .unwrap_or(first_codepoint);
  45. let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2];
  46. line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class)));
  47. }
  48. let mut file = File::create(&mod_path)?;
  49. file.write_all(b"use crate::types::LineBreakClass::*;\n")
  50. .unwrap();
  51. file.write_all(b"use crate::types::LineBreakClass;\n\n")
  52. .unwrap();
  53. file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n")
  54. .unwrap();
  55. for l in &line_break_table {
  56. file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes())
  57. .unwrap();
  58. }
  59. file.write_all(b"];").unwrap();
  60. std::fs::remove_file(&tmpdir_path).unwrap();
  61. tmpdir_path.pop();
  62. std::fs::remove_dir(&tmpdir_path).unwrap();
  63. Ok(())
  64. }