You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

194 lines
7.3 KiB

/*
* meli - melib crate.
*
* Copyright 2017-2020 Manos Pitsidianakis
*
* This file is part of meli.
*
* meli is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* meli is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
#[cfg(feature = "unicode_algorithms")]
include!("src/text_processing/types.rs");
use flate2::Compression;
use flate2::GzBuilder;
use std::fs::{self, File};
use std::io::prelude::*;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::process::Command;
fn build_mailbox(
mailbox_path: &[u8],
mailbox_name: &[u8],
fs_mailbox_path: Option<&Path>,
file: &mut File,
) -> Result<(), std::io::Error> {
file.write_all(
br##" (
""##,
)?;
file.write_all(mailbox_path)?;
file.write_all(
br##"",
""##,
)?;
file.write_all(mailbox_name)?;
file.write_all(
br##"",
&[
"##,
)?;
if let Some(mailbox_path) = fs_mailbox_path {
for entry in fs::read_dir(&mailbox_path)? {
let entry = entry?;
let path = entry.path();
if !path.is_dir() && path.extension().map(|e| e != "gz").unwrap_or(true) {
let mut path = path.to_path_buf();
println!("reading: {:?}", &path);
let bytes = std::fs::read(&path)?;
let filename = format!("{}.gz", path.file_name().unwrap().to_str().unwrap());
path.set_file_name(filename);
let f = File::create(&path)?;
let mut gz = GzBuilder::new().write(f, Compression::default());
gz.write_all(&bytes).unwrap();
gz.finish().unwrap();
println!("wrote: {:?}", &path);
file.write_all(b"(")?;
file.write_all(bytes.len().to_string().as_bytes())?;
file.write_all(b",\"")?;
file.write_all(path.display().to_string().as_bytes())?;
file.write_all(b"\",")?;
file.write_all(b"include_bytes!(\"../../../")?;
file.write_all(path.display().to_string().as_bytes())?;
file.write_all(b"\")),\n")?;
}
}
}
file.write_all(b"]), \n")
}
fn main() -> Result<(), std::io::Error> {
println!("cargo:rerun-if-changed=./../demo/");
println!("cargo:rerun-if-changed=build.rs");
{
let mod_path: &Path = Path::new("src/backends/demo/demo_corpus.rs");
if mod_path.exists() {
println!(
"{} already exists, delete it if you want to replace it.",
mod_path.display()
);
std::process::exit(0);
}
println!("cwd: {:?}", std::env::current_dir());
println!("mod_path: {:?}", &mod_path);
let mut file = File::create(&mod_path)?;
file.write_all(br##"pub static DEMO_MAILBOXES: &'static [(&'static str, &'static str, &'static [(usize, &'static str, &'static [u8])])] = &["##)?;
let demo_path = Path::new("./../demo/ermis-f/imap-protocol/cur");
build_mailbox(
b"imap-protocol",
b"imap-protocol",
Some(demo_path),
&mut file,
)?;
let demo_path = Path::new("./../demo/ermis-f/cur");
println!("demo_path: {:?}", &demo_path);
assert!(demo_path.is_dir());
build_mailbox(b"qenron", b"qenron", Some(demo_path), &mut file)?;
let demo_path = Path::new("./../demo/ermis-f/python_m/cur");
build_mailbox(b"python", b"python", Some(demo_path), &mut file)?;
//build_mailbox(b"INBOX/All", b"All", None, &mut file)?;
//build_mailbox(b"INBOX/Discussion Threads", b"Discussion Threads", None, &mut file)?;
//let demo_path = Path::new("./../demo/ermis-f/Sent/cur");
//build_mailbox(b"Sent", b"Sent", Some(demo_path), &mut file)?;
//build_mailbox(b"INBOX/Notes", b"Notes", None, &mut file)?;
//let demo_path = Path::new("./../demo/ermis-f/Trash/cur");
//build_mailbox(b"Trash", b"Trash", Some(demo_path), &mut file)?;
file.write_all(
br##"
];
"##,
)?;
}
#[cfg(feature = "unicode_algorithms")]
{
const LINE_BREAK_TABLE_URL: &str =
"http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt";
let mod_path = Path::new("src/text_processing/tables.rs");
if mod_path.exists() {
eprintln!(
"{} already exists, delete it if you want to replace it.",
mod_path.display()
);
std::process::exit(0);
}
let mut tmpdir_path = PathBuf::from(
std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout)
.unwrap()
.trim(),
);
tmpdir_path.push("LineBreak.txt");
Command::new("curl")
.args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL])
.output()?;
let file = File::open(&tmpdir_path)?;
let buf_reader = BufReader::new(file);
let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800);
for line in buf_reader.lines() {
let line = line.unwrap();
if line.starts_with('#') || line.starts_with(' ') || line.is_empty() {
continue;
}
let tokens: &str = line.split_whitespace().next().unwrap();
let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap();
/* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */
let chars_str: &str = &tokens[..semicolon_idx];
let mut codepoint_iter = chars_str.split("..");
let first_codepoint: u32 =
u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap();
let sec_codepoint: u32 = codepoint_iter
.next()
.map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap())
.unwrap_or(first_codepoint);
let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2];
line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class)));
}
let mut file = File::create(&mod_path)?;
file.write_all(b"use crate::types::LineBreakClass::*;\n")
.unwrap();
file.write_all(b"use crate::types::LineBreakClass;\n\n")
.unwrap();
file.write_all(b"const LINE_BREAK_RULES: &[(u32, u32, LineBreakClass)] = &[\n")
.unwrap();
for l in &line_break_table {
file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes())
.unwrap();
}
file.write_all(b"];").unwrap();
std::fs::remove_file(&tmpdir_path).unwrap();
tmpdir_path.pop();
std::fs::remove_dir(&tmpdir_path).unwrap();
}
Ok(())
}