2020-01-30 00:25:51 +02:00
|
|
|
|
/*
|
2020-02-04 17:26:25 +02:00
|
|
|
|
* meli - text_processing mod.
|
2020-01-30 00:25:51 +02:00
|
|
|
|
*
|
|
|
|
|
* Copyright 2017-2020 Manos Pitsidianakis
|
|
|
|
|
*
|
|
|
|
|
* This file is part of meli.
|
|
|
|
|
*
|
|
|
|
|
* meli is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* meli is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with meli. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
2019-07-22 15:14:39 +03:00
|
|
|
|
pub mod grapheme_clusters;
|
2019-07-27 01:56:07 +03:00
|
|
|
|
pub mod line_break;
|
2020-02-25 21:56:34 +02:00
|
|
|
|
pub mod search;
|
2019-07-27 01:56:07 +03:00
|
|
|
|
mod tables;
|
|
|
|
|
mod types;
|
2019-11-16 20:19:02 +02:00
|
|
|
|
pub use types::Reflow;
|
2019-07-22 15:14:39 +03:00
|
|
|
|
pub mod wcwidth;
|
|
|
|
|
pub use grapheme_clusters::*;
|
2019-07-27 01:56:07 +03:00
|
|
|
|
pub use line_break::*;
|
2019-07-22 15:14:39 +03:00
|
|
|
|
pub use wcwidth::*;
|
2019-11-22 14:17:09 +02:00
|
|
|
|
|
|
|
|
|
pub trait Truncate {
|
2020-08-02 00:48:44 +03:00
|
|
|
|
fn truncate_at_boundary(&mut self, new_len: usize);
|
|
|
|
|
fn trim_at_boundary(&self, new_len: usize) -> &str;
|
2020-11-24 09:31:38 +02:00
|
|
|
|
fn trim_left_at_boundary(&self, new_len: usize) -> &str;
|
|
|
|
|
fn truncate_left_at_boundary(&mut self, new_len: usize);
|
2019-11-22 14:17:09 +02:00
|
|
|
|
}
|
|
|
|
|
|
2020-08-02 00:48:44 +03:00
|
|
|
|
impl Truncate for &str {
|
|
|
|
|
fn truncate_at_boundary(&mut self, new_len: usize) {
|
|
|
|
|
if new_len >= self.len() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(*self, true)
|
|
|
|
|
.take(new_len)
|
|
|
|
|
.last()
|
|
|
|
|
{
|
|
|
|
|
*self = &self[..last];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn trim_at_boundary(&self, new_len: usize) -> &str {
|
|
|
|
|
if new_len >= self.len() {
|
|
|
|
|
return self;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(*self, true)
|
|
|
|
|
.take(new_len)
|
|
|
|
|
.last()
|
|
|
|
|
{
|
|
|
|
|
&self[..last]
|
|
|
|
|
} else {
|
|
|
|
|
self
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-24 09:31:38 +02:00
|
|
|
|
|
|
|
|
|
fn trim_left_at_boundary(&self, skip_len: usize) -> &str {
|
|
|
|
|
if skip_len >= self.len() {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((first, _)) = UnicodeSegmentation::grapheme_indices(*self, true)
|
|
|
|
|
.skip(skip_len)
|
|
|
|
|
.next()
|
|
|
|
|
{
|
|
|
|
|
&self[first..]
|
|
|
|
|
} else {
|
|
|
|
|
self
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn truncate_left_at_boundary(&mut self, skip_len: usize) {
|
|
|
|
|
if skip_len >= self.len() {
|
|
|
|
|
*self = "";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((first, _)) = UnicodeSegmentation::grapheme_indices(*self, true)
|
|
|
|
|
.skip(skip_len)
|
|
|
|
|
.next()
|
|
|
|
|
{
|
|
|
|
|
*self = &self[first..];
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-08-02 00:48:44 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Truncate for String {
|
|
|
|
|
fn truncate_at_boundary(&mut self, new_len: usize) {
|
2019-12-12 11:01:13 +02:00
|
|
|
|
if new_len >= self.len() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(self.as_str(), true)
|
|
|
|
|
.take(new_len)
|
|
|
|
|
.last()
|
|
|
|
|
{
|
|
|
|
|
String::truncate(self, last);
|
2019-11-22 14:17:09 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-08-02 00:48:44 +03:00
|
|
|
|
|
|
|
|
|
fn trim_at_boundary(&self, new_len: usize) -> &str {
|
|
|
|
|
if new_len >= self.len() {
|
|
|
|
|
return self;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(self.as_str(), true)
|
|
|
|
|
.take(new_len)
|
|
|
|
|
.last()
|
|
|
|
|
{
|
|
|
|
|
&self[..last]
|
|
|
|
|
} else {
|
|
|
|
|
self.as_str()
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-24 09:31:38 +02:00
|
|
|
|
|
|
|
|
|
fn trim_left_at_boundary(&self, skip_len: usize) -> &str {
|
|
|
|
|
if skip_len >= self.len() {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((first, _)) = UnicodeSegmentation::grapheme_indices(self.as_str(), true)
|
|
|
|
|
.skip(skip_len)
|
|
|
|
|
.next()
|
|
|
|
|
{
|
|
|
|
|
&self[first..]
|
|
|
|
|
} else {
|
|
|
|
|
self.as_str()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn truncate_left_at_boundary(&mut self, skip_len: usize) {
|
|
|
|
|
if skip_len >= self.len() {
|
|
|
|
|
self.clear();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern crate unicode_segmentation;
|
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
|
if let Some((first, _)) = UnicodeSegmentation::grapheme_indices(self.as_str(), true)
|
|
|
|
|
.skip(skip_len)
|
|
|
|
|
.next()
|
|
|
|
|
{
|
|
|
|
|
*self = self[first..].to_string();
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-11-22 14:17:09 +02:00
|
|
|
|
}
|
2019-11-23 17:54:45 +02:00
|
|
|
|
|
|
|
|
|
pub trait GlobMatch {
|
|
|
|
|
fn matches_glob(&self, s: &str) -> bool;
|
|
|
|
|
fn is_glob(&self) -> bool;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl GlobMatch for str {
|
2020-02-08 23:08:56 +02:00
|
|
|
|
fn matches_glob(&self, _pattern: &str) -> bool {
|
|
|
|
|
macro_rules! strip_slash {
|
|
|
|
|
($v:expr) => {
|
|
|
|
|
if $v.ends_with("/") {
|
|
|
|
|
&$v[..$v.len() - 1]
|
|
|
|
|
} else {
|
|
|
|
|
$v
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
let pattern: Vec<&str> = strip_slash!(_pattern).split_graphemes();
|
|
|
|
|
let s: Vec<&str> = strip_slash!(self).split_graphemes();
|
2019-11-23 17:54:45 +02:00
|
|
|
|
|
2020-02-08 23:08:56 +02:00
|
|
|
|
// Taken from https://research.swtch.com/glob
|
2019-11-23 17:54:45 +02:00
|
|
|
|
|
2020-02-08 23:08:56 +02:00
|
|
|
|
let mut px = 0;
|
|
|
|
|
let mut sx = 0;
|
|
|
|
|
let mut next_px = 0;
|
|
|
|
|
let mut next_sx = 0;
|
|
|
|
|
while px < pattern.len() || sx < s.len() {
|
|
|
|
|
if px < pattern.len() {
|
|
|
|
|
match pattern[px] {
|
|
|
|
|
"?" => {
|
|
|
|
|
if sx < s.len() {
|
|
|
|
|
px += 1;
|
|
|
|
|
sx += 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
"*" => {
|
|
|
|
|
// Try to match at sx.
|
|
|
|
|
// If that doesn't work out,
|
|
|
|
|
// restart at sx+1 next.
|
|
|
|
|
next_px = px;
|
|
|
|
|
next_sx = sx + 1;
|
|
|
|
|
px += 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
p => {
|
|
|
|
|
if sx < s.len() && s[sx] == p {
|
|
|
|
|
px += 1;
|
|
|
|
|
sx += 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-11-23 17:54:45 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-02-08 23:08:56 +02:00
|
|
|
|
// Mismatch. Maybe restart.
|
|
|
|
|
if 0 < next_sx && next_sx <= s.len() {
|
|
|
|
|
px = next_px;
|
|
|
|
|
sx = next_sx;
|
|
|
|
|
continue;
|
2019-11-23 17:54:45 +02:00
|
|
|
|
}
|
2020-02-08 23:08:56 +02:00
|
|
|
|
return false;
|
2019-11-23 17:54:45 +02:00
|
|
|
|
}
|
|
|
|
|
true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn is_glob(&self) -> bool {
|
|
|
|
|
self.contains('*')
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_globmatch() {
|
|
|
|
|
assert!("INBOX".matches_glob("INBOX"));
|
2020-02-08 23:08:56 +02:00
|
|
|
|
assert!("INBOX/".matches_glob("INBOX"));
|
|
|
|
|
assert!("INBOX".matches_glob("INBO?"));
|
|
|
|
|
|
2019-11-23 17:54:45 +02:00
|
|
|
|
assert!("INBOX/Sent".matches_glob("INBOX/*"));
|
2020-02-08 23:08:56 +02:00
|
|
|
|
assert!(!"INBOX/Sent".matches_glob("INBOX"));
|
2019-11-23 17:54:45 +02:00
|
|
|
|
assert!(!"INBOX/Sent".matches_glob("*/Drafts"));
|
|
|
|
|
assert!("INBOX/Sent".matches_glob("*/Sent"));
|
2020-02-08 23:08:56 +02:00
|
|
|
|
|
2019-11-23 17:54:45 +02:00
|
|
|
|
assert!("INBOX/Archives/2047".matches_glob("*"));
|
2020-02-08 23:08:56 +02:00
|
|
|
|
assert!("INBOX/Archives/2047".matches_glob("INBOX/*/2047"));
|
|
|
|
|
assert!("INBOX/Archives/2047".matches_glob("INBOX/Archives/2*047"));
|
|
|
|
|
assert!("INBOX/Archives/2047".matches_glob("INBOX/Archives/204?"));
|
|
|
|
|
|
|
|
|
|
assert!(!"INBOX/Lists/".matches_glob("INBOX/Lists/*"));
|
2019-11-23 17:54:45 +02:00
|
|
|
|
}
|
2020-02-25 21:56:34 +02:00
|
|
|
|
|
2020-07-05 15:28:55 +03:00
|
|
|
|
pub const _ALICE_CHAPTER_1: &str = r#"CHAPTER I. Down the Rabbit-Hole
|
2020-02-25 21:56:34 +02:00
|
|
|
|
|
|
|
|
|
Alice was beginning to get very tired of sitting by her sister on the
|
|
|
|
|
bank, and of having nothing to do: once or twice she had peeped into the
|
|
|
|
|
book her sister was reading, but it had no pictures or conversations in
|
|
|
|
|
it, βand what is the use of a book,β thought Alice βwithout pictures or
|
|
|
|
|
conversations?β
|
|
|
|
|
|
|
|
|
|
So she was considering in her own mind (as well as she could, for the
|
|
|
|
|
hot day made her feel very sleepy and stupid), whether the pleasure
|
|
|
|
|
of making a daisy-chain would be worth the trouble of getting up and
|
|
|
|
|
picking the daisies, when suddenly a White Rabbit with pink eyes ran
|
|
|
|
|
close by her.
|
|
|
|
|
|
|
|
|
|
>>There was nothing so VERY remarkable in that; nor did Alice think it so
|
|
|
|
|
>>VERY much out of the way to hear the Rabbit say to itself, βOh dear!
|
|
|
|
|
>> Oh dear! I shall be late!β (when she thought it over afterwards, it
|
|
|
|
|
>>occurred to her that she ought to have wondered at this, but at the time
|
|
|
|
|
>>it all seemed quite natural); but when the Rabbit actually TOOK A WATCH
|
|
|
|
|
OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on,
|
|
|
|
|
>>Alice started to her feet, for it flashed across her mind that she had
|
|
|
|
|
>>never before seen a rabbit with either a waistcoat-pocket, or a watch
|
|
|
|
|
>>to take out of it, and burning with curiosity, she ran across the field
|
|
|
|
|
after it, and fortunately was just in time to see it pop down a large
|
|
|
|
|
rabbit-hole under the hedge.
|
|
|
|
|
|
|
|
|
|
In another moment down went Alice after it, never once considering how
|
|
|
|
|
in the world she was to get out again.
|
|
|
|
|
|
|
|
|
|
The rabbit-hole went straight on like a tunnel for some way, and then
|
|
|
|
|
dipped suddenly down, so suddenly that Alice had not a moment to think
|
|
|
|
|
about stopping herself before she found herself falling down a very deep
|
|
|
|
|
well.
|
|
|
|
|
|
|
|
|
|
Either the well was very deep, or she fell very slowly, for she had
|
|
|
|
|
plenty of time as she went down to look about her and to wonder what was
|
|
|
|
|
going to happen next. First, she tried to look down and make out what
|
|
|
|
|
she was coming to, but it was too dark to see anything; then she
|
|
|
|
|
looked at the sides of the well, and noticed that they were filled with
|
|
|
|
|
cupboards and book-shelves; here and there she saw maps and pictures
|
|
|
|
|
hung upon pegs. She took down a jar from one of the shelves as
|
|
|
|
|
she passed; it was labelled βORANGE MARMALADEβ, but to her great
|
|
|
|
|
disappointment it was empty: she did not like to drop the jar for fear
|
|
|
|
|
of killing somebody, so managed to put it into one of the cupboards as
|
|
|
|
|
she fell past it.
|
|
|
|
|
|
|
|
|
|
βWell!β thought Alice to herself, βafter such a fall as this, I shall
|
|
|
|
|
think nothing of tumbling down stairs! How brave theyβll all think me at
|
|
|
|
|
home! Why, I wouldnβt say anything about it, even if I fell off the top
|
|
|
|
|
of the house!β (Which was very likely true.)"#;
|