text_processing: add reflow method() and enum to TextProcessing trait
Add split_lines_reflow(&self, reflow: Reflow, width: Option<usize>) -> Vec<String> method that, according to reflow (No reflow, FormatFlowed or All) reflows the text. FormatFlowed follows the rfc3676 - The Text/Plain Format and DelSp Parameters https://tools.ietf.org/html/rfc3676jmap
parent
e1dec05881
commit
b01b9ffbcb
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
use crate::types::Reflow;
|
||||||
use crate::wcwidth::{wcwidth, CodePointsIter};
|
use crate::wcwidth::{wcwidth, CodePointsIter};
|
||||||
extern crate unicode_segmentation;
|
extern crate unicode_segmentation;
|
||||||
use self::unicode_segmentation::UnicodeSegmentation;
|
use self::unicode_segmentation::UnicodeSegmentation;
|
||||||
|
@ -43,12 +44,18 @@ pub trait TextProcessing: UnicodeSegmentation + CodePointsIter {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split_lines(&self, width: usize) -> Vec<String>;
|
fn split_lines(&self, width: usize) -> Vec<String>;
|
||||||
|
|
||||||
|
fn split_lines_reflow(&self, reflow: Reflow, width: Option<usize>) -> Vec<String>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TextProcessing for str {
|
impl TextProcessing for str {
|
||||||
fn split_lines(&self, width: usize) -> Vec<String> {
|
fn split_lines(&self, width: usize) -> Vec<String> {
|
||||||
crate::line_break::linear(self, width)
|
crate::line_break::linear(self, width)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn split_lines_reflow(&self, reflow: Reflow, width: Option<usize>) -> Vec<String> {
|
||||||
|
crate::line_break::split_lines_reflow(self, reflow, width)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct WordBreakIter<'s> {
|
pub struct WordBreakIter<'s> {
|
||||||
|
|
|
@ -2,6 +2,7 @@ pub mod grapheme_clusters;
|
||||||
pub mod line_break;
|
pub mod line_break;
|
||||||
mod tables;
|
mod tables;
|
||||||
mod types;
|
mod types;
|
||||||
|
pub use types::Reflow;
|
||||||
pub mod wcwidth;
|
pub mod wcwidth;
|
||||||
pub use grapheme_clusters::*;
|
pub use grapheme_clusters::*;
|
||||||
pub use line_break::*;
|
pub use line_break::*;
|
||||||
|
|
|
@ -2,6 +2,7 @@ extern crate unicode_segmentation;
|
||||||
use self::unicode_segmentation::UnicodeSegmentation;
|
use self::unicode_segmentation::UnicodeSegmentation;
|
||||||
use crate::tables::LINE_BREAK_RULES;
|
use crate::tables::LINE_BREAK_RULES;
|
||||||
use crate::types::LineBreakClass;
|
use crate::types::LineBreakClass;
|
||||||
|
use crate::types::Reflow;
|
||||||
use core::cmp::Ordering;
|
use core::cmp::Ordering;
|
||||||
use core::iter::Peekable;
|
use core::iter::Peekable;
|
||||||
use core::str::FromStr;
|
use core::str::FromStr;
|
||||||
|
@ -875,3 +876,217 @@ mod alg {
|
||||||
lines
|
lines
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn split_lines_reflow(text: &str, reflow: Reflow, width: Option<usize>) -> Vec<String> {
|
||||||
|
match reflow {
|
||||||
|
Reflow::FormatFlowed => {
|
||||||
|
/* rfc3676 - The Text/Plain Format and DelSp Parameters
|
||||||
|
* https://tools.ietf.org/html/rfc3676 */
|
||||||
|
|
||||||
|
let mut ret = Vec::new();
|
||||||
|
/*
|
||||||
|
* - Split lines with indices using str::match_indices()
|
||||||
|
* - Iterate and reflow flow regions, and pass fixed regions through
|
||||||
|
*/
|
||||||
|
let lines_indices: Vec<usize> = text.match_indices("\n").map(|(i, _)| i).collect();
|
||||||
|
let mut prev_index = 0;
|
||||||
|
let mut in_paragraph = false;
|
||||||
|
let mut paragraph_start = 0;
|
||||||
|
|
||||||
|
let mut prev_quote_depth = 0;
|
||||||
|
for i in &lines_indices {
|
||||||
|
let line = &text[prev_index..*i];
|
||||||
|
let mut trimmed = line.trim_start().lines().next().unwrap_or("");
|
||||||
|
let mut quote_depth = 0;
|
||||||
|
let p_str: usize = trimmed
|
||||||
|
.as_bytes()
|
||||||
|
.iter()
|
||||||
|
.position(|&b| {
|
||||||
|
if b != b'>' {
|
||||||
|
/* position() is short-circuiting */
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
quote_depth += 1;
|
||||||
|
false
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(0);
|
||||||
|
trimmed = &trimmed[p_str..];
|
||||||
|
if trimmed.starts_with(" ") {
|
||||||
|
/* Remove space stuffing before checking for ending space character.
|
||||||
|
* [rfc3676#section-4.4] */
|
||||||
|
trimmed = &trimmed[1..];
|
||||||
|
}
|
||||||
|
|
||||||
|
if trimmed.ends_with(' ') {
|
||||||
|
if !in_paragraph {
|
||||||
|
in_paragraph = true;
|
||||||
|
paragraph_start = prev_index;
|
||||||
|
} else if prev_quote_depth == quote_depth {
|
||||||
|
/* This becomes part of the paragraph we're in */
|
||||||
|
} else {
|
||||||
|
/*Malformed line, different quote depths can't be in the same paragraph. */
|
||||||
|
let paragraph = &text[paragraph_start..prev_index];
|
||||||
|
reflow_helper(&mut ret, paragraph, prev_quote_depth, in_paragraph, width);
|
||||||
|
|
||||||
|
paragraph_start = prev_index;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if prev_quote_depth == quote_depth || !in_paragraph {
|
||||||
|
let paragraph = &text[paragraph_start..*i];
|
||||||
|
reflow_helper(&mut ret, paragraph, quote_depth, in_paragraph, width);
|
||||||
|
} else {
|
||||||
|
/*Malformed line, different quote depths can't be in the same paragraph. */
|
||||||
|
let paragraph = &text[paragraph_start..prev_index];
|
||||||
|
reflow_helper(&mut ret, paragraph, prev_quote_depth, in_paragraph, width);
|
||||||
|
let paragraph = &text[prev_index..*i];
|
||||||
|
reflow_helper(&mut ret, paragraph, quote_depth, false, width);
|
||||||
|
}
|
||||||
|
paragraph_start = *i;
|
||||||
|
in_paragraph = false;
|
||||||
|
}
|
||||||
|
prev_quote_depth = quote_depth;
|
||||||
|
prev_index = *i;
|
||||||
|
}
|
||||||
|
let paragraph = &text[paragraph_start..text.len()];
|
||||||
|
reflow_helper(&mut ret, paragraph, prev_quote_depth, in_paragraph, width);
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
Reflow::All => {
|
||||||
|
if let Some(width) = width {
|
||||||
|
linear(text, width)
|
||||||
|
} else {
|
||||||
|
text.trim().split('\n').map(str::to_string).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reflow::No => text.trim().split('\n').map(str::to_string).collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reflow_helper(
|
||||||
|
ret: &mut Vec<String>,
|
||||||
|
paragraph: &str,
|
||||||
|
quote_depth: usize,
|
||||||
|
in_paragraph: bool,
|
||||||
|
width: Option<usize>,
|
||||||
|
) {
|
||||||
|
if quote_depth > 0 {
|
||||||
|
let quotes: String = ">".repeat(quote_depth);
|
||||||
|
let paragraph = paragraph
|
||||||
|
.trim_start_matches("es)
|
||||||
|
.replace(&format!("\n{}", "es), "")
|
||||||
|
.replace("\n", "")
|
||||||
|
.replace("\r", "");
|
||||||
|
if in_paragraph {
|
||||||
|
if let Some(width) = width {
|
||||||
|
ret.extend(
|
||||||
|
linear(¶graph, width.saturating_sub(quote_depth))
|
||||||
|
.into_iter()
|
||||||
|
.map(|l| format!("{}{}", "es, l)),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
ret.push(format!("{}{}", "es, ¶graph));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret.push(format!("{}{}", "es, ¶graph));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let paragraph = paragraph.replace("\n", "").replace("\r", "");
|
||||||
|
|
||||||
|
if in_paragraph {
|
||||||
|
if let Some(width) = width {
|
||||||
|
let ex = linear(¶graph, width);
|
||||||
|
ret.extend(ex.into_iter());
|
||||||
|
} else {
|
||||||
|
ret.push(paragraph);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret.push(paragraph);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reflow() {
|
||||||
|
let text = r#"`Take some more tea,' the March Hare said to Alice, very
|
||||||
|
earnestly.
|
||||||
|
|
||||||
|
`I've had nothing yet,' Alice replied in an offended tone, `so
|
||||||
|
I can't take more.'
|
||||||
|
|
||||||
|
`You mean you can't take LESS,' said the Hatter: `it's very
|
||||||
|
easy to take MORE than nothing.'"#;
|
||||||
|
for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(30)) {
|
||||||
|
println!("{}", l);
|
||||||
|
}
|
||||||
|
println!("");
|
||||||
|
for l in split_lines_reflow(text, Reflow::No, Some(30)) {
|
||||||
|
println!("{}", l);
|
||||||
|
}
|
||||||
|
println!("");
|
||||||
|
let text = r#">>>Take some more tea.
|
||||||
|
>>I've had nothing yet, so I can't take more.
|
||||||
|
>You mean you can't take LESS, it's very easy to take
|
||||||
|
>MORE than nothing."#;
|
||||||
|
for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(20)) {
|
||||||
|
println!("{}", l);
|
||||||
|
}
|
||||||
|
println!("");
|
||||||
|
for l in split_lines_reflow(text, Reflow::No, Some(20)) {
|
||||||
|
println!("{}", l);
|
||||||
|
}
|
||||||
|
println!("");
|
||||||
|
let text = r#"CHAPTER I. Down the Rabbit-Hole
|
||||||
|
|
||||||
|
Alice was beginning to get very tired of sitting by her sister on the
|
||||||
|
bank, and of having nothing to do: once or twice she had peeped into the
|
||||||
|
book her sister was reading, but it had no pictures or conversations in
|
||||||
|
it, ‘and what is the use of a book,’ thought Alice ‘without pictures or
|
||||||
|
conversations?’
|
||||||
|
|
||||||
|
So she was considering in her own mind (as well as she could, for the
|
||||||
|
hot day made her feel very sleepy and stupid), whether the pleasure
|
||||||
|
of making a daisy-chain would be worth the trouble of getting up and
|
||||||
|
picking the daisies, when suddenly a White Rabbit with pink eyes ran
|
||||||
|
close by her.
|
||||||
|
|
||||||
|
>>There was nothing so VERY remarkable in that; nor did Alice think it so
|
||||||
|
>>VERY much out of the way to hear the Rabbit say to itself, ‘Oh dear!
|
||||||
|
>> Oh dear! I shall be late!’ (when she thought it over afterwards, it
|
||||||
|
>>occurred to her that she ought to have wondered at this, but at the time
|
||||||
|
>>it all seemed quite natural); but when the Rabbit actually TOOK A WATCH
|
||||||
|
OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on,
|
||||||
|
>>Alice started to her feet, for it flashed across her mind that she had
|
||||||
|
>>never before seen a rabbit with either a waistcoat-pocket, or a watch
|
||||||
|
>>to take out of it, and burning with curiosity, she ran across the field
|
||||||
|
after it, and fortunately was just in time to see it pop down a large
|
||||||
|
rabbit-hole under the hedge.
|
||||||
|
|
||||||
|
In another moment down went Alice after it, never once considering how
|
||||||
|
in the world she was to get out again.
|
||||||
|
|
||||||
|
The rabbit-hole went straight on like a tunnel for some way, and then
|
||||||
|
dipped suddenly down, so suddenly that Alice had not a moment to think
|
||||||
|
about stopping herself before she found herself falling down a very deep
|
||||||
|
well.
|
||||||
|
|
||||||
|
Either the well was very deep, or she fell very slowly, for she had
|
||||||
|
plenty of time as she went down to look about her and to wonder what was
|
||||||
|
going to happen next. First, she tried to look down and make out what
|
||||||
|
she was coming to, but it was too dark to see anything; then she
|
||||||
|
looked at the sides of the well, and noticed that they were filled with
|
||||||
|
cupboards and book-shelves; here and there she saw maps and pictures
|
||||||
|
hung upon pegs. She took down a jar from one of the shelves as
|
||||||
|
she passed; it was labelled ‘ORANGE MARMALADE’, but to her great
|
||||||
|
disappointment it was empty: she did not like to drop the jar for fear
|
||||||
|
of killing somebody, so managed to put it into one of the cupboards as
|
||||||
|
she fell past it.
|
||||||
|
|
||||||
|
‘Well!’ thought Alice to herself, ‘after such a fall as this, I shall
|
||||||
|
think nothing of tumbling down stairs! How brave they’ll all think me at
|
||||||
|
home! Why, I wouldn’t say anything about it, even if I fell off the top
|
||||||
|
of the house!’ (Which was very likely true.)"#;
|
||||||
|
for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(72)) {
|
||||||
|
println!("{}", l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -100,3 +100,16 @@ impl From<&str> for LineBreakClass {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Debug, Copy, Clone)]
|
||||||
|
pub enum Reflow {
|
||||||
|
No,
|
||||||
|
All,
|
||||||
|
FormatFlowed,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Reflow {
|
||||||
|
fn default() -> Self {
|
||||||
|
Reflow::No
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue