LineBreakCandidateIter: make iter non-recursive
A line with lots of graphemes without any breaks can overflow the stack, so make the recursion into a loop.async
parent
6ceed3cae9
commit
b3b9563db0
|
@ -130,6 +130,7 @@ macro_rules! next_grapheme_class {
|
|||
impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
||||
type Item = (usize, LineBreakCandidate);
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
// After end of text, there are no breaks.
|
||||
if self.pos >= self.text.len() {
|
||||
return None;
|
||||
|
@ -140,13 +141,13 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
return Some((self.pos, MandatoryBreak));
|
||||
}
|
||||
|
||||
let (idx, mut grapheme) = self.iter.next().unwrap();
|
||||
let LineBreakCandidateIter {
|
||||
ref mut iter,
|
||||
ref text,
|
||||
ref mut reg_ind_streak,
|
||||
ref mut pos,
|
||||
} = self;
|
||||
let (idx, mut grapheme) = iter.next().unwrap();
|
||||
let iter = iter.by_ref();
|
||||
|
||||
debug_assert_eq!(idx, *pos);
|
||||
|
@ -154,7 +155,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
// LB2 Never break at the start of text
|
||||
if idx == 0 {
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
let class = get_class!(grapheme);
|
||||
|
@ -206,13 +207,13 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
/* LB6 Do not break before hard line breaks. × ( BK | CR | LF | NL ) */
|
||||
BK | CR | LF | NL => {
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB7 Do not break before spaces or zero width
|
||||
* space. × SP × ZW */
|
||||
SP | ZW => {
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -231,7 +232,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
ZWJ => {
|
||||
// LB8a Do not break after a zero width joiner.
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
CM => {
|
||||
|
@ -242,7 +243,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
// where X is any line break class except BK, CR, LF, NL, SP, or ZW.
|
||||
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
WJ => {
|
||||
/*: LB11 Do not break before or after Word joiner and related characters.*/
|
||||
|
@ -251,12 +252,12 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
if next_grapheme_class!(iter, grapheme).is_some() {
|
||||
*pos += grapheme.len();
|
||||
}
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
GL => {
|
||||
/*LB12 Non-breaking characters: LB12 Do not break after NBSP and related characters.*/
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -268,12 +269,12 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
* hyphens. [^SP BA HY] × GL
|
||||
* Also LB12 Do not break after NBSP and related characters */
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces. */
|
||||
CL | CP | EX | IS | SY => {
|
||||
*pos = *next_idx;
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -283,12 +284,13 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
/* LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces. */
|
||||
SP if [CL, CP, EX, IS, SY].contains(&get_class!(text[idx..].trim_start())) => {
|
||||
*pos += grapheme.len();
|
||||
while ![CL, CP, EX, IS, SY].contains(&next_grapheme_class!(iter, grapheme).unwrap())
|
||||
while ![CL, CP, EX, IS, SY]
|
||||
.contains(&next_grapheme_class!(iter, grapheme).unwrap())
|
||||
{
|
||||
*pos += grapheme.len();
|
||||
}
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
OP => {
|
||||
/* LB14 Do not break after ‘[’, even after spaces.
|
||||
|
@ -300,7 +302,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
break;
|
||||
}
|
||||
}
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
QU if get_class!(text[idx..].trim_start()) == OP => {
|
||||
/* LB15 Do not break within ‘”[’, even with intervening spaces.
|
||||
|
@ -310,7 +312,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
*pos += grapheme.len();
|
||||
}
|
||||
*pos = idx;
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
QU => {
|
||||
/* LB19 Do not break before or after quotation marks, such as ‘ ” ’. */
|
||||
|
@ -318,7 +320,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
if let Some((_, g)) = self.iter.next() {
|
||||
*pos += g.len();
|
||||
}
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
LineBreakClass::CL | LineBreakClass::CP
|
||||
if get_class!(text[idx..].trim_start()) == NS =>
|
||||
|
@ -330,14 +332,14 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
while Some(SP) == next_grapheme_class!(iter, grapheme) {
|
||||
*pos += grapheme.len();
|
||||
}
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
B2 if get_class!(text[idx..].trim_start()) == B2 => {
|
||||
*pos += grapheme.len();
|
||||
while Some(SP) == next_grapheme_class!(iter, grapheme) {
|
||||
*pos += grapheme.len();
|
||||
}
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
SP => {
|
||||
/* LB18 Break after spaces. SP ÷ */
|
||||
|
@ -354,7 +356,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
/* LB19 Do not break before or after quotation marks, such as ‘ ” ’. */
|
||||
*pos = *next_idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -369,7 +371,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
* kana, and other non-starters, or after acute accents. × BA, × HY, × NS, BB × */
|
||||
BB => {
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -381,7 +383,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
/* LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small
|
||||
* kana, and other non-starters, or after acute accents. × BA, × HY, × NS, BB × */
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
@ -392,7 +394,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB21b Don’t break between ,Solidus and Hebrew letters. SY × HL */
|
||||
SY if next_grapheme_class!((next_char is HL)) => {
|
||||
|
@ -403,7 +405,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
if let Some((idx, next_grapheme)) = self.iter.next() {
|
||||
*pos = idx + next_grapheme.len();
|
||||
}
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB22 Do not break between two ellipses, or between letters, numbers or excla-
|
||||
* mations and ellipsis.
|
||||
|
@ -413,40 +415,40 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* EX × IN */
|
||||
EX if next_grapheme_class!((next_char is IN)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
EX => {
|
||||
// LB13
|
||||
*pos += grapheme.len();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* (ID | EB | EM) × IN */
|
||||
ID | EB | EM if next_grapheme_class!((next_char is IN)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* IN × IN */
|
||||
IN if next_grapheme_class!((next_char is IN)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* NU × IN */
|
||||
NU if next_grapheme_class!((next_char is IN)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB23 Do not break between digits and letters.
|
||||
* (AL | HL) × NU */
|
||||
|
@ -454,14 +456,14 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* NU × (AL | HL) */
|
||||
NU if next_grapheme_class!((next_char is AL, HL)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB23a Do not break between numeric prefixes and ideographs, or between ideographs
|
||||
* and numeric postfixes.
|
||||
|
@ -470,14 +472,14 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* (ID | EB | EM) × PO */
|
||||
ID | EB | EM if next_grapheme_class!((next_char is PO)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* B24 Do not break between numeric prefix/postfix and letters, or between
|
||||
letters and prefix/postfix.
|
||||
|
@ -486,14 +488,14 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/*(AL | HL) × (PR | PO) */
|
||||
AL | HL if next_grapheme_class!((next_char is PR, PO)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB25 Do not break between the following pairs of classes relevant to numbers:
|
||||
* CL × PO */
|
||||
|
@ -501,98 +503,98 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* CP × PO */
|
||||
CP if next_grapheme_class!((next_char is PO)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* CL × PR */
|
||||
CL if next_grapheme_class!((next_char is PR)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* CP × PR */
|
||||
CP if next_grapheme_class!((next_char is PR)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* NU × PO */
|
||||
NU if next_grapheme_class!((next_char is PO)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* NU × PR */
|
||||
NU if next_grapheme_class!((next_char is PR)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* PO × OP */
|
||||
PO if next_grapheme_class!((next_char is OP)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* PO × NU */
|
||||
PO if next_grapheme_class!((next_char is NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* PR × OP */
|
||||
PR if next_grapheme_class!((next_char is OP)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* PR × NU */
|
||||
PR if next_grapheme_class!((next_char is NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* HY × NU */
|
||||
HY if next_grapheme_class!((next_char is NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* IS × NU */
|
||||
IS if next_grapheme_class!((next_char is NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* NU × NU */
|
||||
NU if next_grapheme_class!((next_char is NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* SY × NU */
|
||||
SY if next_grapheme_class!((next_char is NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB26 Do not break a Korean syllable.
|
||||
* JL × (JL | JV | H2 | H3) */
|
||||
|
@ -600,21 +602,21 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* (JV | H2) × (JV | JT) */
|
||||
JV | H2 if next_grapheme_class!((next_char is JV, JT)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* (JT | H3) × JT */
|
||||
JT | H3 if next_grapheme_class!((next_char is JT)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB27 Treat a Korean Syllable Block the same as ID.
|
||||
* (JL | JV | JT | H2 | H3) × IN */
|
||||
|
@ -622,21 +624,21 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* (JL | JV | JT | H2 | H3) × PO */
|
||||
JL | JV | JT | H2 | H3 if next_grapheme_class!((next_char is PO)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* PR × (JL | JV | JT | H2 | H3) */
|
||||
PR if next_grapheme_class!((next_char is JL, JV, JT, H2, H3)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB28 Do not break between alphabetics (“at”).
|
||||
(AL | HL) × (AL | HL) */
|
||||
|
@ -644,7 +646,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
|
||||
IS × (AL | HL) */
|
||||
|
@ -652,7 +654,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* LB30 Do not break between letters, numbers, or ordinary symbols and opening
|
||||
or closing parentheses.
|
||||
|
@ -661,14 +663,14 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/* CP × (AL | HL | NU) */
|
||||
CP if next_grapheme_class!((next_char is AL, HL , NU)) => {
|
||||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
/*LB30b Do not break between an emoji base and an emoji modifier.
|
||||
* EB × EM */
|
||||
|
@ -676,7 +678,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
let (idx, next_grapheme) = next_char.unwrap();
|
||||
*pos = idx + next_grapheme.len();
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
RI => {
|
||||
/* LB30a Break between two regional indicator symbols if and only if there are an
|
||||
|
@ -689,7 +691,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
return Some((*pos - grapheme.len(), BreakAllowed));
|
||||
}
|
||||
self.iter.next();
|
||||
return self.next();
|
||||
continue;
|
||||
}
|
||||
_ if next_char.is_none() => {
|
||||
return None;
|
||||
|
@ -701,6 +703,7 @@ impl<'a> Iterator for LineBreakCandidateIter<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn search_table(c: u32, t: &'static [(u32, u32, LineBreakClass)]) -> LineBreakClass {
|
||||
match t.binary_search_by(|&(lo, hi, _)| {
|
||||
|
|
Loading…
Reference in New Issue