From 4c26077f30571a69d93b05447e6f765e15bf46e2 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Mon, 25 Mar 2019 13:22:45 +0200 Subject: [PATCH] ui: word break with grapheme length, not bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ​#69 East characters are not working. --- ui/src/terminal/cells.rs | 12 ++++--- ui/src/terminal/grapheme_clusters.rs | 50 ++++++++++++++++------------ ui/src/terminal/text_editing.rs | 14 ++++---- ui/src/terminal/wcwidth.rs | 1 - 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/ui/src/terminal/cells.rs b/ui/src/terminal/cells.rs index 845900a1f..efd93fb16 100644 --- a/ui/src/terminal/cells.rs +++ b/ui/src/terminal/cells.rs @@ -24,6 +24,7 @@ colors and attributes. */ use super::position::*; +use super::grapheme_clusters::*; use std::convert::From; use std::fmt; use std::ops::{Deref, DerefMut, Index, IndexMut}; @@ -733,7 +734,6 @@ pub fn write_string_to_grid( (x, y) } -// TODO UTF-8 incompatible pub fn word_break_string(mut s: &str, width: usize) -> Vec<&str> { let mut ret: Vec<&str> = Vec::with_capacity(16); loop { @@ -753,12 +753,16 @@ pub fn word_break_string(mut s: &str, width: usize) -> Vec<&str> { continue; } } - if s.len() > width { - if let Some(next_idx) = s.as_bytes()[..width] + let graphemes = s.graphemes_indices(); + if graphemes.len() > width { + // use grapheme indices and find position of " " graphemes + if let Some(next_idx) = graphemes[..width] .iter() - .rposition(u8::is_ascii_whitespace) + .rposition(|(_, g)| *g == " ") { + let next_idx = graphemes[next_idx].0; ret.push(&s[..next_idx]); + eprintln!("width = {} w = {} l = {:?}\n\n", width, ret.last().unwrap().grapheme_width(), ret.last().unwrap()); s = &s[next_idx + 1..]; } else { ret.push(&s[..width]); diff --git a/ui/src/terminal/grapheme_clusters.rs b/ui/src/terminal/grapheme_clusters.rs index 4e661c9bc..2135ce8f0 100644 --- a/ui/src/terminal/grapheme_clusters.rs +++ b/ui/src/terminal/grapheme_clusters.rs @@ -8,34 +8,42 @@ */ -use super::wcwidth::{wcwidth, CodePointsIter}; use super::*; -pub fn split_graphemes(s: &str) -> Vec<&str> { - UnicodeSegmentation::graphemes(s, true).collect::>() -} -pub fn next_grapheme(s: &str) -> Option<(usize, &str)> { - UnicodeSegmentation::grapheme_indices(s, true).next() -} - -pub fn last_grapheme(s: &str) -> Option<(usize, &str)> { - UnicodeSegmentation::grapheme_indices(s, true).next_back() -} - -pub fn grapheme_width(grapheme: &str) -> i32 { - let mut count = 0; - for c in grapheme.code_points() { - count += if let Some(c) = wcwidth(c) { - c as i32 - } else { - -1 - }; +pub trait Graphemes: UnicodeSegmentation + CodePointsIter { + fn split_graphemes<'a>(&'a self) -> Vec<&'a str> { + UnicodeSegmentation::graphemes(self, true).collect::>() } - count + fn graphemes_indices<'a>(&'a self) -> Vec<(usize, &'a str)> { + UnicodeSegmentation::grapheme_indices(self, true).collect::>() + } + + fn next_grapheme<'a>(&'a self) -> Option<(usize, &'a str)> { + UnicodeSegmentation::grapheme_indices(self, true).next() + } + + fn last_grapheme<'a>(&'a self) -> Option<(usize, &'a str)> { + UnicodeSegmentation::grapheme_indices(self, true).next_back() + } + + fn grapheme_width(&self) -> i32 { + let mut count = 0; + for c in self.code_points() { + count += if let Some(c) = wcwidth(c) { + c as i32 + } else { + -1 + }; + } + + count + } } +impl Graphemes for str {} + //#[derive(PartialEq)] //enum Property { // CR, diff --git a/ui/src/terminal/text_editing.rs b/ui/src/terminal/text_editing.rs index 326ba1aa6..6ce24fe22 100644 --- a/ui/src/terminal/text_editing.rs +++ b/ui/src/terminal/text_editing.rs @@ -22,7 +22,7 @@ impl UText { } let (first, _) = self.content.split_at(cursor_pos); - self.grapheme_cursor_pos = split_graphemes(first).len(); + self.grapheme_cursor_pos = first.split_graphemes().len(); self.cursor_pos = cursor_pos; } @@ -34,7 +34,7 @@ impl UText { self.content } pub fn grapheme_len(&self) -> usize { - split_graphemes(&self.content).len() + self.content.split_graphemes().len() } pub fn cursor_inc(&mut self) { @@ -42,8 +42,8 @@ impl UText { return; } - let (_, right) = std::dbg!(self.content.split_at(self.cursor_pos)); - if let Some((_, graph)) = std::dbg!(next_grapheme(right)) { + let (_, right) = self.content.split_at(self.cursor_pos); + if let Some((_, graph)) = right.next_grapheme() { self.cursor_pos += graph.len(); self.grapheme_cursor_pos += 1; } @@ -52,8 +52,8 @@ impl UText { if self.cursor_pos == 0 { return; } - let (left, _) = std::dbg!(self.content.split_at(self.cursor_pos)); - if let Some((_, graph)) = std::dbg!(last_grapheme(left)) { + let (left, _) = self.content.split_at(self.cursor_pos); + if let Some((_, graph)) = left.last_grapheme() { self.cursor_pos -= graph.len(); self.grapheme_cursor_pos -= 1; } @@ -116,7 +116,7 @@ impl UText { * left = xxxxxx....xxgg; * right = xxx; */ - if let Some((offset, graph)) = std::dbg!(last_grapheme(left)) { + if let Some((offset, graph)) = left.last_grapheme() { (offset, graph.len()) } else { return; diff --git a/ui/src/terminal/wcwidth.rs b/ui/src/terminal/wcwidth.rs index f0493b7d1..b62021026 100644 --- a/ui/src/terminal/wcwidth.rs +++ b/ui/src/terminal/wcwidth.rs @@ -41,7 +41,6 @@ impl<'a> Iterator for CodePointsIterator<'a> { type Item = WChar; fn next(&mut self) -> Option { - println!("rest = {:?}", self.rest); if self.rest.is_empty() { return None; }