ui: word break with grapheme length, not bytes

​#69 East characters are not working.
embed
Manos Pitsidianakis 2019-03-25 13:22:45 +02:00
parent 9522508a92
commit 4c26077f30
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
4 changed files with 44 additions and 33 deletions

View File

@ -24,6 +24,7 @@
colors and attributes.
*/
use super::position::*;
use super::grapheme_clusters::*;
use std::convert::From;
use std::fmt;
use std::ops::{Deref, DerefMut, Index, IndexMut};
@ -733,7 +734,6 @@ pub fn write_string_to_grid(
(x, y)
}
// TODO UTF-8 incompatible
pub fn word_break_string(mut s: &str, width: usize) -> Vec<&str> {
let mut ret: Vec<&str> = Vec::with_capacity(16);
loop {
@ -753,12 +753,16 @@ pub fn word_break_string(mut s: &str, width: usize) -> Vec<&str> {
continue;
}
}
if s.len() > width {
if let Some(next_idx) = s.as_bytes()[..width]
let graphemes = s.graphemes_indices();
if graphemes.len() > width {
// use grapheme indices and find position of " " graphemes
if let Some(next_idx) = graphemes[..width]
.iter()
.rposition(u8::is_ascii_whitespace)
.rposition(|(_, g)| *g == " ")
{
let next_idx = graphemes[next_idx].0;
ret.push(&s[..next_idx]);
eprintln!("width = {} w = {} l = {:?}\n\n", width, ret.last().unwrap().grapheme_width(), ret.last().unwrap());
s = &s[next_idx + 1..];
} else {
ret.push(&s[..width]);

View File

@ -8,34 +8,42 @@
*/
use super::wcwidth::{wcwidth, CodePointsIter};
use super::*;
pub fn split_graphemes(s: &str) -> Vec<&str> {
UnicodeSegmentation::graphemes(s, true).collect::<Vec<&str>>()
}
pub fn next_grapheme(s: &str) -> Option<(usize, &str)> {
UnicodeSegmentation::grapheme_indices(s, true).next()
}
pub fn last_grapheme(s: &str) -> Option<(usize, &str)> {
UnicodeSegmentation::grapheme_indices(s, true).next_back()
}
pub fn grapheme_width(grapheme: &str) -> i32 {
let mut count = 0;
for c in grapheme.code_points() {
count += if let Some(c) = wcwidth(c) {
c as i32
} else {
-1
};
pub trait Graphemes: UnicodeSegmentation + CodePointsIter {
fn split_graphemes<'a>(&'a self) -> Vec<&'a str> {
UnicodeSegmentation::graphemes(self, true).collect::<Vec<&str>>()
}
count
fn graphemes_indices<'a>(&'a self) -> Vec<(usize, &'a str)> {
UnicodeSegmentation::grapheme_indices(self, true).collect::<Vec<(usize, &str)>>()
}
fn next_grapheme<'a>(&'a self) -> Option<(usize, &'a str)> {
UnicodeSegmentation::grapheme_indices(self, true).next()
}
fn last_grapheme<'a>(&'a self) -> Option<(usize, &'a str)> {
UnicodeSegmentation::grapheme_indices(self, true).next_back()
}
fn grapheme_width(&self) -> i32 {
let mut count = 0;
for c in self.code_points() {
count += if let Some(c) = wcwidth(c) {
c as i32
} else {
-1
};
}
count
}
}
impl Graphemes for str {}
//#[derive(PartialEq)]
//enum Property {
// CR,

View File

@ -22,7 +22,7 @@ impl UText {
}
let (first, _) = self.content.split_at(cursor_pos);
self.grapheme_cursor_pos = split_graphemes(first).len();
self.grapheme_cursor_pos = first.split_graphemes().len();
self.cursor_pos = cursor_pos;
}
@ -34,7 +34,7 @@ impl UText {
self.content
}
pub fn grapheme_len(&self) -> usize {
split_graphemes(&self.content).len()
self.content.split_graphemes().len()
}
pub fn cursor_inc(&mut self) {
@ -42,8 +42,8 @@ impl UText {
return;
}
let (_, right) = std::dbg!(self.content.split_at(self.cursor_pos));
if let Some((_, graph)) = std::dbg!(next_grapheme(right)) {
let (_, right) = self.content.split_at(self.cursor_pos);
if let Some((_, graph)) = right.next_grapheme() {
self.cursor_pos += graph.len();
self.grapheme_cursor_pos += 1;
}
@ -52,8 +52,8 @@ impl UText {
if self.cursor_pos == 0 {
return;
}
let (left, _) = std::dbg!(self.content.split_at(self.cursor_pos));
if let Some((_, graph)) = std::dbg!(last_grapheme(left)) {
let (left, _) = self.content.split_at(self.cursor_pos);
if let Some((_, graph)) = left.last_grapheme() {
self.cursor_pos -= graph.len();
self.grapheme_cursor_pos -= 1;
}
@ -116,7 +116,7 @@ impl UText {
* left = xxxxxx....xxgg;
* right = xxx;
*/
if let Some((offset, graph)) = std::dbg!(last_grapheme(left)) {
if let Some((offset, graph)) = left.last_grapheme() {
(offset, graph.len())
} else {
return;

View File

@ -41,7 +41,6 @@ impl<'a> Iterator for CodePointsIterator<'a> {
type Item = WChar;
fn next(&mut self) -> Option<WChar> {
println!("rest = {:?}", self.rest);
if self.rest.is_empty() {
return None;
}