Add "regexp" feature, format text with regexps

`regexp` feature uses the pcre2 library to enable the user to define
regular expressions for matching text and applying text formatting to
the matches. An example from the theme configuration I used to test
this:

  [terminal.themes.win95.text_format_regexps]
  "listing.subject" = { "\\[[^\\]]*\\]" = { attrs = "Bold" } }
  "listing.from" = { "\\<[^\\>]*\\>(?:(?:\\s*$)|(?=,))" = { attrs = "Italics" } }

  [terminal.themes.win95.text_format_regexps."pager.envelope.body"]
  "^>.*$" = { attrs = "Italics" }
  "\\d+\\s?(?:(?:[KkMmTtGg]?[Bb])|(?:[KkMmTtGg][Bb]?)(?=\\s))" = { attrs = "Bold | Underline" }
master
Manos Pitsidianakis 2020-06-04 21:33:27 +03:00
parent ef0f269fbf
commit 3e31c46a74
Signed by: Manos Pitsidianakis
GPG Key ID: 73627C2F690DF710
5 changed files with 463 additions and 13 deletions

33
Cargo.lock generated
View File

@ -753,6 +753,7 @@ dependencies = [
"nom",
"notify",
"notify-rust",
"pcre2",
"rmp",
"rmp-serde",
"rmpv",
@ -1076,6 +1077,29 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "pcre2"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85b30f2f69903b439dd9dc9e824119b82a55bf113b29af8d70948a03c1b11ab1"
dependencies = [
"libc",
"log",
"pcre2-sys",
"thread_local",
]
[[package]]
name = "pcre2-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "876c72d05059d23a84bd9fcdc3b1d31c50ea7fe00fe1522b4e68cd3608db8d5b"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "percent-encoding"
version = "2.1.0"
@ -1500,6 +1524,15 @@ dependencies = [
"melib",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static",
]
[[package]]
name = "time"
version = "0.1.43"

View File

@ -49,7 +49,7 @@ rmpv = { version = "^0.4.2", features=["with-serde",] }
rmp-serde = "^0.14.0"
smallvec = { version = "1.1.0", features = ["serde", ] }
bitflags = "1.0"
pcre2 = { version = "0.2.3", optional = true }
[profile.release]
lto = true
@ -60,10 +60,11 @@ debug = false
members = ["melib", "testing", ]
[features]
default = ["sqlite3", "notmuch"]
default = ["sqlite3", "notmuch", "regexp"]
notmuch = ["melib/notmuch_backend", ]
jmap = ["melib/jmap_backend",]
sqlite3 = ["melib/sqlite3"]
regexp = ["pcre2"]
cli-docs = []
# Print tracing logs as meli runs in stderr

View File

@ -301,6 +301,7 @@ pub struct Pager {
colors: ThemeAttribute,
initialised: bool,
content: CellBuffer,
text_lines: Vec<String>,
movement: Option<PageMovement>,
id: ComponentId,
}
@ -342,8 +343,9 @@ impl Pager {
empty_cell.set_bg(self.colors.bg);
let mut content = CellBuffer::new(width, height, empty_cell);
content.set_ascii_drawing(ascii_drawing);
Pager::print_string(&mut content, lines, self.colors);
Pager::print_string(&mut content, &lines, self.colors);
self.text = text.to_string();
self.text_lines = lines;
self.content = content;
self.height = height;
self.width = width;
@ -414,8 +416,9 @@ impl Pager {
}) {
return Pager::from_buf(content, cursor_pos);
}
let lines: Vec<String>;
let content = {
let lines: Vec<String> = if let Some(width) = width {
lines = if let Some(width) = width {
text.split_lines_reflow(reflow, Some(width.saturating_sub(2)))
} else {
text.trim().split('\n').map(str::to_string).collect()
@ -431,11 +434,12 @@ impl Pager {
} else {
CellBuffer::new(width, height, empty_cell)
};
Pager::print_string(&mut content, lines, colors);
Pager::print_string(&mut content, &lines, colors);
content
};
Pager {
text,
text_lines: lines,
reflow,
cursor: (0, cursor_pos.unwrap_or(0)),
height: content.size().1,
@ -466,9 +470,10 @@ impl Pager {
empty_cell.set_bg(colors.bg);
let mut content = CellBuffer::new(width, height, empty_cell);
Pager::print_string(&mut content, lines, colors);
Pager::print_string(&mut content, &lines, colors);
Pager {
text: text.to_string(),
text_lines: lines,
cursor: (0, cursor_pos.unwrap_or(0)),
height,
width,
@ -494,7 +499,7 @@ impl Pager {
..Default::default()
}
}
pub fn print_string(content: &mut CellBuffer, lines: Vec<String>, colors: ThemeAttribute) {
pub fn print_string(content: &mut CellBuffer, lines: &[String], colors: ThemeAttribute) {
let width = content.size().0;
debug!(colors);
for (i, l) in lines.iter().enumerate() {
@ -558,7 +563,24 @@ impl Component for Pager {
);
}
}
Pager::print_string(&mut content, lines, self.colors);
Pager::print_string(&mut content, &lines, self.colors);
#[cfg(feature = "regexp")]
{
for text_formatter in crate::conf::text_format_regexps(
context,
"pager.envelope.body"
) {
let t = content.insert_tag(text_formatter.tag);
for (i, l) in lines.iter().enumerate() {
for _match in text_formatter.regexp.0.find_iter(l.as_bytes()) {
if let Ok(_match) = _match {
content.set_tag(t, (_match.start(), i), (_match.end(), i));
}
}
}
}
}
self.text_lines = lines;
if let Some(ref mut search) = self.search {
let results_attr = crate::conf::value(context, "pager.highlight_search");
let results_current_attr =

View File

@ -489,9 +489,193 @@ pub struct Themes {
pub struct Theme {
color_aliases: HashMap<Cow<'static, str>, ThemeValue<Color>>,
attr_aliases: HashMap<Cow<'static, str>, ThemeValue<Attr>>,
#[cfg(feature = "regexp")]
text_format_regexps: HashMap<Cow<'static, str>, SmallVec<[TextFormatterSetting; 32]>>,
pub keys: HashMap<Cow<'static, str>, ThemeAttributeInner>,
}
#[cfg(feature = "regexp")]
pub use regexp::text_format_regexps;
#[cfg(feature = "regexp")]
use regexp::*;
#[cfg(feature = "regexp")]
mod regexp {
use super::*;
use crate::terminal::FormatTag;
pub(super) const DEFAULT_TEXT_FORMATTER_KEYS: &'static [&'static str] =
&["pager.envelope.body", "listing.from", "listing.subject"];
#[derive(Clone)]
pub struct RegexpWrapper(pub pcre2::bytes::Regex);
#[derive(Debug, Clone)]
pub(super) struct TextFormatterSetting {
pub(super) regexp: RegexpWrapper,
pub(super) fg: Option<ThemeValue<Color>>,
pub(super) bg: Option<ThemeValue<Color>>,
pub(super) attrs: Option<ThemeValue<Attr>>,
}
#[derive(Debug, Clone)]
pub struct TextFormatter<'r> {
pub regexp: &'r RegexpWrapper,
pub tag: FormatTag,
}
impl Default for RegexpWrapper {
fn default() -> Self {
Self(pcre2::bytes::Regex::new("").unwrap())
}
}
impl std::fmt::Debug for RegexpWrapper {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
std::fmt::Debug::fmt(self.0.as_str(), fmt)
}
}
impl std::hash::Hash for RegexpWrapper {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state)
}
}
impl Eq for RegexpWrapper {}
impl PartialEq for RegexpWrapper {
fn eq(&self, other: &RegexpWrapper) -> bool {
self.0.as_str().eq(other.0.as_str())
}
}
impl RegexpWrapper {
pub(super) fn new(
pattern: &str,
caseless: bool,
dotall: bool,
extended: bool,
multi_line: bool,
ucp: bool,
jit_if_available: bool,
) -> std::result::Result<Self, pcre2::Error> {
Ok(Self(unsafe {
pcre2::bytes::RegexBuilder::new()
.caseless(caseless)
.dotall(dotall)
.extended(extended)
.multi_line(multi_line)
.ucp(ucp)
.jit_if_available(jit_if_available)
.disable_utf_check() // We only match on rust strings, which are guaranteed UTF8
.build(pattern)?
}))
}
}
#[inline(always)]
pub fn text_format_regexps<'ctx>(
context: &'ctx Context,
key: &'static str,
) -> SmallVec<[TextFormatter<'ctx>; 64]> {
let theme = match context.settings.terminal.theme.as_str() {
"light" => &context.settings.terminal.themes.light,
"dark" => &context.settings.terminal.themes.dark,
t => context
.settings
.terminal
.themes
.other_themes
.get(t)
.unwrap_or(&context.settings.terminal.themes.dark),
};
theme.text_format_regexps[&Cow::from(key)]
.iter()
.map(|v| TextFormatter {
regexp: &v.regexp,
tag: FormatTag {
fg: v.fg.as_ref().map(|v| match v {
ThemeValue::Link(ref key, ref field) => unlink_fg(theme, field, key),
ThemeValue::Alias(ref alias_ident) => {
let mut alias_ident = alias_ident;
let ret;
'fg_alias_loop: loop {
match &theme.color_aliases[alias_ident.as_ref()] {
ThemeValue::Link(ref new_key, ref new_field) => {
ret = unlink_fg(theme, new_field, new_key);
break 'fg_alias_loop;
}
ThemeValue::Alias(ref new_alias_ident) => {
alias_ident = new_alias_ident
}
ThemeValue::Value(val) => {
ret = *val;
break 'fg_alias_loop;
}
}
}
ret
}
ThemeValue::Value(val) => *val,
}),
bg: v.bg.as_ref().map(|v| match v {
ThemeValue::Link(ref key, ref field) => unlink_bg(theme, field, key),
ThemeValue::Alias(ref alias_ident) => {
let mut alias_ident = alias_ident;
let ret;
'bg_alias_loop: loop {
match &theme.color_aliases[alias_ident.as_ref()] {
ThemeValue::Link(ref new_key, ref new_field) => {
ret = unlink_bg(theme, new_field, new_key);
break 'bg_alias_loop;
}
ThemeValue::Alias(ref new_alias_ident) => {
alias_ident = new_alias_ident
}
ThemeValue::Value(val) => {
ret = *val;
break 'bg_alias_loop;
}
}
}
ret
}
ThemeValue::Value(val) => *val,
}),
attrs: v.attrs.as_ref().map(|v| match v {
ThemeValue::Link(ref key, ()) => unlink_attrs(theme, key),
ThemeValue::Alias(ref alias_ident) => {
let mut alias_ident = alias_ident;
let ret;
'attrs_alias_loop: loop {
match &theme.attr_aliases[alias_ident.as_ref()] {
ThemeValue::Link(ref new_key, ()) => {
ret = unlink_attrs(theme, new_key);
break 'attrs_alias_loop;
}
ThemeValue::Alias(ref new_alias_ident) => {
alias_ident = new_alias_ident
}
ThemeValue::Value(val) => {
ret = *val;
break 'attrs_alias_loop;
}
}
}
ret
}
ThemeValue::Value(val) => *val,
}),
priority: 0,
},
})
.collect()
}
}
use std::ops::{Deref, DerefMut};
impl Deref for Theme {
type Target = HashMap<Cow<'static, str>, ThemeAttributeInner>;
@ -511,6 +695,14 @@ impl<'de> Deserialize<'de> for Themes {
where
D: Deserializer<'de>,
{
#[cfg(feature = "regexp")]
const fn false_val() -> bool {
false
}
#[cfg(feature = "regexp")]
const fn true_val() -> bool {
true
}
#[derive(Deserialize)]
struct ThemesOptions {
#[serde(default)]
@ -526,9 +718,30 @@ impl<'de> Deserialize<'de> for Themes {
color_aliases: HashMap<Cow<'static, str>, ThemeValue<Color>>,
#[serde(default)]
attr_aliases: HashMap<Cow<'static, str>, ThemeValue<Attr>>,
#[cfg(feature = "regexp")]
#[serde(default)]
text_format_regexps: HashMap<Cow<'static, str>, HashMap<String, RegexpOptions>>,
#[serde(flatten, default)]
keys: HashMap<Cow<'static, str>, ThemeAttributeInnerOptions>,
}
#[cfg(feature = "regexp")]
#[derive(Deserialize, Default)]
struct RegexpOptions {
#[serde(default = "false_val")]
caseless: bool,
#[serde(default = "false_val")]
dotall: bool,
#[serde(default = "false_val")]
extended: bool,
#[serde(default = "false_val")]
multi_line: bool,
#[serde(default = "true_val")]
ucp: bool,
#[serde(default = "false_val")]
jit_if_available: bool,
#[serde(flatten)]
rest: ThemeAttributeInnerOptions,
}
#[derive(Deserialize, Default)]
struct ThemeAttributeInnerOptions {
#[serde(default)]
@ -572,6 +785,34 @@ impl<'de> Deserialize<'de> for Themes {
}
ret.light.color_aliases = s.light.color_aliases;
ret.light.attr_aliases = s.light.attr_aliases;
#[cfg(feature = "regexp")]
for (k, v) in s.light.text_format_regexps {
let mut acc = SmallVec::new();
for (rs, v) in v {
match RegexpWrapper::new(
&rs,
v.caseless,
v.dotall,
v.extended,
v.multi_line,
v.ucp,
v.jit_if_available,
) {
Ok(regexp) => {
acc.push(TextFormatterSetting {
regexp,
fg: v.rest.fg,
bg: v.rest.bg,
attrs: v.rest.attrs,
});
}
Err(err) => {
return Err(de::Error::custom(err.to_string()));
}
}
}
ret.light.text_format_regexps.insert(k, acc);
}
for (k, v) in ret.dark.iter_mut() {
if let Some(mut att) = s.dark.keys.remove(k) {
if let Some(att) = att.fg.take() {
@ -599,6 +840,34 @@ impl<'de> Deserialize<'de> for Themes {
}
ret.dark.color_aliases = s.dark.color_aliases;
ret.dark.attr_aliases = s.dark.attr_aliases;
#[cfg(feature = "regexp")]
for (k, v) in s.dark.text_format_regexps {
let mut acc = SmallVec::new();
for (rs, v) in v {
match RegexpWrapper::new(
&rs,
v.caseless,
v.dotall,
v.extended,
v.multi_line,
v.ucp,
v.jit_if_available,
) {
Ok(regexp) => {
acc.push(TextFormatterSetting {
regexp,
fg: v.rest.fg,
bg: v.rest.bg,
attrs: v.rest.attrs,
});
}
Err(err) => {
return Err(de::Error::custom(err.to_string()));
}
}
}
ret.dark.text_format_regexps.insert(k, acc);
}
for (tk, t) in ret.other_themes.iter_mut() {
let mut theme = s.other_themes.remove(tk).unwrap();
for (k, v) in t.iter_mut() {
@ -629,6 +898,34 @@ impl<'de> Deserialize<'de> for Themes {
}
t.color_aliases = theme.color_aliases;
t.attr_aliases = theme.attr_aliases;
#[cfg(feature = "regexp")]
for (k, v) in theme.text_format_regexps {
let mut acc = SmallVec::new();
for (rs, v) in v {
match RegexpWrapper::new(
&rs,
v.caseless,
v.dotall,
v.extended,
v.multi_line,
v.ucp,
v.jit_if_available,
) {
Ok(regexp) => {
acc.push(TextFormatterSetting {
regexp,
fg: v.rest.fg,
bg: v.rest.bg,
attrs: v.rest.attrs,
});
}
Err(err) => {
return Err(de::Error::custom(err.to_string()));
}
}
}
t.text_format_regexps.insert(k, acc);
}
}
Ok(ret)
}
@ -636,7 +933,7 @@ impl<'de> Deserialize<'de> for Themes {
impl Themes {
fn validate_keys(name: &str, theme: &Theme, hash_set: &HashSet<&'static str>) -> Result<()> {
let keys = theme
let mut keys = theme
.keys()
.filter_map(|k| {
if !hash_set.contains(&k.as_ref()) {
@ -745,6 +1042,85 @@ impl Themes {
}
}))
.collect::<SmallVec<[(Option<_>, &'_ str, &'_ str, &'_ str); 128]>>();
#[cfg(feature = "regexp")]
{
for (key, v) in &theme.text_format_regexps {
if !regexp::DEFAULT_TEXT_FORMATTER_KEYS.contains(&key.as_ref()) {
keys.push((
None,
"key",
"invalid key in `text_format_regexps`",
key.as_ref(),
));
} else {
for tfs in v {
if let Some(fg) = &tfs.fg {
if let ThemeValue::Link(ref r, _) = fg {
if !hash_set.contains(&r.as_ref()) {
keys.push((
Some(key),
"fg link",
"invalid key in `text_format_regexps`",
r.as_ref(),
));
}
} else if let ThemeValue::Alias(ref ident) = fg {
if !theme.color_aliases.contains_key(ident.as_ref()) {
keys.push((
Some(key),
"fg alias",
"nonexistant color alias in `text_format_regexps`",
ident,
));
}
}
}
if let Some(bg) = &tfs.bg {
if let ThemeValue::Link(ref r, _) = bg {
if !hash_set.contains(&r.as_ref()) {
keys.push((
Some(key),
"bg link",
"invalid key in `text_format_regexps`",
r.as_ref(),
));
}
} else if let ThemeValue::Alias(ref ident) = bg {
if !theme.color_aliases.contains_key(ident.as_ref()) {
keys.push((
Some(key),
"bg alias",
"nonexistant color alias in `text_format_regexps`",
ident,
));
}
}
}
if let Some(attrs) = &tfs.attrs {
if let ThemeValue::Link(ref r, _) = attrs {
if !hash_set.contains(&r.as_ref()) {
keys.push((
Some(key),
"attrs link",
"invalid key in `text_format_regexps`",
r.as_ref(),
));
}
} else if let ThemeValue::Alias(ref ident) = attrs {
if !theme.attr_aliases.contains_key(ident.as_ref()) {
keys.push((
Some(key),
"attrs alias",
"nonexistant text attribute alias in `text_format_regexps`",
ident,
));
}
}
}
}
}
}
}
if !keys.is_empty() {
return Err(format!(
@ -1192,11 +1568,21 @@ impl Default for Themes {
keys: light,
attr_aliases: Default::default(),
color_aliases: Default::default(),
#[cfg(feature = "regexp")]
text_format_regexps: DEFAULT_TEXT_FORMATTER_KEYS
.iter()
.map(|&k| (k.into(), SmallVec::new()))
.collect(),
},
dark: Theme {
keys: dark,
attr_aliases: Default::default(),
color_aliases: Default::default(),
#[cfg(feature = "regexp")]
text_format_regexps: DEFAULT_TEXT_FORMATTER_KEYS
.iter()
.map(|&k| (k.into(), SmallVec::new()))
.collect(),
},
other_themes,
}

View File

@ -1718,7 +1718,15 @@ pub fn copy_area(grid_dest: &mut CellBuffer, grid_src: &CellBuffer, dest: Area,
}
grid_dest[(x, y)] = grid_src[(src_x, src_y)];
for t in &stack {
grid_dest[(x, y)].attrs |= grid_src.tag_table()[&t].attrs;
if let Some(fg) = grid_src.tag_table()[&t].fg {
grid_dest[(x, y)].set_fg(fg);
}
if let Some(bg) = grid_src.tag_table()[&t].bg {
grid_dest[(x, y)].set_bg(bg);
}
if let Some(attrs) = grid_src.tag_table()[&t].attrs {
grid_dest[(x, y)].attrs |= attrs;
}
}
if src_x >= get_x(bottom_right!(src)) {
break 'for_x;
@ -2792,8 +2800,8 @@ fn test_cellbuffer_search() {
#[derive(Debug, Default, Copy, Hash, Clone, PartialEq, Eq)]
pub struct FormatTag {
pub fg: Color,
pub bg: Color,
pub attrs: Attr,
pub fg: Option<Color>,
pub bg: Option<Color>,
pub attrs: Option<Attr>,
pub priority: u8,
}