From a37851b1082b88fc57611f79fe09064b46dbb17e Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Sat, 29 Apr 2023 17:14:31 +0300 Subject: [PATCH] cli: add repair command with some lints --- Cargo.lock | 1 + Cargo.toml | 6 ++ cli/src/lib.rs | 22 ++++ cli/src/lints.rs | 252 +++++++++++++++++++++++++++++++++++++++++++ cli/src/main.rs | 50 ++++++++- core/Cargo.toml | 2 +- core/src/db/posts.rs | 8 +- core/src/db/queue.rs | 8 +- core/src/lib.rs | 4 + core/src/models.rs | 2 +- docs/mpot.1 | 31 ++++++ web/src/lists.rs | 6 +- 12 files changed, 380 insertions(+), 12 deletions(-) create mode 100644 cli/src/lints.rs diff --git a/Cargo.lock b/Cargo.lock index 03fb72b..8a8d7b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2530,6 +2530,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a" dependencies = [ "bitflags", + "chrono", "fallible-iterator", "fallible-streaming-iterator", "hashlink", diff --git a/Cargo.toml b/Cargo.toml index ffa3357..5632c20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,9 @@ members = [ "rest-http", "web", ] + +[profile.release] +lto = "fat" +opt-level = "z" +codegen-units = 1 +split-debuginfo = "unpacked" diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 67c6de3..3d4dc9a 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -185,6 +185,28 @@ pub enum Command { /// Is account enabled. enabled: Option>, }, + /// Show and fix possible data mistakes or inconsistencies. + Repair { + /// Fix errors (default: false) + #[arg(long, default_value = "false")] + fix: bool, + /// Select all tests (default: false) + #[arg(long, default_value = "false")] + all: bool, + /// Post `datetime` column must have the Date: header value, in RFC2822 + /// format. + #[arg(long, default_value = "false")] + datetime_header_value: bool, + /// Remove accounts that have no matching subscriptions. + #[arg(long, default_value = "false")] + remove_empty_accounts: bool, + /// Remove subscription requests that have been accepted. + #[arg(long, default_value = "false")] + remove_accepted_subscription_requests: bool, + /// Warn if a list has no owners. + #[arg(long, default_value = "false")] + warn_list_no_owner: bool, + }, } /// Postfix config values. diff --git a/cli/src/lints.rs b/cli/src/lints.rs new file mode 100644 index 0000000..68b118f --- /dev/null +++ b/cli/src/lints.rs @@ -0,0 +1,252 @@ +/* + * This file is part of mailpot + * + * Copyright 2020 - Manos Pitsidianakis + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +use super::*; + +pub fn datetime_header_value_lint(db: &mut Connection, dry_run: bool) -> Result<()> { + let mut col = vec![]; + { + let mut stmt = db.connection.prepare("SELECT * FROM post ORDER BY pk")?; + let iter = stmt.query_map([], |row| { + let pk: i64 = row.get("pk")?; + let date_s: String = row.get("datetime")?; + match melib::datetime::rfc822_to_timestamp(date_s.trim()) { + Err(_) | Ok(0) => { + let mut timestamp: i64 = row.get("timestamp")?; + let created: i64 = row.get("created")?; + if timestamp == 0 { + timestamp = created; + } + timestamp = std::cmp::min(timestamp, created); + let timestamp = if timestamp <= 0 { + None + } else { + // safe because we checked it's not negative or zero above. + Some(timestamp as u64) + }; + let message: Vec = row.get("message")?; + Ok(Some((pk, date_s, message, timestamp))) + } + Ok(_) => Ok(None), + } + })?; + + for entry in iter { + if let Some(s) = entry? { + col.push(s); + } + } + } + let mut failures = 0; + let tx = if dry_run { + None + } else { + Some(db.connection.transaction()?) + }; + if col.is_empty() { + println!("datetime_header_value: ok"); + } else { + println!("datetime_header_value: found {} entries", col.len()); + println!("pk\tDate value\tshould be"); + for (pk, val, message, timestamp) in col { + let correct = if let Ok(v) = + chrono::DateTime::::parse_from_rfc3339(&val) + { + v.to_rfc2822() + } else if let Some(v) = timestamp.map(|t| { + melib::datetime::timestamp_to_string(t, Some(melib::datetime::RFC822_DATE), true) + }) { + v + } else if let Ok(v) = + Envelope::from_bytes(&message, None).map(|env| env.date_as_str().to_string()) + { + v + } else { + failures += 1; + println!("{pk}\t{val}\tCould not find any valid date value in the post metadata!"); + continue; + }; + println!("{pk}\t{val}\t{correct}"); + if let Some(tx) = tx.as_ref() { + tx.execute( + "UPDATE post SET datetime = ? WHERE pk = ?", + rusqlite::params![&correct, pk], + )?; + } + } + } + if let Some(tx) = tx { + tx.commit()?; + } + if failures > 0 { + println!( + "datetime_header_value: {failures} failure{}", + if failures == 1 { "" } else { "s" } + ); + } + Ok(()) +} + +pub fn remove_empty_accounts_lint(db: &mut Connection, dry_run: bool) -> Result<()> { + let mut col = vec![]; + { + let mut stmt = db.connection.prepare( + "SELECT * FROM account WHERE NOT EXISTS (SELECT 1 FROM subscription AS s WHERE \ + s.address = address) ORDER BY pk", + )?; + let iter = stmt.query_map([], |row| { + let pk = row.get("pk")?; + Ok(DbVal( + Account { + pk, + name: row.get("name")?, + address: row.get("address")?, + public_key: row.get("public_key")?, + password: row.get("password")?, + enabled: row.get("enabled")?, + }, + pk, + )) + })?; + + for entry in iter { + let entry = entry?; + col.push(entry); + } + } + if col.is_empty() { + println!("remove_empty_accounts: ok"); + } else { + let tx = if dry_run { + None + } else { + Some(db.connection.transaction()?) + }; + println!("remove_empty_accounts: found {} entries", col.len()); + println!("pk\tAddress"); + for DbVal(Account { pk, address, .. }, _) in &col { + println!("{pk}\t{address}"); + } + if let Some(tx) = tx { + for DbVal(_, pk) in col { + tx.execute("DELETE FROM account WHERE pk = ?", [pk])?; + } + tx.commit()?; + } + } + Ok(()) +} + +pub fn remove_accepted_subscription_requests_lint( + db: &mut Connection, + dry_run: bool, +) -> Result<()> { + let mut col = vec![]; + { + let mut stmt = db.connection.prepare( + "SELECT * FROM candidate_subscription WHERE accepted IS NOT NULL ORDER BY pk", + )?; + let iter = stmt.query_map([], |row| { + let pk = row.get("pk")?; + Ok(DbVal( + ListSubscription { + pk, + list: row.get("list")?, + address: row.get("address")?, + account: row.get("account")?, + name: row.get("name")?, + digest: row.get("digest")?, + enabled: row.get("enabled")?, + verified: row.get("verified")?, + hide_address: row.get("hide_address")?, + receive_duplicates: row.get("receive_duplicates")?, + receive_own_posts: row.get("receive_own_posts")?, + receive_confirmation: row.get("receive_confirmation")?, + }, + pk, + )) + })?; + + for entry in iter { + let entry = entry?; + col.push(entry); + } + } + if col.is_empty() { + println!("remove_accepted_subscription_requests: ok"); + } else { + let tx = if dry_run { + None + } else { + Some(db.connection.transaction()?) + }; + println!( + "remove_accepted_subscription_requests: found {} entries", + col.len() + ); + println!("pk\tAddress"); + for DbVal(ListSubscription { pk, address, .. }, _) in &col { + println!("{pk}\t{address}"); + } + if let Some(tx) = tx { + for DbVal(_, pk) in col { + tx.execute("DELETE FROM candidate_subscription WHERE pk = ?", [pk])?; + } + tx.commit()?; + } + } + Ok(()) +} + +pub fn warn_list_no_owner_lint(db: &mut Connection, _: bool) -> Result<()> { + let mut stmt = db.connection.prepare( + "SELECT * FROM list WHERE NOT EXISTS (SELECT 1 FROM owner AS o WHERE o.list = pk) ORDER \ + BY pk", + )?; + let iter = stmt.query_map([], |row| { + let pk = row.get("pk")?; + Ok(DbVal( + MailingList { + pk, + name: row.get("name")?, + id: row.get("id")?, + address: row.get("address")?, + description: row.get("description")?, + archive_url: row.get("archive_url")?, + }, + pk, + )) + })?; + + let mut col = vec![]; + for entry in iter { + let entry = entry?; + col.push(entry); + } + if col.is_empty() { + println!("warn_list_no_owner: ok"); + } else { + println!("warn_list_no_owner: found {} entries", col.len()); + println!("pk\tName"); + for DbVal(MailingList { pk, name, .. }, _) in col { + println!("{pk}\t{name}"); + } + } + Ok(()) +} diff --git a/cli/src/main.rs b/cli/src/main.rs index b46049c..35fbca5 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -24,6 +24,8 @@ use std::{ process::Stdio, }; +mod lints; +use lints::*; use mailpot::{ melib::{backends::maildir::MaildirPathTrait, smol, Envelope, EnvelopeHash}, models::{changesets::*, *}, @@ -757,6 +759,52 @@ fn run_app(opt: Opt) -> Result<()> { }; db.update_account(changeset)?; } + Repair { + fix, + all, + mut datetime_header_value, + mut remove_empty_accounts, + mut remove_accepted_subscription_requests, + mut warn_list_no_owner, + } => { + type LintFn = + fn(&'_ mut mailpot::Connection, bool) -> std::result::Result<(), mailpot::Error>; + let dry_run = !fix; + if all { + datetime_header_value = true; + remove_empty_accounts = true; + remove_accepted_subscription_requests = true; + warn_list_no_owner = true; + } + + if !(datetime_header_value + | remove_empty_accounts + | remove_accepted_subscription_requests + | warn_list_no_owner) + { + return Err( + "No lints selected: specify them with flag arguments. See --help".into(), + ); + } + + if dry_run { + println!("running without making modifications (dry run)"); + } + + for (flag, lint_fn) in [ + (datetime_header_value, datetime_header_value_lint as LintFn), + (remove_empty_accounts, remove_empty_accounts_lint as _), + ( + remove_accepted_subscription_requests, + remove_accepted_subscription_requests_lint as _, + ), + (warn_list_no_owner, warn_list_no_owner_lint as _), + ] { + if flag { + lint_fn(&mut db, dry_run)?; + } + } + } } Ok(()) @@ -773,7 +821,7 @@ fn main() -> std::result::Result<(), i32> { .init() .unwrap(); if let Err(err) = run_app(opt) { - println!("{}", err.display_chain()); + print!("{}", err.display_chain()); std::process::exit(-1); } Ok(()) diff --git a/core/Cargo.toml b/core/Cargo.toml index 127759f..6eb0d07 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -17,7 +17,7 @@ error-chain = { version = "0.12.4", default-features = false } log = "0.4" melib = { version = "*", default-features = false, features = ["smtp", "unicode_algorithms", "maildir_backend"], git = "https://github.com/meli/meli", rev = "2447a2c" } minijinja = { version = "0.31.0", features = ["source", ] } -rusqlite = { version = "^0.28", features = ["bundled", "trace", "hooks", "serde_json", "array"] } +rusqlite = { version = "^0.28", features = ["bundled", "trace", "hooks", "serde_json", "array", "chrono"] } serde = { version = "^1", features = ["derive", ] } serde_json = "^1" toml = "^0.5" diff --git a/core/src/db/posts.rs b/core/src/db/posts.rs index 7b8cb59..ee733ed 100644 --- a/core/src/db/posts.rs +++ b/core/src/db/posts.rs @@ -31,15 +31,15 @@ impl Connection { } else { from_[0].get_email() }; - let datetime: std::borrow::Cow<'_, str> = if env.timestamp != 0 { + let datetime: std::borrow::Cow<'_, str> = if !env.date.as_str().is_empty() { + env.date.as_str().into() + } else { melib::datetime::timestamp_to_string( env.timestamp, - Some(melib::datetime::RFC3339_FMT_WITH_TIME), + Some(melib::datetime::RFC822_DATE), true, ) .into() - } else { - env.date.as_str().into() }; let message_id = env.message_id_display(); let mut stmt = self.connection.prepare( diff --git a/core/src/db/queue.rs b/core/src/db/queue.rs index 392bf45..97faafb 100644 --- a/core/src/db/queue.rs +++ b/core/src/db/queue.rs @@ -87,7 +87,7 @@ pub struct QueueEntry { /// Unix timestamp of date. pub timestamp: u64, /// Datetime as string. - pub datetime: String, + pub datetime: DateTime, } impl std::fmt::Display for QueueEntry { @@ -142,7 +142,7 @@ impl QueueEntry { message_id: env.message_id().to_string(), message: raw.to_vec(), timestamp: now.timestamp() as u64, - datetime: now.to_string(), + datetime: now, }) } } @@ -197,7 +197,7 @@ impl Connection { message_id: row.get::<_, String>("message_id")?, message: row.get::<_, Vec>("message")?, timestamp: row.get::<_, u64>("timestamp")?, - datetime: row.get::<_, String>("datetime")?, + datetime: row.get::<_, DateTime>("datetime")?, }, pk, )) @@ -227,7 +227,7 @@ impl Connection { message_id: row.get::<_, String>("message_id")?, message: row.get::<_, Vec>("message")?, timestamp: row.get::<_, u64>("timestamp")?, - datetime: row.get::<_, String>("datetime")?, + datetime: row.get::<_, DateTime>("datetime")?, }) }; let mut stmt = if index.is_empty() { diff --git a/core/src/lib.rs b/core/src/lib.rs index 0139c2a..d0caca0 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -153,8 +153,12 @@ #[macro_use] extern crate error_chain; pub extern crate anyhow; +pub extern crate chrono; pub extern crate rusqlite; +/// Alias for [`chrono::DateTime`]. +pub type DateTime = chrono::DateTime; + #[macro_use] pub extern crate serde; pub extern crate log; diff --git a/core/src/models.rs b/core/src/models.rs index 9cdcfc7..d743829 100644 --- a/core/src/models.rs +++ b/core/src/models.rs @@ -473,7 +473,7 @@ pub struct Post { pub message: Vec, /// Unix timestamp of date. pub timestamp: u64, - /// Datetime as string. + /// Date header as string. pub datetime: String, /// Month-year as a `YYYY-mm` formatted string, for use in archives. pub month_year: String, diff --git a/docs/mpot.1 b/docs/mpot.1 index 18f4b91..834545d 100644 --- a/docs/mpot.1 +++ b/docs/mpot.1 @@ -891,5 +891,36 @@ Is account enabled. [\fIpossible values: \fRtrue, false] .ie \n(.g .ds Aq \(aq .el .ds Aq ' +.\fB +.SS mpot repair +.\fR +.br + +.br + +mpot repair [\-\-fix \fIFIX\fR] [\-\-all \fIALL\fR] [\-\-datetime\-header\-value \fIDATETIME_HEADER_VALUE\fR] [\-\-remove\-empty\-accounts \fIREMOVE_EMPTY_ACCOUNTS\fR] [\-\-remove\-accepted\-subscription\-requests \fIREMOVE_ACCEPTED_SUBSCRIPTION_REQUESTS\fR] [\-\-warn\-list\-no\-owner \fIWARN_LIST_NO_OWNER\fR] +.br + +Show and fix possible data mistakes or inconsistencies. +.TP +\-\-fix +Fix errors (default: false). +.TP +\-\-all +Select all tests (default: false). +.TP +\-\-datetime\-header\-value +Post `datetime` column must have the Date: header value, in RFC2822 format. +.TP +\-\-remove\-empty\-accounts +Remove accounts that have no matching subscriptions. +.TP +\-\-remove\-accepted\-subscription\-requests +Remove subscription requests that have been accepted. +.TP +\-\-warn\-list\-no\-owner +Warn if a list has no owners. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' .SH AUTHORS Manos Pitsidianakis diff --git a/web/src/lists.rs b/web/src/lists.rs index 9e38d16..f148ab4 100644 --- a/web/src/lists.rs +++ b/web/src/lists.rs @@ -73,7 +73,11 @@ pub async fn list( .map(|(thread, length, _timestamp)| { let post = &post_map[&thread.message_id.as_str()]; //2019-07-14T14:21:02 - if let Some(day) = post.datetime.get(8..10).and_then(|d| d.parse::().ok()) { + if let Some(day) = + chrono::DateTime::::parse_from_rfc2822(post.datetime.trim()) + .ok() + .map(|d| d.day()) + { hist.get_mut(&post.month_year).unwrap()[day.saturating_sub(1) as usize] += 1; } let envelope = melib::Envelope::from_bytes(post.message.as_slice(), None)