From dda4057e76b926598ebea4b45a5a1a5737921b96 Mon Sep 17 00:00:00 2001 From: Michael Adler Date: Tue, 1 Oct 2024 18:12:45 +0200 Subject: [PATCH] feat: add rule_match_mode option to determine rule application behavior The `rule_match_mode` config option can be one of `unique`, `first`, or `all`, and it dictates how rules are applied. This parameter is optional and defaults to `unique` in order to be backwards compatible. The available modes are as follows: - `first`: The first rule that matches will be applied. - `all`: All matching rules will be applied in order, meaning a single mail can be moved multiple times. - `unique`: Similar to `first`, but ensures that a mail is only moved once. --- example/config.yaml | 21 +++++-- src/lib/action.rs | 8 +++ src/lib/config.rs | 66 +++++++++++++++++++- src/lib/engine.rs | 142 ++++++++++++++++++++++++++------------------ 4 files changed, 171 insertions(+), 66 deletions(-) diff --git a/example/config.yaml b/example/config.yaml index d80e029..18bc632 100644 --- a/example/config.yaml +++ b/example/config.yaml @@ -3,15 +3,26 @@ maildir: ~/mail notmuch_config: ~/.config/notmuch/notmuchrc # only rename if you use mbsync rename: true + +# only apply rules to messages younger than 60 days (to speed things up) +max_age_days: 60 + +# rule_match_mode can be one of `unique`, `first`, `all` and determines how rules are applied; +# rule_match_mode is optional and defaults to `unique`; the following modes are available: +# - first: the first rule that matches will be applied +# - all: all rules that match will be applied (in order), i.e. a single mail can be moved multiple times +# - unique: like first, but ensure that a mail is only moved once +rule_match_mode: first rules: + # move mails older than 30 days from Trash to Nirvana; + # if Nirvana is not synced with the remote mailserver, then this is housekeeping for the server side + - folder: Nirvana + query: tag:trash and date:..30_days + # move mails tagged as `trash` to folder `Trash` - folder: Trash query: tag:trash - # move mails tagged as `sent` to folder `Sent` - - folder: Sent - query: tag:sent and not tag:trash - # move mails tagged as `archive` to folder `Archive` - folder: Archive - query: tag:archive and not tag:sent and not tag:trash + query: tag:archive diff --git a/src/lib/action.rs b/src/lib/action.rs index 6ecbb31..008ff68 100644 --- a/src/lib/action.rs +++ b/src/lib/action.rs @@ -32,6 +32,14 @@ pub fn apply_actions(cfg: &Config, dry_run: bool, actions: &HashMap, pub rules: Vec, + pub rule_match_mode: Option, +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum MatchMode { + Unique, + First, + All, +} + +impl Serialize for MatchMode { + fn serialize(&self, serializer: S) -> Result + where + S: serde::ser::Serializer, + { + use MatchMode::*; + match self { + Unique => serializer.serialize_str("unique"), + First => serializer.serialize_str("first"), + All => serializer.serialize_str("all"), + } + } +} + +impl<'de> Deserialize<'de> for MatchMode { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct MatchModeVisitor; + + impl<'de> serde::de::Visitor<'de> for MatchModeVisitor { + type Value = MatchMode; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a string representing a match mode") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + match value { + "unique" => Ok(MatchMode::Unique), + "first" => Ok(MatchMode::First), + "all" => Ok(MatchMode::All), + _ => Err(E::custom(format!("unknown match mode: {}", value))), + } + } + } + + deserializer.deserialize_str(MatchModeVisitor) + } } impl Default for Config { @@ -24,16 +77,23 @@ impl Default for Config { rename: false, max_age_days: None, rules: Vec::new(), + rule_match_mode: None, } } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct Rule { pub folder: String, pub query: String, } +impl std::fmt::Display for Rule { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Rule {}: {}", self.folder, self.query) + } +} + pub fn load_config(fname: &Option) -> Result { let bd = BaseDirs::new().unwrap(); let basedir = bd.config_dir().join("notmuch-mailmover"); diff --git a/src/lib/engine.rs b/src/lib/engine.rs index 22a0f8c..b0dd80e 100644 --- a/src/lib/engine.rs +++ b/src/lib/engine.rs @@ -2,61 +2,47 @@ use std::fmt::Write as _; use std::{collections::HashMap, path::PathBuf}; use anyhow::{anyhow, Result}; -use log::{debug, error}; +use log::{debug, trace, warn}; -use crate::config::Config; +use crate::config::{Config, MatchMode}; use crate::repo::MailRepo; +/// Apply the given rules to the mails in the repository. +/// The result is a HashMap which is assigns messages (files) to their new destination folders. +/// Note that no messages are actually moved at this stage. pub fn apply_rules<'a>(cfg: &'a Config, repo: &dyn MailRepo) -> Result> { debug!("applying rules"); let mut actions = HashMap::new(); - - let n = cfg.rules.len(); - if n > 0 { - let mut overlap_count: usize = 0; - - debug!("checking if any two rules overlap"); - let mut combined_query = String::with_capacity(2048); - for i in 0..n - 1 { - for j in i + 1..n { - let lhs = cfg.rules.get(i).unwrap(); - let rhs = cfg.rules.get(j).unwrap(); - - combined_query.clear(); - write!(combined_query, "({}) AND ({})", lhs.query, rhs.query)?; - if let Some(days) = cfg.max_age_days { - write!(combined_query, " AND date:\"{}_days\"..", days)?; - } - debug!("combined query: {}", combined_query); - let messages = repo.search_message(&combined_query)?; - if !messages.is_empty() { - let count = messages.len(); - overlap_count += count; - error!( - "Queries '{}' and '{}' overlap ({} messages)", - lhs.query, rhs.query, count - ); - } - } - } - - if overlap_count > 0 { - return Err(anyhow!("Rules overlap ({} messages)", overlap_count)); - } - } - for rule in &cfg.rules { - let mut query_str = format!("NOT folder:\"{}\" AND ({})", rule.folder, &rule.query); + let mut query_str = format!("({})", &rule.query); if let Some(days) = cfg.max_age_days { write!(query_str, " AND date:\"{}_days\"..", days)?; } - debug!("using query: {}", query_str); let messages = repo.search_message(&query_str)?; + debug!("query '{}' returned {} messages", query_str, messages.len()); for filename in messages { - debug!("processing {:?}", filename.to_str()); - if let Some(old) = actions.insert(filename, rule.folder.as_str()) { - return Err(anyhow!("Ambiguous result: {} and {}", old, rule.folder)); + trace!("processing {:?}", filename.to_str()); + // check if message was matched previously + if let Some(folder) = actions.get(&filename) { + match cfg.rule_match_mode { + Some(MatchMode::First) => { + debug!( + "Message was already assigned to folder {}, not moving into {}", + folder, rule.folder + ); + continue; + } + None | Some(MatchMode::Unique) => { + return Err(anyhow!("Ambiguous rule! Message already assigned to folder {}, cannot assign to folder {}", folder, rule.folder)); + } + Some(MatchMode::All) => { + warn!("Ambiguous rule! Message was previously assigned to folder {folder}"); + } + } } + + debug!("Assigning {:?} to {}", filename, rule.folder); + actions.insert(filename, rule.folder.as_str()); } } Ok(actions) @@ -104,10 +90,7 @@ mod tests { }); let mut repo: DummyRepo = Default::default(); - repo.add_mail( - "NOT folder:\"Trash\" AND (tag:trash)".to_string(), - "some.mail".to_string(), - ); + repo.add_mail("(tag:trash)".to_string(), "some.mail".to_string()); let actions = apply_rules(&cfg, &repo).unwrap(); assert_eq!( 1, @@ -123,7 +106,34 @@ mod tests { #[test] fn ambiguous_rule_test() { + let mut repo: DummyRepo = Default::default(); + repo.add_mail("(tag:trash)".to_string(), "some.mail".to_string()); + + let mut cfg1: Config = Default::default(); + cfg1.rules.push(Rule { + folder: "Trash".to_string(), + query: "tag:trash".to_string(), + }); + cfg1.rules.push(Rule { + folder: "Deleted".to_string(), + query: "tag:trash".to_string(), + }); + + let mut cfg2 = cfg1.clone(); + cfg2.rule_match_mode = Some(MatchMode::Unique); + + for cfg in &[cfg1, cfg2] { + let actions = apply_rules(&cfg, &repo); + assert!(actions.is_err()); + let err = actions.unwrap_err(); + assert_eq!("Ambiguous rule! Message already assigned to folder Trash, cannot assign to folder Deleted", err.to_string()); + } + } + + #[test] + fn rule_match_mode_first_test() { let mut cfg: Config = Default::default(); + cfg.rule_match_mode = Some(MatchMode::First); cfg.rules.push(Rule { folder: "Trash".to_string(), query: "tag:trash".to_string(), @@ -134,17 +144,33 @@ mod tests { }); let mut repo: DummyRepo = Default::default(); - repo.add_mail( - "NOT folder:\"Trash\" AND (tag:trash)".to_string(), - "some.mail".to_string(), - ); - repo.add_mail( - "NOT folder:\"Deleted\" AND (tag:trash)".to_string(), - "some.mail".to_string(), - ); - let actions = apply_rules(&cfg, &repo); - assert!(actions.is_err()); - let err = actions.unwrap_err(); - assert_eq!("Ambiguous result: Trash and Deleted", err.to_string()); + repo.add_mail("(tag:trash)".to_string(), "some.mail".to_string()); + let actions = apply_rules(&cfg, &repo).unwrap(); + assert_eq!(actions.len(), 1); + let pb = PathBuf::from_str("some.mail").unwrap(); + let folder = *actions.get(&pb).unwrap(); + assert_eq!("Trash", folder); + } + + #[test] + fn rule_match_mode_all() { + let mut cfg: Config = Default::default(); + cfg.rule_match_mode = Some(MatchMode::All); + cfg.rules.push(Rule { + folder: "Trash".to_string(), + query: "tag:trash".to_string(), + }); + cfg.rules.push(Rule { + folder: "Deleted".to_string(), + query: "tag:trash".to_string(), + }); + + let mut repo: DummyRepo = Default::default(); + repo.add_mail("(tag:trash)".to_string(), "some.mail".to_string()); + let actions = apply_rules(&cfg, &repo).unwrap(); + assert_eq!(actions.len(), 1); + let pb = PathBuf::from_str("some.mail").unwrap(); + let folder = *actions.get(&pb).unwrap(); + assert_eq!("Deleted", folder); } }