diff --git a/Cargo.lock b/Cargo.lock index a9fa29a..1b90c07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -251,7 +251,7 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "labeler" -version = "0.1.4" +version = "0.1.5" dependencies = [ "ansi_term 0.12.1", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 71a1b4a..e40b3c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "labeler" -version = "0.1.4" +version = "0.1.5" authors = ["syncpark "] edition = "2018" diff --git a/src/cluster.rs b/src/cluster.rs index 2a314b6..b128d2f 100644 --- a/src/cluster.rs +++ b/src/cluster.rs @@ -6,12 +6,12 @@ use anyhow::Result; use log::info; use regex::Regex; use serde::Deserialize; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt; use std::str::FromStr; const SIGNATURE_DISPLAY_LENGTH: usize = 200; - +const CLUSTER_ID_FOR_OUTLIERS: ClusterId = 1_000_000; #[derive(Deserialize)] struct SavedClusters { detector_id: i32, @@ -26,6 +26,8 @@ struct SavedClusters { struct ClusterMember { cluster_id: usize, cluster_size: usize, + signature: Option, + score: Option, events: Vec, } @@ -46,6 +48,9 @@ impl SavedClusters { self.outlier_count, ) } + fn outliers(&self) -> &Vec { + &self.outliers + } } #[derive(Debug, Default, Clone)] @@ -57,7 +62,8 @@ pub struct Members { new_qualifier: Qualifier, signature: Option, event_ids: Vec, - // tokens: HashMap>, // TODO: calculate token occurrences to correct label-score + filtered_events: Vec>, // tokens: HashMap>, // TODO: calculate token occurrences to correct label-score + filter: Vec, } impl fmt::Display for Members { @@ -68,7 +74,8 @@ impl fmt::Display for Members { } else { write!(f, ", {}<-{}", self.new_qualifier, self.qualifier)?; } - write!(f, ", {} events", self.size) + write!(f, ", {} events", self.size)?; + write!(f, ", score = {}", self.score) } } @@ -104,7 +111,6 @@ pub struct Clusters { clusters: Vec, _outliers: Vec, clusters_map: HashMap, - // _message_cluster_map: HashMap, tokens_clusters_map: HashMap>, } @@ -112,7 +118,7 @@ impl Clusters { /// # Errors /// /// Will return `Err` if the query to get cluster records for the specified datasource failed. - pub fn new(path: &str, labels: &Labels) -> Result { + pub fn new(path: &str, labels: &Labels, delimiter: char) -> Result { let save_clusters = SavedClusters::from_path(path)?; { let (detector_id, events_count, clusters_count, outliers_count) = @@ -122,8 +128,8 @@ impl Clusters { path, detector_id, events_count, clusters_count, outliers_count ); } - let clusters = save_clusters.cluster_ids(); - let clusters_map: HashMap = save_clusters + let mut clusters = save_clusters.cluster_ids(); + let mut clusters_map: HashMap = save_clusters .clusters .iter() .map(|m| { @@ -137,31 +143,49 @@ impl Clusters { Members { id: m.cluster_id, size: m.cluster_size, - score: 0.0, + score: m.score.unwrap_or_default(), qualifier, new_qualifier: qualifier, - signature: None, + signature: m.signature.as_ref().cloned(), event_ids: m.events.clone(), + filtered_events: Vec::new(), + filter: Vec::new(), }, ) }) .collect(); - // let _message_cluster_map: HashMap = clusters_map - // .values() - // .flat_map(|c| { - // c.event_ids - // .iter() - // .map(|e| (e.to_string(), c.id)) - // .collect::>() - // }) - // .collect(); + if !save_clusters.outliers().is_empty() { + let message_id_index = 1; + let event_ids: Vec<_> = save_clusters + .outliers() + .iter() + .filter_map(|raw| { + let s: Vec<_> = raw.split(delimiter).collect(); + s.get(message_id_index).map(|msg_id| (*msg_id).to_string()) + }) + .collect(); + clusters_map.insert( + CLUSTER_ID_FOR_OUTLIERS, + Members { + id: CLUSTER_ID_FOR_OUTLIERS, + size: save_clusters.outliers().len(), + score: 0.0, + qualifier: Qualifier::default(), + new_qualifier: Qualifier::default(), + signature: None, + event_ids, + filtered_events: Vec::new(), + filter: Vec::new(), + }, + ); + clusters.push(CLUSTER_ID_FOR_OUTLIERS); + } Ok(Self { clusters, _outliers: save_clusters.outliers, clusters_map, - // _message_cluster_map, tokens_clusters_map: HashMap::new(), }) } @@ -213,6 +237,13 @@ impl Clusters { self.clusters.is_empty() } + pub fn clear_filter(&mut self, cluster_id: ClusterId) { + if let Some(c) = self.clusters_map.get_mut(&cluster_id) { + c.filtered_events.clear(); + c.filter.clear(); + } + } + pub fn print(&self, cid: ClusterId, events: &Events, cfg: &CliConf) { if let Some(c) = self.clusters_map.get(&cid) { println!("{}", c); @@ -221,11 +252,20 @@ impl Clusters { println!("signature = {}", sig); } } + if !c.filter.is_empty() { + println!("Event Filter: {:#?}", c.filter); + } if cfg.is_show_samples_on() { let display_count = cfg.samples_count(); + let event_ids = if let Some(last) = c.filtered_events.last() { + last + } else { + &c.event_ids + }; println!(); - for (idx, message_id) in c.event_ids.iter().enumerate() { + for (idx, message_id) in event_ids.iter().enumerate() { if idx > display_count { + println!("... {} more events", event_ids.len() - display_count); break; } if let Some(msg) = events.get_message(message_id) { @@ -308,10 +348,11 @@ impl Clusters { .iter() .filter_map(|cid| { if let Some(c) = self.clusters_map.get(cid) { - if events.regex_match(&re, &c.event_ids) { - Some(*cid) - } else { + let matched = events.regex_match(&re, &c.event_ids); + if matched.is_empty() { None + } else { + Some(*cid) } } else { None @@ -320,6 +361,48 @@ impl Clusters { .collect()) } + pub fn regex_match_in_this_cluster( + &self, + cluster_id: ClusterId, + pattern: &str, + events: &Events, + ) -> Result>> { + let mut negate: bool = false; + let pattern = if pattern.starts_with('!') { + if pattern.len() == 1 { + return Ok(None); + } + negate = true; + pattern.get(1..).unwrap_or(pattern) + } else { + pattern + }; + + let re = Regex::new(pattern)?; + Ok(self.clusters_map.get(&cluster_id).map(|c| { + let cluster_event_ids = if let Some(last) = c.filtered_events.last() { + last + } else { + &c.event_ids + }; + let matched = events.regex_match(&re, cluster_event_ids); + if negate { + let set_matched: HashSet<_> = matched.into_iter().collect(); + let set_cluster: HashSet<_> = cluster_event_ids.iter().cloned().collect(); + (&set_cluster - &set_matched).into_iter().collect() + } else { + matched + } + })) + } + + pub fn set_filtered(&mut self, cluster_id: ClusterId, matched: Vec, pattern: &str) { + if let Some(c) = self.clusters_map.get_mut(&cluster_id) { + c.filter.push(pattern.to_string()); + c.filtered_events.push(matched); + } + } + pub fn set_qualifier(&mut self, cid: ClusterId, qualifier: Qualifier) -> bool { if let Some(c) = self.clusters_map.get_mut(&cid) { return c.set_qualifier(qualifier); diff --git a/src/events.rs b/src/events.rs index 496d591..8dea565 100644 --- a/src/events.rs +++ b/src/events.rs @@ -107,15 +107,28 @@ impl Events { } #[must_use] - pub fn regex_match(&self, re: &Regex, event_ids: &[MessageId]) -> bool { - for msg_id in event_ids { - if let Some(evt) = self.events.get(msg_id) { - if re.is_match(&evt.content) { - return true; - } - } - } - false + pub fn regex_match(&self, re: &Regex, event_ids: &[MessageId]) -> Vec { + event_ids + .iter() + .filter_map(|msg_id| { + self.events.get(msg_id).map(|event| { + if re.is_match(&event.content) { + Some(msg_id.to_string()) + } else { + None + } + }) + }) + .flatten() + .collect() + // for msg_id in event_ids { + // if let Some(evt) = self.events.get(msg_id) { + // if re.is_match(&evt.content) { + // return true; + // } + // } + // } + // false } #[must_use] diff --git a/src/main.rs b/src/main.rs index 45c4f64..d056642 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,6 +29,7 @@ fn main() { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CliCmd { ClusterID, + Event(FilterType, FilterOp), Exit, Filter(FilterType, FilterOp), GoNext, @@ -48,6 +49,8 @@ struct CmdCompleter { commands: Vec<&'static str>, } const CMDLIST: &[&str] = &[ + "/event regex", + "/event clear", "/filter count", "/filter label", "/filter qualifier benign", @@ -152,6 +155,9 @@ fn run(cfg: &Config) -> Result<()> { } } } + CliCmd::Event(t, _) => { + do_event_filtering(&mut champion, t, opt.as_deref(), &ticks); + } CliCmd::Exit => { if !prompt.is_empty() { if champion.remove_filter().is_ok() { @@ -253,6 +259,20 @@ fn do_goto(cmd: CliCmd, ticks: Option, reverse: bool) -> usize { } } +fn do_event_filtering( + champion: &mut TitleMatch, + ft: FilterType, + pattern: Option<&str>, + ticks: &Option, +) { + match ft { + FilterType::NoFilter | FilterType::Regex => { + let _r = champion.filter_event(ft, pattern, ticks); + } + _ => {} + } +} + fn do_filtering( champion: &mut TitleMatch, ft: FilterType, @@ -338,6 +358,13 @@ fn get_user_input(rl: &mut rustyline::Editor, tag: &str) -> (CliCm ls.push(&pattern); } match &ls[..] { + ["/event", "clear"] => return (CliCmd::Event(FilterType::NoFilter, FilterOp::EQ), None), + ["/event", "regex", x] => { + return ( + CliCmd::Event(FilterType::Regex, FilterOp::EQ), + Some((*x).to_string()), + ) + } ["/filter", "count", x, y] => { if let Ok(op) = FilterOp::from_str(*x) { if y.parse::().is_ok() { @@ -398,17 +425,25 @@ fn get_user_input(rl: &mut rustyline::Editor, tag: &str) -> (CliCm ["/set", x, y] => { let mut all: bool = false; let mut op: bool = false; + let mut count: usize = 0; match *y { "on" => op = true, "off" => op = false, "all" => all = true, - _ => return (CliCmd::Undefined, None), + _ => { + if let Ok(c) = (*y).parse::() { + count = c; + } else { + return (CliCmd::Undefined, None); + } + } }; match *x { "benign" => return (CliCmd::SetQualifier(all), Some(String::from("benign"))), "mixed" => return (CliCmd::SetQualifier(all), Some(String::from("mixed"))), "reverse" => return (CliCmd::Set(ConfigType::Reverse(op)), None), "samples" => return (CliCmd::Set(ConfigType::Samples(op)), None), + "samplescount" => return (CliCmd::Set(ConfigType::SamplesCount(count)), None), "signature" => return (CliCmd::Set(ConfigType::Signature(op)), None), "suspicious" => { return (CliCmd::SetQualifier(all), Some(String::from("suspicious"))) @@ -435,6 +470,8 @@ fn show_help() { /x exit from label mode. # get into the label mode and show defail information of the label. +/event clear clear event filters. +/event regex [!] filter events in current cluster by regular expression. /filter label filter qualified clusters by all labels. /filter label filter qualified clusters by the specified label. /filter count|score >|>=|=|<=|< filter clusters by the number of event in cluster or it's score. @@ -445,6 +482,7 @@ fn show_help() { /set csvstyle on|off set message display style. /set reverse on|off navigate reverse direction. /set samples on|off show samples. +/set samplescount change sample display count. /set signature on|off show signature of cluster. /set tokens on|off show tokens and it's matching result in the cluster. /set benign|mixed|suspicious|unknown [all] set qualifier cluster or all clusters of current layer. diff --git a/src/matcher.rs b/src/matcher.rs index 236d75a..2242f57 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -50,7 +50,7 @@ impl TitleMatch { let labels = Labels::new(cfg.labels())?; info!("loading clusters"); - let mut clusters = Clusters::new(cfg.clusters(), &labels)?; + let mut clusters = Clusters::new(cfg.clusters(), &labels, cfg.delimiter())?; if clusters.is_empty() { return Err(anyhow!("clusters not found.")); } @@ -297,15 +297,55 @@ impl TitleMatch { } } + /// # Errors + /// * Will return error if unknown cluster is specified + /// * Will return error if regular expression has invalid syntax + pub fn filter_event( + &mut self, + ft: FilterType, + pattern: Option<&str>, + ticks: &Option, + ) -> Result { + let mut filtered_events_count = 0; + if let Some(index) = ticks { + let cluster_id = self + .rounds + .last() + .and_then(|last| last.clusters.get(*index)) + .ok_or_else(|| anyhow!("Cluster {} not found", index))?; + match ft { + FilterType::NoFilter => self.clusters.clear_filter(*cluster_id), + FilterType::Regex => { + if let Some(pattern) = pattern { + match self.clusters.regex_match_in_this_cluster( + *cluster_id, + pattern, + &self.events, + ) { + Err(e) => eprintln!("Error: {}", e), + Ok(matched) => { + if let Some(matched) = matched { + filtered_events_count = matched.len(); + self.clusters.set_filtered(*cluster_id, matched, pattern); + } + } + } + } + } + _ => {} + } + } + Ok(filtered_events_count) + } /// # Errors /// /// Will return `Err` if a try to remove on an empty filter pub fn remove_filter(&mut self) -> Result<()> { - if self.rounds.len() > 1 { + if self.rounds.is_empty() { + Err(anyhow!("Failed to remove the filtered clusters.")) + } else { let _r = self.rounds.pop(); Ok(()) - } else { - Err(anyhow!("Failed to remove the filtered clusters.")) } }