From 91a6d470f9b53955e38c54fdeeede8364a26bdc3 Mon Sep 17 00:00:00 2001 From: Krzysztof Piotrowski Date: Thu, 11 Jul 2024 16:50:53 +0000 Subject: [PATCH] feat: skip filtering by date when static path is used Signed-off-by: Krzysztof Piotrowski --- Cargo.lock | 1 + crates/common/log_manager/Cargo.toml | 1 + crates/common/log_manager/src/log_utils.rs | 209 ++++++++++++--------- 3 files changed, 125 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e9c150cb99..be7f5632fda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2011,6 +2011,7 @@ dependencies = [ "glob", "log", "rand", + "regex", "serde", "tedge_test_utils", "tedge_utils", diff --git a/crates/common/log_manager/Cargo.toml b/crates/common/log_manager/Cargo.toml index 8f6071f6fd3..31f5f4c6cc9 100644 --- a/crates/common/log_manager/Cargo.toml +++ b/crates/common/log_manager/Cargo.toml @@ -13,6 +13,7 @@ easy_reader = { workspace = true } glob = { workspace = true } log = { workspace = true } rand = { workspace = true } +regex = { workspace = true } serde = { workspace = true, features = ["derive"] } tedge_utils = { workspace = true } thiserror = { workspace = true } diff --git a/crates/common/log_manager/src/log_utils.rs b/crates/common/log_manager/src/log_utils.rs index 05593d16f15..811e04df51f 100644 --- a/crates/common/log_manager/src/log_utils.rs +++ b/crates/common/log_manager/src/log_utils.rs @@ -2,6 +2,7 @@ use super::config::FileEntry; use super::error::LogRetrievalError; use easy_reader::EasyReader; use glob::glob; +use regex::Regex; use std::collections::VecDeque; use std::fs::File; use std::io::Write; @@ -11,16 +12,15 @@ use time::OffsetDateTime; /// read any log file coming from `obj.log.log_type` pub fn new_read_logs( - files: &Vec, + files: &[FileEntry], log_type: &str, date_from: OffsetDateTime, lines: usize, search_text: &Option, tmp_dir: &Path, ) -> Result { - // first filter logs on type - let mut logfiles_to_read = filter_logs_on_type(files, log_type)?; - logfiles_to_read = filter_logs_path_on_metadata(log_type, date_from, logfiles_to_read)?; + //filter logs on type and date + let logfiles_to_read = filter_logs(files, log_type, date_from)?; let temp_path = tmp_dir.join(format!("{log_type}-{}", rand::random::())); let mut temp_file = File::create(&temp_path)?; @@ -44,7 +44,7 @@ pub fn new_read_logs( Ok(temp_path) } -pub fn read_log_content( +fn read_log_content( logfile: &Path, mut line_counter: usize, max_lines: usize, @@ -93,42 +93,73 @@ pub fn read_log_content( } } -pub fn filter_logs_on_type( - files: &Vec, +fn filter_logs( + files: &[FileEntry], log_type: &str, + date_from: OffsetDateTime, ) -> Result, LogRetrievalError> { - let mut files_to_send = Vec::new(); - for file in files { - let maybe_file_path = file.path.as_str(); // because it can be a glob pattern - let file_type = file.config_type.as_str(); - - if !file_type.eq(log_type) { - continue; - } else { - for entry in glob(maybe_file_path)? { - let file_path = entry?; - files_to_send.push(file_path) - } - } - } - if files_to_send.is_empty() { + let file_list = filter_logs_by_type(files, log_type)?; + + let file_list = filter_logs_by_date(file_list, date_from); + + if file_list.is_empty() { Err(LogRetrievalError::NoLogsAvailableForType { log_type: log_type.to_string(), }) } else { - Ok(files_to_send) + Ok(sort_logs_by_date(file_list)) } } -/// filter a vector of pathbufs according to `obj.log.date_from` and `obj.log.date_to` -pub fn filter_logs_path_on_metadata( +fn filter_logs_by_type( + files: &[FileEntry], log_type: &str, +) -> Result, LogRetrievalError> { + let mut log_list = Vec::new(); + let wildcard_regex = Regex::new(r"^.*\*.*").unwrap(); + + let files: Vec<&str> = files + .iter() + .filter(|file| file.config_type.eq(log_type)) + .map(|file| file.path.as_str()) + .collect(); + + for file in files { + let paths = glob(file)?; + for path in paths { + let entry = path?; + log_list.push(( + entry.to_owned(), + get_modification_date(entry.as_path())?, + wildcard_regex.is_match(file), + )) + } + } + + Ok(log_list) +} + +fn get_modification_date(path: impl AsRef) -> std::io::Result { + let metadata = std::fs::metadata(&path)?; + Ok(OffsetDateTime::from(metadata.modified()?)) +} + +fn filter_logs_by_date( + files: Vec<(PathBuf, OffsetDateTime, bool)>, date_from: OffsetDateTime, - mut logs_path_vec: Vec, -) -> Result, LogRetrievalError> { - let mut out = vec![]; +) -> Vec { + // include log file with static path no matter whether or not it is in date range + files + .into_iter() + .filter(|(_, modification_time, wildcard_match)| { + !wildcard_match || *modification_time >= date_from + }) + .map(|(path, _, _)| path) + .collect() +} - logs_path_vec.sort_by_key(|pathbuf| { +fn sort_logs_by_date(mut files: Vec) -> Vec { + files.sort_by_key(|pathbuf| { if let Ok(metadata) = std::fs::metadata(pathbuf) { if let Ok(file_modified_time) = metadata.modified() { return OffsetDateTime::from(file_modified_time); @@ -138,23 +169,10 @@ pub fn filter_logs_path_on_metadata( // to UNIX_EPOCH (Jan 1st 1970) OffsetDateTime::UNIX_EPOCH }); - logs_path_vec.reverse(); // to get most recent - for file_pathbuf in logs_path_vec { - let metadata = std::fs::metadata(&file_pathbuf)?; - let datetime_modified = OffsetDateTime::from(metadata.modified()?); - if datetime_modified >= date_from { - out.push(file_pathbuf); - } - } + files.reverse(); // to get most recent - if out.is_empty() { - Err(LogRetrievalError::NoLogsAvailableForType { - log_type: log_type.to_string(), - }) - } else { - Ok(out) - } + files } #[cfg(test)] @@ -172,72 +190,71 @@ mod tests { let tempdir = TempTedgeDir::new(); let tempdir_path = tempdir.path().to_str().unwrap(); - tempdir.file("file_a"); - tempdir.file("file_b"); - tempdir.file("file_c"); - tempdir.file("file_d"); + tempdir.file("file_a_one"); + tempdir.file("file_b_one"); + tempdir.file("file_c_two"); + tempdir.file("file_d_one"); set_file_mtime( - format!("{tempdir_path}/file_a"), + format!("{tempdir_path}/file_a_one"), FileTime::from_unix_time(2, 0), ) .unwrap(); set_file_mtime( - format!("{tempdir_path}/file_b"), + format!("{tempdir_path}/file_b_one"), FileTime::from_unix_time(3, 0), ) .unwrap(); set_file_mtime( - format!("{tempdir_path}/file_c"), + format!("{tempdir_path}/file_c_two"), FileTime::from_unix_time(11, 0), ) .unwrap(); let files: Vec = vec![ FileEntry { - path: format!("{tempdir_path}/file_a"), - config_type: "type_one".to_string(), - }, - FileEntry { - path: format!("{tempdir_path}/file_b"), + path: format!("{tempdir_path}/*_one"), config_type: "type_one".to_string(), }, FileEntry { - path: format!("{tempdir_path}/file_c"), + path: format!("{tempdir_path}/*_two"), config_type: "type_two".to_string(), }, - FileEntry { - path: format!("{tempdir_path}/file_d"), - config_type: "type_one".to_string(), - }, ]; (tempdir, files) } #[test] - /// Filter on type = "type_one". - /// There are four logs created in tempdir { file_a, file_b, file_c, file_d } - /// Of which, { file_a, file_b, file_d } are "type_one" - fn test_filter_logs_on_type() { + /// Out of logs filtered on type = "type_one", that is: { file_a, file_b, file_d }. + /// Only logs filtered on metadata remain, that is { file_b, file_d }. + /// + /// This is because: + /// + /// file_a has timestamp: 1970/01/01 00:00:02 + /// file_b has timestamp: 1970/01/01 00:00:03 + /// file_d has timestamp: (current, not modified) + /// + /// The order of the output is { file_d, file_b }, because files are sorted from + /// most recent to oldest + fn test_filter_logs_path_containing_wildcard_on_type_and_metadata() { let (tempdir, files) = prepare(); let tempdir_path = tempdir.path().to_str().unwrap(); - let logs = filter_logs_on_type(&files, "type_one").unwrap(); + let logs = filter_logs(&files, "type_one", datetime!(1970-01-01 00:00:03 +00:00)).unwrap(); assert_eq!( logs, vec![ - PathBuf::from(format!("{tempdir_path}/file_a")), - PathBuf::from(format!("{tempdir_path}/file_b")), - PathBuf::from(format!("{tempdir_path}/file_d")) + PathBuf::from(format!("{tempdir_path}/file_d_one")), + PathBuf::from(format!("{tempdir_path}/file_b_one")), ] ) } #[test] /// Out of logs filtered on type = "type_one", that is: { file_a, file_b, file_d }. - /// Only logs filtered on metadata remain, that is { file_b, file_d }. + /// logs filtered on metadata are the same, that is { file_a, file_b, file_d }. /// /// This is because: /// @@ -245,22 +262,42 @@ mod tests { /// file_b has timestamp: 1970/01/01 00:00:03 /// file_d has timestamp: (current, not modified) /// - /// The order of the output is { file_d, file_b }, because files are sorted from + /// However, files are provided with static path so we don't take date range into consideration + /// + /// The order of the output is { file_d, file_b, file_a }, because files are sorted from /// most recent to oldest - fn test_filter_logs_path_on_metadata() { - let (tempdir, files) = prepare(); + fn test_filter_logs_with_static_path_on_type_and_metadata() { + let (tempdir, _) = prepare(); let tempdir_path = tempdir.path().to_str().unwrap(); - let logs = filter_logs_on_type(&files, "type_one").unwrap(); - let logs = - filter_logs_path_on_metadata("type_one", datetime!(1970-01-01 00:00:03 +00:00), logs) - .unwrap(); + // provide vector of files with static paths + let files: Vec = vec![ + FileEntry { + path: format!("{tempdir_path}/file_a_one"), + config_type: "type_one".to_string(), + }, + FileEntry { + path: format!("{tempdir_path}/file_b_one"), + config_type: "type_one".to_string(), + }, + FileEntry { + path: format!("{tempdir_path}/file_c_two"), + config_type: "type_two".to_string(), + }, + FileEntry { + path: format!("{tempdir_path}/file_d_one"), + config_type: "type_one".to_string(), + }, + ]; + + let logs = filter_logs(&files, "type_one", datetime!(1970-01-01 00:00:03 +00:00)).unwrap(); assert_eq!( logs, vec![ - PathBuf::from(format!("{tempdir_path}/file_d")), - PathBuf::from(format!("{tempdir_path}/file_b")), + PathBuf::from(format!("{tempdir_path}/file_d_one")), + PathBuf::from(format!("{tempdir_path}/file_b_one")), + PathBuf::from(format!("{tempdir_path}/file_a_one")), ] ) } @@ -287,7 +324,7 @@ mod tests { fn test_read_log_content() { let (tempdir, _) = prepare(); let tempdir_path = tempdir.path().to_str().unwrap(); - let file_path = &format!("{tempdir_path}/file_a"); + let file_path = &format!("{tempdir_path}/file_a_one"); let mut log_file = std::fs::OpenOptions::new() .append(true) .create(false) @@ -306,7 +343,7 @@ mod tests { read_log_content(Path::new(file_path), line_counter, max_lines, &filter_text).unwrap(); assert_eq!(line_counter, max_lines); - assert_eq!(result, "filename: file_a\nthis is the second line.\nthis is the third line.\nthis is the forth line.\nthis is the fifth line.\n"); + assert_eq!(result, "filename: file_a_one\nthis is the second line.\nthis is the third line.\nthis is the forth line.\nthis is the fifth line.\n"); } #[test] @@ -333,10 +370,10 @@ mod tests { let tempdir_path = tempdir.path().to_str().unwrap(); for (file_name, m_time) in [ - ("file_a", 2), - ("file_b", 3), - ("file_c", 11), - ("file_d", 100), + ("file_a_one", 2), + ("file_b_one", 3), + ("file_c_two", 11), + ("file_d_one", 100), ] { let file_path = &format!("{tempdir_path}/{file_name}"); @@ -366,6 +403,6 @@ mod tests { assert_eq!(temp_path.parent().unwrap(), tempdir.path()); let result = std::fs::read_to_string(temp_path).unwrap(); - assert_eq!(result, String::from("filename: file_d\nthis is the first line of file_d.\nthis is the second line of file_d.\nthis is the third line of file_d.\nthis is the forth line of file_d.\nthis is the fifth line of file_d.\nfilename: file_b\nthis is the forth line of file_b.\nthis is the fifth line of file_b.\n")) + assert_eq!(result, String::from("filename: file_d_one\nthis is the first line of file_d_one.\nthis is the second line of file_d_one.\nthis is the third line of file_d_one.\nthis is the forth line of file_d_one.\nthis is the fifth line of file_d_one.\nfilename: file_b_one\nthis is the forth line of file_b_one.\nthis is the fifth line of file_b_one.\n")) } }