Skip to content

Commit

Permalink
Loading x2
Browse files Browse the repository at this point in the history
  • Loading branch information
qarmin committed Oct 5, 2023
1 parent 9f0b3e0 commit e9765e1
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 212 deletions.
2 changes: 1 addition & 1 deletion czkawka_core/src/big_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_di
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};

#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct FileEntry {
pub path: PathBuf,
pub size: u64,
Expand Down
53 changes: 12 additions & 41 deletions czkawka_core/src/broken_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@ use rayon::prelude::*;
use serde::{Deserialize, Serialize};

use crate::common::{
check_folder_children, create_crash_message, load_cache_from_file_generalized, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads,
AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
check_folder_children, create_crash_message, load_cache_from_file_generalized, prepare_thread_handler_common, save_cache_to_file_generalized,
send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::*;

Expand All @@ -44,6 +43,12 @@ impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}

#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
Expand Down Expand Up @@ -444,8 +449,9 @@ impl BrokenFiles {
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
let save_as_json = self.get_save_also_as_json();
save_cache_to_file(&all_results, &mut self.common_data.text_messages, save_as_json);

let messages = save_cache_to_file_generalized(&get_cache_file(), &all_results, self.common_data.save_also_as_json);
self.get_text_messages_mut().extend_with_another_messages(messages);
}
debug!("save_to_cache - end");
}
Expand Down Expand Up @@ -541,43 +547,8 @@ impl PrintResults for BrokenFiles {
}
}

fn save_cache_to_file(old_hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, save_also_as_json: bool) {
let mut hashmap: BTreeMap<String, FileEntry> = Default::default();
for (path, fe) in old_hashmap {
if fe.size > 1024 {
hashmap.insert(path.clone(), fe.clone());
}
}
let hashmap = &hashmap;

if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), true, save_also_as_json, &mut text_messages.warnings) {
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
}
}
}

text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}

fn get_cache_file() -> String {
"cache_broken_files.bin".to_string()
"cache_broken_files_61.bin".to_string()
}

fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {
Expand Down
82 changes: 75 additions & 7 deletions czkawka_core/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ use rayon::iter::ParallelIterator;
use std::collections::BTreeMap;
use std::ffi::OsString;
use std::fs::{DirEntry, File, OpenOptions};
use std::io::BufReader;
use std::io::{BufReader, BufWriter};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread::{sleep, JoinHandle};
use std::time::{Duration, SystemTime};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::{fs, thread};

#[cfg(feature = "heif")]
Expand All @@ -21,7 +21,7 @@ use imagepipe::{ImageSource, Pipeline};
use libheif_rs::{ColorSpace, HeifContext, RgbChroma};
use log::{debug, LevelFilter, Record};
use rayon::prelude::*;
use serde::Deserialize;
use serde::{Deserialize, Serialize};

// #[cfg(feature = "heif")]
// use libheif_rs::LibHeif;
Expand Down Expand Up @@ -160,6 +160,45 @@ pub fn open_cache_folder(cache_file_name: &str, save_to_cache: bool, use_json: b
None
}

pub fn save_cache_to_file_generalized<T>(cache_file_name: &str, hashmap: &BTreeMap<String, T>, save_also_as_json: bool) -> Messages
where
T: Serialize + ResultEntry + Sized + Send + Sync,
{
debug!("Saving cache to file {} (or also json alternative) - {} results", cache_file_name, hashmap.len());
let mut text_messages = Messages::new();
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(cache_file_name, true, save_also_as_json, &mut text_messages.warnings) {
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, &hashmap.values().collect::<Vec<_>>()) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
debug!("Failed to save cache to file {:?}", cache_file);
return text_messages;
}
debug!("Saved binary to file {:?}", cache_file);
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, &hashmap.values().collect::<Vec<_>>()) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
debug!("Failed to save cache to file {:?}", cache_file_json);
return text_messages;
}
debug!("Saved json to file {:?}", cache_file_json);
}
}

text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
} else {
debug!("Failed to save cache to file {cache_file_name} because not exists");
}
text_messages
}

pub fn load_cache_from_file_generalized<T>(cache_file_name: &str, delete_outdated_cache: bool) -> (Messages, Option<BTreeMap<String, T>>)
where
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync,
Expand Down Expand Up @@ -199,17 +238,46 @@ where
// Don't load cache data if destination file not exists
if delete_outdated_cache {
debug!("Starting to removing outdated cache entries");
vec_loaded_entries = vec_loaded_entries.into_par_iter().filter(|file_entry| !file_entry.get_path().exists()).collect();
debug!("Completed removing outdated cache entries");
let initial_number_of_entries = vec_loaded_entries.len();
vec_loaded_entries = vec_loaded_entries
.into_par_iter()
.filter(|file_entry| {
if !file_entry.get_path().exists() {
return false;
}
let Ok(metadata) = file_entry.get_path().metadata() else {
return false;
};
if metadata.len() != file_entry.get_size() {
return false;
}
let Ok(modified) = metadata.modified() else {
return false;
};
let Ok(secs) = modified.duration_since(UNIX_EPOCH) else {
return false;
};
if secs.as_secs() != file_entry.get_modified_date() {
return false;
}

true
})
.collect();
debug!(
"Completed removing outdated cache entries, removed {} out of all {} entries",
initial_number_of_entries - vec_loaded_entries.len(),
initial_number_of_entries
);
}

text_messages.messages.push(format!("Properly loaded {} cache entries.", vec_loaded_entries.len()));

let map_loaded_entries = vec_loaded_entries
let map_loaded_entries: BTreeMap<_, _> = vec_loaded_entries
.into_iter()
.map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry))
.collect();
debug!("Loaded cache from file {cache_file_name} (or json alternative)");
debug!("Loaded cache from file {cache_file_name} (or json alternative) - {} results", map_loaded_entries.len());
return (text_messages, Some(map_loaded_entries));
}
debug!("Failed to load cache from file {cache_file_name} because not exists");
Expand Down
6 changes: 6 additions & 0 deletions czkawka_core/src/common_dir_traversal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}

// Symlinks
Expand Down
2 changes: 2 additions & 0 deletions czkawka_core/src/common_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ pub trait PrintResults {

pub trait ResultEntry {
fn get_path(&self) -> &Path;
fn get_modified_date(&self) -> u64;
fn get_size(&self) -> u64;
}
47 changes: 12 additions & 35 deletions czkawka_core/src/same_music.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@ use symphonia::core::meta::MetadataOptions;
use symphonia::core::probe::Hint;

use crate::common::{
create_crash_message, filter_reference_folders_generic, load_cache_from_file_generalized, open_cache_folder, prepare_thread_handler_common,
create_crash_message, filter_reference_folders_generic, load_cache_from_file_generalized, prepare_thread_handler_common, save_cache_to_file_generalized,
send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::*;

Expand Down Expand Up @@ -72,6 +71,12 @@ impl ResultEntry for MusicEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}

impl FileEntry {
Expand Down Expand Up @@ -268,8 +273,9 @@ impl SameMusic {
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
let save_also_as_json = self.get_save_also_as_json();
save_cache_to_file(&all_results, &mut self.common_data.text_messages, save_also_as_json, checking_tags);

let messages = save_cache_to_file_generalized(get_cache_file(checking_tags), &all_results, self.common_data.save_also_as_json);
self.get_text_messages_mut().extend_with_another_messages(messages);
debug!("save_cache - end");
}

Expand Down Expand Up @@ -746,35 +752,6 @@ impl SameMusic {
}
}

fn save_cache_to_file(hashmap: &BTreeMap<String, MusicEntry>, text_messages: &mut Messages, save_also_as_json: bool, checking_tags: bool) {
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
open_cache_folder(get_cache_file(checking_tags), true, save_also_as_json, &mut text_messages.warnings)
{
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
}
}
}

text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}

// TODO this should be taken from rusty-chromaprint repo, not reimplemented here
fn calc_fingerprint_helper(path: impl AsRef<Path>, config: &Configuration) -> anyhow::Result<Vec<u32>> {
let path = path.as_ref();
Expand Down Expand Up @@ -941,9 +918,9 @@ fn read_single_file_tag(path: &str, music_entry: &mut MusicEntry) -> bool {
// Using different cache folders, because loading cache just for finding duplicated tags would be really slow
fn get_cache_file(checking_tags: bool) -> &'static str {
if checking_tags {
"cache_same_music_tags.bin"
"cache_same_music_tags_61.bin"
} else {
"cache_same_music_fingerprints.bin"
"cache_same_music_fingerprints_61.bin"
}
}

Expand Down
Loading

0 comments on commit e9765e1

Please sign in to comment.