diff --git a/.vscode/launch.json b/.vscode/launch.json index 9c9e006..a28bbde 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -205,35 +205,5 @@ ], "cwd": "${workspaceFolder}" }, - { - "type": "lldb", - "request": "launch", - "name": "Test Danbooru async download", - "cargo": { - "args": [ - "build", - "--bin=imageboard_downloader", - "--package=imageboard_downloader" - ], - "filter": { - "name": "imageboard_downloader", - "kind": "bin" - } - }, - "args": [ - "--async", - "-e", - "lowres", - "--id", - "--ignore-unknown", - "--no-animated", - "-O", - "/mnt/ram/tst", - "folinic_(arknights)", - "solo", - "--annotate" - ], - "cwd": "${workspaceFolder}" - } ] } \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 3c1f2ee..eed9047 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = ["ibdl-common", "ibdl-core", "ibdl-extractors"] [package] name = "imageboard_downloader" authors = ["Ferrah Aiko Wolf "] -version = "1.3.2" +version = "1.4.0" edition = "2021" description = "Cli utility to bulk download images from popular imageboard sites (Boorus) using their APIs" license = "MIT" diff --git a/ibdl-core/Cargo.toml b/ibdl-core/Cargo.toml index 80f1868..b6640f0 100644 --- a/ibdl-core/Cargo.toml +++ b/ibdl-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ibdl-core" -version = "1.3.2" +version = "1.4.0" edition = "2021" description = "Main download infrastructure for imageboard_downloader" license = "MIT" diff --git a/ibdl-core/src/async_queue/mod.rs b/ibdl-core/src/async_queue/mod.rs index 9e5253c..ac00cff 100644 --- a/ibdl-core/src/async_queue/mod.rs +++ b/ibdl-core/src/async_queue/mod.rs @@ -84,8 +84,7 @@ use zip::ZipWriter; use crate::progress_bars::ProgressCounter; -use crate::queue::error::QueueError; -use crate::queue::summary::{SummaryFile, SummaryType}; +use crate::error::QueueError; static PROGRESS_COUNTERS: OnceCell = OnceCell::new(); diff --git a/ibdl-core/src/async_queue/summary.rs b/ibdl-core/src/async_queue/summary.rs deleted file mode 100644 index 69af167..0000000 --- a/ibdl-core/src/async_queue/summary.rs +++ /dev/null @@ -1,205 +0,0 @@ -use chrono::{serde::ts_seconds, DateTime, Utc}; -use ibdl_common::serde::{self, Deserialize, Serialize}; -use ibdl_common::serde_json::from_str; -use ibdl_common::tokio::sync::mpsc::UnboundedReceiver; -use ibdl_common::{ - bincode::{deserialize, serialize}, - post::{NameType, Post}, - serde_json::{from_slice, to_string_pretty}, - tokio, - zstd::{decode_all, encode_all}, - ImageBoards, -}; -use std::{ - fs::File, - io::{Read, Write}, - path::Path, -}; -use tokio::{ - fs::File as AsyncFile, - io::{AsyncReadExt, AsyncWriteExt}, - task::spawn_blocking, -}; -use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; - -use crate::queue::error::QueueError; - -/// The download summary can be saved in two formats: -/// - As a ZSTD-compressed bincode file -/// - As a generic JSON file. -#[derive(Debug, Serialize, Deserialize)] -#[serde(crate = "self::serde")] -pub enum SummaryType { - ZSTDBincode, - JSON, -} - -/// The generic information of a [Post](ibdl_common::post) along with the name of the file saved in the output directory. -#[derive(Debug, Serialize, Deserialize, Eq, PartialOrd, Ord)] -#[serde(crate = "self::serde")] -pub struct PostInfo { - pub saved_as: String, - pub post: Post, -} - -impl PartialEq for PostInfo { - fn eq(&self, other: &PostInfo) -> bool { - self.post.id == other.post.id - } -} - -/// The final summary file. It containes common information for the user to read and the necessary data to filter posts in certain occasions. -#[derive(Debug, Serialize, Deserialize)] -#[serde(crate = "self::serde")] -pub struct SummaryFile { - pub file_mode: SummaryType, - pub imageboard: ImageBoards, - pub name_mode: NameType, - pub tags: Vec, - #[serde(with = "ts_seconds")] - pub last_updated: DateTime, - pub last_downloaded: u64, - pub posts: Vec, -} - -impl SummaryFile { - /// Create a [SummaryFile] from the supplied information about all downloaded posts. - pub fn new( - imageboard: ImageBoards, - tags: &[String], - name_mode: NameType, - file_mode: SummaryType, - ) -> Self { - Self { - file_mode, - imageboard, - name_mode, - tags: tags.to_vec(), - last_updated: Utc::now(), - last_downloaded: 0, - posts: vec![], - } - } - - /// Writes this struct as a summary file inside a supplied zip file. - pub fn write_zip_summary(&self, zip: &mut ZipWriter) -> Result<(), QueueError> { - let serialized = self.to_json()?; - - zip.start_file( - "00_summary.json", - FileOptions::default() - .compression_method(CompressionMethod::Deflated) - .compression_level(Some(9)), - )?; - - zip.write_all(serialized.as_bytes())?; - Ok(()) - } - - /// Writes this struct as a summary file in the given [Path]. - pub async fn write_summary(&self, path: &Path) -> Result<(), QueueError> { - let mut dsum = AsyncFile::create(path).await?; - - let string = self.to_bincode()?; - - dsum.write_all(&string).await?; - - Ok(()) - } - - /// Read the summary file from the supplied [Path]. - pub async fn read_summary(path: &Path, summary_type: SummaryType) -> Result { - let mut raw_data: Vec = vec![]; - let mut dsum = AsyncFile::open(path).await?; - - dsum.read_to_end(&mut raw_data).await?; - - match summary_type { - SummaryType::ZSTDBincode => Ok(Self::from_bincode(&raw_data)?), - SummaryType::JSON => Ok(Self::from_json_slice(&raw_data)?), - } - } - - /// Read the bincode summary and decode it into a [SummaryFile] - #[inline] - pub fn from_bincode(slice: &[u8]) -> Result { - match deserialize::(&decode_all(slice)?) { - Ok(summary) => Ok(summary), - Err(err) => Err(QueueError::SummaryDeserializeFail { - error: err.to_string(), - }), - } - } - - /// Read the summary as a raw JSON slice from and decode it into a [SummaryFile] - #[inline] - pub fn from_json_slice(slice: &[u8]) -> Result { - match from_slice::(slice) { - Ok(sum) => Ok(sum), - Err(error) => Err(QueueError::SummaryDeserializeFail { - error: error.to_string(), - }), - } - } - - /// Read the summary as a raw JSON string and decode it into a [SummaryFile] - #[inline] - pub fn from_json_str(text: &str) -> Result { - match from_str::(text) { - Ok(sum) => Ok(sum), - Err(error) => Err(QueueError::SummaryDeserializeFail { - error: error.to_string(), - }), - } - } - - pub async fn read_zip_summary( - path: &Path, - summary_type: SummaryType, - ) -> Result { - let path = path.to_path_buf(); - spawn_blocking(move || -> Result { - let file = File::open(&path)?; - let mut zip = ZipArchive::new(file)?; - let mut raw_bytes = match zip.by_name("00_summary.json") { - Ok(bytes) => bytes, - Err(_) => { - return Err(QueueError::ZipSummaryReadError { - file: path.display().to_string(), - }) - } - }; - - let mut summary_slice = vec![]; - - raw_bytes.read_to_end(&mut summary_slice)?; - - match summary_type { - SummaryType::ZSTDBincode => Ok(Self::from_bincode(&summary_slice)?), - SummaryType::JSON => Ok(Self::from_json_slice(&summary_slice)?), - } - }) - .await - .unwrap() - } - - #[inline] - pub fn to_json(&self) -> Result { - match to_string_pretty(self) { - Ok(json) => Ok(json), - Err(err) => Err(QueueError::SummarySerializeFail { - error: err.to_string(), - }), - } - } - - #[inline] - pub fn to_bincode(&self) -> Result, QueueError> { - match serialize(&self) { - Ok(data) => Ok(encode_all(&*data, 9)?), - Err(err) => Err(QueueError::SummarySerializeFail { - error: err.to_string(), - }), - } - } -} diff --git a/ibdl-core/src/cli.rs b/ibdl-core/src/cli.rs index 064b2b9..52edd73 100644 --- a/ibdl-core/src/cli.rs +++ b/ibdl-core/src/cli.rs @@ -86,15 +86,6 @@ pub struct Cli { #[clap(short, long, value_parser, help_heading = "DOWNLOAD")] pub limit: Option, - /// Enable experimental async downloader (currently only available for Danbooru) - #[clap( - long = "async", - value_parser, - default_value_t = false, - help_heading = "DOWNLOAD" - )] - pub async_download: bool, - /// Disable blacklist filtering #[clap(long, value_parser, default_value_t = false, help_heading = "GENERAL")] pub disable_blacklist: bool, @@ -117,18 +108,6 @@ pub struct Cli { )] pub start_page: Option, - /// Download only the latest images for tag selection. - /// - /// Will not re-download already present or deleted images from destination directory - #[clap( - short, - long, - value_parser, - default_value_t = false, - help_heading = "SAVE" - )] - pub update: bool, - /// Download posts with the selected rating. Can be used multiple times to download posts with other ratings #[clap( short, diff --git a/ibdl-core/src/queue/error.rs b/ibdl-core/src/error.rs similarity index 100% rename from ibdl-core/src/queue/error.rs rename to ibdl-core/src/error.rs diff --git a/ibdl-core/src/lib.rs b/ibdl-core/src/lib.rs index d61e865..adf2286 100644 --- a/ibdl-core/src/lib.rs +++ b/ibdl-core/src/lib.rs @@ -7,8 +7,8 @@ use std::path::{Path, PathBuf}; pub mod async_queue; pub mod cli; +pub mod error; pub mod progress_bars; -pub mod queue; #[derive(Debug, Clone, Copy)] #[repr(transparent)] diff --git a/ibdl-core/src/queue/mod.rs b/ibdl-core/src/queue/mod.rs deleted file mode 100644 index b2d5e99..0000000 --- a/ibdl-core/src/queue/mod.rs +++ /dev/null @@ -1,578 +0,0 @@ -//! Queue used specifically to download, filter and save posts found by an [`Extractor`](ibdl-extractors::websites). -//! -//! # Example usage -//! -//! Conveniently using the same example from [here](ibdl-extractors::websites) -//! -//! ```rust -//! use imageboard_downloader::*; -//! use std::path::PathBuf; -//! -//! async fn download_posts() { -//! let tags = ["umbreon", "espeon"]; // The tags to search -//! -//! let safe_mode = false; // Setting this to true, will ignore searching NSFW posts -//! -//! let disable_blacklist = false; // Will filter all items according to what's set in GBL -//! -//! let mut unit = DanbooruExtractor::new(&tags, safe_mode, disable_blacklist); // Initialize -//! -//! let prompt = true; // If true, will ask the user to input thei username and API key. -//! -//! unit.auth(prompt).await.unwrap(); // Try to authenticate -//! -//! let start_page = Some(1); // Start searching from the first page -//! -//! let limit = Some(50); // Max number of posts to download -//! -//! let posts = unit.full_search(start_page, limit).await.unwrap(); // and then, finally search -//! -//! let sd = 10; // Number of simultaneous downloads. -//! -//! let limit = Some(1000); // Max number of posts to download -//! -//! let cbz = false; // Set to true to download everything into a .cbz file -//! -//! let mut qw = Queue::new( // Initialize the queue -//! ImageBoards::Danbooru, -//! posts, -//! sd, -//! Some(unit.client()), // Re-use the client from the extractor -//! limit, -//! cbz, -//! ); -//! -//! let output = Some(PathBuf::from("./")); // Where to save the downloaded files or .cbz file -//! -//! let id = true; // Save file with their ID as the filename instead of MD5 -//! -//! qw.download(output, id).await.unwrap(); // Start downloading -//! } -//! ``` -use futures::stream::iter; -use futures::StreamExt; -use ibdl_common::log::debug; -use ibdl_common::post::error::PostError; -use ibdl_common::post::rating::Rating; -use ibdl_common::post::{NameType, Post, PostQueue}; -use ibdl_common::reqwest::Client; -use ibdl_common::{client, tokio, ImageBoards}; -use md5::compute; -use owo_colors::OwoColorize; -use std::convert::TryInto; -use std::fs::File; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::sync::atomic::Ordering; -use std::sync::Arc; -use std::sync::Mutex; -use tokio::fs::{create_dir_all, read, remove_file, rename, OpenOptions}; -use tokio::io::{AsyncWriteExt, BufWriter}; -use tokio::task::{self, spawn_blocking}; -use zip::write::FileOptions; -use zip::CompressionMethod; -use zip::ZipWriter; - -use crate::progress_bars::ProgressCounter; - -use self::error::QueueError; -use self::summary::{SummaryFile, SummaryType}; - -pub(crate) mod error; -pub mod summary; - -macro_rules! finish_and_increment { - ($x:expr) => {{ - $x.main.inc(1); - $x.downloaded_mtx.fetch_add(1, Ordering::SeqCst); - $x.total_mtx.fetch_add(1, Ordering::SeqCst); - }}; -} - -/// Struct where all the downloading and filtering will take place -pub struct Queue { - list: Vec, - tags: Vec, - imageboard: ImageBoards, - sim_downloads: u8, - client: Client, - cbz: bool, - annotate: bool, -} - -impl Queue { - /// Set up the queue for download - #[must_use] - pub fn new( - imageboard: ImageBoards, - posts: PostQueue, - sim_downloads: u8, - custom_client: Option, - save_as_cbz: bool, - annotate: bool, - ) -> Self { - let client = if let Some(cli) = custom_client { - cli - } else { - client!(imageboard) - }; - - Self { - list: posts.posts, - tags: posts.tags, - cbz: save_as_cbz, - imageboard, - sim_downloads, - client, - annotate, - } - } - - async fn create_out(&self, dir: &Path) -> Result<(), QueueError> { - if self.cbz { - let output_file = dir.parent().unwrap().to_path_buf(); - - match create_dir_all(&output_file).await { - Ok(_) => (), - Err(error) => { - return Err(QueueError::DirCreationError { - message: error.to_string(), - }) - } - }; - return Ok(()); - } - - debug!("Target dir: {}", dir.display()); - match create_dir_all(&dir).await { - Ok(_) => (), - Err(error) => { - return Err(QueueError::DirCreationError { - message: error.to_string(), - }) - } - }; - - Ok(()) - } - - /// Starts the download of all posts collected inside a [`PostQueue`] - pub async fn download( - self, - output_dir: PathBuf, - name_type: NameType, - ) -> Result { - let counters = Arc::new(ProgressCounter::initialize( - self.list.len().try_into()?, - self.imageboard, - )); - - self.create_out(&output_dir).await?; - - if self.cbz { - self.cbz_path(output_dir, counters.clone(), name_type) - .await?; - - counters.main.finish_and_clear(); - - return Ok(counters.downloaded_mtx.load(Ordering::SeqCst)); - } - - debug!("Fetching {} posts", self.list.len()); - - iter(self.list) - .map(|d| { - let cli = self.client.clone(); - let output = output_dir.clone(); - let file_path = output_dir.join(d.file_name(name_type)); - let variant = self.imageboard; - let counters = counters.clone(); - let annotate = self.annotate; - - task::spawn(async move { - if !Self::check_file_exists(&d, &file_path, counters.clone(), name_type).await? - { - Self::fetch(cli, variant, &d, counters, &output, name_type, annotate) - .await?; - } - Ok::<(), QueueError>(()) - }) - }) - .buffer_unordered(self.sim_downloads as usize) - .for_each(|_| async {}) - .await; - - counters.main.finish_and_clear(); - - let tot = counters.downloaded_mtx.load(Ordering::SeqCst); - - Ok(tot) - } - - async fn cbz_path( - &self, - path: PathBuf, - counters: Arc, - name_type: NameType, - ) -> Result<(), QueueError> { - debug!("Target file: {}", path.display()); - - let file = File::create(&path)?; - let zip = Arc::new(Mutex::new(ZipWriter::new(file))); - - self.write_zip_structure(zip.clone(), &self.list.clone(), name_type)?; - - debug!("Fetching {} posts", self.list.len()); - - iter(self.list.clone()) - .map(|d| { - let cli = self.client.clone(); - let variant = self.imageboard; - let counters = counters.clone(); - let zip = zip.clone(); - let annotate = self.annotate; - - task::spawn(async move { - Self::fetch_cbz(cli, variant, name_type, d, annotate, counters, zip).await?; - Ok::<(), QueueError>(()) - }) - }) - .buffer_unordered(self.sim_downloads.into()) - .for_each(|_| async {}) - .await; - - let mut mtx = zip.lock().unwrap(); - - mtx.finish()?; - Ok(()) - } - - fn write_zip_structure( - &self, - zip: Arc>>, - posts: &[Post], - name_type: NameType, - ) -> Result<(), QueueError> { - let ap = SummaryFile::new( - self.imageboard, - &self.tags, - posts, - name_type, - SummaryType::JSON, - ) - .to_json()?; - - let mut z_1 = zip.lock().unwrap(); - - posts - .iter() - .any(|post| post.rating == Rating::Unknown) - .then(|| -> Result<(), QueueError> { - z_1.add_directory(Rating::Unknown.to_string(), FileOptions::default())?; - Ok(()) - }); - - z_1.add_directory(Rating::Safe.to_string(), FileOptions::default())?; - z_1.add_directory(Rating::Questionable.to_string(), FileOptions::default())?; - z_1.add_directory(Rating::Explicit.to_string(), FileOptions::default())?; - - z_1.start_file( - "00_summary.json", - FileOptions::default() - .compression_method(CompressionMethod::Deflated) - .compression_level(Some(9)), - )?; - - z_1.write_all(ap.as_bytes())?; - Ok(()) - } - - async fn check_file_exists( - post: &Post, - output: &Path, - counters: Arc, - name_type: NameType, - ) -> Result { - let id_name = post.file_name(NameType::ID); - let md5_name = post.file_name(NameType::MD5); - - let name = post.file_name(name_type); - - let raw_path = output.parent().unwrap(); - - let (actual, file_is_same) = if output.exists() { - debug!("File {} found.", &name); - (output.to_path_buf(), false) - } else if name_type == NameType::ID { - debug!("File {} not found.", &name); - debug!("Trying possibly matching file: {}", &md5_name); - (raw_path.join(Path::new(&md5_name)), true) - } else { - debug!("File {} not found.", &name); - debug!("Trying possibly matching file: {}", &id_name); - (raw_path.join(Path::new(&id_name)), true) - }; - - if actual.exists() { - debug!( - "Found file {}", - actual.file_name().unwrap().to_str().unwrap() - ); - let file_digest = compute(read(&actual).await?); - let hash = format!("{:x}", file_digest); - if hash == post.md5 { - if file_is_same { - match counters.multi.println(format!( - "{} {} {}", - "A file similar to".bold().green(), - name.bold().blue().italic(), - "already exists and will be renamed accordingly." - .bold() - .green() - )) { - Ok(_) => { - rename(&actual, output).await?; - } - Err(error) => { - return Err(QueueError::ProgressBarPrintFail { - message: error.to_string(), - }) - } - }; - - counters.main.inc(1); - counters.total_mtx.fetch_add(1, Ordering::SeqCst); - return Ok(true); - } - match counters.multi.println(format!( - "{} {} {}", - "File".bold().green(), - name.bold().blue().italic(), - "already exists. Skipping.".bold().green() - )) { - Ok(_) => (), - Err(error) => { - return Err(QueueError::ProgressBarPrintFail { - message: error.to_string(), - }) - } - }; - - counters.main.inc(1); - counters.total_mtx.fetch_add(1, Ordering::SeqCst); - return Ok(true); - } - remove_file(&actual).await?; - counters.multi.println(format!( - "{} {} {}", - "File".bold().red(), - name.bold().yellow().italic(), - "is corrupted. Re-downloading...".bold().red() - ))?; - - Ok(false) - } else { - Ok(false) - } - } - - async fn fetch_cbz( - client: Client, - variant: ImageBoards, - name_type: NameType, - post: Post, - annotate: bool, - counters: Arc, - zip: Arc>>, - ) -> Result<(), PostError> { - let filename = post.file_name(name_type); - debug!("Fetching {}", &post.url); - let res = client.get(&post.url).send().await?; - - if res.status().is_client_error() { - counters.multi.println(format!( - "{} {}{}", - "Image source returned status".bold().red(), - res.status().as_str().bold().red(), - ". Skipping download.".bold().red() - ))?; - counters.main.inc(1); - return Err(PostError::RemoteFileNotFound); - } - - let size = res.content_length().unwrap_or_default(); - - let pb = counters.add_download_bar(size, variant); - - // Download the file chunk by chunk. - debug!("Retrieving chunks for {}", &filename); - let mut stream = res.bytes_stream(); - - let buf_size: usize = size.try_into()?; - - let mut fvec: Vec = Vec::with_capacity(buf_size); - - let options = FileOptions::default().compression_method(CompressionMethod::Stored); - let cap_options = FileOptions::default() - .compression_method(CompressionMethod::Deflated) - .compression_level(Some(5)); - - while let Some(item) = stream.next().await { - // Retrieve chunk. - let chunk = match item { - Ok(chunk) => chunk, - Err(e) => { - return Err(PostError::ChunkDownloadFail { - message: e.to_string(), - }) - } - }; - pb.inc(chunk.len().try_into()?); - - // Write to file. - AsyncWriteExt::write_all(&mut fvec, &chunk).await?; - } - - spawn_blocking(move || -> Result<(), PostError> { - let mut un_mut = zip.lock().unwrap(); - - debug!("Writing {} to cbz file", filename); - match un_mut.start_file(format!("{}/{}", post.rating.to_string(), filename), options) { - Ok(_) => {} - Err(error) => { - return Err(PostError::ZipFileWriteError { - message: error.to_string(), - }) - } - }; - - un_mut.write_all(&fvec)?; - - if annotate { - debug!("Writing caption for {} to cbz file", filename); - match un_mut.start_file( - format!("{}/{}.txt", post.rating.to_string(), post.name(name_type)), - cap_options, - ) { - Ok(_) => {} - Err(error) => { - return Err(PostError::ZipFileWriteError { - message: error.to_string(), - }) - } - }; - - let tag_list = Vec::from_iter( - post.tags - .iter() - .filter(|t| t.is_prompt_tag()) - .map(|tag| tag.tag()), - ); - - let prompt = tag_list.join(", "); - - let f1 = prompt.replace('_', " "); - //let f2 = f1.replace('(', "\\("); - //let final_prompt = f2.replace(')', "\\)"); - un_mut.write_all(f1.as_bytes())?; - } - Ok(()) - }) - .await??; - - pb.finish_and_clear(); - - finish_and_increment!(counters); - - Ok(()) - } - - async fn fetch( - client: Client, - variant: ImageBoards, - post: &Post, - counters: Arc, - output: &Path, - name_type: NameType, - annotate: bool, - ) -> Result<(), PostError> { - debug!("Fetching {}", &post.url); - let res = client.get(&post.url).send().await?; - - if res.status().is_client_error() { - counters.multi.println(format!( - "{} {}{}", - "Image source returned status".bold().red(), - res.status().as_str().bold().red(), - ". Skipping download.".bold().red() - ))?; - counters.main.inc(1); - return Err(PostError::RemoteFileNotFound); - } - - let size = res.content_length().unwrap_or_default(); - - let pb = counters.add_download_bar(size, variant); - - // Download the file chunk by chunk. - let mut stream = res.bytes_stream(); - - let buf_size: usize = size.try_into()?; - - let out = output.join(post.file_name(name_type)); - - debug!("Creating {:?}", &out); - let file = OpenOptions::new() - .append(true) - .create(true) - .open(out) - .await?; - - let mut bw = BufWriter::with_capacity(buf_size, file); - - while let Some(item) = stream.next().await { - // Retrieve chunk. - let mut chunk = match item { - Ok(chunk) => chunk, - Err(e) => { - return Err(PostError::ChunkDownloadFail { - message: e.to_string(), - }) - } - }; - pb.inc(chunk.len().try_into()?); - - // Write to file. - bw.write_all_buf(&mut chunk).await?; - } - bw.flush().await?; - - if annotate { - let mut prompt_file = OpenOptions::new() - .append(true) - .create(true) - .open(output.join(format!("{}.txt", post.name(name_type)))) - .await?; - - let tag_list = Vec::from_iter( - post.tags - .iter() - .filter(|t| t.is_prompt_tag()) - .map(|tag| tag.tag()), - ); - - let prompt = tag_list.join(", "); - - let f1 = prompt.replace('_', " "); - //let f2 = f1.replace('(', "\\("); - //let final_prompt = f2.replace(')', "\\)"); - prompt_file.write_all(f1.as_bytes()).await?; - } - - pb.finish_and_clear(); - - finish_and_increment!(counters); - - Ok(()) - } -} diff --git a/ibdl-core/src/queue/summary.rs b/ibdl-core/src/queue/summary.rs deleted file mode 100644 index 0fae43e..0000000 --- a/ibdl-core/src/queue/summary.rs +++ /dev/null @@ -1,220 +0,0 @@ -use super::error::QueueError; -use chrono::{serde::ts_seconds, DateTime, Utc}; -use ibdl_common::serde::{self, Deserialize, Serialize}; -use ibdl_common::serde_json::from_str; -use ibdl_common::{ - bincode::{deserialize, serialize}, - post::{NameType, Post}, - serde_json::{from_slice, to_string_pretty}, - tokio, - zstd::{decode_all, encode_all}, - ImageBoards, -}; -use std::{ - fs::File, - io::{Read, Write}, - path::Path, -}; -use tokio::{ - fs::File as AsyncFile, - io::{AsyncReadExt, AsyncWriteExt}, - task::spawn_blocking, -}; -use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; - -/// The download summary can be saved in two formats: -/// - As a ZSTD-compressed bincode file -/// - As a generic JSON file. -#[derive(Debug, Serialize, Deserialize)] -#[serde(crate = "self::serde")] -pub enum SummaryType { - ZSTDBincode, - JSON, -} - -/// The generic information of a [Post](ibdl_common::post) along with the name of the file saved in the output directory. -#[derive(Debug, Serialize, Deserialize, Eq, PartialOrd, Ord)] -#[serde(crate = "self::serde")] -pub struct PostInfo { - pub saved_as: String, - pub post: Post, -} - -impl PartialEq for PostInfo { - fn eq(&self, other: &PostInfo) -> bool { - self.post.id == other.post.id - } -} - -/// The final summary file. It containes common information for the user to read and the necessary data to filter posts in certain occasions. -#[derive(Debug, Serialize, Deserialize)] -#[serde(crate = "self::serde")] -pub struct SummaryFile { - pub file_mode: SummaryType, - pub imageboard: ImageBoards, - pub name_mode: NameType, - pub tags: Vec, - #[serde(with = "ts_seconds")] - pub last_updated: DateTime, - pub last_downloaded: u64, - pub posts: Vec, -} - -impl SummaryFile { - /// Create a [SummaryFile] from the supplied information about all downloaded posts. - pub fn new( - imageboard: ImageBoards, - tags: &[String], - posts: &[Post], - name_mode: NameType, - file_mode: SummaryType, - ) -> Self { - let last_down = posts.first().unwrap().clone(); - - let mut post_list: Vec = Vec::with_capacity(posts.len()); - - posts.iter().for_each(|post| { - let info = PostInfo { - saved_as: post.file_name(name_mode), - post: post.clone(), - }; - - post_list.push(info); - }); - - post_list.sort(); - post_list.reverse(); - - Self { - file_mode, - imageboard, - name_mode, - tags: tags.to_vec(), - last_updated: Utc::now(), - last_downloaded: last_down.id, - posts: post_list, - } - } - - /// Writes this struct as a summary file inside a supplied zip file. - pub fn write_zip_summary(&self, zip: &mut ZipWriter) -> Result<(), QueueError> { - let serialized = self.to_json()?; - - zip.start_file( - "00_summary.json", - FileOptions::default() - .compression_method(CompressionMethod::Deflated) - .compression_level(Some(9)), - )?; - - zip.write_all(serialized.as_bytes())?; - Ok(()) - } - - /// Writes this struct as a summary file in the given [Path]. - pub async fn write_summary(&self, path: &Path) -> Result<(), QueueError> { - let mut dsum = AsyncFile::create(path).await?; - - let string = self.to_bincode()?; - - dsum.write_all(&string).await?; - - Ok(()) - } - - /// Read the summary file from the supplied [Path]. - pub async fn read_summary(path: &Path, summary_type: SummaryType) -> Result { - let mut raw_data: Vec = vec![]; - let mut dsum = AsyncFile::open(path).await?; - - dsum.read_to_end(&mut raw_data).await?; - - match summary_type { - SummaryType::ZSTDBincode => Ok(Self::from_bincode(&raw_data)?), - SummaryType::JSON => Ok(Self::from_json_slice(&raw_data)?), - } - } - - /// Read the bincode summary and decode it into a [SummaryFile] - #[inline] - pub fn from_bincode(slice: &[u8]) -> Result { - match deserialize::(&decode_all(slice)?) { - Ok(summary) => Ok(summary), - Err(err) => Err(QueueError::SummaryDeserializeFail { - error: err.to_string(), - }), - } - } - - /// Read the summary as a raw JSON slice from and decode it into a [SummaryFile] - #[inline] - pub fn from_json_slice(slice: &[u8]) -> Result { - match from_slice::(slice) { - Ok(sum) => Ok(sum), - Err(error) => Err(QueueError::SummaryDeserializeFail { - error: error.to_string(), - }), - } - } - - /// Read the summary as a raw JSON string and decode it into a [SummaryFile] - #[inline] - pub fn from_json_str(text: &str) -> Result { - match from_str::(text) { - Ok(sum) => Ok(sum), - Err(error) => Err(QueueError::SummaryDeserializeFail { - error: error.to_string(), - }), - } - } - - pub async fn read_zip_summary( - path: &Path, - summary_type: SummaryType, - ) -> Result { - let path = path.to_path_buf(); - spawn_blocking(move || -> Result { - let file = File::open(&path)?; - let mut zip = ZipArchive::new(file)?; - let mut raw_bytes = match zip.by_name("00_summary.json") { - Ok(bytes) => bytes, - Err(_) => { - return Err(QueueError::ZipSummaryReadError { - file: path.display().to_string(), - }) - } - }; - - let mut summary_slice = vec![]; - - raw_bytes.read_to_end(&mut summary_slice)?; - - match summary_type { - SummaryType::ZSTDBincode => Ok(Self::from_bincode(&summary_slice)?), - SummaryType::JSON => Ok(Self::from_json_slice(&summary_slice)?), - } - }) - .await - .unwrap() - } - - #[inline] - pub fn to_json(&self) -> Result { - match to_string_pretty(self) { - Ok(json) => Ok(json), - Err(err) => Err(QueueError::SummarySerializeFail { - error: err.to_string(), - }), - } - } - - #[inline] - pub fn to_bincode(&self) -> Result, QueueError> { - match serialize(&self) { - Ok(data) => Ok(encode_all(&*data, 9)?), - Err(err) => Err(QueueError::SummarySerializeFail { - error: err.to_string(), - }), - } - } -} diff --git a/src/default_path.rs b/src/default_path.rs deleted file mode 100644 index 4d240bf..0000000 --- a/src/default_path.rs +++ /dev/null @@ -1,178 +0,0 @@ -use std::path::Path; - -use color_eyre::{eyre::Result, owo_colors::OwoColorize}; -use ibdl_common::{ - log::debug, post::PostQueue, reqwest::Client, tokio::fs::remove_file, ImageBoards, -}; -use ibdl_core::{ - cli::Cli, - queue::{ - summary::{SummaryFile, SummaryType}, - Queue, - }, -}; -use ibdl_extractors::websites::{ - danbooru::DanbooruExtractor, e621::E621Extractor, gelbooru::GelbooruExtractor, - moebooru::MoebooruExtractor, Extractor, MultiWebsite, -}; -use spinoff::{spinners, Color, Spinner}; - -use crate::utils::{auth_imgboard, print_results}; - -pub async fn default_path(args: Cli) -> Result<()> { - let mut spinner = Spinner::new_with_stream( - spinners::SimpleDotsScrolling, - "Scanning for posts, please wait".bold().to_string(), - Color::Blue, - spinoff::Streams::Stderr, - ); - - let mut nt = args.name_type(); - - let (mut post_queue, total_black, client) = search_args(&args).await?; - - if post_queue.posts.is_empty() { - println!("{}", "No posts left to download!".bold()); - spinner.clear(); - return Ok(()); - } - - post_queue.prepare(args.limit); - - spinner.clear(); - - let dirname = args.generate_save_path()?; - - let summary_path = dirname.join(Path::new(".00_download_summary.bin")); - - if args.update && summary_path.exists() { - let summary_file = SummaryFile::read_summary(&summary_path, SummaryType::ZSTDBincode).await; - if let Ok(post) = summary_file { - debug!("Latest post found: {}", post.last_downloaded); - post_queue.posts.retain(|c| c.id > post.last_downloaded); - post_queue.posts.shrink_to_fit(); - nt = post.name_mode; - } else { - debug!("Summary file is corrupted, ignoring..."); - remove_file(&summary_path).await?; - } - } - - let post_list = post_queue.posts.clone(); - - let qw = Queue::new( - *args.imageboard, - post_queue, - args.simultaneous_downloads, - Some(client), - args.cbz, - args.annotate, - ); - - let total_down = qw.download(dirname, nt).await?; - - if !args.cbz { - let summary = SummaryFile::new( - *args.imageboard, - &args.tags, - &post_list, - nt, - SummaryType::ZSTDBincode, - ); - summary.write_summary(&summary_path).await?; - } - - print_results(total_down, total_black); - - Ok(()) -} - -async fn search_args(args: &Cli) -> Result<(PostQueue, u64, Client)> { - let ratings = args.selected_ratings(); - - match *args.imageboard { - ImageBoards::Danbooru => { - let mut unit = DanbooruExtractor::new( - &args.tags, - &ratings, - args.disable_blacklist, - !args.no_animated, - ); - auth_imgboard(args.auth, &mut unit).await?; - - unit.exclude_tags(&args.exclude); - - if let Some(ext) = args.get_extension() { - unit.force_extension(ext); - } - - let posts = unit.full_search(args.start_page, args.limit).await?; - - debug!("Collected {} valid posts", posts.posts.len()); - - Ok((posts, unit.total_removed(), unit.client())) - } - ImageBoards::E621 => { - let mut unit = E621Extractor::new( - &args.tags, - &ratings, - args.disable_blacklist, - !args.no_animated, - ); - auth_imgboard(args.auth, &mut unit).await?; - - unit.exclude_tags(&args.exclude); - - if let Some(ext) = args.get_extension() { - unit.force_extension(ext); - } - - let posts = unit.full_search(args.start_page, args.limit).await?; - - debug!("Collected {} valid posts", posts.posts.len()); - - Ok((posts, unit.total_removed(), unit.client())) - } - ImageBoards::Rule34 | ImageBoards::Realbooru | ImageBoards::Gelbooru => { - let mut unit = GelbooruExtractor::new( - &args.tags, - &ratings, - args.disable_blacklist, - !args.no_animated, - ); - - unit.exclude_tags(&args.exclude) - .set_imageboard(*args.imageboard); - - let posts = unit.full_search(args.start_page, args.limit).await?; - - if let Some(ext) = args.get_extension() { - unit.force_extension(ext); - } - - debug!("Collected {} valid posts", posts.posts.len()); - - Ok((posts, unit.total_removed(), unit.client())) - } - ImageBoards::Konachan => { - let mut unit = MoebooruExtractor::new( - &args.tags, - &ratings, - args.disable_blacklist, - !args.no_animated, - ); - - unit.exclude_tags(&args.exclude); - - let posts = unit.full_search(args.start_page, args.limit).await?; - - if let Some(ext) = args.get_extension() { - unit.force_extension(ext); - } - - debug!("Collected {} valid posts", posts.posts.len()); - - Ok((posts, unit.total_removed(), unit.client())) - } - } -} diff --git a/src/main.rs b/src/main.rs index 6685733..3c886c7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,10 @@ use crate::async_path::async_path; use color_eyre::eyre::Result; -use default_path::default_path; use ibdl_common::tokio; use ibdl_core::clap::Parser; use ibdl_core::cli::Cli; mod async_path; -mod default_path; mod utils; #[tokio::main] @@ -15,9 +13,5 @@ async fn main() -> Result<()> { env_logger::builder().format_timestamp(None).init(); color_eyre::install()?; - if args.async_download { - async_path(&args).await - } else { - default_path(args).await - } + async_path(&args).await }