Skip to content

Commit

Permalink
clean up a little
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Jan 31, 2024
1 parent f5216f8 commit 893e0a7
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 111 deletions.
4 changes: 1 addition & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use pyo3::prelude::*;
extern crate simple_error;

mod utils;
use crate::utils::build_selection;
use crate::utils::is_revindex_database;
use crate::utils::{build_selection, build_template};
mod check;
mod fastgather;
mod fastmultigather;
Expand Down Expand Up @@ -212,8 +212,6 @@ fn do_multisearch(
let queryfile_path: camino::Utf8PathBuf = querylist_path.into();
let againstfile_path: camino::Utf8PathBuf = siglist_path.into();
let selection = build_selection(ksize, scaled, &moltype);
// let selection = build_selection(ksize, scaled, &moltype);
let template = build_template(ksize, scaled, &moltype);
match multisearch::multisearch(
&queryfile_path,
&againstfile_path,
Expand Down
2 changes: 0 additions & 2 deletions src/python/tests/test_pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ def test_bad_query(runtmp, capfd):
assert "WARNING: 1 query paths failed to load. See error messages above." in captured.err




def test_bad_query_2(runtmp, capfd):
# test with a bad query (a .sig.gz file renamed as zip file)

Expand Down
106 changes: 0 additions & 106 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,69 +420,6 @@ pub fn load_sketches_above_threshold(
Ok((matchlist, skipped_paths, failed_paths))
}

/// Loads all compatible sketches from a ZIP archive at the given path into memory.
/// Currently not parallelized; use a different zip crate to enable parallelization.
///
/// # Arguments
///
/// * `zip_path` - Path to the ZIP archive.
/// * `template` - Reference to the Sketch template.
///
/// # Returns
///
/// Returns a tuple containing:
/// * A vector of `SmallSignature`s.
/// * Number of paths that were skipped because they did not match the sketch parameters.
/// * Number of paths that failed to load.
///
/// # Errors
///
/// Returns an error if:
/// * Unable to open the ZIP file.
/// * ZIP archive is malformed.
pub fn load_sketches_from_zip<P: AsRef<Path>>(
zip_path: P,
template: &Sketch,
) -> Result<(Vec<SmallSignature>, usize, usize)> {
let mut sketchlist = Vec::new();
let zip_file = File::open(&zip_path)?;
let mut zip_archive = ZipArchive::new(zip_file)?;
let mut skipped_paths = 0;
let mut failed_paths = 0;

// loop through, loading signatures
for i in 0..zip_archive.len() {
let mut file = zip_archive.by_index(i)?;
let file_name = Path::new(file.name())
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();

if !file_name.contains(".sig") && !file_name.contains(".sig.gz") {
continue;
}
if let Ok(sigs) = Signature::from_reader(&mut file) {
if let Some(sm) =
prepare_query(&sigs, template, &zip_path.as_ref().display().to_string())
{
sketchlist.push(sm);
} else {
// track number of paths that have no matching sigs
skipped_paths += 1;
}
} else {
// failed to load from this path - print error & track.
eprintln!("WARNING: could not load sketches from path '{}'", file_name);
failed_paths += 1;
}
}
drop(zip_archive);
println!("loaded {} signatures", sketchlist.len());
Ok((sketchlist, skipped_paths, failed_paths))
}

pub enum ReportType {
Query,
Against,
Expand All @@ -500,49 +437,6 @@ impl std::fmt::Display for ReportType {
}
}

/// Control function to load compatible signatures from an input file.
/// If a ZIP archive is provided (detected via extension),
/// calls `load_sketches_from_zip`. Otherwise, assumes the
/// user provided a `fromfile` sketchlist and calls
/// `load_sketchlist_filenames`.
///
/// # Arguments
///
/// * `sketchlist_path` - Path to either a ZIP archive or a list of signature file paths.
/// * `template` - Reference to the Sketch template (used to load only compatible signatures).
/// * `report_type` - ReportType Enum. Are these 'query' or 'search' signatures?
///
/// # Returns
///
/// Returns a vector of `SmallSignature`s.
pub fn load_sketches_from_zip_or_pathlist<P: AsRef<Path>>(
sketchlist_path: P,
template: &Sketch,
report_type: ReportType,
) -> Result<Vec<SmallSignature>> {
eprintln!(
"Reading list of {} paths from: '{}'",
report_type,
sketchlist_path.as_ref().display()
);

let (sketchlist, skipped_paths, failed_paths) = if sketchlist_path
.as_ref()
.extension()
.map(|ext| ext == "zip")
.unwrap_or(false)
{
load_sketches_from_zip(sketchlist_path, template)?
} else {
let sketch_paths = load_sketchlist_filenames(&sketchlist_path)?;
load_sketches(sketch_paths, template)?
};

report_on_sketch_loading(&sketchlist, skipped_paths, failed_paths, report_type)?;

Ok(sketchlist)
}

pub fn load_collection(
sigpath: &camino::Utf8PathBuf,
selection: &Selection,
Expand Down

0 comments on commit 893e0a7

Please sign in to comment.