Skip to content

Commit

Permalink
Lint and format code
Browse files Browse the repository at this point in the history
  • Loading branch information
nsyzrantsev committed Sep 1, 2024
1 parent e0207ef commit 0553aa0
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 55 deletions.
2 changes: 2 additions & 0 deletions barkit-extract/src/fastq.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]

use std::fs::{File, OpenOptions};
use std::io::{self, BufRead, BufReader, BufWriter, Read, Write};
use std::path::Path;
Expand Down
2 changes: 1 addition & 1 deletion barkit-extract/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pub mod parse;
pub mod error;
pub mod fastq;
pub mod logger;
pub mod parse;
pub mod pattern;
pub mod run;
25 changes: 16 additions & 9 deletions barkit-extract/src/logger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use console::{style, Emoji};
use indicatif::{HumanDuration, ProgressBar, ProgressStyle};

pub static SPARKLE: Emoji<'_, '_> = Emoji("✨ ", ":-)");
static PROGRESS_BAR_TEMPLATE: &str = "{spinner:.green} [{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {per_sec} ({eta})";
static PROGRESS_BAR_TEMPLATE: &str =
"{spinner:.green} [{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {per_sec} ({eta})";

pub struct Logger {
/// Index of the current step
Expand All @@ -18,7 +19,7 @@ pub struct Logger {

/// Start time of execution
execution_start: Instant,

/// Progress bar
progress_bar: Option<ProgressBar>,
}
Expand All @@ -30,7 +31,7 @@ impl Logger {
///
/// ```
/// use barkit_extract::logger::Logger;
///
///
/// let logger = Logger::new(3, false);
/// ```
pub fn new(total: usize, quiet: bool) -> Self {

Check warning on line 37 in barkit-extract/src/logger.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/logger.rs#L37

Added line #L37 was not covered by tests
Expand All @@ -39,7 +40,7 @@ impl Logger {
total,
quiet,
execution_start: Instant::now(),

Check warning on line 42 in barkit-extract/src/logger.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/logger.rs#L42

Added line #L42 was not covered by tests
progress_bar: None
progress_bar: None,
}
}

Expand All @@ -49,12 +50,12 @@ impl Logger {
///
/// ```
/// use barkit_extract::logger::Logger;
///
///
/// let mut logger = Logger::new(2, false);
///
///
/// logger.message("first logging message");
/// // Output: "[1/2] first logging message"
///
///
/// logger.message("second logging message");
/// // Output: "[2/2] second logging message"
///
Expand All @@ -65,7 +66,13 @@ impl Logger {
if self.current < self.total {
self.current += 1;
if !self.quiet {
println!("{} {}", style(format!("[{}/{}]", self.current, self.total)).bold().dim(), text);
println!(

Check warning on line 69 in barkit-extract/src/logger.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/logger.rs#L65-L69

Added lines #L65 - L69 were not covered by tests
"{} {}",
style(format!("[{}/{}]", self.current, self.total))
.bold()
.dim(),
text
);
}
} else {
// Optionally handle the case where `current` exceeds `total`
Expand Down Expand Up @@ -104,4 +111,4 @@ impl Logger {
)
}
}
}
}
7 changes: 7 additions & 0 deletions barkit-extract/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]

use crate::pattern::BarcodeRegex;
use regex::bytes::Captures;

Expand Down Expand Up @@ -30,8 +32,13 @@ const TRANSLATION_TABLE: [u8; 256] = {
};

pub struct BarcodeParser {
/// Prepared regex pattern to parse barcodes
barcode_regex: BarcodeRegex,

/// If `true`, all captured patterns will not be trimmed
skip_trimming: bool,

/// If `true`, the barcode pattern will also be matched in the reverse complement sequence.
rc_barcodes: bool,
}

Expand Down
71 changes: 35 additions & 36 deletions barkit-extract/src/pattern.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]

use std::{fmt, mem::size_of};

use fancy_regex::Regex as FancyRegex;
Expand All @@ -11,15 +13,15 @@ const ADAPTER_PATTERN_REGEX: &str = r"(?<!\[)\b[atgcryswkmbdhvn]+\b(?!\])";
pub struct BarcodePattern {
adapter_pattern: FancyRegex,
barcode_pattern: String,
max_error: usize
max_error: usize,
}

impl BarcodePattern {
pub fn new(pattern: &str, max_error: &usize) -> Result<Self, Error> {
Ok(Self {
adapter_pattern: FancyRegex::new(ADAPTER_PATTERN_REGEX)?,
barcode_pattern: pattern.to_owned(),
max_error: *max_error
max_error: *max_error,

Check warning on line 24 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L20-L24

Added lines #L20 - L24 were not covered by tests
})
}

Expand All @@ -29,36 +31,36 @@ impl BarcodePattern {
///
/// ```
/// use barkit_extract::pattern::BarcodePattern;
///
///
/// let barcode_pattern = BarcodePattern::new("^atgc(?<UMI>[ATGCN]{12})", &1).unwrap();
///
///
/// let sequences_with_errors = barcode_pattern.get_sequence_with_errors("ATGC");
/// assert_eq!(vec!["ATG.", "AT.C", "A.GC", ".TGC"], sequences_with_errors);
/// ```
pub fn get_sequence_with_errors(&self, sequence: &str) -> Vec<String> {
if self.max_error == 0 {
return vec![sequence.to_string().to_ascii_uppercase()];

Check warning on line 42 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L40-L42

Added lines #L40 - L42 were not covered by tests
}

if sequence.is_empty() {
return Vec::new();

Check warning on line 46 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L45-L46

Added lines #L45 - L46 were not covered by tests
}

if self.max_error >= sequence.len() {
return vec![FUZZY_CHARACTER.repeat(sequence.len())];

Check warning on line 50 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L49-L50

Added lines #L49 - L50 were not covered by tests
}

let num_chars = sequence.chars().count();
assert!(num_chars <= usize::BITS as usize * 8, "too many characters");

Check warning on line 54 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L53-L54

Added lines #L53 - L54 were not covered by tests

let max_permutation_mask = usize::MAX
.checked_shr(size_of::<usize>() as u32 * 8 - num_chars as u32)

Check warning on line 57 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L56-L57

Added lines #L56 - L57 were not covered by tests
.unwrap();

let mut cases = Vec::new();

Check warning on line 60 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L60

Added line #L60 was not covered by tests

let upper: Vec<char> = sequence.chars().map(|c| c.to_ascii_uppercase()).collect();

Check warning on line 62 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L62

Added line #L62 was not covered by tests

for permutation_mask in 0..=max_permutation_mask {
if permutation_mask.count_ones() as usize != num_chars - self.max_error {

Check warning on line 65 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L64-L65

Added lines #L64 - L65 were not covered by tests
continue;
Expand All @@ -73,7 +75,6 @@ impl BarcodePattern {
}
cases.push(s);

Check warning on line 76 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L76

Added line #L76 was not covered by tests
}

cases

Check warning on line 78 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L78

Added line #L78 was not covered by tests
}

Expand All @@ -83,9 +84,9 @@ impl BarcodePattern {
///
/// ```
/// use barkit_extract::pattern::BarcodePattern;
///
///
/// let barcode_pattern = BarcodePattern::new("^atgc(?<UMI>[ATGCN]{12})", &1).unwrap();
///
///
/// let pattern_with_pcr_errors = barcode_pattern.get_pattern_with_errors().unwrap();
/// assert_eq!("^(ATG.|AT.C|A.GC|.TGC)(?<UMI>[ATGCN]{12})", pattern_with_pcr_errors);
/// ```
Expand All @@ -108,8 +109,7 @@ impl BarcodePattern {
}
}


#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BarcodeType {
/// Moleculare barcode (UMI)
Umi,
Expand All @@ -125,11 +125,11 @@ impl BarcodeType {
/// Parses type of barcode
///
/// # Example
///
///
/// ```
/// use barkit_extract::pattern::BarcodeType;
/// use barkit_extract::error::Error::UnexpectedCaptureGroupName;
///
///
/// assert_eq!(BarcodeType::Umi, BarcodeType::parse_type("UMI").unwrap());
/// assert_eq!(BarcodeType::Sample, BarcodeType::parse_type("SB").unwrap());
/// assert_eq!(BarcodeType::Cell, BarcodeType::parse_type("CB").unwrap());
Expand All @@ -146,15 +146,18 @@ impl BarcodeType {

impl fmt::Display for BarcodeType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", match self {
BarcodeType::Umi => "UMI",
BarcodeType::Sample => "SB",
BarcodeType::Cell => "CB",
})
write!(

Check warning on line 149 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L148-L149

Added lines #L148 - L149 were not covered by tests
f,
"{}",
match self {
BarcodeType::Umi => "UMI",
BarcodeType::Sample => "SB",
BarcodeType::Cell => "CB",

Check warning on line 155 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L152-L155

Added lines #L152 - L155 were not covered by tests
}
)
}
}


#[derive(Clone)]
pub struct BarcodeRegex {
/// Regex pattern to parse barcode(s) from read sequence
Expand All @@ -166,11 +169,11 @@ pub struct BarcodeRegex {

impl BarcodeRegex {
/// Creates `BarcodeRegex` instance
///
///
/// Example
/// ```
/// use barkit_extract::pattern::BarcodeRegex;
///
///
/// let barcode_regex = BarcodeRegex::new("^atgc(?<UMI>[ATGCN]{6})", 1);
/// ```
pub fn new(pattern: &str, max_error: usize) -> Result<Self, Error> {
Expand All @@ -184,7 +187,7 @@ impl BarcodeRegex {
})
}

/// Parses capture groups from regex pattern
/// Parses capture groups from regex pattern
fn parse_capture_groups(regex: &Regex) -> Result<Vec<BarcodeType>, Error> {
let mut capture_groups = Vec::<BarcodeType>::new();
for capture_group in regex

Check warning on line 193 in barkit-extract/src/pattern.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/pattern.rs#L191-L193

Added lines #L191 - L193 were not covered by tests
Expand All @@ -202,13 +205,13 @@ impl BarcodeRegex {
}

/// Captures barcodes in read sequence
///
///
/// Example
/// ```
/// use barkit_extract::pattern::BarcodeRegex;
///
///
/// let barcode_regex = BarcodeRegex::new("^atgc(?<UMI>[ATGCN]{6})", 1).unwrap();
///
///
/// assert_eq!(
/// b"NNNNNN",
/// barcode_regex
Expand All @@ -231,7 +234,6 @@ impl BarcodeRegex {
}
}


#[cfg(test)]
mod tests {
use rstest::rstest;
Expand All @@ -251,10 +253,7 @@ mod tests {
#[case] max_error: usize,
) {
let barcode_pattern = pattern::BarcodePattern::new("", &max_error).unwrap();
assert_eq!(
expected,
barcode_pattern.get_sequence_with_errors(text)
);
assert_eq!(expected, barcode_pattern.get_sequence_with_errors(text));
}

#[rstest]
Expand All @@ -272,4 +271,4 @@ mod tests {
let barcode_pattern = pattern::BarcodePattern::new(pattern, &max_error).unwrap();
assert_eq!(expected, barcode_pattern.get_pattern_with_errors().unwrap())
}
}
}
8 changes: 4 additions & 4 deletions barkit-extract/src/run.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use rayon::prelude::*;

use crate::parse;
use crate::pattern::BarcodeRegex;
use crate::fastq::{self, CompressionType};
use crate::logger;
use crate::parse;
use crate::pattern::BarcodeRegex;

#[allow(clippy::too_many_arguments)]
pub fn run(
Expand Down Expand Up @@ -74,7 +74,7 @@ fn process_single_end_fastq(
) {
let mut logger = logger::Logger::new(3, quiet);
logger.message("Estimating reads count...");

Check warning on line 76 in barkit-extract/src/run.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/run.rs#L75-L76

Added lines #L75 - L76 were not covered by tests

let lines_number = fastq::get_reads_count(&fq, threads, max_memory);
logger.set_progress_bar(lines_number);

Check warning on line 79 in barkit-extract/src/run.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/run.rs#L78-L79

Added lines #L78 - L79 were not covered by tests

Expand Down Expand Up @@ -140,7 +140,7 @@ fn process_pair_end_fastq(
) {
let mut logger = logger::Logger::new(3, quiet);
logger.message("Estimating reads count...");

Check warning on line 142 in barkit-extract/src/run.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/run.rs#L141-L142

Added lines #L141 - L142 were not covered by tests

let lines_number = fastq::get_reads_count(&fq1, threads, max_memory);
logger.set_progress_bar(lines_number);

Check warning on line 145 in barkit-extract/src/run.rs

View check run for this annotation

Codecov / codecov/patch

barkit-extract/src/run.rs#L144-L145

Added lines #L144 - L145 were not covered by tests

Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,4 @@ pub struct AdditionalParamsGroup {
/// Max error (mismatch) between provided pattern and read sequence
#[arg(short = 'e', long, default_value = "1")]
pub max_error: usize,
}
}
10 changes: 6 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ fn main() {
quiet,
force,
} => {
let output_compression =
barkit_extract::fastq::CompressionType::select(
&compression.gz, &compression.bgz, &compression.mgz, &compression.lz4,
);
let output_compression = barkit_extract::fastq::CompressionType::select(
&compression.gz,
&compression.bgz,
&compression.mgz,
&compression.lz4,

Check warning on line 21 in src/main.rs

View check run for this annotation

Codecov / codecov/patch

src/main.rs#L19-L21

Added lines #L19 - L21 were not covered by tests
);
barkit_extract::run::run(
input_fastqs.fq1.to_string(),
input_fastqs.fq2.clone(),

Check warning on line 25 in src/main.rs

View check run for this annotation

Codecov / codecov/patch

src/main.rs#L24-L25

Added lines #L24 - L25 were not covered by tests
Expand Down

0 comments on commit 0553aa0

Please sign in to comment.