Skip to content

Commit

Permalink
2024/10/28-10:20:03 (Linux VDI0092.zit.bam.de x86_64)
Browse files Browse the repository at this point in the history
  • Loading branch information
pbenner committed Oct 28, 2024
1 parent 90f7b74 commit b57c021
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/granges_gtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use crate::error::ArgumentError;
use crate::granges::GRanges;
use crate::meta::MetaData;
use crate::range::Range;
use crate::utility::is_gzip;
use crate::utility::{is_gzip, trim_and_unquote};

/* -------------------------------------------------------------------------- */

Expand Down Expand Up @@ -82,7 +82,7 @@ impl GRanges {

for i in (0..fields.len()).step_by(2) {
let name = &fields[i];
let value_str = &fields[i + 1];
let value_str = trim_and_unquote(&fields[i + 1]);

// Retrieve expected type from the type_map
if let Some(expected_type) = type_map.get(name) {
Expand Down
156 changes: 154 additions & 2 deletions src/utility.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,40 @@ use num::traits::PrimInt;

/* -------------------------------------------------------------------------- */

/// Removes duplicate integers from a slice, preserving the order of first occurrences.
///
/// This function iterates over a slice of `usize` values, and returns a `Vec<usize>` with duplicate
/// values removed. Only the first occurrence of each unique value is retained in the result, and the
/// order of appearance in the input slice is preserved.
///
/// # Parameters
/// - `s`: A slice of `usize` values from which duplicates will be removed.
///
/// # Returns
/// A `Vec<usize>` containing only the unique values from the input slice `s`, in the order they first appear.
///
/// # Examples
///
/// ```rust,ignore
/// let input = vec![1, 2, 2, 3, 4, 4, 5];
/// let result = remove_duplicates_int(&input);
/// assert_eq!(result, vec![1, 2, 3, 4, 5]);
///
/// let input = vec![10, 10, 20, 30, 20];
/// let result = remove_duplicates_int(&input);
/// assert_eq!(result, vec![10, 20, 30]);
///
/// let input = vec![1, 1, 1, 1];
/// let result = remove_duplicates_int(&input);
/// assert_eq!(result, vec![1]);
/// ```
///
/// # Complexity
/// This function has a time complexity of approximately O(n), where `n` is the length of the input slice,
/// due to the use of a `HashSet` to track unique elements.
///
/// # Note
/// The function requires the `HashSet` from the standard library to track elements seen so far.
pub fn remove_duplicates_int(s: &[usize]) -> Vec<usize> {
let mut m: HashSet<usize> = HashSet::new();
let mut r: Vec<usize> = Vec::new();
Expand All @@ -40,18 +74,136 @@ pub fn remove_duplicates_int(s: &[usize]) -> Vec<usize> {

/* -------------------------------------------------------------------------- */

// Helper function for integer division rounding up
/// Trims trailing whitespace and removes the outermost matching quotes (either single or double) from a string if they exist.
///
/// # Parameters
/// - `input`: A string slice that may contain trailing whitespace and/or outermost quotes.
///
/// # Returns
/// A new `String` with any trailing whitespace removed and the outermost matching quotes (single or double) removed, if they exist.
/// If the outermost characters are not matching quotes, only the trailing whitespace is removed.
///
/// # Examples
///
/// ```rust,ignore
/// let input = " 'example text' ";
/// let result = trim_and_unquote(input);
/// assert_eq!(result, "example text");
///
/// let input = " \"hello world\" ";
/// let result = trim_and_unquote(input);
/// assert_eq!(result, "hello world");
///
/// let input = "no quotes here ";
/// let result = trim_and_unquote(input);
/// assert_eq!(result, "no quotes here");
///
/// let input = "'unmatched quotes";
/// let result = trim_and_unquote(input);
/// assert_eq!(result, "'unmatched quotes");
/// ```
pub fn trim_and_unquote(input: &str) -> String {
// Step 1: Trim trailing whitespace
let trimmed = input.trim_end();

// Step 2: Remove outermost quotes if they exist
if (trimmed.starts_with('"') && trimmed.ends_with('"')) ||
(trimmed.starts_with('\'') && trimmed.ends_with('\'')) {
trimmed[1..trimmed.len()-1].to_string()
} else {
trimmed.to_string()
}
}

/* -------------------------------------------------------------------------- */

/// Performs integer division with rounding up.
///
/// Given two integers `a` and `b`, this function calculates `a / b` with rounding up,
/// which ensures that any remainder will result in an additional increment of the quotient.
///
/// # Type Parameters
/// - `T`: A type that implements the `PrimInt` trait, representing a primitive integer type.
///
/// # Parameters
/// - `a`: The dividend.
/// - `b`: The divisor.
///
/// # Returns
/// The result of `a / b`, rounded up to the nearest integer.
///
/// # Panics
/// Panics if `b` is zero, as division by zero is undefined.
///
/// # Examples
///
/// ```rust,ignore
/// let result = div_int_up(7, 3);
/// assert_eq!(result, 3); // 7 / 3 rounded up is 3
///
/// let result = div_int_up(10, 2);
/// assert_eq!(result, 5); // 10 / 2 is exactly 5
/// ```
pub fn div_int_up<T : PrimInt>(a: T, b: T) -> T {
(a + b - T::one()) / b
}

// Helper function for integer division rounding down
/// Performs integer division with truncation (rounding down).
///
/// This function divides two integers `n` and `d` and rounds down, discarding any remainder,
/// which is the typical behavior of integer division.
///
/// # Type Parameters
/// - `T`: A type that implements the `PrimInt` trait, representing a primitive integer type.
///
/// # Parameters
/// - `n`: The dividend.
/// - `d`: The divisor.
///
/// # Returns
/// The result of `n / d`, rounded down to the nearest integer (truncated).
///
/// # Panics
/// Panics if `d` is zero, as division by zero is undefined.
///
/// # Examples
///
/// ```rust,ignore
/// let result = div_int_down(7, 3);
/// assert_eq!(result, 2); // 7 / 3 rounded down is 2
///
/// let result = div_int_down(10, 2);
/// assert_eq!(result, 5); // 10 / 2 is exactly 5
/// ```
pub fn div_int_down<T : PrimInt>(n: T, d: T) -> T {
n / d
}

/* -------------------------------------------------------------------------- */

/// Checks if a file has a `.gz` extension, typically indicating a gzip-compressed file.
///
/// This function takes a file path and checks whether its extension is `.gz`,
/// commonly used for gzip-compressed files.
///
/// # Type Parameters
/// - `P`: A type that can be referenced as a `Path`, such as `Path` or `PathBuf`.
///
/// # Parameters
/// - `filename`: The file path to check.
///
/// # Returns
/// `true` if the file has a `.gz` extension; `false` otherwise.
///
/// # Examples
///
/// ```rust,ignore
/// let result = is_gzip("file.txt.gz");
/// assert!(result); // file has a .gz extension
///
/// let result = is_gzip("file.txt");
/// assert!(!result); // file does not have a .gz extension
/// ```
pub fn is_gzip<P: AsRef<Path>>(filename: P) -> bool {
filename.as_ref().extension().map_or(false, |ext| ext == "gz")
}

0 comments on commit b57c021

Please sign in to comment.