diff --git a/src/track_generic.rs b/src/track_generic.rs index e5c8b30..1681e5c 100644 --- a/src/track_generic.rs +++ b/src/track_generic.rs @@ -274,11 +274,26 @@ impl<'a> GenericMutableTrack<'a> { Self{track} } - // Add a single read to the track by incrementing the value of each bin that - // overlaps with the read. Single end reads are extended in 3' direction - // to have a length of [d]. This is the same as the macs2 `extsize' parameter. - // Reads are not extended if [d] is zero. - // The function returns an error if the read's position is out of range + /// Adds a single read to the coverage track by incrementing the value of each bin that + /// overlaps with the read. If the read is single-end, it is extended in the 3' direction + /// to have a length of `d`. This behavior mimics the `extsize` parameter in macs2. + /// Reads are not extended if `d` is zero. + /// + /// # Arguments + /// + /// * `read` - A reference to the read to be added. Contains sequence name and position data. + /// * `d` - The extension size. If greater than zero, the read will be extended in the 3' direction; + /// otherwise, the read is used as is. + /// + /// # Returns + /// + /// Returns `Ok(())` if the read was successfully added to the track. + /// If the read's position is out of range, it returns an error. + /// + /// # Errors + /// + /// This function will return an error if the read's position falls outside of the track's bin range. + /// Specifically, a `ReadOutOfRangeError` is returned if the read cannot be mapped to any valid bins. pub fn add_read(&mut self, read: &Read, d: usize) -> Result<(), Box> { let bin_size = self.track.get_bin_size(); @@ -303,11 +318,30 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } - // Add a single read to the track by adding the fraction of overlap between - // the read and each bin. Single end reads are extended in 3' direction - // to have a length of [d]. This is the same as the macs2 `extsize' parameter. - // Reads are not extended if [d] is zero. - // The function returns an error if the read's position is out of range + /// Adds a single read to the coverage track by calculating and adding the fraction of overlap + /// between the read and each bin. If the read is single-end, it is extended in the 3' direction + /// to have a length of `d`, similar to the `extsize` parameter in macs2. If `d` is zero, the read + /// is not extended. + /// + /// The amount added to each bin is proportional to the fraction of the bin that overlaps with the + /// read. For example, if a bin partially overlaps with the read, only the fraction of overlap will + /// be added to the bin's value. + /// + /// # Arguments + /// + /// * `read` - A reference to the read to be added. Contains sequence name and position data. + /// * `d` - The extension size. If greater than zero, the read will be extended in the 3' direction; + /// otherwise, the read is used as is. + /// + /// # Returns + /// + /// Returns `Ok(())` if the read was successfully added to the track. + /// If the read's position is out of range, an error is returned. + /// + /// # Errors + /// + /// This function returns an error if the read's position is outside of the valid bin range. + /// Specifically, a `ReadOutOfRangeError` is returned if the read cannot be mapped to any valid bins. fn add_read_mean_overlap(&mut self, read: &Read, d: usize) -> Result<(), Box> { let bin_size = self.track.get_bin_size(); @@ -335,11 +369,24 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } - // Add a single read to the track by adding the number of overlapping nucleotides - // between the read and each bin. Single end reads are extended in 3' direction - // to have a length of [d]. This is the same as the macs2 `extsize' parameter. - // Reads are not extended if [d] is zero. - // The function returns an error if the read's position is out of range + /// Adds a single read to the coverage track by calculating and adding the number of overlapping + /// nucleotides between the read and each bin. If the read is single-end, it is extended in the 3' direction + /// to have a length of `d`, similar to the `extsize` parameter in macs2. If `d` is zero, the read is not extended. + /// + /// The value added to each bin corresponds to the total number of nucleotides that overlap between the read + /// and the bin. For instance, if a read spans several bins, the function will increment each bin by the number + /// of nucleotides that fall within that bin. + /// + /// # Arguments + /// + /// * `read` - A reference to the read to be added. Contains sequence name and position data. + /// * `d` - The extension size. If greater than zero, the read will be extended in the 3' direction; + /// otherwise, the read is used as is. + /// + /// # Returns + /// + /// Returns `Ok(())` if the read was successfully added to the track. + /// If the read's position is out of range, an error is returned. fn add_read_overlap(&mut self, read: &Read, d: usize) -> Result<(), Box> { let bin_size = self.track.get_bin_size(); @@ -366,15 +413,39 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } - // Add reads to track. All single end reads are extended in 3' direction - // to have a length of [d]. This is the same as the macs2 `extsize' parameter. - // Reads are not extended if [d] is zero. - // If [method] is "default", the value of each bin that overlaps the read - // is incremented. If [method] is "overlap", each bin that overlaps the read is - // incremented by the number of overlapping nucleotides. If [method] is "mean - // overlap", each bin that overlaps the read is incremented by the fraction - // of overlapping nucleotides within the bin. - // The function returns an error if the read's position is out of range + /// Adds multiple reads to the coverage track, applying an extension in the 3' direction for single-end reads + /// according to the given extension size `d`. This behavior is similar to the `extsize` parameter in macs2. + /// If `d` is zero, the reads are not extended. + /// + /// The method of how bins are incremented depends on the `method` parameter: + /// + /// - `"default"` or `"simple"`: Increments the value of each bin that overlaps the read by 1. + /// - `"overlap"`: Increments the value of each overlapping bin by the number of nucleotides that overlap the bin. + /// - `"mean overlap"`: Increments the value of each overlapping bin by the fraction of overlapping nucleotides within the bin. + /// + /// # Arguments + /// + /// * `reads` - An iterator over reads to be added to the track. + /// * `d` - The extension size. If greater than zero, each read will be extended in the 3' direction. + /// If zero, reads are used as is. + /// * `method` - A string specifying how the coverage should be computed. Possible values are: + /// - `"default"` or `"simple"`: Increments the value of overlapping bins by 1. + /// - `"overlap"`: Increments overlapping bins by the number of overlapping nucleotides. + /// - `"mean overlap"`: Increments overlapping bins by the fraction of overlapping nucleotides. + /// + /// # Returns + /// + /// Returns the number of reads successfully added to the track. + /// + /// # Panics + /// + /// This function will panic if an invalid `method` is provided (i.e., any value other than `"default"`, `"overlap"`, + /// or `"mean overlap"`). + /// + /// # Errors + /// + /// The function will return an error internally if a read's position is out of the track's valid bin range. + /// pub fn add_reads( &mut self, reads : impl Iterator, @@ -411,11 +482,28 @@ impl<'a> GenericMutableTrack<'a> { n } - // Combine treatment and control from a ChIP-seq experiment into a single track. - // At each genomic location, the number of binned reads from the treatment - // experiment is divided by the number of control reads. To avoid division by - // zero, a pseudocount is added to both treatment and control. The parameter - // d determines the extension of reads. + /// Combines treatment and control tracks from a ChIP-seq experiment into a single normalized track. + /// At each genomic location, the number of binned reads from the treatment track is divided by the number + /// of control reads, and a pseudocount is added to both treatment and control values to avoid division by zero. + /// + /// The normalization is performed across all sequences in the track, and the result is stored in the treatment track. + /// The method supports an optional logarithmic transformation of the normalized values. + /// + /// # Arguments + /// + /// * `control` - A reference to the control track against which the treatment track will be normalized. + /// * `c1` - The pseudocount added to the treatment track to avoid division by zero. Must be strictly positive. + /// * `c2` - The pseudocount added to the control track to avoid division by zero. Must be strictly positive. + /// * `log_scale` - If `true`, the result is transformed to the natural logarithm of the ratio; otherwise, the raw ratio is used. + /// + /// # Returns + /// + /// Returns `Ok(())` if the normalization is successful. + /// + /// # Errors + /// + /// Returns an error if either `c1` or `c2` are non-positive pseudocount values, as pseudocounts must be strictly positive. + /// Additionally, an error may occur if sequences cannot be retrieved from either the treatment or control track. pub fn normalize( &mut self, control : &dyn Track, @@ -554,9 +642,32 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } - // Smoothen track data with an adaptive window method. For each region the smallest window - // size among windowSizes is selected which contains at least minCounts counts. If the - // minimum number of counts is not reached, the larges window size is selected. + /// Smoothens the track data using an adaptive window method. For each bin, the function selects + /// the smallest window size from the provided `window_sizes` that contains at least `min_counts` counts + /// within the window. If no window size can satisfy the minimum count, the largest window size is used. + /// + /// This method loops over the track's sequences and applies the smoothing operation to each bin, + /// adjusting the values based on the selected window size. + /// + /// # Arguments + /// + /// * `min_counts` - The minimum number of counts required in a window for it to be considered valid. + /// * `window_sizes` - A list of window sizes to select from. The function tries to use the smallest window + /// size that contains at least `min_counts` counts. If none of the windows satisfy + /// this condition, the largest window size is applied. + /// + /// # Returns + /// + /// Returns `Ok(())` if the smoothing operation is completed successfully. + /// + /// # Errors + /// + /// Returns an error if there are issues retrieving sequences or updating bins in the track. + /// + /// # Panics + /// + /// This function does not panic. However, if `window_sizes` is empty, the function will immediately return + /// without modifying the track. pub fn smoothen(&mut self, min_counts: f64, window_sizes: Vec) -> Result<(), Box> { if window_sizes.is_empty() { return Ok(()); @@ -642,6 +753,28 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } + /// Applies a user-defined function to sliding windows of data from two tracks, modifying the bins of the current track (`self`) based on the values from the provided `track`. + /// The user-defined function `f` is called on each window of data and should return a new value for the corresponding bin in the current track. + /// + /// The function operates over each sequence in the tracks, applying the sliding window approach to each bin. It ensures that the bin sizes of the two tracks match and that + /// the lengths of their sequences are consistent. If the sequences differ in length or if the bin sizes do not match, an error is returned. + /// + /// # Arguments + /// + /// * `track` - A reference to the second track from which windowed values are retrieved for comparison or computation. + /// * `window_size` - The size of the window (number of bins) for the sliding window operation. Each window contains values from `track` centered around the current bin. + /// * `f` - A user-defined function that takes the sequence name (`&str`), the genomic position of the current bin (`usize`), and the array of values in the window (`&[f64]`). + /// The function should return a new value to be set in the current track for that bin. + /// + /// # Returns + /// + /// Returns `Ok(())` if the windowed mapping is applied successfully to all sequences and bins. + /// + /// # Errors + /// + /// * Returns an error if `window_size` is zero. + /// * Returns an error if the bin sizes of the two tracks do not match. + /// * Returns an error if the sequences in the two tracks have different numbers of bins. pub fn window_map( &mut self, track : &dyn Track, @@ -686,6 +819,25 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } + /// Applies a user-defined function to corresponding bins across multiple tracks, modifying the bins of the current track (`self`) based on the values from the provided list of tracks. + /// The function loops over all bins in all tracks and calls the user-defined function `f` to compute a new value for each bin in the current track. + /// + /// This function checks that the bin sizes of all tracks match and ensures that each sequence has the same number of bins across all tracks before applying the function. + /// + /// # Arguments + /// + /// * `tracks` - A slice of references to the tracks that will be used for the computation. Each track should have the same bin size as the current track. + /// * `f` - A user-defined function that takes the sequence name (`&str`), the genomic position (`usize`), and an array of bin values (`&[f64]`) from the tracks. The function should return a new value to be set in the current track for that bin. + /// + /// # Returns + /// + /// Returns `Ok(())` if the function is applied successfully to all sequences and bins. + /// + /// # Errors + /// + /// * Returns an error if the list of tracks is empty. + /// * Returns an error if the bin sizes of any track do not match the bin size of the current track. + /// * Returns an error if any sequence in the provided tracks has a different number of bins than the corresponding sequence in the current track. pub fn map_list( &mut self, tracks: &[&dyn Track], @@ -748,6 +900,25 @@ impl<'a> GenericMutableTrack<'a> { Ok(()) } + /// Applies a user-defined function to a sliding window of bins across multiple tracks, modifying the bins of the current track (`self`). + /// The function processes each bin by considering the values from a window of surrounding bins in each track. The size of the sliding window is specified by `window_size`. + /// The user-defined function `f` computes a new value for each bin in the current track, based on the corresponding windows from the other tracks. + /// + /// # Arguments + /// + /// * `tracks` - A slice of references to the tracks that will be used for the computation. Each track should have the same bin size as the current track. + /// * `window_size` - The size of the sliding window used to extract bins from the other tracks for each bin in the current track. Must be greater than 0. + /// * `f` - A user-defined function that takes the sequence name (`&str`), the genomic position (`usize`), and a slice of windows (`&[Vec]`) from the tracks. Each window is a `Vec` representing the values of the bins within the window for a specific track. The function should return a new value to be set in the current track for that bin. + /// + /// # Returns + /// + /// Returns `Ok(())` if the function is applied successfully to all sequences and bins. + /// + /// # Errors + /// + /// * Returns an error if `window_size` is 0. + /// * Returns an error if the bin sizes of any track do not match the bin size of the current track. + /// * Returns an error if any sequence in the provided tracks has a different number of bins than the corresponding sequence in the current track. pub fn window_map_list( &mut self, tracks : &[&dyn Track],