Skip to content

Commit

Permalink
1. fix bdg output file str/bytes type issue; 2. fix PE/FW track pileu…
Browse files Browse the repository at this point in the history
…p parameters
  • Loading branch information
taoliu committed Feb 5, 2025
1 parent 0599ae0 commit 9dbb534
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 70 deletions.
4 changes: 2 additions & 2 deletions MACS3/Commands/hmmratac_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2025-02-04 16:47:43 Tao Liu>
# Time-stamp: <2025-02-05 09:31:12 Tao Liu>


"""Description: Main HMMR command
Expand Down Expand Up @@ -207,7 +207,7 @@ def run(args):
fc_bdg = digested_atac_signals[0]
else:
options.info("# Pile up all fragments")
fc_bdg = petrack.pileup_bdg([1.0, ], baseline_value=0)
fc_bdg = petrack.pileup_bdg(baseline_value=0)
(sum_v, n_v, max_v, min_v, mean_v, std_v) = fc_bdg.summary()
options.info("# Convert pileup to fold-change over average signal")
fc_bdg.apply_func(lambda x: x/mean_v)
Expand Down
8 changes: 4 additions & 4 deletions MACS3/Commands/pileup_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
under the terms of the BSD License (see the file LICENSE included with
the distribution).
"""
# Time-stamp: <2024-11-30 00:04:48 Tao Liu>
# Time-stamp: <2025-02-05 09:55:28 Tao Liu>
# ------------------------------------
# python modules
# ------------------------------------
Expand Down Expand Up @@ -39,7 +39,7 @@ def run(o_options):
options.PE_MODE = options.format in ('BAMPE', 'BEDPE', 'FRAG')

# 0 prepare output file
outfile = os.path.join(options.outdir, options.outputfile).encode()
outfile = os.path.join(options.outdir, options.outputfile)
if os.path.isfile(outfile):
info("# Existing file %s will be replaced!" % outfile)
os.unlink(outfile)
Expand All @@ -65,10 +65,10 @@ def run(o_options):

if options.bothdirection:
info("# Pileup alignment file, extend each read towards up/downstream direction with %d bps" % options.extsize)
pileup_and_write_se(treat, outfile, options.extsize * 2, 1, directional=False, halfextension=False)
pileup_and_write_se(treat, outfile.encode(), options.extsize * 2, 1, directional=False, halfextension=False)
else:
info("# Pileup alignment file, extend each read towards downstream direction with %d bps" % options.extsize)
pileup_and_write_se(treat, outfile, options.extsize, 1, directional=True, halfextension=False)
pileup_and_write_se(treat, outfile.encode(), options.extsize, 1, directional=True, halfextension=False)

info("# Done! Check %s" % options.outputfile)

Expand Down
18 changes: 9 additions & 9 deletions MACS3/Signal/CallPeakUnit.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# cython: language_level=3
# cython: profile=True
# cython: linetrace=True
# Time-stamp: <2024-10-22 11:42:37 Tao Liu>
# Time-stamp: <2025-02-05 10:15:46 Tao Liu>

"""Module for Calculate Scores.
Expand Down Expand Up @@ -565,12 +565,12 @@ def pileup_treat_ctrl_a_chromosome(self, chrom: bytes):

if self.PE_mode:
treat_pv = self.treat.pileup_a_chromosome(chrom,
[self.treat_scaling_factor,],
self.treat_scaling_factor,
baseline_value=0.0)
else:
treat_pv = self.treat.pileup_a_chromosome(chrom,
[self.d,],
[self.treat_scaling_factor,],
self.d,
self.treat_scaling_factor,
baseline_value=0.0,
directional=True,
end_shift=self.end_shift)
Expand All @@ -585,11 +585,11 @@ def pileup_treat_ctrl_a_chromosome(self, chrom: bytes):
self.ctrl_scaling_factor_s,
baseline_value=self.lambda_bg)
else:
ctrl_pv = self.ctrl.pileup_a_chromosome(chrom,
self.ctrl_d_s,
self.ctrl_scaling_factor_s,
baseline_value=self.lambda_bg,
directional=False)
ctrl_pv = self.ctrl.pileup_a_chromosome_c(chrom,
self.ctrl_d_s,
self.ctrl_scaling_factor_s,
baseline_value=self.lambda_bg,
directional=False)
else:
# a: set global lambda
ctrl_pv = [treat_pv[0][-1:], np.array([self.lambda_bg,],
Expand Down
66 changes: 63 additions & 3 deletions MACS3/Signal/FixWidthTrack.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cython: language_level=3
# cython: profile=True
# Time-stamp: <2024-10-14 14:53:06 Tao Liu>
# Time-stamp: <2025-02-05 10:23:13 Tao Liu>

"""Module for FWTrack classes.
Expand Down Expand Up @@ -587,14 +587,73 @@ def compute_region_tags_from_peaks(self, peaks: PeakIO,
return retval

@cython.ccall
def pileup_a_chromosome(self, chrom: bytes, ds: list,
scale_factor_s: list,
def pileup_a_chromosome(self, chrom: bytes,
d: cython.long,
scale_factor: cython.float = 1.0,
baseline_value: cython.float = 0.0,
directional: bool = True,
end_shift: cython.int = 0) -> list:
"""pileup a certain chromosome, return [p,v] (end position and
value) list.
d : tag will be extended to this value to 3' direction,
unless directional is False.
scale_factor : linearly scale the pileup value.
baseline_value : a value to be filled for missing values, and
will be the minimum pileup.
directional : if False, the strand or direction of tag will be
ignored, so that extension will be both sides
with d/2.
end_shift : move cutting ends towards 5->3 direction if value
is positive, or towards 3->5 direction if
negative. Default is 0 -- no shift at all.
p and v are numpy.ndarray objects.
"""
five_shift: cython.long
# adjustment to 5' end and 3' end positions to make a fragment
three_shift: cython.long
rlength: cython.long
chrlengths: dict
tmp_pileup: list

chrlengths = self.get_rlengths()
rlength = chrlengths[chrom]

# adjust extension length according to 'directional' and
# 'halfextension' setting.
if directional:
# only extend to 3' side
five_shift = - end_shift
three_shift = end_shift + d
else:
# both sides
five_shift = d//2 - end_shift
three_shift = end_shift + d - d//2

tmp_pileup = se_all_in_one_pileup(self.locations[chrom][0],
self.locations[chrom][1],
five_shift,
three_shift,
rlength,
scale_factor,
baseline_value)
return tmp_pileup

@cython.ccall
def pileup_a_chromosome_c(self, chrom: bytes, ds: list,
scale_factor_s: list,
baseline_value: cython.float = 0.0,
directional: bool = True,
end_shift: cython.int = 0) -> list:
"""pileup a certain chromosome, return [p,v] (end position and
value) list. This is for control data for which we have
multiple different `d` and `scale_factor`.
ds : tag will be extended to this value to 3' direction,
unless directional is False. Can contain multiple
extension values. Final pileup will the maximum.
Expand All @@ -615,6 +674,7 @@ def pileup_a_chromosome(self, chrom: bytes, ds: list,
negative. Default is 0 -- no shift at all.
p and v are numpy.ndarray objects.
"""
d: cython.long
five_shift: cython.long
Expand Down
66 changes: 16 additions & 50 deletions MACS3/Signal/PairedEndTrack.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cython: language_level=3
# cython: profile=True
# Time-stamp: <2024-11-29 23:37:25 Tao Liu>
# Time-stamp: <2025-02-05 10:07:02 Tao Liu>

"""Module for filter duplicate tags from paired-end data
Expand Down Expand Up @@ -475,41 +475,24 @@ def print_to_bed(self, fhd=None):
@cython.ccall
def pileup_a_chromosome(self,
chrom: bytes,
scale_factor_s: list,
scale_factor: cython.float = 1.0,
baseline_value: cython.float = 0.0) -> list:
"""pileup a certain chromosome, return [p,v] (end position and
value) list.
scale_factor_s : linearly scale the pileup value applied to
each d in ds. The list should have the same
length as ds.
scale_factor : linearly scale the pileup value.
baseline_value : a value to be filled for missing values, and
will be the minimum pileup.
"""
tmp_pileup: list
prev_pileup: list
scale_factor: cython.float

prev_pileup = None

for i in range(len(scale_factor_s)):
scale_factor = scale_factor_s[i]

# Can't directly pass partial nparray there since that will mess up with pointer calculation.
tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
np.sort(self.locations[chrom]['r']),
scale_factor, baseline_value)

if prev_pileup:
prev_pileup = over_two_pv_array(prev_pileup,
tmp_pileup,
func="max")
else:
prev_pileup = tmp_pileup
tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
np.sort(self.locations[chrom]['r']),
scale_factor, baseline_value)

return prev_pileup
return tmp_pileup

@cython.ccall
def pileup_a_chromosome_c(self,
Expand Down Expand Up @@ -574,48 +557,31 @@ def pileup_a_chromosome_c(self,

@cython.ccall
def pileup_bdg(self,
scale_factor_s: list,
scale_factor: cython.float = 1.0,
baseline_value: cython.float = 0.0):
"""pileup all chromosomes, and return a bedGraphTrackI object.
scale_factor_s : linearly scale the pileup value applied to
each d in ds. The list should have the same
length as ds.
scale_factor : a value to scale the pileup values.
baseline_value : a value to be filled for missing values, and
will be the minimum pileup.
"""
tmp_pileup: list
prev_pileup: list
scale_factor: cython.float
chrom: bytes
bdg: bedGraphTrackI

bdg = bedGraphTrackI(baseline_value=baseline_value)

for chrom in sorted(self.get_chr_names()):
prev_pileup = None
for i in range(len(scale_factor_s)):
scale_factor = scale_factor_s[i]

# Can't directly pass partial nparray there since that
# will mess up with pointer calculation.
tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
np.sort(self.locations[chrom]['r']),
scale_factor,
baseline_value)

if prev_pileup:
prev_pileup = over_two_pv_array(prev_pileup,
tmp_pileup,
func="max")
else:
prev_pileup = tmp_pileup
tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']),
np.sort(self.locations[chrom]['r']),
scale_factor,
baseline_value)

# save to bedGraph
bdg.add_chrom_data(chrom,
pyarray('i', prev_pileup[0]),
pyarray('f', prev_pileup[1]))
pyarray('i', tmp_pileup[0]),
pyarray('f', tmp_pileup[1]))
return bdg

@cython.ccall
Expand Down
2 changes: 1 addition & 1 deletion MACS3/Signal/Pileup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cython: language_level=3
# cython: profile=True
# Time-stamp: <2024-10-22 10:35:32 Tao Liu>
# Time-stamp: <2025-02-05 09:54:56 Tao Liu>

"""Module Description: For pileup functions.
Expand Down
8 changes: 7 additions & 1 deletion test/test_PairedEndTrack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Time-stamp: <2024-10-15 16:07:27 Tao Liu>
# Time-stamp: <2025-02-05 09:28:39 Tao Liu>

import unittest
from MACS3.Signal.PairedEndTrack import PETrackI, PETrackII
Expand Down Expand Up @@ -74,6 +74,12 @@ def test_sample_percent(self):
pe.sample_percent(0.5)
self.assertEqual(pe.total, 8)

def test_pileupbdg(self):
pe = PETrackI()
for (c, l, r) in self.input_regions:
pe.add_loc(c, l, r)
pe.finalize()
pe.pileup_bdg()

class Test_PETrackII(unittest.TestCase):
def setUp(self):
Expand Down

0 comments on commit 9dbb534

Please sign in to comment.