Skip to content

Commit

Permalink
Merge pull request #690 from ARTbio/optimize_bowtie
Browse files Browse the repository at this point in the history
Optimize bowtie2 task in repenrich2 tool
  • Loading branch information
drosofff authored Apr 25, 2024
2 parents b3b166a + 9c8ae25 commit 961a14c
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 30 deletions.
17 changes: 9 additions & 8 deletions tools/repenrich2/RepEnrich2.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,13 @@ def run_bowtie(args):
'''
write to files to save memory
'''
metagenome, fastqfile = args
b_opt = "-k 1 -p 1 --quiet --no-hd --no-unal"
command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile}")
metagenome = args
b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal"
if paired_end is True:
command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}"
f" -1 {fastqfile_1} -2 {fastqfile_1}")
else:
command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}")
bowtie_align = subprocess.run(command, check=True,
capture_output=True, text=True).stdout
bowtie_align = bowtie_align.rstrip('\r\n').split('\n')
Expand All @@ -123,17 +127,14 @@ def run_bowtie(args):


# multimapper parsing
args_list = [(metagenome, fastqfile_1) for metagenome in repeat_list]
if paired_end:
args_list.extend([(metagenome, fastqfile_2) for
metagenome in repeat_list])
args_list = [metagenome for metagenome in repeat_list]
with ProcessPoolExecutor(max_workers=cpus) as executor:
results = executor.map(run_bowtie, args_list)

# Aggregate results (avoiding race conditions)
metagenome_reads = defaultdict(list) # metagenome: list of multimap reads

# Now we read .reads file to populate metagnomes_reads
# Now we read .reads files to populate metagnomes_reads
for metagenome in repeat_list:
with open(f"{metagenome}.reads") as readfile:
for read in readfile:
Expand Down
2 changes: 1 addition & 1 deletion tools/repenrich2/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">2.31.1</token>
<token name="@VERSION_SUFFIX@">8</token>
<token name="@VERSION_SUFFIX@">9</token>
<token name="@PROFILE@">23.0</token>

<xml name="repenrich_requirements">
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
DNA 184.0
LINE 227.0
LTR 27179.0
Low_complexity 0.67
LTR 27175.0
Low_complexity 0.0
RC 0.0
Simple_repeat 90.33
Simple_repeat 91.0
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
CMC-Transib 30.0
CR1 4.0
Copia 25880.0
Gypsy 1238.0
Gypsy 1234.0
Helitron 0.0
Jockey 107.0
LOA 0.0
Low_complexity 0.67
Low_complexity 0.0
P 60.0
Pao 61.0
R1 116.0
Simple_repeat 90.33
Simple_repeat 91.0
TcMar-Tc1 94.0
30 changes: 15 additions & 15 deletions tools/repenrich2/test-data/chrY_paired_fraction_counts.tab
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ BS LINE Jockey 0.0
BS2 LINE Jockey 58.0
BURDOCK_I-int LTR Gypsy 0.0
Baggins1 LINE LOA 0.0
Bica_I-int LTR Gypsy 49.0
Bica_LTR LTR Gypsy 1.0
Bica_I-int LTR Gypsy 50.0
Bica_LTR LTR Gypsy 0.0
CIRCE LTR Gypsy 0.0
Chouto_I-int LTR Gypsy 1.5
Copia1-I_DM LTR Copia 0.0
Expand Down Expand Up @@ -37,7 +37,7 @@ G3_DM LINE Jockey 0.0
G5A_DM LINE Jockey 0.0
G5_DM LINE Jockey 0.0
G6_DM LINE Jockey 0.0
GA-rich Low_complexity Low_complexity 0.67
GA-rich Low_complexity Low_complexity 0.0
GTWIN_I-int LTR Gypsy 12.5
G_DM LINE Jockey 0.0
Gypsy11_I-int LTR Gypsy 0.0
Expand All @@ -48,7 +48,7 @@ Gypsy2-LTR_DM LTR Gypsy 0.0
Gypsy3_LTR LTR Gypsy 0.0
Gypsy4_I-int LTR Gypsy 0.0
Gypsy5_I-int LTR Gypsy 0.0
Gypsy6A_LTR LTR Gypsy 1.0
Gypsy6A_LTR LTR Gypsy 0.0
Gypsy6_I-int LTR Gypsy 31.0
Gypsy8_I-int LTR Gypsy 0.0
Gypsy8_LTR LTR Gypsy 0.0
Expand All @@ -73,29 +73,29 @@ MAX_I-int LTR Pao 56.0
MAX_LTR LTR Pao 2.0
MDG1_I-int LTR Gypsy 0.0
MDG1_LTR LTR Gypsy 0.0
MDG3_I-int LTR Gypsy 156.5
MDG3_LTR LTR Gypsy 2.5
MDG3_I-int LTR Gypsy 156.0
MDG3_LTR LTR Gypsy 3.0
MICROPIA_I-int LTR Gypsy 51.0
MICROPIA_LTR LTR Gypsy 2.0
Mariner2_DM DNA TcMar-Tc1 0.0
NINJA_I-int LTR Pao 0.0
NOMAD_I-int LTR Gypsy 0.0
PROTOP_A DNA P 50.0
PROTOP_B DNA P 10.0
PROTOP_A DNA P 55.0
PROTOP_B DNA P 5.0
QUASIMODO2-I_DM LTR Gypsy 43.0
QUASIMODO2-LTR_DM LTR Gypsy 0.0
QUASIMODO_I-int LTR Gypsy 108.0
QUASIMODO_LTR LTR Gypsy 23.0
QUASIMODO_I-int LTR Gypsy 105.0
QUASIMODO_LTR LTR Gypsy 25.0
R1_DM LINE R1 0.0
ROOA_I-int LTR Pao 0.0
ROOA_LTR LTR Pao 0.0
ROVER-I_DM LTR Gypsy 414.0
ROVER-LTR_DM LTR Gypsy 6.0
ROVER-LTR_DM LTR Gypsy 5.0
S2_DM DNA TcMar-Tc1 0.0
STALKER4_I-int LTR Gypsy 143.5
STALKER4_I-int LTR Gypsy 146.5
STALKER4_LTR LTR Gypsy 25.0
S_DM DNA TcMar-Tc1 53.0
Stalker2_I-int LTR Gypsy 103.0
Stalker2_I-int LTR Gypsy 99.0
Stalker2_LTR LTR Gypsy 3.0
TART-A LINE Jockey 4.0
TART_B1 LINE Jockey 21.0
Expand All @@ -109,7 +109,7 @@ _AACACA_n Simple_repeat Simple_repeat 0.0
_AAT_n Simple_repeat Simple_repeat 0.0
_ACAATAG_n Simple_repeat Simple_repeat 0.0
_ACC_n Simple_repeat Simple_repeat 0.0
_AGAGAAG_n Simple_repeat Simple_repeat 2.17
_AGAGAAG_n Simple_repeat Simple_repeat 2.5
_AGAGA_n Simple_repeat Simple_repeat 43.0
_ATAAT_n Simple_repeat Simple_repeat 0.0
_ATATATT_n Simple_repeat Simple_repeat 0.0
Expand All @@ -120,7 +120,7 @@ _AT_n Simple_repeat Simple_repeat 0.0
_A_n Simple_repeat Simple_repeat 0.0
_CATA_n Simple_repeat Simple_repeat 0.0
_CTTTT_n Simple_repeat Simple_repeat 0.0
_GAGAA_n Simple_repeat Simple_repeat 45.17
_GAGAA_n Simple_repeat Simple_repeat 45.5
_GCCTTT_n Simple_repeat Simple_repeat 0.0
_TAATAT_n Simple_repeat Simple_repeat 0.0
_TAATA_n Simple_repeat Simple_repeat 0.0
Expand Down

0 comments on commit 961a14c

Please sign in to comment.