Skip to content

Commit

Permalink
Merge pull request #684 from ARTbio/fix_repenrich2
Browse files Browse the repository at this point in the history
fix bowtie2-inspect and metagenome concatenation
  • Loading branch information
drosofff authored Apr 20, 2024
2 parents 191ee27 + 83eca28 commit 6b3b119
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 20 deletions.
23 changes: 5 additions & 18 deletions tools/repenrich2/RepEnrich2_setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python
import argparse
import csv
import os
import shlex
import subprocess
import sys
Expand Down Expand Up @@ -48,15 +47,6 @@
genomefasta = args.genomefasta
cpus = args.cpus

# check that the programs we need are available
try:
subprocess.call(shlex.split("bowtie2 --version"),
stdout=open(os.devnull, 'wb'),
stderr=open(os.devnull, 'wb'))
except OSError:
print("Error: Bowtie2 not available in the path")
raise


def starts_with_numerical(list):
try:
Expand All @@ -68,7 +58,7 @@ def starts_with_numerical(list):
return False


# define a text importer for .out/.txt format of repbase
# text import function for .out/.txt format of repbase
def import_text(filename, separator):
csv.field_size_limit(sys.maxsize)
file = csv.reader(open(filename), delimiter=separator,
Expand All @@ -81,7 +71,7 @@ def import_text(filename, separator):
genome = defaultdict(dict)

for chr in g.keys():
genome[chr]['sequence'] = g[chr].seq
genome[chr]['sequence'] = str(g[chr].seq)
genome[chr]['length'] = len(g[chr].seq)

# Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
Expand Down Expand Up @@ -110,7 +100,7 @@ def import_text(filename, separator):

# generate metagenomes and save them to FASTA files for bowtie build
for repname in rep_coords:
metagenome = ''
genomes_list = []
# iterating coordinate list by block of 3 (chr, start, end)
block = 3
for i in range(0, len(rep_coords[repname]) - block + 1, block):
Expand All @@ -119,11 +109,8 @@ def import_text(filename, separator):
start = max(int(batch[1]) - flankingl, 0)
end = min(int(batch[2]) + flankingl,
int(genome[chromosome]['length'])-1) + 1
metagenome = (
f"{metagenome}{spacer}"
f"{genome[chromosome]['sequence'][start:end]}"
)

genomes_list.append(genome[chromosome]['sequence'][start:end])
metagenome = spacer.join(genomes_list)
# Create Fasta of repeat pseudogenome
fastafilename = f"{repname}.fa"
record = SeqRecord(Seq(metagenome), id=repname, name='', description='')
Expand Down
2 changes: 1 addition & 1 deletion tools/repenrich2/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">2.31.1</token>
<token name="@VERSION_SUFFIX@">3</token>
<token name="@VERSION_SUFFIX@">4</token>
<token name="@PROFILE@">23.0</token>

<xml name="repenrich_requirements">
Expand Down
2 changes: 1 addition & 1 deletion tools/repenrich2/repenrich2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#set index_path = 'genome'
#else:
#set index_path = $refGenomeSource.genome.fields.path
bowtie-inspect $index_path > genome.fa &&
ln -s '${index_path}.fa' 'genome.fa' &&
#end if
python $__tool_directory__/RepEnrich2_setup.py
Expand Down

0 comments on commit 6b3b119

Please sign in to comment.