Skip to content

Commit

Permalink
Merge pull request #23 from OpenOmics/update_assembly_resources
Browse files Browse the repository at this point in the history
Update assembly resources
  • Loading branch information
skchronicles authored Jul 18, 2024
2 parents e799352 + 4ca8d1d commit 953ccbe
Show file tree
Hide file tree
Showing 15 changed files with 186 additions and 101 deletions.
34 changes: 31 additions & 3 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,49 @@ on:
branches_ignore: []

jobs:
Dry_Run_and_Lint:
Dry_Run_and_Lint_DNA&RNA:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: docker://snakemake/snakemake:latest
- name: Dry Run with test data
- name: Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:latest \
/opt2/metamorph run --input /opt2/.tests/test_cohort.txt \
--output /opt2/output --mode local --dry-run
- name: View the pipeline config file
- name: View the pipeline config file [DNA & RNA modes]
run: |
echo "Generated config file for pipeline...." && cat $PWD/output/config.json
- name: Lint Workflow
continue-on-error: true
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:latest snakemake --lint -s /opt2/output/workflow/Snakefile -d /opt2/output || \
echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'
Dry_Run_and_Lint_DNAonly1:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: docker://snakemake/snakemake:latest
- name: Dry Run [DNA only mode]
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:latest \
/opt2/metamorph run --input /opt2/.tests/test_cohort_DNAonly.txt \
--output /opt2/output --mode local --dry-run
- name: View the pipeline config file [DNA only mode]
run: |
echo "Generated config file for pipeline...." && cat $PWD/output/config.json
- name: Lint Workflow
continue-on-error: true
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:latest snakemake --lint -s /opt2/output/workflow/Snakefile -d /opt2/output || \
echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'
Dry_Run_DNA2:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: docker://snakemake/snakemake:latest
- name: Dry Run [DNA only mode w RNA column]
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:latest \
/opt2/metamorph run --input /opt2/.tests/test_cohort_DNAonly_RNAcolexists.txt \
--output /opt2/output --mode local --dry-run
5 changes: 5 additions & 0 deletions .tests/test_cohort_DNAonly.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DNA
/opt2/.tests/WT_S1_R1.fastq.gz
/opt2/.tests/WT_S1_R2.fastq.gz
/opt2/.tests/WT_S2_R1.fastq.gz
/opt2/.tests/WT_S2_R2.fastq.gz
5 changes: 5 additions & 0 deletions .tests/test_cohort_DNAonly_RNAcolexists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DNA RNA
/opt2/.tests/WT_S1_R1.fastq.gz
/opt2/.tests/WT_S1_R2.fastq.gz
/opt2/.tests/WT_S2_R1.fastq.gz
/opt2/.tests/WT_S2_R2.fastq.gz
15 changes: 8 additions & 7 deletions config/cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
"gres": "lscratch:400"
},
"metawrap_genome_assembly": {
"threads": 128,
"mem": "128G",
"threads": 56,
"mem": "240G",
"partition": "norm",
"time": "10-00:00:00"
},
Expand All @@ -36,7 +36,8 @@
"threads": 32,
"mem": "64G",
"partition": "norm",
"time": "2-00:00:00"
"time": "2-00:00:00",
"gres": "lscratch:600"
},
"derep_bins": {
"threads": 32,
Expand All @@ -52,8 +53,8 @@
"time": "2-00:00:00"
},
"bbtools_index_map": {
"threads": 48,
"mem": "64G",
"threads": 28,
"mem": "220G",
"partition": "norm",
"time": "2-00:00:00"
},
Expand Down Expand Up @@ -92,10 +93,10 @@
},
"rna_humann_classify": {
"threads": 32,
"mem": "64G",
"mem": "128G",
"partition": "norm",
"time": "10-00:00:00",
"gres": "lscratch:600"
"gres": "lscratch:800"
},
"map_to_rna_to_mag": {
"threads": 32,
Expand Down
4 changes: 2 additions & 2 deletions config/images.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"images": {
"metawrap": "docker://rroutsong/metamorph_metawrap:0.0.9"
"metawrap": "docker://rroutsong/metamorph_metawrap:0.0.11"
},
"containers": {
"metawrap": "/data/OpenOmics/SIFs/metamorph_metawrap_0.0.9.sif"
"metawrap": "/data/OpenOmics/SIFs/metamorph_metawrap_0.0.11.sif"
}
}
14 changes: 7 additions & 7 deletions config/resources.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,37 +53,37 @@
}, {
"name": "CAT_db",
"to": "/data2/CAT",
"from": "/vf/users/OpenOmics/references/metamorph/CAT_pack_prepare/cat_db/db",
"from": "/data/OpenOmics/references/metamorph/CAT_pack_prepare/db",
"mode": "rw"
}, {
"name": "CAT_tax",
"to": "/data2/CAT_tax",
"from": "/vf/users/OpenOmics/references/metamorph/CAT_pack_prepare/cat_db/db",
"from": "/data/OpenOmics/references/metamorph/CAT_pack_prepare/tax",
"mode": "rw"
}, {
"name": "humann3_protein_db",
"to": "/data2/uniref",
"from": "/vf/users/OpenOmics/references/metamorph/uniref",
"from": "/data/OpenOmics/references/metamorph/uniref",
"mode": "rw"
}, {
"name": "humann3_nuc_db",
"to": "/data2/chocophlan",
"from": "/vf/users/OpenOmics/references/metamorph/chocophlan",
"from": "/data/OpenOmics/references/metamorph/chocophlan",
"mode": "rw"
}, {
"name": "humann3_config",
"to": "/opt/conda/envs/bb3/lib/python3.7/site-packages/humann",
"from": "/vf/users/OpenOmics/references/metamorph/humann",
"from": "/data/OpenOmics/references/metamorph/humann",
"mode": "rw"
}, {
"name": "utility_mapping",
"to": "/data2/um",
"from": "/vf/users/OpenOmics/references/metamorph/utility_mapping",
"from": "/data/OpenOmics/references/metamorph/utility_mapping",
"mode": "rw"
}, {
"name": "metaphlan_db",
"to": "/data2/metaphlan",
"from": "/vf/users/OpenOmics/references/metamorph/metaphlan",
"from": "/data/OpenOmics/references/metamorph/metaphlan",
"mode": "rw"
}
]
Expand Down
2 changes: 1 addition & 1 deletion docker/metawrap/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN mamba install -y -n bb3 -c biobakery humann
RUN mkdir /install; cd /install; wget https://carlowood.github.io/which/which-2.21.tar.gz; tar xvf which-2.21.tar.gz
RUN cd /install/which-2.21; ./configure; make && make install
RUN rm /usr/bin/which; ln -s /usr/local/bin/which /usr/bin/which
RUN cd /home; git clone https://github.com/dutilh/CAT.git; chmod +x CAT_pack/CAT_pack; ln -s /home/CAT/CAT_pack/CAT_pack /usr/bin/CAT; chmod +x /usr/bin/CAT
RUN cd /home; git clone https://github.com/rroutsong/CAT_pack.git; ln -s /home/CAT_pack/CAT_pack/CAT_pack /usr/bin/CAT
RUN cd /install; wget https://github.com/hyattpd/Prodigal/archive/refs/tags/v2.6.3.tar.gz; tar xvf v2.6.3.tar.gz
RUN cd /install/Prodigal-2.6.3; make install INSTALLDIR=/usr/bin
RUN cd /install; wget https://github.com/bbuchfink/diamond/archive/refs/tags/v2.1.9.tar.gz; tar xvf v2.1.9.tar.gz
Expand Down
16 changes: 10 additions & 6 deletions metamorph
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ from src.utils import (
exists,
fatal,
check_cache,
require
require,
permissions
)


Expand Down Expand Up @@ -114,19 +115,21 @@ def run(sub_args):

# Step 1. Parse input sample sheet in down stream
# data structures consistent with argparse parser
validated, sample_map = valid_input(sub_args.input)
validated, rna_included = valid_input(sub_args.input)

delattr(sub_args, 'input')
setattr(sub_args, 'input', [row['DNA'] for row in validated])
setattr(sub_args, 'rna', [row['RNA'] for row in validated])
if rna_included:
setattr(sub_args, 'rna', [row['RNA'] for row in validated])

# Step 2. Initialize working directory,
# copy over required resources to run
# the pipeline
git_repo = __home__

fastq_inputs = [sub_args.input]
if sub_args.rna:

if getattr(sub_args, 'rna', False):
fastq_inputs.append(sub_args.rna)

input_files = init(
Expand All @@ -142,7 +145,8 @@ def run(sub_args):
config = setup(sub_args,
ifiles = input_files,
repo_path = git_repo,
output_path = sub_args.output
output_path = sub_args.output,
rna=rna_included
)

# Step 4. Resolve docker/singularity bind
Expand Down
70 changes: 42 additions & 28 deletions src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def init(repo_path, output_path, links=[], required=['workflow', 'resources', 'c
try:
os.mkdir(os.path.join(output_path, 'rna'))
except FileExistsError:
pass
pass
inputs['rna'] = sym_safe(input_data = links[1], target = os.path.join(output_path, 'rna'))

return inputs
Expand Down Expand Up @@ -112,7 +112,7 @@ def sym_safe(input_data, target):
if not exists(renamed):
# Create a symlink if it does not already exist
# Follow source symlinks to resolve any binding issues
os.symlink(os.path.abspath(os.path.realpath(file)), renamed)
os.symlink(os.path.abspath(file), renamed)

return input_fastqs

Expand Down Expand Up @@ -182,7 +182,7 @@ def get_sid(filepath):
return


def setup(sub_args, ifiles, repo_path, output_path):
def setup(sub_args, ifiles, repo_path, output_path, rna=True):
"""Setup the pipeline for execution and creates config file from templates
@param sub_args <parser.parse_args() object>:
Parsed arguments for run sub-command
Expand Down Expand Up @@ -238,13 +238,16 @@ def setup(sub_args, ifiles, repo_path, output_path):
config['options'][opt] = v

# RNA -> DNA mapping
sample_map = {}
dna_files, rna_files = ifiles['dna'], ifiles['rna']
for i in range(len(dna_files)):
r_sid = get_sid(rna_files[i])
d_sid = get_sid(dna_files[i])
sample_map[r_sid] = d_sid
config['sample_map'] = sample_map
if rna:
sample_map = {}
dna_files, rna_files = ifiles['dna'], ifiles['rna']
for i in range(len(dna_files)):
r_sid = get_sid(rna_files[i])
d_sid = get_sid(dna_files[i])
sample_map[r_sid] = d_sid
config['sample_map'] = sample_map
else:
config['rna'] = False

return config

Expand Down Expand Up @@ -339,7 +342,7 @@ def bind(sub_args, config):

if 'databases' in config:
dbs = config.pop('databases')
bindpaths.extend([resolve(mount['from'])+':'+resolve(mount['to'])+':'+mount['mode'] for mount in dbs])
bindpaths.extend([resolve(mount['from'])+':'+mount['to']+':'+mount['mode'] for mount in dbs])

if 'options' in config and 'input' in config['options']:
inrents = list(set([os.path.abspath(os.path.dirname(p)) for p in config['options']['input'] if os.path.exists(os.path.dirname(p)) and os.path.isdir(os.path.dirname(p))]))
Expand All @@ -351,7 +354,11 @@ def bind(sub_args, config):

if 'options' in config and 'rna' in config['options']:
rnarents = list(set([os.path.abspath(os.path.dirname(p)) for p in config['options']['rna'] if os.path.exists(os.path.dirname(p)) and os.path.isdir(os.path.dirname(p))]))
bindpaths.extend(rnarents)
common_parent = longest_common_parent_path(rnarents)
if common_parent:
bindpaths.extend([common_parent])
else:
bindpaths.extend(rnarents)

if 'options' in config and 'output' in config['options']:
if os.path.exists(config['options']['output']) and os.path.isdir(config['options']['output']):
Expand All @@ -360,8 +367,8 @@ def bind(sub_args, config):
if 'tmp_dir' in config:
bindpaths.append(config['tmp_dir'])

rawdata_bind_paths = [os.path.abspath(p) for p in config['project']['datapath'].split(',')]
working_directory = os.path.realpath(config['project']['workpath'])
# rawdata_bind_paths = [os.path.abspath(p) for p in config['project']['datapath'].split(',')]
# working_directory = os.path.realpath(config['project']['workpath'])

return list(set(bindpaths))

Expand Down Expand Up @@ -486,7 +493,7 @@ def add_rawdata_information(sub_args, config, ifiles):

# Finds the set of rawdata directories to bind
config['project']['datapath'] = ','.join(get_rawdata_bind_paths(input_files = sub_args.input))
if sub_args.rna:
if getattr(sub_args, 'rna', False):
config["project"]["rna_datapath"] = ','.join(get_rawdata_bind_paths(input_files = sub_args.rna))

# Add each sample's basename
Expand Down Expand Up @@ -613,7 +620,7 @@ def get_rawdata_bind_paths(input_files):
bindpaths = []
for file in input_files:
# Get directory of input file
rawdata_src_path = os.path.dirname(os.path.abspath(os.path.realpath(file)))
rawdata_src_path = os.path.dirname(os.path.abspath(file))
if rawdata_src_path not in bindpaths:
bindpaths.append(rawdata_src_path)

Expand Down Expand Up @@ -675,26 +682,33 @@ def valid_input(sheet):
raise argparse.ArgumentTypeError(f"Path `{sheet}` exists, but cannot read path due to permissions!")

# check format to make sure it's correct
sheet = open(sheet, 'r')
dialect = Sniffer().sniff(sheet.read(), [',', "\t"])
sheet.seek(0)
rdr = DictReader(sheet, delimiter=dialect.delimiter)
if sheet.endswith('.tsv') or sheet.endswith('.txt'):
delim = '\t'
elif sheet.endswith('.csv'):
delim = '\t'

rdr = DictReader(open(sheet, 'r'), delimiter=delim)

if 'DNA' not in rdr.fieldnames:
raise argparse.ArgumentTypeError("Sample sheet does not contain `DNA` column")
if 'RNA' not in rdr.fieldnames:
raise argparse.ArgumentTypeError("Sample sheet does not contain `RNA` column")
data = [row for row in rdr]
print("-- Running in DNA only mode --")
else:
print("-- Running in paired DNA & RNA mode --")

this_map = {}
data = [row for row in rdr]
RNA_included = False
for row in data:
row['RNA'] = os.path.abspath(row['RNA'])
row['DNA'] = os.path.abspath(row['DNA'])
if not os.path.exists(row['RNA']):
raise argparse.ArgumentTypeError(f"Sample sheet path `{row['RNA']}` does not exist")
if not os.path.exists(row['DNA']):
raise argparse.ArgumentTypeError(f"Sample sheet path `{row['DNA']}` does not exist")

return data, this_map
if 'RNA' in row and not row['RNA'] in ('', None, 'None'):
RNA_included = True
row['RNA'] = os.path.abspath(row['RNA'])
if not os.path.exists(row['RNA']):
raise argparse.ArgumentTypeError(f"Sample sheet path `{row['RNA']}` does not exist")

return data, RNA_included


try:
Expand Down
2 changes: 1 addition & 1 deletion src/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ snakemake \\
--jobs 500 \\
--keep-remote \\
--stats "$3/logfiles/runtime_statistics.json" \\
--restart-times 0 \\
--restart-times 1 \\
--keep-incomplete \\
--local-cores "14" 2>&1
# Create summary report
Expand Down
Loading

0 comments on commit 953ccbe

Please sign in to comment.