Skip to content

Commit

Permalink
Run MT without MG
Browse files Browse the repository at this point in the history
* MOSCA is now capable of running MT without MG
* Combined with approach to not use assembly
* In the future might implement the use of assembly to analyze MT
Also added rejected and accepted files from rRNA removal to list of files to delete at preprocessing

closes #3
  • Loading branch information
iquasere committed Feb 1, 2021
1 parent 01004a9 commit 71e2cd1
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 18 deletions.
6 changes: 0 additions & 6 deletions .idea/rSettings.xml

This file was deleted.

51 changes: 42 additions & 9 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import shutil
import sys
from mosca_tools import parse_blast, multi_sheet_excel

scripts_dir = sys.path[0]

Expand Down Expand Up @@ -56,8 +57,7 @@ def all_input(wildcards):
)
else:
return (
expand("{output}/Annotation/{sample}/aligned.blast", output=config["output"],
sample=set(experiments['Sample']))
"{}/MOSCA_Entry_Counts_Report.xlsx".format(config["output"])
)

def preprocess_input(wildcards):
Expand All @@ -83,6 +83,13 @@ def annotation_input(wildcards):
return expand("{output}/Preprocess/piled_{name}.fasta", output = config["output"],
name = wildcards.sample)

def upimapi_input(wildcards):
if config['do_assembly']:
return expand("{output}/Annotation/{sample}/aligned.blast",output=config["output"],
sample = set(experiments['Sample']))
return expand("{output}/Annotation/{name}/aligned.blast",output=config["output"],
name = set(experiments['Name']))

rule all:
input:
all_input
Expand Down Expand Up @@ -184,16 +191,18 @@ rule annotation:

rule upimapi:
input:
expand("{output}/Annotation/{sample}/aligned.blast", output = config["output"],
sample = set(experiments["Sample"]))
upimapi_input
output:
expand("{output}/Annotation/uniprotinfo.tsv", output = config["output"])
expand("{output}/Annotation/uniprotinfo.tsv", output = config["output"]),
threads:
1
run:
shell("upimapi.py -i {input} -o {output}/Annotation/uniprotinfo --blast --full-id",
output = config["output"])
shell("echo 'done' > {output}/Annotation/{{sample}}.txt", output = config["output"])
if config["do_assembly"]:
shell("upimapi.py -i {input} -o {output}/Annotation/uniprotinfo --blast --full-id", output = config["output"])
else:
for blast in input:
shell("upimapi.py -i {blast} -o {output}/Annotation/uniprotinfo --blast --full-id",
output=config["output"])

rule recognizer:
input:
Expand Down Expand Up @@ -259,7 +268,7 @@ rule join_information:
expand("{output}/Metatranscriptomics/{name}.readcounts", output = config["output"],
name = set(mt_experiments['Name'])),
expand("{output}/Annotation/{name}.readcounts", output = config["output"],
name = set(mg_experiments['Name'])),
name = set(mg_experiments['Name']))
output:
expand("{output}/MOSCA_Protein_Report.xlsx", output = config["output"]),
expand("{output}/MOSCA_Entry_Report.xlsx", output = config["output"]),
Expand All @@ -272,6 +281,30 @@ rule join_information:
experiments = config["experiments"], output = config["output"],
normalization_method = config["normalization_method"])

rule entry_count:
input:
uniprotinfo=expand("{output}/Annotation/uniprotinfo.tsv",output=config["output"]),
blasts=expand("{output}/Annotation/{name}/aligned.blast",output=config["output"], name=set(experiments['Name']))
output:
"{}/MOSCA_Entry_Counts_Report.xlsx".format(config["output"])
threads:
1
run:
uniprotinfo=pd.read_csv(input.uniprotinfo[0], sep='\t')
result = pd.DataFrame(columns=['sseqid'])
i = 1
for blast in input.blasts:
print('[{}/{}] Quantifying entries for: {}'.format(i, len(input.blasts), blast))
data = parse_blast(blast).groupby('sseqid').size().reset_index(name=blast.split('/')[-2])
data['sseqid'] = [ide.split('|')[1] if ide != '*' else ide for ide in data['sseqid']]
result = pd.merge(result, data, on='sseqid', how='outer')
i += 1
result.columns = ['Entry'] + result.columns.to_list()[1:]
print('Merging entry counts with info at {}'.format(input.uniprotinfo[0]))
result = pd.merge(result, uniprotinfo, on='Entry', how='left')
multi_sheet_excel("{}/MOSCA_Entry_Counts_Report.xlsx".format(config["output"]), result, sheet_name='Sheet')
result.to_csv("{}/MOSCA_Entry_Counts_Report.tsv".format(config["output"]), index=False, sep='\t')

rule differential_expression:
input:
expand("{output}/Metatranscriptomics/expression_matrix.tsv", output = config["output"])
Expand Down
2 changes: 1 addition & 1 deletion workflow/envs/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "mosca" %}
{% set version = "1.2.3" %}
{% set version = "1.3.0" %}
{% set sha256 = "87cbca039ea9b9c85f417543f2426b2b2acffebe58179878ee6872a32ae949ba" %}

package:
Expand Down
2 changes: 1 addition & 1 deletion workflow/mosca.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import multiprocessing
import sys

__version__ = '1.2.3'
__version__ = '1.3.0'

parser = argparse.ArgumentParser(description="MOSCA's main script")
parser.add_argument("-s", "--snakefile", type=str, default="{}/Snakefile".format(sys.path[0]), help="Snakefile file")
Expand Down
6 changes: 5 additions & 1 deletion workflow/scripts/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ def rrna_removal(self, files, out_dir, name, databases, threads='12', original_f
self.remove_orphans(basename + '_forward.fastq',
basename + '_reverse.fastq')
'''

for file in files_to_delete:
os.remove(file)
print('Removed: {}'.format(file))
Expand Down Expand Up @@ -356,9 +355,14 @@ def run(self):
if self.paired:
args.input = ['{}/SortMeRNA/{}_{}.fastq'.format(
args.output, name, fr) for fr in ['forward', 'reverse']]
files_to_delete = ['{}/SortMeRNA/{}_{}.fastq'.format(
args.output, name, rejection) for rejection in ['accepted', 'rejected']]
else:
args.input = ['{}/SortMeRNA/{}_rejected.fq'.format(
args.output, name)]
files_to_delete = ['{}/SortMeRNA/{}_{}.fastq'.format(args.output, name, 'accepted')]
for file in files_to_delete:
os.remove(file)

self.run_fastqc(args.input, '{}/FastQC'.format(args.output), threads=args.threads)

Expand Down

0 comments on commit 71e2cd1

Please sign in to comment.