Skip to content

Commit

Permalink
Merge MultiQC results by strandedness
Browse files Browse the repository at this point in the history
  • Loading branch information
Redmar-van-den-Berg committed Dec 5, 2024
1 parent 16e6656 commit 99e0a38
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 39 deletions.
17 changes: 3 additions & 14 deletions includes/expression/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ rule merge_samples:
src=srcdir("scripts/multiqc.py"),
params:
samples=[sample.sample for sample in samples],
strandedness=[get_strand(sample) for sample in samples],
output:
unstranded="merged_expression_unstranded_mqc.tsv",
forward="merged_expression_forward_mqc.tsv",
Expand All @@ -94,20 +95,8 @@ rule merge_samples:
python3 {input.src} \
--counts {input.counts} \
--samples {params.samples} \
--strand unstranded \
> {output.unstranded} 2> {log}
python3 {input.src} \
--counts {input.counts} \
--samples {params.samples} \
--strand forward \
> {output.forward} 2>> {log}
python3 {input.src} \
--counts {input.counts} \
--samples {params.samples} \
--strand reverse \
> {output.reverse_} 2>> {log}
--strandedness {params.strandedness}\
2> {log}
"""


Expand Down
71 changes: 50 additions & 21 deletions includes/expression/scripts/multiqc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3

import argparse
from collections import defaultdict


def read_expression(fname, strand):
Expand All @@ -12,31 +13,52 @@ def read_expression(fname, strand):
yield spline[0], spline[column]


def main(samples, countfiles, strand):
# Did we already write the gene header?
gene_header = None
def main(samples, countfiles, strandedness):
# Group the samples by strand
grouped = defaultdict(list)
for sample, fname, strand in zip(samples, countfiles, strandedness):
grouped[strand].append((sample, fname))

# First, we print the multiqc header
multiqc_header = f"""# plot_type: "table"
for strand in ["unstranded", "forward", "reverse"]:
samples = [x[0] for x in grouped[strand]]
fnames = [x[1] for x in grouped[strand]]

write_multiqc(samples, fnames, strand)


def write_multiqc(samples, countfiles, strand):
outfile = f"merged_expression_{strand}_mqc.tsv"

with open(outfile, "wt") as fout:
# If there are no samples, we don't even write the header
if not samples:
return
# Did we already write the gene header?
gene_header = None

# First, we print the multiqc header
multiqc_header = f"""# plot_type: "table"
# id: mqc_expression_{strand}
# section_name: "{strand.capitalize()}"
# description: "Normalized gene expression assuming <b>{strand} library preparation</b>." """

print(multiqc_header)
print(multiqc_header, file=fout)

for sample, fname in zip(samples, countfiles):
# Store the normalized expression for each gene per strand
expression = dict()
for values in read_expression(fname, args.strand):
gene, level = values
expression[gene] = level
for sample, fname in zip(samples, countfiles):
# Store the normalized expression for each gene per strand
expression = dict()
for values in read_expression(fname, strand):
gene, level = values
expression[gene] = level

# IF this is the first time, write the header
if not gene_header:
gene_header = list(expression.keys())
print("Sample", *gene_header, sep="\t")
# IF this is the first time, write the header
if not gene_header:
gene_header = list(expression.keys())
print("Sample", *gene_header, sep="\t", file=fout)

print(sample, *(expression[gene] for gene in gene_header), sep="\t")
print(
sample, *(expression[gene] for gene in gene_header), sep="\t", file=fout
)


if __name__ == "__main__":
Expand All @@ -50,13 +72,20 @@ def main(samples, countfiles, strand):
help="Files with normalized expression levels",
)
parser.add_argument(
"--strand", required=True, choices=["unstranded", "forward", "reverse"]
"--strandedness",
required=True,
nargs="+",
choices=["unstranded", "forward", "reverse"],
)

args = parser.parse_args()

if len(args.samples) != len(args.counts):
msg = "--counts and --samples must have the same number of items"
if len(args.samples) != len(args.counts) or len(args.samples) != len(
args.strandedness
):
msg = (
"--counts, --samples and --strandedness must have the same number of items"
)
raise RuntimeError(msg)

main(args.samples, args.counts, args.strand)
main(args.samples, args.counts, args.strandedness)
12 changes: 8 additions & 4 deletions test/test_expression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,10 @@
- path: merged_expression_unstranded_mqc.tsv
contains:
- "id: mqc_expression_unstranded"
# Empty file since this sample is unstranded
- path: merged_expression_forward_mqc.tsv
contains:
- "id: mqc_expression_forward"
# Empty file since this sample is unstranded
- path: merged_expression_reverse_mqc.tsv
contains:
- "id: mqc_expression_reverse"
# Test that we create the multiqc report
- path: multiqc_expression.html
contains:
Expand Down Expand Up @@ -160,6 +158,9 @@
- "X\tSRR8615409\tSRR8615409"
# Results for the forward strand
- "ENSG00000211459\t18\t18"
- path: merged_expression_forward_mqc.tsv
contains:
- "id: mqc_expression_forward"

- name: Test running the expression module with a reverse stranded sample
tags:
Expand All @@ -179,6 +180,9 @@
- "X\tSRR8615409\tSRR8615409"
# Results for the reverse strand
- "ENSG00000211459\t12\t12"
- path: merged_expression_reverse_mqc.tsv
contains:
- "id: mqc_expression_reverse"


- name: Check formatting of Snakemake files
Expand Down

0 comments on commit 99e0a38

Please sign in to comment.