From 2e099d3be5bd9c45fadd311456a462aaf47eba42 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Mon, 18 Mar 2024 14:37:49 +0100 Subject: [PATCH] Implement combining of rankings --- bin/combine_rankings.py | 20 ++++++++++++++++ conf/modules.config | 8 +++++++ modules/local/ranking/combine_rankings.nf | 29 +++++++++++++++++++++++ subworkflows/local/ranking.nf | 5 ++++ 4 files changed, 62 insertions(+) create mode 100755 bin/combine_rankings.py create mode 100644 modules/local/ranking/combine_rankings.nf diff --git a/bin/combine_rankings.py b/bin/combine_rankings.py new file mode 100755 index 0000000..965595e --- /dev/null +++ b/bin/combine_rankings.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd + +parser = argparse.ArgumentParser(description='Combine TF rankings') +parser.add_argument('--input', type=str, nargs='+', help='Assay specific score files', required=True) +parser.add_argument('--output', type=str, help='Output file', required=True) + +args = parser.parse_args() + +dfs = [pd.read_csv(f, sep='\t', header=0, index_col=0) for f in args.input] +df = pd.concat([df[['dcg']] for df in dfs]) + +df = df.groupby(df.index).sum() +df.sort_values(by=['dcg'], ascending=False, inplace=True) + +df['rank'] = range(1, len(df.index) + 1) + +df.to_csv(args.output, sep='\t', index=True) \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 3042899..3297b7f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -68,6 +68,14 @@ process { ] } + withName: COMBINE_RANKINGS { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/modules/local/ranking/combine_rankings.nf b/modules/local/ranking/combine_rankings.nf new file mode 100644 index 0000000..fe8b8a0 --- /dev/null +++ b/modules/local/ranking/combine_rankings.nf @@ -0,0 +1,29 @@ +process COMBINE_RANKINGS { + tag "$meta.id" + label "process_single" + + conda "bioconda::mulled-v2-cd5249a47f81a81b2e7785172c240f12497f55b4==c5c6cff7c28d3260400f938602ee600b1acf0323-0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-cd5249a47f81a81b2e7785172c240f12497f55b4:c5c6cff7c28d3260400f938602ee600b1acf0323-0': + 'biocontainers/mulled-v2-cd5249a47f81a81b2e7785172c240f12497f55b4:c5c6cff7c28d3260400f938602ee600b1acf0323-0' }" + + input: + tuple val(meta), path(rankings) + + output: + tuple val(meta), path("combined.ranking.tsv"), emit: ranking + + path "versions.yml" , emit: versions + + script: + """ + combine_rankings.py --input ${rankings} --output combined.ranking.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + pandas: \$(python -c "import pandas; print(pandas.__version__)") + numpy: \$(python -c "import numpy; print(numpy.__version__)") + END_VERSIONS + """ +} \ No newline at end of file diff --git a/subworkflows/local/ranking.nf b/subworkflows/local/ranking.nf index 2773dda..3fc67cc 100644 --- a/subworkflows/local/ranking.nf +++ b/subworkflows/local/ranking.nf @@ -1,5 +1,6 @@ include { TF_TG_SCORE } from '../../modules/local/ranking/tf_tg_score' include { RANKING as CREATE_RANKING } from '../../modules/local/ranking/ranking' +include { COMBINE_RANKINGS } from '../../modules/local/ranking/combine_rankings' workflow RANKING { @@ -26,6 +27,10 @@ workflow RANKING { TF_TG_SCORE(ch_combined) CREATE_RANKING(TF_TG_SCORE.out.score, alpha) + COMBINE_RANKINGS(CREATE_RANKING.out.ranking .map{ meta, ranking -> ranking } + .collect() + .map{ rankings -> [[id: "all"], rankings]} + ) emit: