From 2e099d3be5bd9c45fadd311456a462aaf47eba42 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Mon, 18 Mar 2024 14:37:49 +0100
Subject: [PATCH] Implement combining of rankings

---
 bin/combine_rankings.py                   | 20 ++++++++++++++++
 conf/modules.config                       |  8 +++++++
 modules/local/ranking/combine_rankings.nf | 29 +++++++++++++++++++++++
 subworkflows/local/ranking.nf             |  5 ++++
 4 files changed, 62 insertions(+)
 create mode 100755 bin/combine_rankings.py
 create mode 100644 modules/local/ranking/combine_rankings.nf

diff --git a/bin/combine_rankings.py b/bin/combine_rankings.py
new file mode 100755
index 0000000..965595e
--- /dev/null
+++ b/bin/combine_rankings.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+
+import argparse
+import pandas as pd
+
+parser = argparse.ArgumentParser(description='Combine TF rankings')
+parser.add_argument('--input', type=str, nargs='+', help='Assay specific score files', required=True)
+parser.add_argument('--output', type=str, help='Output file', required=True)
+
+args = parser.parse_args()
+
+dfs = [pd.read_csv(f, sep='\t', header=0, index_col=0) for f in args.input]
+df = pd.concat([df[['dcg']] for df in dfs])
+
+df = df.groupby(df.index).sum()
+df.sort_values(by=['dcg'], ascending=False, inplace=True)
+
+df['rank'] = range(1, len(df.index) + 1)
+
+df.to_csv(args.output, sep='\t', index=True)
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 3042899..3297b7f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -68,6 +68,14 @@ process {
         ]
     }
 
+    withName: COMBINE_RANKINGS {
+        publishDir = [
+            path: { "${params.outdir}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {
         publishDir = [
             path: { "${params.outdir}/pipeline_info" },
diff --git a/modules/local/ranking/combine_rankings.nf b/modules/local/ranking/combine_rankings.nf
new file mode 100644
index 0000000..fe8b8a0
--- /dev/null
+++ b/modules/local/ranking/combine_rankings.nf
@@ -0,0 +1,29 @@
+process COMBINE_RANKINGS {
+    tag "$meta.id"
+    label "process_single"
+
+    conda "bioconda::mulled-v2-cd5249a47f81a81b2e7785172c240f12497f55b4==c5c6cff7c28d3260400f938602ee600b1acf0323-0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-cd5249a47f81a81b2e7785172c240f12497f55b4:c5c6cff7c28d3260400f938602ee600b1acf0323-0':
+        'biocontainers/mulled-v2-cd5249a47f81a81b2e7785172c240f12497f55b4:c5c6cff7c28d3260400f938602ee600b1acf0323-0' }"
+
+    input:
+    tuple val(meta), path(rankings)
+
+    output:
+    tuple val(meta), path("combined.ranking.tsv"), emit: ranking
+
+    path  "versions.yml"                  , emit: versions
+
+    script:
+    """
+    combine_rankings.py --input ${rankings} --output combined.ranking.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        pandas: \$(python -c "import pandas; print(pandas.__version__)")
+        numpy: \$(python -c "import numpy; print(numpy.__version__)")
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/subworkflows/local/ranking.nf b/subworkflows/local/ranking.nf
index 2773dda..3fc67cc 100644
--- a/subworkflows/local/ranking.nf
+++ b/subworkflows/local/ranking.nf
@@ -1,5 +1,6 @@
 include { TF_TG_SCORE               } from '../../modules/local/ranking/tf_tg_score'
 include { RANKING as CREATE_RANKING } from '../../modules/local/ranking/ranking'
+include { COMBINE_RANKINGS          } from '../../modules/local/ranking/combine_rankings'
 
 workflow RANKING {
 
@@ -26,6 +27,10 @@ workflow RANKING {
 
     TF_TG_SCORE(ch_combined)
     CREATE_RANKING(TF_TG_SCORE.out.score, alpha)
+    COMBINE_RANKINGS(CREATE_RANKING.out.ranking .map{ meta, ranking -> ranking }
+                                                .collect()
+                                                .map{ rankings -> [[id: "all"], rankings]}
+    )
 
 
     emit: