From f2dc7f51fca8edef4ee91e7b9f55f01881a3f7b2 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 6 Mar 2025 16:44:46 -0500 Subject: [PATCH 1/6] Removed all netmhcpan module --- modules.json | 5 - modules/msk/netmhcpan/environment.yml | 9 -- modules/msk/netmhcpan/main.nf | 67 ----------- modules/msk/netmhcpan/meta.yml | 67 ----------- modules/msk/netmhcpan/tests/main.nf.test | 106 ------------------ modules/msk/netmhcpan/tests/main.nf.test.snap | 62 ---------- modules/msk/netmhcpan/tests/tags.yml | 2 - workflows/neoantigenpipeline.nf | 1 - 8 files changed, 319 deletions(-) delete mode 100644 modules/msk/netmhcpan/environment.yml delete mode 100644 modules/msk/netmhcpan/main.nf delete mode 100644 modules/msk/netmhcpan/meta.yml delete mode 100644 modules/msk/netmhcpan/tests/main.nf.test delete mode 100644 modules/msk/netmhcpan/tests/main.nf.test.snap delete mode 100644 modules/msk/netmhcpan/tests/tags.yml diff --git a/modules.json b/modules.json index 7879555..67994ef 100644 --- a/modules.json +++ b/modules.json @@ -45,11 +45,6 @@ "git_sha": "0ccb264740ff8fe681f0e298133e52e51bf68994", "installed_by": ["modules", "netmhcstabandpan"] }, - "netmhcpan": { - "branch": "main", - "git_sha": "503abeb67260f060d8228221b07d743aa4180345", - "installed_by": ["modules"] - }, "netmhcpan4": { "branch": "develop", "git_sha": "9e23d0e98023dec8b7a9b0faa20eea2f1f9cee3f", diff --git a/modules/msk/netmhcpan/environment.yml b/modules/msk/netmhcpan/environment.yml deleted file mode 100644 index dcbf0e4..0000000 --- a/modules/msk/netmhcpan/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "netmhcpan" -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - "NETMHCPAN" diff --git a/modules/msk/netmhcpan/main.nf b/modules/msk/netmhcpan/main.nf deleted file mode 100644 index bc54431..0000000 --- a/modules/msk/netmhcpan/main.nf +++ /dev/null @@ -1,67 +0,0 @@ -process NETMHCPAN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/netmhctools:1.0.0': - 'docker.io/mskcc/netmhctools:1.0.0' }" - - input: - tuple val(meta), path(inputFasta), val(hlaString), val(inputType) - - output: - tuple val(output_meta), path("*.xls"), emit: xls - tuple val(output_meta), path("*.netmhcpan.output"), emit: netmhcpanoutput - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def hla = hlaString.trim() - output_meta = meta.clone() - output_meta.typeMut = inputType == "MUT" ? true : false - output_meta.fromStab = false - def NETMHCPAN_VERSION = "4.1" - - """ - /usr/local/bin/netMHCpan-${NETMHCPAN_VERSION}/netMHCpan \ - -s 0 \ - -BA 1 \ - -f ${inputFasta} \ - -a ${hla} \ - -l 9,10 \ - -inptype 0 \ - -xls \ - ${args} \ - -xlsfile \ - ${prefix}.${inputType}.xls > ${prefix}.${inputType}.netmhcpan.output - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - netmhcpan: v${NETMHCPAN_VERSION} - END_VERSIONS - - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def NETMHCPAN_VERSION = "4.1" - output_meta = meta.clone() - output_meta.typeMut = inputType == "MUT" ? true : false - output_meta.fromStab = false - """ - touch ${prefix}.MUT.netmhcpan.output - touch ${prefix}.MUT.xls - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - netmhcpan: v${NETMHCPAN_VERSION} - END_VERSIONS - """ -} diff --git a/modules/msk/netmhcpan/meta.yml b/modules/msk/netmhcpan/meta.yml deleted file mode 100644 index 5c0885e..0000000 --- a/modules/msk/netmhcpan/meta.yml +++ /dev/null @@ -1,67 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "netmhcpan" -description: write your description here -keywords: - - immune - - netmhcpan - - genomics -tools: - - "netmhcpan": - description: " Runs netMHCpan and outputs tsvs and STDout for mutated and wild type neoantigens" - homepage: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" - documentation: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" - licence: ["MIT"] - -input: - # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - inputMaf: - type: file - description: Maf outputtted by Tempo that was run through phyloWGS - pattern: "*.{maf}" - - - hlaFile: - type: file - description: HLA tsv outputtted by Polysolver - pattern: "winners.{tsv}" - - - inputType: - type: string - description: Allows netmhcpan to run in parallel. Should be 'MUT' or 'WT', it will kick off two jobs. make a Channel.Of('MUT','WT') outside the module as an input. Running them in series is kicked off by putting in anything other than MUT or WT. - pattern: "*" - -output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - - xls: - type: file - description: TSV/XLS file of netMHCpan. A poorly formated file of neoantigens. This contains the MUT or WT antigens - pattern: "*.xls" - - - netmhcpanoutput: - type: file - description: STDOUT file of netMHCpan. A poorly formated file of neoantigens. This contains either the MUT or WT neoantigens. Neoantigenutils contains a parser for this file. - pattern: "*.WT.netmhcpan.output,*.MUT.netmhcpan.output" - -authors: - - "@johnoooh" - - "@nikhil" -maintainers: - - "@johnoooh" - - "@nikhil" diff --git a/modules/msk/netmhcpan/tests/main.nf.test b/modules/msk/netmhcpan/tests/main.nf.test deleted file mode 100644 index 092e7c5..0000000 --- a/modules/msk/netmhcpan/tests/main.nf.test +++ /dev/null @@ -1,106 +0,0 @@ -nextflow_process { - - name "Test Process NETMHCPAN" - script "../main.nf" - process "NETMHCPAN" - - tag "modules" - tag "modules_nfcore" - tag "netmhcpan" - tag "modules_msk" - - test("netmhcpan - MUT - xls,output,fa") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), - "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", - "MUT" - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - process.out.netmhcpanoutput[0][0], - file(process.out.xls[0][1]).name, - file(process.out.netmhcpanoutput[0][1]).name - ).match() - } - ) - } - - } - - test("netmhcpan - WT - xls,output,fa") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), - "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", - "WT" - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - process.out.netmhcpanoutput[0][0], - file(process.out.xls[0][1]).name, - file(process.out.netmhcpanoutput[0][1]).name - ).match() - } - ) - } - - } - - - - test("netmhcpan - xls,output,fa - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file('MUT_sequence_fa'), - "HLA", - "MUT" - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - process.out.netmhcpanoutput[0][0], - file(process.out.xls[0][1]).name, - file(process.out.netmhcpanoutput[0][1]).name - ).match() - } - ) - } - - } - -} diff --git a/modules/msk/netmhcpan/tests/main.nf.test.snap b/modules/msk/netmhcpan/tests/main.nf.test.snap deleted file mode 100644 index d8f9f0e..0000000 --- a/modules/msk/netmhcpan/tests/main.nf.test.snap +++ /dev/null @@ -1,62 +0,0 @@ -{ - "netmhcpan - WT - xls,output,fa": { - "content": [ - [ - "versions.yml:md5,35ec563839ee27410cf9f8d134c6e8e5" - ], - { - "id": "test", - "single_end": false, - "typeMut": false, - "fromStab": false - }, - "test.WT.xls", - "test.WT.netmhcpan.output" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-13T11:02:31.338253" - }, - "netmhcpan - xls,output,fa - stub": { - "content": [ - [ - "versions.yml:md5,35ec563839ee27410cf9f8d134c6e8e5" - ], - { - "id": "test", - "single_end": false, - "typeMut": true, - "fromStab": false - }, - "test.MUT.xls", - "test.MUT.netmhcpan.output" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-13T11:02:35.582061" - }, - "netmhcpan - MUT - xls,output,fa": { - "content": [ - [ - "versions.yml:md5,35ec563839ee27410cf9f8d134c6e8e5" - ], - { - "id": "test", - "single_end": false, - "typeMut": true, - "fromStab": false - }, - "test.MUT.xls", - "test.MUT.netmhcpan.output" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-13T11:02:12.753672" - } -} \ No newline at end of file diff --git a/modules/msk/netmhcpan/tests/tags.yml b/modules/msk/netmhcpan/tests/tags.yml deleted file mode 100644 index b66af53..0000000 --- a/modules/msk/netmhcpan/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -netmhcpan: - - "modules/msk/netmhcpan/**" diff --git a/workflows/neoantigenpipeline.nf b/workflows/neoantigenpipeline.nf index f43e229..81af76f 100644 --- a/workflows/neoantigenpipeline.nf +++ b/workflows/neoantigenpipeline.nf @@ -13,7 +13,6 @@ include { PHYLOWGS_PARSECNVS } from '../modules/msk/phylowgs/parsecnvs/main' include { PHYLOWGS_WRITERESULTS } from '../modules/msk/phylowgs/writeresults/main' include { PHYLOWGS } from '../subworkflows/msk/phylowgs' include { NETMHCSTABANDPAN } from '../subworkflows/msk/netmhcstabandpan/main' -include { NETMHCPAN } from '../modules/msk/netmhcpan/main' include { NEOANTIGENUTILS_NEOANTIGENINPUT } from '../modules/msk/neoantigenutils/neoantigeninput' include { NEOANTIGEN_EDITING } from '../subworkflows/msk/neoantigen_editing' include { NEOANTIGENUTILS_CONVERTANNOTJSON } from '../modules/msk/neoantigenutils/convertannotjson' From d3dd8a56b8f47bd5d029532b7a4e2ee32b73f53d Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 6 Mar 2025 16:45:59 -0500 Subject: [PATCH 2/6] Updated modules for 1.3.0 --- modules.json | 20 +- .../neoantigenediting/computefitness/main.nf | 4 +- .../resources/usr/bin/compute_fitness.py | 14 +- .../computefitness/tests/main.nf.test | 7 + .../computefitness/tests/nextflow.config | 9 + .../neoantigenutils/convertannotjson/main.nf | 4 +- .../resources/usr/bin/convertannotjson.py | 35 ++- .../convertannotjson/tests/main.nf.test.snap | 24 +- .../neoantigenutils/formatnetmhcpan/main.nf | 4 +- .../usr/bin/format_netmhcpan_output.py | 2 +- .../formatnetmhcpan/tests/main.nf.test.snap | 20 +- .../neoantigenutils/generatehlastring/main.nf | 4 +- .../neoantigenutils/generatemutfasta/main.nf | 12 +- .../neoantigenutils/generatemutfasta/meta.yml | 8 +- .../resources/usr/bin/generateMutFasta.py | 26 +- .../generatemutfasta/tests/main.nf.test.snap | 24 +- .../neoantigenutils/neoantigeninput/main.nf | 11 +- .../neoantigenutils/neoantigeninput/meta.yml | 2 +- .../resources/usr/bin/generate_input.py | 257 ++++++++++++++++-- .../neoantigeninput/tests/main.nf.test | 31 ++- .../neoantigeninput/tests/main.nf.test.snap | 42 +-- modules/msk/netmhc3/meta.yml | 2 +- .../createinput/tests/main.nf.test.snap | 20 +- .../msk/neoantigen_editing/tests/main.nf.test | 1 + .../neoantigen_editing/tests/nextflow.config | 9 + .../netmhcstabandpan/tests/main.nf.test.snap | 50 ++-- 26 files changed, 472 insertions(+), 170 deletions(-) create mode 100644 modules/msk/neoantigenediting/computefitness/tests/nextflow.config create mode 100644 subworkflows/msk/neoantigen_editing/tests/nextflow.config diff --git a/modules.json b/modules.json index 67994ef..4f09566 100644 --- a/modules.json +++ b/modules.json @@ -12,37 +12,37 @@ }, "neoantigenediting/computefitness": { "branch": "develop", - "git_sha": "05e49188ee9407e1b51dfb1a49d8b6133e9276bc", + "git_sha": "6aee6083154c666798e779c0aeb305614f4dcdd3", "installed_by": ["modules", "neoantigen_editing"] }, "neoantigenutils/convertannotjson": { "branch": "develop", - "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", + "git_sha": "e8ae6eb203cb5a49f98cacd4734090c182ad5f44", "installed_by": ["modules"] }, "neoantigenutils/formatnetmhcpan": { "branch": "develop", - "git_sha": "939507da8ef974fa8b9f53abd5f9d54492bafab0", + "git_sha": "84f51051ee6735fc9dc8eea733eb84a54dc350bc", "installed_by": ["modules", "netmhcstabandpan"] }, "neoantigenutils/generatehlastring": { "branch": "develop", - "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", + "git_sha": "84f51051ee6735fc9dc8eea733eb84a54dc350bc", "installed_by": ["modules", "netmhcstabandpan"] }, "neoantigenutils/generatemutfasta": { "branch": "develop", - "git_sha": "e6bbb12a2dc237b9ea18163e96dbe9d780ddce5f", + "git_sha": "e8ae6eb203cb5a49f98cacd4734090c182ad5f44", "installed_by": ["modules", "netmhcstabandpan"] }, "neoantigenutils/neoantigeninput": { "branch": "develop", - "git_sha": "ba014f40a3aaccd6a78db44f62d697b77a790eb8", + "git_sha": "e8ae6eb203cb5a49f98cacd4734090c182ad5f44", "installed_by": ["modules"] }, "netmhc3": { "branch": "develop", - "git_sha": "0ccb264740ff8fe681f0e298133e52e51bf68994", + "git_sha": "26ac6cb270844a72838220ca239a22fcbe7f71f2", "installed_by": ["modules", "netmhcstabandpan"] }, "netmhcpan4": { @@ -57,7 +57,7 @@ }, "phylowgs/createinput": { "branch": "develop", - "git_sha": "05e49188ee9407e1b51dfb1a49d8b6133e9276bc", + "git_sha": "e8ae6eb203cb5a49f98cacd4734090c182ad5f44", "installed_by": ["phylowgs"] }, "phylowgs/multievolve": { @@ -81,12 +81,12 @@ "msk": { "neoantigen_editing": { "branch": "develop", - "git_sha": "56a628201401866096d6307b9e8c690c5eb46ac2", + "git_sha": "3284d77f55f0ef756d9ccd0519826fd7edd7d2ec", "installed_by": ["subworkflows"] }, "netmhcstabandpan": { "branch": "develop", - "git_sha": "848ad2b5cffd9a4bd15a628cfe59aba93807a252", + "git_sha": "e8ae6eb203cb5a49f98cacd4734090c182ad5f44", "installed_by": ["subworkflows"] }, "phylowgs": { diff --git a/modules/msk/neoantigenediting/computefitness/main.nf b/modules/msk/neoantigenediting/computefitness/main.nf index 3e15400..0a4cbba 100644 --- a/modules/msk/neoantigenediting/computefitness/main.nf +++ b/modules/msk/neoantigenediting/computefitness/main.nf @@ -23,8 +23,8 @@ process NEOANTIGENEDITING_COMPUTEFITNESS { """ compute_fitness.py \\ --alignment ${alignment_file} \\ - --input ${patient_data} - + --input ${patient_data} \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py index da19f4d..28c0d2b 100755 --- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py +++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/compute_fitness.py @@ -285,19 +285,23 @@ def clean_data(tree): """ - a = 22.897590714815188 - k = 1 - w = 0.22402192838740312 - parser = argparse.ArgumentParser(prog="align_neoantigens_to_IEDB") parser.add_argument("--alignment", help="neoantigen alignment file", required=True) parser.add_argument("--input", help="patient_data file", required=True) - + parser.add_argument("--a_param", help="weight corresponding to a", default = 22.897590714815188) + parser.add_argument("--k_param", help="weight corresponding to k", default = 1) + parser.add_argument("--w_param", help="weight corresponding to w", default = 0.22402192838740312) + args = parser.parse_args() alignment_file = args.alignment patient_file = args.input + a = float(args.a_param) + k = float(args.k_param) + w = float(args.w_param) + + epidist = EpitopeDistance() sample_file = patient_file diff --git a/modules/msk/neoantigenediting/computefitness/tests/main.nf.test b/modules/msk/neoantigenediting/computefitness/tests/main.nf.test index 236520e..b0d14c7 100644 --- a/modules/msk/neoantigenediting/computefitness/tests/main.nf.test +++ b/modules/msk/neoantigenediting/computefitness/tests/main.nf.test @@ -3,6 +3,7 @@ nextflow_process { name "Test Process NEOANTIGENEDITING_COMPUTEFITNESS" script "../main.nf" process "NEOANTIGENEDITING_COMPUTEFITNESS" + config "./nextflow.config" tag "modules" tag "modules_nfcore" @@ -20,6 +21,12 @@ nextflow_process { file(params.test_data_mskcc['neoantigen']['patient_data'], checkIfExists: true), file(params.test_data_mskcc['neoantigen']['iedb_alignments'], checkIfExists: true) ] + input[0] = [ + [ id:'test', single_end:false ], // meta map, + file(params.test_data_mskcc['neoantigen']['patient_data'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['iedb_alignments'], checkIfExists: true) + ] + """ } diff --git a/modules/msk/neoantigenediting/computefitness/tests/nextflow.config b/modules/msk/neoantigenediting/computefitness/tests/nextflow.config new file mode 100644 index 0000000..33d9f71 --- /dev/null +++ b/modules/msk/neoantigenediting/computefitness/tests/nextflow.config @@ -0,0 +1,9 @@ +params { + enable_conda = false +} + +process { + withName: 'NEOANTIGENEDITING_COMPUTEFITNESS' { + ext.args = '--a_param 22.897590714815188 --k_param 1 --w_param 0.22402192838740312' + } +} diff --git a/modules/msk/neoantigenutils/convertannotjson/main.nf b/modules/msk/neoantigenutils/convertannotjson/main.nf index 4aa6b9f..953b0e4 100644 --- a/modules/msk/neoantigenutils/convertannotjson/main.nf +++ b/modules/msk/neoantigenutils/convertannotjson/main.nf @@ -2,8 +2,8 @@ process NEOANTIGENUTILS_CONVERTANNOTJSON { tag "$meta.id" label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/neoantigen-utils-base:1.0.0': - 'docker.io/mskcc/neoantigen-utils-base:1.0.0' }" + 'docker://mskcc/neoantigen-utils-base:1.3.0': + 'docker.io/mskcc/neoantigen-utils-base:1.3.0' }" input: tuple val(meta), path(annotatedJSON) diff --git a/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py b/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py index 60c9d8a..c927b52 100755 --- a/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py +++ b/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py @@ -3,21 +3,40 @@ import json import argparse -VERSION = 1.0 +VERSION = 1.1 -def process_json_file(json_file_path,output_file_path): + +def process_json_file(json_file_path, output_file_path): with open(json_file_path, "r") as json_file: data = json.load(json_file) # Define the TSV header - tsv_header = ["id", "mutation_id", "HLA_gene_id", "sequence", "WT_sequence", "mutated_position", "Kd", "KdWT", "R", "logC", "logA", "quality", "git_branch"] + tsv_header = [ + "id", + "mutation_id", + "Gene", + "HLA_gene_id", + "sequence", + "WT_sequence", + "mutated_position", + "Kd", + "KdWT", + "R", + "logC", + "logA", + "quality", + "NMD", + "git_branch", + ] # Convert JSON to TSV tsv_lines = [] tsv_lines.append("\t".join(tsv_header)) for neoantigen in data["neoantigens"]: - tsv_lines.append("\t".join(str(neoantigen.get(field, "")) for field in tsv_header[:-1])) + tsv_lines.append( + "\t".join(str(neoantigen.get(field, "")) for field in tsv_header[:-1]) + ) tsv_output = "\n".join(tsv_lines) @@ -25,8 +44,11 @@ def process_json_file(json_file_path,output_file_path): with open(output_file_path, "w") as tsv_file: tsv_file.write(tsv_output) + def main(): - parser = argparse.ArgumentParser(description="Process an annotated JSON file and output TSV format.") + parser = argparse.ArgumentParser( + description="Process an annotated JSON file and output TSV format." + ) parser.add_argument("--json_file", help="Path to the annotated JSON file") parser.add_argument("--output_file", help="Path to the output TSV file") parser.add_argument( @@ -34,7 +56,8 @@ def main(): ) args = parser.parse_args() - process_json_file(args.json_file,args.output_file) + process_json_file(args.json_file, args.output_file) + if __name__ == "__main__": main() diff --git a/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap b/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap index 061ab9d..b0b04d4 100644 --- a/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap @@ -7,30 +7,30 @@ { "id": "test" }, - "test_neoantigens.tsv:md5,4931fb72bba8bbeb3bc6cef19b99b01a" + "test_neoantigens.tsv:md5,38705f1ccbbeb9298037beb871e2e87c" ] ], "1": [ - "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + "versions.yml:md5,07818f514d7f8471de49a39923d6921e" ], "neoantigenTSV": [ [ { "id": "test" }, - "test_neoantigens.tsv:md5,4931fb72bba8bbeb3bc6cef19b99b01a" + "test_neoantigens.tsv:md5,38705f1ccbbeb9298037beb871e2e87c" ] ], "versions": [ - "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + "versions.yml:md5,07818f514d7f8471de49a39923d6921e" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-07-24T16:49:37.133693" + "timestamp": "2025-03-03T15:54:28.541430188" }, "neoantigenutils_convertannotjson - output(test) - tsv - stub": { "content": [ @@ -44,7 +44,7 @@ ] ], "1": [ - "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + "versions.yml:md5,07818f514d7f8471de49a39923d6921e" ], "neoantigenTSV": [ [ @@ -55,14 +55,14 @@ ] ], "versions": [ - "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + "versions.yml:md5,07818f514d7f8471de49a39923d6921e" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-07-24T16:49:41.629164" + "timestamp": "2025-03-03T15:40:34.220309133" } } \ No newline at end of file diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/main.nf b/modules/msk/neoantigenutils/formatnetmhcpan/main.nf index b113c2b..010e1b8 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/main.nf +++ b/modules/msk/neoantigenutils/formatnetmhcpan/main.nf @@ -2,8 +2,8 @@ process NEOANTIGENUTILS_FORMATNETMHCPAN { tag "$meta.id" label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/neoantigen-utils-base:1.0.0': - 'docker.io/mskcc/neoantigen-utils-base:1.0.0' }" + 'docker://mskcc/neoantigen-utils-base:1.3.0': + 'docker.io/mskcc/neoantigen-utils-base:1.3.0' }" input: tuple val(meta), path(netmhcPanOutput) diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py b/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py index 2bce405..1c773de 100755 --- a/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py +++ b/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py @@ -46,7 +46,7 @@ def netMHCpan_out_reformat(netMHCoutput, mut, stab, netmhc3, prefix): if netmhc3: pan_prefix = "" outfilename = "{}_netmhc{}{}.output.{}.tsv".format( - prefix, pan_prefix, stab_prefix, type_prefix + prefix, stab_prefix, pan_prefix, type_prefix ) with open(netMHCoutput, "r") as file: # data = file.read() diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap b/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap index b84f7d6..6afe92f 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap @@ -205,7 +205,7 @@ "fromStab": true, "fromPan": true }, - "test_netmhcpanstab.output.MUT.tsv:md5,246eb723691371ad49bd080071475740" + "test_netmhcstabpan.output.MUT.tsv:md5,f1745797f5dc9db19c6b8124a3a12b81" ] ], "1": [ @@ -219,7 +219,7 @@ "fromStab": true, "fromPan": true }, - "test_netmhcpanstab.output.MUT.tsv:md5,246eb723691371ad49bd080071475740" + "test_netmhcstabpan.output.MUT.tsv:md5,f1745797f5dc9db19c6b8124a3a12b81" ] ], "versions": [ @@ -228,10 +228,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T14:46:10.256554716" + "timestamp": "2025-03-03T14:28:35.420434" }, "neoantigenutils_formatnetmhcpan - output(WT,netmhcpanstab) - tsv": { "content": [ @@ -244,7 +244,7 @@ "fromStab": false, "fromPan": true }, - "test_netmhcpan.output.WT.tsv:md5,b95a6624d4010eb6517ca880a13e670d" + "test_netmhcpan.output.WT.tsv:md5,da55aef51d69fcc86da81472920861ab" ] ], "1": [ @@ -258,7 +258,7 @@ "fromStab": false, "fromPan": true }, - "test_netmhcpan.output.WT.tsv:md5,b95a6624d4010eb6517ca880a13e670d" + "test_netmhcpan.output.WT.tsv:md5,da55aef51d69fcc86da81472920861ab" ] ], "versions": [ @@ -267,9 +267,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T14:46:17.955083074" + "timestamp": "2025-03-03T14:28:39.82547" } } \ No newline at end of file diff --git a/modules/msk/neoantigenutils/generatehlastring/main.nf b/modules/msk/neoantigenutils/generatehlastring/main.nf index 2362e69..d3ae83d 100644 --- a/modules/msk/neoantigenutils/generatehlastring/main.nf +++ b/modules/msk/neoantigenutils/generatehlastring/main.nf @@ -2,8 +2,8 @@ process NEOANTIGENUTILS_GENERATEHLASTRING { tag "$meta.id" label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/neoantigen-utils-base:1.0.0': - 'docker.io/mskcc/neoantigen-utils-base:1.0.0' }" + 'docker://mskcc/neoantigen-utils-base:1.3.0': + 'docker.io/mskcc/neoantigen-utils-base:1.3.0' }" input: tuple val(meta), path(inputHLA) diff --git a/modules/msk/neoantigenutils/generatemutfasta/main.nf b/modules/msk/neoantigenutils/generatemutfasta/main.nf index f9051a9..0efdfbe 100644 --- a/modules/msk/neoantigenutils/generatemutfasta/main.nf +++ b/modules/msk/neoantigenutils/generatemutfasta/main.nf @@ -2,16 +2,16 @@ process NEOANTIGENUTILS_GENERATEMUTFASTA { tag "$meta.id" label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/neoantigen-utils-base:1.0.0': - 'docker.io/mskcc/neoantigen-utils-base:1.0.0' }" + 'docker://mskcc/neoantigen-utils-base:1.3.0': + 'docker.io/mskcc/neoantigen-utils-base:1.3.0' }" input: tuple val(meta), path(inputMaf) tuple path(cds), path(cdna) output: - tuple val(meta), path("*_out/*.MUT_sequences.fa"), emit: mut_fasta - tuple val(meta), path("*_out/*.WT_sequences.fa"), emit: wt_fasta + tuple val(meta), path("*_out/*.MUT.sequences.fa"), emit: mut_fasta + tuple val(meta), path("*_out/*.WT.sequences.fa"), emit: wt_fasta path "versions.yml", emit: versions when: @@ -43,8 +43,8 @@ process NEOANTIGENUTILS_GENERATEMUTFASTA { """ mkdir ${prefix}_out - touch ${prefix}_out/${prefix}.MUT_sequences.fa - touch ${prefix}_out/${prefix}.WT_sequences.fa + touch ${prefix}_out/${prefix}.MUT.sequences.fa + touch ${prefix}_out/${prefix}.WT.sequences.fa cat <<-END_VERSIONS > versions.yml "${task.process}": generateMutFasta: \$(echo \$(generateMutFasta.py -v)) diff --git a/modules/msk/neoantigenutils/generatemutfasta/meta.yml b/modules/msk/neoantigenutils/generatemutfasta/meta.yml index ba57423..9c131bc 100644 --- a/modules/msk/neoantigenutils/generatemutfasta/meta.yml +++ b/modules/msk/neoantigenutils/generatemutfasta/meta.yml @@ -38,20 +38,20 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "*_out/*.MUT_sequences.fa": + - "*_out/*.MUT.sequences.fa": type: file description: Mutated fasta sequence - pattern: "*.{MUT_sequences.fa}" + pattern: "*.{MUT.sequences.fa}" - wt_fasta: - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "*_out/*.WT_sequences.fa": + - "*_out/*.WT.sequences.fa": type: file description: Wildtype fasta sequence - pattern: "*.{WT_sequences.fa}" + pattern: "*.{WT.sequences.fa}" - versions: - versions.yml: type: file diff --git a/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py b/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py index 253a41b..df2bdd9 100755 --- a/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py @@ -72,8 +72,8 @@ def main(): reference_cdna_file = str(args.CDNA_file) peptide_lengths = [9, 10, 11] sample_path_pfx = output_dir + "/" + sample_id - mutated_sequences_fa = sample_path_pfx + ".MUT_sequences.fa" - WT_sequences_fa = sample_path_pfx + ".WT_sequences.fa" + mutated_sequences_fa = sample_path_pfx + ".MUT.sequences.fa" + WT_sequences_fa = sample_path_pfx + ".WT.sequences.fa" mutations = [] out_fa = open(mutated_sequences_fa, "w") @@ -350,15 +350,15 @@ def __init__(self, maf_row, cds_seq, cdna_seq): ] variant_type_map = { - "Missense_Mutation": "M", - "Nonsense_Mutation": "X", - "Silent_Mutation": "S", - "Silent": "S", - "Frame_shift_Ins": "I+", - "Frame_shift_Del": "I-", - "In_Frame_Ins": "If", - "In_Frame_Del": "Id", - "Splice_Site": "Sp", + "missense_mutation": "M", + "nonsense_nutation": "X", + "silent_mutation": "S", + "silent": "S", + "frame_shift_ins": "I+", + "frame_shift_del": "I-", + "in_frame_ins": "If", + "in_frame_del": "Id", + "splice_site": "Sp", } position = int(str(self.maf_row["Start_Position"])[0:2]) @@ -399,12 +399,12 @@ def __init__(self, maf_row, cds_seq, cdna_seq): # SNPs Allele2code = self.maf_row["Tumor_Seq_Allele2"] - if self.maf_row["Variant_Classification"] in variant_type_map: + if self.maf_row["Variant_Classification"].lower() in variant_type_map: self.identifier_key = ( str(self.maf_row["Chromosome"]) + encoded_position + "_" - + variant_type_map[self.maf_row["Variant_Classification"]] + + variant_type_map[(self.maf_row["Variant_Classification"]).lower()] + Allele2code ) else: diff --git a/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap b/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap index 132e856..5c9edc8 100644 --- a/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap @@ -8,7 +8,7 @@ "id": "test", "single_end": false }, - "test.MUT_sequences.fa:md5,c236ca28e7c658b74377c19437d7c5ab" + "test.MUT.sequences.fa:md5,b668c05330e78204620421dccbb80c40" ] ], "1": [ @@ -17,7 +17,7 @@ "id": "test", "single_end": false }, - "test.WT_sequences.fa:md5,b1a6ea2978a6624c98112e8118658495" + "test.WT.sequences.fa:md5,4fcfee0a32d3c3b75c3ed24d1c462f48" ] ], "2": [ @@ -29,7 +29,7 @@ "id": "test", "single_end": false }, - "test.MUT_sequences.fa:md5,c236ca28e7c658b74377c19437d7c5ab" + "test.MUT.sequences.fa:md5,b668c05330e78204620421dccbb80c40" ] ], "versions": [ @@ -41,16 +41,16 @@ "id": "test", "single_end": false }, - "test.WT_sequences.fa:md5,b1a6ea2978a6624c98112e8118658495" + "test.WT.sequences.fa:md5,4fcfee0a32d3c3b75c3ed24d1c462f48" ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.4" }, - "timestamp": "2024-11-22T17:16:33.363826904" + "timestamp": "2025-03-03T15:41:55.31143705" }, "neoantigenutils_generatemutfasta - maf - fasta - stub": { "content": [ @@ -61,7 +61,7 @@ "id": "test", "single_end": false }, - "test.MUT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.MUT.sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -70,7 +70,7 @@ "id": "test", "single_end": false }, - "test.WT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.WT.sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ @@ -82,7 +82,7 @@ "id": "test", "single_end": false }, - "test.MUT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.MUT.sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -94,15 +94,15 @@ "id": "test", "single_end": false }, - "test.WT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.WT.sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.4" }, - "timestamp": "2024-11-22T17:16:40.580416341" + "timestamp": "2025-03-03T15:42:03.290988947" } } \ No newline at end of file diff --git a/modules/msk/neoantigenutils/neoantigeninput/main.nf b/modules/msk/neoantigenutils/neoantigeninput/main.nf index 90147b3..015f822 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/main.nf +++ b/modules/msk/neoantigenutils/neoantigeninput/main.nf @@ -2,16 +2,17 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { tag "$meta.id" label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/neoantigen-utils-base:1.1.0': - 'docker.io/mskcc/neoantigen-utils-base:1.1.0' }" + 'docker://mskcc/neoantigen-utils-base:1.3.0': + 'docker.io/mskcc/neoantigen-utils-base:1.3.0' }" input: tuple val(meta), path(inputMaf), path(inputBedpe, arity: '0..*'), path(hlaFile) tuple val(meta2), path(phyloWGSsumm), path(phyloWGSmut), path(phyloWGSfolder) tuple val(meta3), path(mutNetMHCpan), path(wtNetMHCpan) + tuple path(gtf), path(cdna) output: - tuple val(meta), path("*_.json"), emit: json + tuple val(meta), path("*_*.json"), emit: json path "versions.yml", emit: versions when: @@ -42,6 +43,8 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { --cohort ${cohort} --HLA_genes ${hlaFile} \ --netMHCpan_MUT_input ${mutNetMHCpan} \ --netMHCpan_WT_input ${wtNetMHCpan} \ + --gtf-file ${gtf} \ + --cdna-file ${cdna} \ ${args} cat <<-END_VERSIONS > versions.yml @@ -57,7 +60,7 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { def cohort =task.ext.cohort ?: "${meta.id}_cohort" """ - touch ${patientid}_${id}_.json + touch ${patientid}_${id}.json cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/msk/neoantigenutils/neoantigeninput/meta.yml b/modules/msk/neoantigenutils/neoantigeninput/meta.yml index 6e25bd9..cb03c6d 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/meta.yml +++ b/modules/msk/neoantigenutils/neoantigeninput/meta.yml @@ -73,7 +73,7 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "*_.json": + - "*.json": type: file description: output combined Json ready for input into the neoantigen pipeline pattern: "*.{json}" diff --git a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py index 7491ba5..435fe26 100755 --- a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py +++ b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py @@ -4,9 +4,12 @@ import pandas as pd import argparse import os +import Bio from Bio import pairwise2 from Bio.pairwise2 import format_alignment import numpy as np +from pyensembl.genome import Genome +from pyensembl import EnsemblRelease VERSION = 1.9 @@ -102,6 +105,7 @@ def makeChild(subTree, start): mutation_dict = ( {} ) # Used for matching mutation without the subsititution information from netMHCpan to phyloWGS output + gene_dict = {} mafdf = pd.read_csv(args.maf_file, delimiter="\t") @@ -119,6 +123,8 @@ def makeChild(subTree, start): else: missense = 0 + + if ( row["Variant_Type"] == "SNP" or row["Variant_Type"] == "DNP" @@ -142,6 +148,7 @@ def makeChild(subTree, start): + row["Tumor_Seq_Allele2"], "gene": row["Hugo_Symbol"], "missense": missense, + "transcript": row["Feature"] } mutation_list.append( @@ -187,6 +194,7 @@ def makeChild(subTree, start): + "D", "gene": row["Hugo_Symbol"], "missense": missense, + "transcript": row["Feature"] } mutation_list.append( @@ -240,6 +248,7 @@ def makeChild(subTree, start): + row["Tumor_Seq_Allele2"], "gene": row["Hugo_Symbol"], "missense": missense, + "transcript": row["Feature"] } mutation_list.append( @@ -326,6 +335,7 @@ def convert_polysolver_hla(polyHLA): bedpe_match_dict = {} + ensembl = ensembl_load(args.release, args.gtf_file, args.cdna_file) neoantigen_mut_in = pd.read_csv(args.netMHCpan_MUT_input, sep="\t") neoantigen_WT_in = pd.read_csv(args.netMHCpan_WT_input, sep="\t") @@ -345,6 +355,7 @@ def find_first_difference_index(str1, str2): wtsvid = "" row_WT_identity = trim_id(row_WT["Identity"]) IDsplit = row_WT_identity.split("_") + print(row_WT_identity) if len(IDsplit[0]) < 3: # it is from neoSV IDsplit = row_WT_identity.split("_") @@ -452,7 +463,9 @@ def find_most_similar_string(target, strings): first_AA_same_score, max_score, ) - + + NMD_dict = {} + for index_mut, row_mut in neoantigen_mut_in.iterrows(): row_MUT_identity = trim_id(row_mut["Identity"]) IDsplit = row_MUT_identity.split("_") @@ -461,6 +474,7 @@ def find_most_similar_string(target, strings): peplen = len(row_mut["peptide"]) matchfound = False frameshift= False + print(row_MUT_identity) if IDsplit[1][0] == "S" and IDsplit[1][1] != "p": # If it is a silent mutation. Silent mutations can either be S or SY. These include intron mutations. Splices can be Sp continue @@ -526,7 +540,6 @@ def find_most_similar_string(target, strings): if ("-" in IDsplit[1] or "+" in IDsplit[1]): frameshift = True - ( best_pepmatch, best_pepmatch2, @@ -566,9 +579,38 @@ def find_most_similar_string(target, strings): + 1 ) + + chrom, pos = mutation_dict[row_mut["Identity"]].split("_")[0:2] + + + if frameshift: mut_pos = "Frameshifted peptide" - + num_windows = len(list(WTdict[no_positon_ID]["peptides"].keys())) + num_windows_li = len(list(WTdict[no_positon_ID]["peptides"].values())) + # [pep.split(",") for pep in num_windows_li] + + # print(WTdict[no_positon_ID]["peptides"]) + print("FRAMESHIFT") + + if no_positon_ID in NMD_dict: + #NMD must only be calculated once per mutation + pass + else: + split_mutation_dict_ID = mutation_dict[row_MUT_identity].split("_") + print(split_mutation_dict_ID) + if split_mutation_dict_ID[2] == "I": + len_indel = len(split_mutation_dict_ID[3]) + elif split_mutation_dict_ID[3] == "D": + len_indel = 0 - len(split_mutation_dict_ID[2]) + else: + len_indel = 0 + transcriptID = chrom_pos_dict[mutation_dict[row_MUT_identity]]["transcript"] + NMD_dict[no_positon_ID] = determine_NMD(chrom, pos,num_windows,len_indel,ensembl,transcriptID) + + else: + NMD_dict[no_positon_ID] = "False" + if SV: neo_dict = { "id": row_MUT_identity @@ -582,12 +624,14 @@ def find_most_similar_string(target, strings): "mutation_id": bedpe_dict[ bedpe_match_dict[row_MUT_identity] ].id, + "Gene": chrom_pos_dict[mutation_dict[row_MUT_identity]]["gene"], "HLA_gene_id": row_mut["MHC"], "sequence": row_mut["peptide"], "WT_sequence": best_pepmatch, # WTdict[WTid]["peptide"], "mutated_position": mut_pos, "Kd": float(row_mut["affinity"]), "KdWT": float(WTdict[WTid]["affinity"]), + "NMD" : NMD_dict[no_positon_ID] } else: neo_dict = { @@ -599,16 +643,18 @@ def find_most_similar_string(target, strings): + "_" + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", ""), "mutation_id": mutation_dict[row_MUT_identity], + "Gene": chrom_pos_dict[mutation_dict[row_MUT_identity]]["gene"], "HLA_gene_id": row_mut["MHC"], "sequence": row_mut["peptide"], "WT_sequence": best_pepmatch, # WTdict[WTid]["peptide"], "mutated_position": mut_pos, "Kd": float(row_mut["affinity"]), "KdWT": float(WTdict[WTid]["affinity"]), + "NMD" : NMD_dict[no_positon_ID] } outer_dict["neoantigens"].append(neo_dict) - outjson = args.patient_id + "_" + args.id + "_" + ".json" + outjson = args.patient_id + "_" + args.id + ".json" with open(outjson, "w") as tstout: json.dump(outer_dict, tstout, indent=1) @@ -654,16 +700,16 @@ def makeID(maf_row): ] variant_type_map = { - "Missense_Mutation": "M", - "Nonsense_Mutation": "X", - "Silent_Mutation": "S", - "Silent": "S", - "Frame_shift_Ins": "I+", - "Frame_shift_Del": "I-", - "In_Frame_Ins": "If", - "In_Frame_Del": "Id", - "Splice_Site": "Sp", - } + "missense_mutation": "M", + "nonsense_nutation": "X", + "silent_mutation": "S", + "silent": "S", + "frame_shift_ins": "I+", + "frame_shift_del": "I-", + "in_frame_ins": "If", + "in_frame_del": "Id", + "splice_site": "Sp", + } position = int(str(maf_row["Start_Position"])[0:2]) @@ -701,17 +747,16 @@ def makeID(maf_row): # SNPs Allele2code = maf_row["Tumor_Seq_Allele2"] - if maf_row["Variant_Classification"] in variant_type_map: + if maf_row["Variant_Classification"].lower() in variant_type_map: identifier_key = ( str(maf_row["Chromosome"]) + encoded_position + "_" - + variant_type_map[maf_row["Variant_Classification"]] + + variant_type_map[maf_row["Variant_Classification"].lower()] + Allele2code + "_M" # This indicates mutated. It is added in the generateMutFasta script as well but not in this function. ) else: - identifier_key = ( str(maf_row["Chromosome"]) + encoded_position @@ -889,6 +934,172 @@ def makeID_bedpe(chrom1, pos1, svclass): return identifier_key +def get_exon_range(transcript): + """ + :param transcript: transcript instance in pyensembl + :return: exon intervals of this transcript + from 5' to 3', exon 1, exon 2, ... + [start, end], start < end + """ + exon_ranges = [] + for exon in transcript.exons: + exon_ranges.append((exon.start, exon.end)) + return exon_ranges + +def get_longest_transcript(transcripts): + """ + :param transcripts: a list of Transcript(pyensembl) instances + :return: the longest transcript + """ + transcripts = sorted(transcripts, key=lambda t: t.end-t.start) + transcript = transcripts[-1] + return transcript + + +def get_transcript(chrom, pos, ensembl, complete=True): + """ + :param chrom: chromosome with no chr + :param pos: position of mutation + :param ensembl: Genome instance in pyensembl + :param complete: only consider complete transcripts + :return: firstly return the longest complete transcript, + if there is no complete transcript and + complete = False, return the longest transcript + """ + transcripts = ensembl.transcripts_at_locus(contig=str(chrom), position=int(pos)) + transcripts_comp = [transcript for transcript in transcripts if transcript.complete] + if transcripts_comp: + return get_longest_transcript(transcripts_comp) + else: + if complete: + return None + else: + if transcripts: + return get_longest_transcript(transcripts) + else: + return None + + +def ensembl_load(release, gtf_file, cdna_file): + """ + :param release: the release number in EMSEMBL, could be custom + :param gtf_file: the path of gtf file if release == custom + :param cdna_file: the path of cdna file if release == custom + :param cache_dir: directory for pyensembl downloading + :return: a Genome class in pyensembl + """ + # if release != 'custom': + # print("doing a new one") + # ensembl = EnsemblRelease(int(release)) + # ensembl.download() + # ensembl.index() + + # else: + ensembl = Genome(gtf_path_or_url=gtf_file, + transcript_fasta_paths_or_urls=cdna_file, + reference_name='User-defined', + annotation_name='User-defined') + ensembl.index() + return ensembl + + + +def get_exons_from_transcriptID(transcriptid, ensembl, complete=True): + """ + :param transcriptid: transcriptid from MAF col + :param ensembl: Genome instance in pyensembl + :param complete: only consider complete transcripts + :return: a list of tuples of exon ranges + """ + + transcripts= ensembl.exon_ids_of_transcript_id(transcriptid) + + exon_ranges = [] + for exonid in transcripts: + exon = ensembl.exon_by_id(exonid) + exon_ranges.append((exon.start, exon.end)) + + return exon_ranges + + + +def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None): + """ + :param chrom: chromosome where alteration takes place + :param pos: position where alteration takes place + :pos transcriptID: transcriptID from the MAF. If it isnt annotated, then use longest transcript + :num_windows: number of windows created by the peptide + :len_indel: Length of the indel. Negative if del, positive if ins + :param gtf_file: the path of gtf file if release == custom + :param cdna_file: the path of cdna file if release == custom + :return: NMD value + """ + + if transcriptID == None: + #do these if maf was not annotated + transcript = get_transcript(chrom, pos, ensembl) + exon_ranges = get_exon_range(transcript) + else: + exon_ranges = get_exons_from_transcriptID(transcriptID,ensembl) + + NMD = "False" + + pos = int(pos) + for i in range(0,len(exon_ranges)): + if pos>exon_ranges[i][0] and pos= 0: + + PTC_exon = exon_ranges[d] + PTC_pos = exon_ranges[d][0] + mut_to_stop_dist + # print(("Found everything!",PTC_exon,PTC_pos,mut_to_stop_dist)) + else: + mut_to_stop_dist = mut_to_stop_dist - dist + # print((exon_ranges_dist[d],mut_to_stop_dist)) + + if PTC_exon == exon_ranges[-1]: + # "on the last exon" + NMD = "Last Exon" + print("on the last exon") + else: + if exon_ranges[0][0] - PTC_pos < 150: + # less than 150 nt away from the start exon + NMD = "Start-proximal" + # print((exon_ranges[0][0] , PTC_pos,exon_ranges[0][0] - PTC_pos)) + # print("less than 150 nt away from the start exon") + else: + if (PTC_exon[1] - PTC_exon[0]) > 407: + # in a long exon with more than 407 nt + NMD = "Long Exon" + print("in a long exon with more than 407 nt") + else: + # it is in the last 50 nt of the penultimate exon + if PTC_exon == exon_ranges[-2] and (exon_ranges[-2][0] - PTC_pos) < 50 : + NMD = "50nt Rule" + # print(num_windows) + # print(exon_ranges) + # print(("mut EXON:",exon_ranges[i])) + # print(("PTC EXON",PTC_exon)) + # print(exon_ranges[i][-2:]) + print("within 50 nt of the penultimate exon") + else: + NMD = "Trigger NMD" + + return NMD + + + def parse_args(): parser = argparse.ArgumentParser(description="Process input files and parameters") parser.add_argument("--maf_file", required=True, help="Path to the MAF file") @@ -904,6 +1115,17 @@ def parse_args(): required=True, help="Path to the tree directory containing json files", ) + parser.add_argument('-r', '--release', dest='release', metavar='RELEASE', default='75', + help='Which reference (ENSEMBL release) you want to use. Ensembl releases that' + 'correspond to hg18/NCBI36, hg19/GRCh37, hg38/GRCh38 are 54, 75, 95.' + 'If your data are from other species(custom), please download the gtf ' + 'file and the cdna file from ENSEMBL website ftp://ftp.ensembl.org/pub' + ' and specify them using --gtf-file and --cdna-file.') + parser.add_argument('-gf', '--gtf-file', dest='gtf_file', metavar='GTF_FILE', default=None, + help='GTF file for the reference.') + parser.add_argument('-cf', '--cdna-file', dest='cdna_file', metavar='CDNA_FILE', default=None, + help='cDNA file for the reference.') + parser.add_argument("--id", required=True, help="ID") parser.add_argument("--patient_id", required=True, help="Patient ID") parser.add_argument("--cohort", required=True, help="Cohort") @@ -948,6 +1170,7 @@ def parse_args(): print("HLA Genes File:", args.HLA_genes) print("netMHCpan Files:", args.netMHCpan_MUT_input, args.netMHCpan_WT_input) print("kD Cutoff Value:", args.kD_cutoff) + print("Ensembl files:", args.gtf_file, args.cdna_file) if args.patient_data_file: print("patient_data_file File:", args.patient_data_file) diff --git a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test index 577790e..6582671 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test +++ b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test @@ -29,9 +29,9 @@ nextflow_process { input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data_mskcc['neoantigen']['test4_summ_json'], checkIfExists: true), - file(params.test_data_mskcc['neoantigen']['test4_muts_json'], checkIfExists: true), - file(params.test_data_mskcc['neoantigen']['test4_mutass_zip'], checkIfExists: true) + file(params.test_data_mskcc['neoantigen']['test_summ_json'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['test_muts_json'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['test_mutass_zip'], checkIfExists: true) ] input[2] = [ @@ -40,6 +40,11 @@ nextflow_process { file(params.test_data_mskcc['neoantigen']['WTnetMHCpan_tsv'], checkIfExists: true) ] + input[3] = [ + file(params.test_data_mskcc['neoantigen']['gtf'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['cdna'], checkIfExists: true) + ] + """ } } @@ -72,9 +77,9 @@ nextflow_process { input[1] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data_mskcc['neoantigen']['test4_summ_json'], checkIfExists: true), - file(params.test_data_mskcc['neoantigen']['test4_muts_json'], checkIfExists: true), - file(params.test_data_mskcc['neoantigen']['test4_mutass_zip'], checkIfExists: true) + file(params.test_data_mskcc['neoantigen']['test_summ_json'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['test_muts_json'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['test_mutass_zip'], checkIfExists: true) ] input[2] = [ @@ -83,6 +88,11 @@ nextflow_process { file(params.test_data_mskcc['neoantigen']['WTnetMHC_tsv'], checkIfExists: true) ] + input[3] = [ + file(params.test_data_mskcc['neoantigen']['gtf'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['cdna'], checkIfExists: true) + ] + """ } } @@ -118,8 +128,8 @@ nextflow_process { input[1] = [ [ id:'test', single_end:false ], // meta map - file('test4_summ_json'), - file('test4_muts_json'), + file('test_summ_json'), + file('test_muts_json'), file(params.folderPath) ] @@ -129,6 +139,11 @@ nextflow_process { file('WTnetMHCpan.tsv') ] + input[3] = [ + file('gtf.tsv'), + file('cdna.tsv') + ] + """ } } diff --git a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap index 6ba57a7..67f8e0b 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap @@ -8,11 +8,11 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_patient_test.json:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + "versions.yml:md5,54d926e53336463100f72cc431eeaf09" ], "json": [ [ @@ -20,19 +20,19 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_patient_test.json:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + "versions.yml:md5,54d926e53336463100f72cc431eeaf09" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2025-02-05T14:38:57.427385617" + "timestamp": "2025-03-03T15:49:38.266725941" }, "neoantigenutils_neoantigeninput - bedpe,json,tsv": { "content": [ @@ -43,11 +43,11 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,7fdb25ccc6ed41f53aa03507d982ea05" + "test_patient_test.json:md5,ba459d11af4195b9cbc2e3ac63792893" ] ], "1": [ - "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + "versions.yml:md5,54d926e53336463100f72cc431eeaf09" ], "json": [ [ @@ -55,19 +55,19 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,7fdb25ccc6ed41f53aa03507d982ea05" + "test_patient_test.json:md5,ba459d11af4195b9cbc2e3ac63792893" ] ], "versions": [ - "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + "versions.yml:md5,54d926e53336463100f72cc431eeaf09" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2025-02-05T14:38:35.2939696" + "timestamp": "2025-03-03T15:44:12.409474458" }, "neoantigenutils_neoantigeninput - json,tsv": { "content": [ @@ -78,11 +78,11 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,a81ee3977fa393850e0b7b36321d1143" + "test_patient_test.json:md5,f386dd295154740155b34883a40ae54e" ] ], "1": [ - "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + "versions.yml:md5,54d926e53336463100f72cc431eeaf09" ], "json": [ [ @@ -90,18 +90,18 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,a81ee3977fa393850e0b7b36321d1143" + "test_patient_test.json:md5,f386dd295154740155b34883a40ae54e" ] ], "versions": [ - "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + "versions.yml:md5,54d926e53336463100f72cc431eeaf09" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2025-02-05T14:38:48.778108545" + "timestamp": "2025-03-03T15:49:31.503843906" } } \ No newline at end of file diff --git a/modules/msk/netmhc3/meta.yml b/modules/msk/netmhc3/meta.yml index f920ab2..9fdca61 100644 --- a/modules/msk/netmhc3/meta.yml +++ b/modules/msk/netmhc3/meta.yml @@ -58,7 +58,7 @@ output: - "*.netmhc.output": type: file description: - STDOUT file of netMHCpan. A poorly formated file of neoantigens. This + STDOUT file of netMHC. A poorly formated file of neoantigens. This contains either the MUT or WT neoantigens. Neoantigenutils contains a parser for this file. pattern: "*.WT.netmhc.output,*.MUT.netmhc.output" diff --git a/modules/msk/phylowgs/createinput/tests/main.nf.test.snap b/modules/msk/phylowgs/createinput/tests/main.nf.test.snap index 377f422..5784e31 100644 --- a/modules/msk/phylowgs/createinput/tests/main.nf.test.snap +++ b/modules/msk/phylowgs/createinput/tests/main.nf.test.snap @@ -30,7 +30,11 @@ ] } ], - "timestamp": "2024-06-11T18:00:07.554362" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-03T15:59:43.605713284" }, "PHYLOWGS_CREATEINPUT - txt": { "content": [ @@ -41,8 +45,8 @@ "id": "test", "single_end": false }, - "cnv_data.txt:md5,9228c0a7bce478e06db5d4304be3dbf7", - "ssm_data.txt:md5,055b7b5359a50eefd86fc85fcc023c6f" + "cnv_data.txt:md5,1a3d26484389d5b3b35d20ac5e01f32c", + "ssm_data.txt:md5,3a23e8d67f32ea13edb62882952f4122" ] ], "1": [ @@ -54,8 +58,8 @@ "id": "test", "single_end": false }, - "cnv_data.txt:md5,9228c0a7bce478e06db5d4304be3dbf7", - "ssm_data.txt:md5,055b7b5359a50eefd86fc85fcc023c6f" + "cnv_data.txt:md5,1a3d26484389d5b3b35d20ac5e01f32c", + "ssm_data.txt:md5,3a23e8d67f32ea13edb62882952f4122" ] ], "versions": [ @@ -63,6 +67,10 @@ ] } ], - "timestamp": "2024-06-11T18:00:00.11222" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-03T15:59:38.659729442" } } \ No newline at end of file diff --git a/subworkflows/msk/neoantigen_editing/tests/main.nf.test b/subworkflows/msk/neoantigen_editing/tests/main.nf.test index 349be79..774875a 100644 --- a/subworkflows/msk/neoantigen_editing/tests/main.nf.test +++ b/subworkflows/msk/neoantigen_editing/tests/main.nf.test @@ -3,6 +3,7 @@ nextflow_workflow { name "Test Workflow NEOANTIGEN_EDITING" script "../main.nf" workflow "NEOANTIGEN_EDITING" + config "./nextflow.config" tag "subworkflows" tag "subworkflows_nfcore" diff --git a/subworkflows/msk/neoantigen_editing/tests/nextflow.config b/subworkflows/msk/neoantigen_editing/tests/nextflow.config new file mode 100644 index 0000000..33d9f71 --- /dev/null +++ b/subworkflows/msk/neoantigen_editing/tests/nextflow.config @@ -0,0 +1,9 @@ +params { + enable_conda = false +} + +process { + withName: 'NEOANTIGENEDITING_COMPUTEFITNESS' { + ext.args = '--a_param 22.897590714815188 --k_param 1 --w_param 0.22402192838740312' + } +} diff --git a/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap b/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap index aeeee53..7c00772 100644 --- a/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap +++ b/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap @@ -2,61 +2,61 @@ "netmhcstabandpan - tsv,xls,fa": { "content": [ "test_netmhcpan.output.WT.tsv", - "test.MUT_sequences.fa:md5,7fdb7d3f0fe5a6f439ed294b612c2d70", - "test.WT_sequences.fa:md5,7595ed6cf0c98500b00c9ad027125b38" + "test.MUT.sequences.fa:md5,7fdb7d3f0fe5a6f439ed294b612c2d70", + "test.WT.sequences.fa:md5,7595ed6cf0c98500b00c9ad027125b38" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T15:32:48.122038795" + "timestamp": "2025-03-03T16:12:54.843023535" }, "netmhcstabandpan - tsv,xls,fa - stub": { "content": [ "h", - "test.MUT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "test.WT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.MUT.sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.WT.sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T15:33:59.951435808" + "timestamp": "2025-03-03T16:13:44.059191142" }, "netmhcstabandnetmhc3 - SV - tsv,xls,fa": { "content": [ "test_netmhc.output.WT.tsv", - "test.MUT_sequences.fa:md5,118b48df96c7217675b9f9ac14309a25", - "test.WT_sequences.fa:md5,bb7dfff23ae47cf64ec4854ee48ec78d" + "test.MUT.sequences.fa:md5,118b48df96c7217675b9f9ac14309a25", + "test.WT.sequences.fa:md5,bb7dfff23ae47cf64ec4854ee48ec78d" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T15:31:41.007610492" + "timestamp": "2025-03-03T16:12:14.551189054" }, "netmhcstabandpan - SV - tsv,xls,fa": { "content": [ "test_netmhcpan.output.WT.tsv", - "test.MUT_sequences.fa:md5,118b48df96c7217675b9f9ac14309a25", - "test.WT_sequences.fa:md5,bb7dfff23ae47cf64ec4854ee48ec78d" + "test.MUT.sequences.fa:md5,118b48df96c7217675b9f9ac14309a25", + "test.WT.sequences.fa:md5,bb7dfff23ae47cf64ec4854ee48ec78d" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T15:29:06.330489514" + "timestamp": "2025-03-03T16:10:51.864889003" }, "netmhcstabandnetmhc3 - tsv,xls,fa": { "content": [ "test_netmhc.output.WT.tsv", - "test.MUT_sequences.fa:md5,7fdb7d3f0fe5a6f439ed294b612c2d70", - "test.WT_sequences.fa:md5,7595ed6cf0c98500b00c9ad027125b38" + "test.MUT.sequences.fa:md5,7fdb7d3f0fe5a6f439ed294b612c2d70", + "test.WT.sequences.fa:md5,7595ed6cf0c98500b00c9ad027125b38" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-12-17T15:33:45.014740679" + "timestamp": "2025-03-03T16:13:34.85619969" } } \ No newline at end of file From aee5695a098652270684f708e2b11a20c3611081 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 6 Mar 2025 17:04:11 -0500 Subject: [PATCH 3/6] Updated config files --- conf/modules.config | 5 ++++- conf/prod.config | 4 ++++ conf/test.config | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 3595557..d446082 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,4 +32,7 @@ process { withName: 'NEOANTIGENUTILS_NEOANTIGENINPUT' { ext.args = "--kD_cutoff ${params.kd_cutoff}" } -} + + withName: 'NEOANTIGENEDITING_COMPUTEFITNESS' { + ext.args = "--a_param ${params.compute_fitness_a} --k_param ${params.compute_fitness_k} --w_param ${params.compute_fitness_w}" + } diff --git a/conf/prod.config b/conf/prod.config index f92277d..a30917f 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -37,8 +37,12 @@ params { phylo_mcmc_samples = 2500 phylo_num_chains = 15 kd_cutoff = 500 + compute_fitness_a = 22.897590714815188 + compute_fitness_k = 1 + compute_fitness_w = 0.22402192838740312 iedbfasta = 'https://raw.githubusercontent.com/mskcc/NeoantigenEditing/refs/heads/main/data/iedb.fasta' cds = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz' cdna = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cdna.all.fa.gz' + gtf = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.gtf.gz' } diff --git a/conf/test.config b/conf/test.config index 9528af1..4a0665e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -33,8 +33,12 @@ params { phylo_num_chains = 2 netmhc3 = true kd_cutoff = 500 + compute_fitness_a = 22.897590714815188 + compute_fitness_k = 1 + compute_fitness_w = 0.22402192838740312 iedbfasta = 'https://raw.githubusercontent.com/mskcc-omics-workflows/test-datasets/neoantigen/neoantigen/neoantigenEditing/data/iedb.fasta' cds = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz' cdna = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cdna.all.fa.gz' + gtf = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.gtf.gz' } From faef2cf59d362e6302a47fe98e0d5be80ee09577 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 6 Mar 2025 18:30:22 -0500 Subject: [PATCH 4/6] Fix modules config format --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index d446082..73f9e26 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,3 +36,4 @@ process { withName: 'NEOANTIGENEDITING_COMPUTEFITNESS' { ext.args = "--a_param ${params.compute_fitness_a} --k_param ${params.compute_fitness_k} --w_param ${params.compute_fitness_w}" } +} From 0491bbe39307dfb496695df261479aa54d1970c2 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 6 Mar 2025 19:12:36 -0500 Subject: [PATCH 5/6] Update workflow to work with modules --- workflows/neoantigenpipeline.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflows/neoantigenpipeline.nf b/workflows/neoantigenpipeline.nf index 81af76f..6549419 100644 --- a/workflows/neoantigenpipeline.nf +++ b/workflows/neoantigenpipeline.nf @@ -35,6 +35,8 @@ workflow NEOANTIGENPIPELINE { ch_cds_and_cdna = Channel.value([file(params.cds), file(params.cdna)]) + ch_gtf_and_cdna = Channel.value([file(params.gtf), file(params.cdna)]) + ch_samplesheet.map { meta, maf, facets_hisens_cncf, hla_file -> [meta, maf, hla_file] @@ -90,7 +92,7 @@ workflow NEOANTIGENPIPELINE { new Tuple(it[0], it[6], it[7]) } - NEOANTIGENUTILS_NEOANTIGENINPUT(merged_netMHC_input,merged_phylo_output,merged_netmhc_tsv) + NEOANTIGENUTILS_NEOANTIGENINPUT(merged_netMHC_input,merged_phylo_output,merged_netmhc_tsv,ch_gtf_and_cdna) ch_versions = ch_versions.mix(NEOANTIGENUTILS_NEOANTIGENINPUT.out.versions) From f22ccfde5d3bad35eb251c8b7dc8da144ce9d2d8 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Fri, 7 Mar 2025 12:47:41 -0500 Subject: [PATCH 6/6] Update neoantigeninput module to be more specific in output json selection --- modules.json | 2 +- .../neoantigenutils/neoantigeninput/main.nf | 4 ++-- .../resources/usr/bin/generate_input.py | 2 +- .../neoantigeninput/tests/main.nf.test.snap | 24 +++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules.json b/modules.json index 4f09566..02e00db 100644 --- a/modules.json +++ b/modules.json @@ -37,7 +37,7 @@ }, "neoantigenutils/neoantigeninput": { "branch": "develop", - "git_sha": "e8ae6eb203cb5a49f98cacd4734090c182ad5f44", + "git_sha": "8b8e8d28b628b6244f4539cc405f2561fd733967", "installed_by": ["modules"] }, "netmhc3": { diff --git a/modules/msk/neoantigenutils/neoantigeninput/main.nf b/modules/msk/neoantigenutils/neoantigeninput/main.nf index 015f822..e013516 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/main.nf +++ b/modules/msk/neoantigenutils/neoantigeninput/main.nf @@ -12,7 +12,7 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { tuple path(gtf), path(cdna) output: - tuple val(meta), path("*_*.json"), emit: json + tuple val(meta), path("*_input.json"), emit: json path "versions.yml", emit: versions when: @@ -60,7 +60,7 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { def cohort =task.ext.cohort ?: "${meta.id}_cohort" """ - touch ${patientid}_${id}.json + touch ${patientid}_${id}_input.json cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py index 435fe26..c50eaab 100755 --- a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py +++ b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py @@ -654,7 +654,7 @@ def find_most_similar_string(target, strings): } outer_dict["neoantigens"].append(neo_dict) - outjson = args.patient_id + "_" + args.id + ".json" + outjson = args.patient_id + "_" + args.id + "_" + "input.json" with open(outjson, "w") as tstout: json.dump(outer_dict, tstout, indent=1) diff --git a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap index 67f8e0b..64a8a9f 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap @@ -8,7 +8,7 @@ "id": "test", "single_end": false }, - "test_patient_test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_patient_test_input.json:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -20,7 +20,7 @@ "id": "test", "single_end": false }, - "test_patient_test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_patient_test_input.json:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -30,9 +30,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "24.04.4" }, - "timestamp": "2025-03-03T15:49:38.266725941" + "timestamp": "2025-03-07T11:55:57.067412639" }, "neoantigenutils_neoantigeninput - bedpe,json,tsv": { "content": [ @@ -43,7 +43,7 @@ "id": "test", "single_end": false }, - "test_patient_test.json:md5,ba459d11af4195b9cbc2e3ac63792893" + "test_patient_test_input.json:md5,ba459d11af4195b9cbc2e3ac63792893" ] ], "1": [ @@ -55,7 +55,7 @@ "id": "test", "single_end": false }, - "test_patient_test.json:md5,ba459d11af4195b9cbc2e3ac63792893" + "test_patient_test_input.json:md5,ba459d11af4195b9cbc2e3ac63792893" ] ], "versions": [ @@ -65,9 +65,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "24.04.4" }, - "timestamp": "2025-03-03T15:44:12.409474458" + "timestamp": "2025-03-07T11:52:19.151192995" }, "neoantigenutils_neoantigeninput - json,tsv": { "content": [ @@ -78,7 +78,7 @@ "id": "test", "single_end": false }, - "test_patient_test.json:md5,f386dd295154740155b34883a40ae54e" + "test_patient_test_input.json:md5,f386dd295154740155b34883a40ae54e" ] ], "1": [ @@ -90,7 +90,7 @@ "id": "test", "single_end": false }, - "test_patient_test.json:md5,f386dd295154740155b34883a40ae54e" + "test_patient_test_input.json:md5,f386dd295154740155b34883a40ae54e" ] ], "versions": [ @@ -100,8 +100,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "24.04.4" }, - "timestamp": "2025-03-03T15:49:31.503843906" + "timestamp": "2025-03-07T11:55:45.944757678" } } \ No newline at end of file