diff --git a/CHANGELOG.md b/CHANGELOG.md index e62628fc6..ffc3b4db4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [PR #1341](https://github.com/nf-core/rnaseq/pull/1341) - Add rename in the MultiQC report for samples without techreps - [PR #1342](https://github.com/nf-core/rnaseq/pull/1342) - Factor out preprocessing - [PR #1345](https://github.com/nf-core/rnaseq/pull/1345) - Fix preprocessing call +- [PR #1350](https://github.com/nf-core/rnaseq/pull/1350) - Reduce resource usage for sort process in bedtools/genomecov ### Parameters diff --git a/modules.json b/modules.json index f197e3f65..87e2d176c 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "bedtools/genomecov": { "branch": "master", - "git_sha": "81b90194ce9911dbd55bba2c65c6919f6677abc4", + "git_sha": "9ba6b02bbcb322ff4265cc51fca0ee5d8420b929", "installed_by": ["modules"] }, "cat/fastq": { diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 954e8913d..8403c5303 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -4,8 +4,8 @@ process BEDTOOLS_GENOMECOV { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + 'oras://community.wave.seqera.io/library/bedtools_coreutils:ba273c06a3909a15': + 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b' }" input: tuple val(meta), path(intervals), val(scale) @@ -21,13 +21,16 @@ process BEDTOOLS_GENOMECOV { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args_list = args.tokenize() args += (scale > 0 && scale != 1) ? " -scale $scale" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } - def sort_cmd = sort ? '| bedtools sort' : '' + // Sorts output file by chromosome and position using additional options for performance and consistency + // See https://www.biostars.org/p/66927/ for further details + def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' + def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus $buffer -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test b/modules/nf-core/bedtools/genomecov/tests/main.nf.test index fe1567d89..16a03492c 100644 --- a/modules/nf-core/bedtools/genomecov/tests/main.nf.test +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test @@ -4,10 +4,15 @@ nextflow_process { process "BEDTOOLS_GENOMECOV" config "./nextflow.config" - test("sarscov2 - no scale") { + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/genomecov" + + test("sarscov2 - no scale") { when { process { - """ + """ input[0] = [ [ id:'test' ], // meta map file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), @@ -25,16 +30,15 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("no_scale") } + { assert snapshot(process.out).match() } ) } - } test("sarscov2 - dummy sizes") { when { process { - """ + """ input[0] = [ [ id:'test'], file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), @@ -52,16 +56,15 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("dummy_sizes") } + { assert snapshot(process.out).match() } ) } - } test("sarscov2 - scale") { when { process { - """ + """ input[0] = [ [ id:'test'], file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/baits.bed", checkIfExists: true), @@ -79,18 +82,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("scale") } + { assert snapshot(process.out).match() } ) } - } - test("stub") { + test("sarscov2 - no scale - stub") { options "-stub" when { process { - """ + """ input[0] = [ [ id:'test' ], // meta map file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), @@ -99,6 +101,34 @@ nextflow_process { // sizes input[1] = [] // extension + input[2] = "txt" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - dummy sizes - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file('dummy_chromosome_sizes') + // extension input[2] = 'txt' input[3] = false """ @@ -108,10 +138,37 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.genomecov[0][1]).name).match("stub") } + { assert snapshot(process.out).match() } ) } + } + + test("sarscov2 - scale - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/baits.bed", checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.sizes", checkIfExists: true) + // extension + input[2] = 'txt' + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } } } diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap index 8f9191e4c..da6dbe875 100644 --- a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "dummy_sizes": { + "sarscov2 - dummy sizes": { "content": [ { "0": [ @@ -26,9 +26,13 @@ ] } ], - "timestamp": "2023-12-05T17:35:58.35232" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:33.898146" }, - "no_scale": { + "sarscov2 - no scale - stub": { "content": [ { "0": [ @@ -36,7 +40,7 @@ { "id": "test" }, - "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -47,7 +51,7 @@ { "id": "test" }, - "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -55,15 +59,46 @@ ] } ], - "timestamp": "2023-12-05T17:35:51.142496" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:52.483371" }, - "stub": { + "sarscov2 - scale": { "content": [ - "test.coverage.txt" + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } ], - "timestamp": "2023-12-05T17:36:13.084709" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:43.69501" }, - "scale": { + "sarscov2 - scale - stub": { "content": [ { "0": [ @@ -71,7 +106,7 @@ { "id": "test" }, - "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -82,7 +117,73 @@ { "id": "test" }, - "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:00:09.930036" + }, + "sarscov2 - no scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:25.448817" + }, + "sarscov2 - dummy sizes - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -90,6 +191,10 @@ ] } ], - "timestamp": "2023-12-05T17:36:05.962006" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:00:01.086433" } } \ No newline at end of file