From 2c837064a666c77a5049ec9e31e68cef24d3ef8f Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:00:47 +0100 Subject: [PATCH 1/6] Replace bedtools sort with unix sort in BEDTOOLS_GENOMECOV `bedtools sort` uses a large amount of CPUs and memory, but when using it here it doesn't require the additional genome based features of `bedtools`. Replacing it should speed up the process and make it many times more efficient. --- modules/nf-core/bedtools/genomecov/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 954e8913d3b..b02bf1cfeeb 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -27,7 +27,7 @@ process BEDTOOLS_GENOMECOV { if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } - def sort_cmd = sort ? '| bedtools sort' : '' + def sort_cmd = sort ? '| sort -k1,1 -k2,2n' : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { From 78647c0e2439f10f63a9e041a0b2720dee93bb83 Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:19:07 +0100 Subject: [PATCH 2/6] add args2 for for customisation of GNU sort command Allows customisation of GNU --- modules/nf-core/bedtools/genomecov/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index b02bf1cfeeb..6da7dc31f9e 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -21,13 +21,14 @@ process BEDTOOLS_GENOMECOV { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def args_list = args.tokenize() args += (scale > 0 && scale != 1) ? " -scale $scale" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } - def sort_cmd = sort ? '| sort -k1,1 -k2,2n' : '' + def sort_cmd = sort ? '| sort $args2 -k1,1 -k2,2n' : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { From 73543a923bab9d75be5eeaaf316f7ce2d0c33820 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:38:30 +0100 Subject: [PATCH 3/6] quoting for args2 --- modules/nf-core/bedtools/genomecov/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 6da7dc31f9e..e7cc761d0ae 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -28,7 +28,7 @@ process BEDTOOLS_GENOMECOV { if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } - def sort_cmd = sort ? '| sort $args2 -k1,1 -k2,2n' : '' + def sort_cmd = sort ? "| sort $args2 -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { From b4b7466529f6ec25ae9174d922c711a6b13518a7 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:51:17 +0100 Subject: [PATCH 4/6] Use LC_ALL and default options for performance and consistency --- modules/nf-core/bedtools/genomecov/main.nf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index e7cc761d0ae..7e6eb35c3e4 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -4,8 +4,8 @@ process BEDTOOLS_GENOMECOV { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + 'oras://community.wave.seqera.io/library/bedtools_coreutils:ba273c06a3909a15': + 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b' }" input: tuple val(meta), path(intervals), val(scale) @@ -24,11 +24,14 @@ process BEDTOOLS_GENOMECOV { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' def args_list = args.tokenize() + def buffer = task.memory.toGiga().intdiv(2) args += (scale > 0 && scale != 1) ? " -scale $scale" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } - def sort_cmd = sort ? "| sort $args2 -k1,1 -k2,2n" : '' + // Sorts output file by chromosome and position using additional options for performance and consistency + // See https://www.biostars.org/p/66927/ for further details + def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus --buffer-size=${buffer}G -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { From c1204bab92bc8ccfa0f19c897941187e2e5022ed Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:25:50 +0100 Subject: [PATCH 5/6] Handle null memory value --- modules/nf-core/bedtools/genomecov/main.nf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 7e6eb35c3e4..8403c530380 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -21,17 +21,16 @@ process BEDTOOLS_GENOMECOV { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: '' def args_list = args.tokenize() - def buffer = task.memory.toGiga().intdiv(2) args += (scale > 0 && scale != 1) ? " -scale $scale" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } // Sorts output file by chromosome and position using additional options for performance and consistency // See https://www.biostars.org/p/66927/ for further details - def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus --buffer-size=${buffer}G -k1,1 -k2,2n" : '' + def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' + def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus $buffer -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { From 402c5bba62cc38703dfa9cd102756fffdea9fc89 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:26:15 +0100 Subject: [PATCH 6/6] Remove tags.yml --- modules/nf-core/bedtools/genomecov/tests/tags.yml | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 modules/nf-core/bedtools/genomecov/tests/tags.yml diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml deleted file mode 100644 index 55fce47800f..00000000000 --- a/modules/nf-core/bedtools/genomecov/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bedtools/genomecov: - - "modules/nf-core/bedtools/genomecov/**"