From e4ff020adc42886f7624636d997a9906786dad51 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Wed, 27 Dec 2023 12:52:13 +0200 Subject: [PATCH 01/18] added filtering step and tested --- modules/local/filterbed/main.nf | 57 + .../execution_report_2023-12-27_12-50-57.html | 1075 +++++++++++++++++ ...xecution_timeline_2023-12-27_12-50-57.html | 227 ++++ .../execution_trace_2023-12-27_12-42-15.txt | 1 + .../execution_trace_2023-12-27_12-43-55.txt | 1 + .../execution_trace_2023-12-27_12-44-43.txt | 1 + .../execution_trace_2023-12-27_12-50-57.txt | 6 + .../pipeline_dag_2023-12-27_12-50-57.html | 325 +++++ workflows/rnavar.nf | 20 +- 9 files changed, 1709 insertions(+), 4 deletions(-) create mode 100644 modules/local/filterbed/main.nf create mode 100644 out/pipeline_info/execution_report_2023-12-27_12-50-57.html create mode 100644 out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html create mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt create mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt create mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt create mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt create mode 100644 out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf new file mode 100644 index 00000000..71d0cdc4 --- /dev/null +++ b/modules/local/filterbed/main.nf @@ -0,0 +1,57 @@ +process FILTERBEDFILE { + tag "${genome_bed} -> ${filtered_bed}" + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + path genome_bed + path genome_dict + + output: + path 'filtered_exome.bed', emit: filtered_bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + python - < versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/out/pipeline_info/execution_report_2023-12-27_12-50-57.html b/out/pipeline_info/execution_report_2023-12-27_12-50-57.html new file mode 100644 index 00000000..9abaffb4 --- /dev/null +++ b/out/pipeline_info/execution_report_2023-12-27_12-50-57.html @@ -0,0 +1,1075 @@ + + + + + + + + + + + [dreamy_heisenberg] Nextflow Workflow Report + + + + + + + +
+
+ +

Nextflow workflow report

+

[dreamy_heisenberg]

+ + +
+

Workflow execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: 139.

+

The full error message was:

+
Error executing process > 'NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta)'
+
+Caused by:
+  Process `NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta)` terminated with an error exit status (139)
+
+Command executed:
+
+  samtools \
+      faidx \
+      genome.fasta
+  
+  cat <<-END_VERSIONS > versions.yml
+  "NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX":
+      samtools: $(echo $(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*$//')
+  END_VERSIONS
+
+Command exit status:
+  139
+
+Command output:
+  (empty)
+
+Command error:
+  WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested
+
+Work dir:
+  /Users/shaunie/Desktop/rnavar_edit/work/3d/dbd836d06cd92f49ad17b17aeedec0
+
+Tip: when you have fixed the problem you can continue the execution adding the option `-resume` to the run command line
+
+ + +
+
Run times
+
+ 27-Dec-2023 12:50:57 - 27-Dec-2023 12:51:18 + (duration: 20.3s) +
+ +
+
+
  1 succeeded  
+
  0 cached  
+
  0 ignored  
+
  2 failed  
+
+
+ +
Nextflow command
+
nextflow run main.nf -profile test,docker --outdir out/
+
+ +
+
CPU-Hours
+
(a few seconds)
+ +
Launch directory
+
/Users/shaunie/Desktop/rnavar_edit
+ +
Work directory
+
/Users/shaunie/Desktop/rnavar_edit/work
+ +
Project directory
+
/Users/shaunie/Desktop/rnavar_edit
+ + +
Script name
+
main.nf
+ + + +
Script ID
+
e58604470acecd0bb0ce67da4f63a0b0
+ + +
Workflow session
+
fcf68bc5-c8a6-4d5a-8396-5874d3221de0
+ + + +
Workflow profile
+
test,docker
+ + +
Workflow container
+
[ENSEMBLVEP:nfcore/vep:104.3.null, SNPEFF:(dynamic resolved)]
+ +
Container engine
+
docker
+ + +
Nextflow version
+
version 23.10.0, build 5889 (15-10-2023 15:07 UTC)
+
+
+
+ +
+

Resource Usage

+

These plots give an overview of the distribution of resource usage for each process.

+ +

CPU

+ +
+
+
+
+
+
+
+ +
+ +

Memory

+ +
+
+
+
+
+
+
+
+
+
+
+ +

Job Duration

+ +
+
+
+
+
+
+
+
+ +

I/O

+ +
+
+
+
+
+
+
+
+
+ +
+
+

Tasks

+

This table shows information about each task in the workflow. Use the search box on the right + to filter rows for specific values. Clicking headers will sort the table by that value and + scrolling side to side will reveal more columns.

+
+ + +
+
+
+
+
+ +
+ (tasks table omitted because the dataset is too big) +
+
+ +
+
+ Generated by Nextflow, version 23.10.0 +
+
+ + + + + diff --git a/out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html b/out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html new file mode 100644 index 00000000..8a3e4614 --- /dev/null +++ b/out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html @@ -0,0 +1,227 @@ + + + + + + + + + + + + + +
+

Processes execution timeline

+

+ Launch time:
+ Elapsed time:
+ Legend: job wall time / memory usage (RAM) +

+
+
+ + + + + + + diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt b/out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt b/out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt b/out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt b/out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt new file mode 100644 index 00000000..c185f302 --- /dev/null +++ b/out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt @@ -0,0 +1,6 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar +4 f4/35bf24 68515 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:UNTAR_STAR_INDEX (star.tar.gz) COMPLETED 0 2023-12-27 12:51:06.245 7.9s 107ms 26.5% 5.6 MB 9.9 MB 1021.8 KB 1.2 MB +3 eb/45e380 68502 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta) FAILED 139 2023-12-27 12:51:06.147 11.5s 11.5s - - - - - +5 3d/dbd836 68718 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta) FAILED 139 2023-12-27 12:51:17.695 320ms 236ms - - - - - +1 bb/f66435 68525 NFCORE_RNAVAR:RNAVAR:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) ABORTED - 2023-12-27 12:51:06.270 - - - - - - - +2 80/2da648 68535 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:GTF2BED (genome.gtf) ABORTED - 2023-12-27 12:51:06.322 - - - - - - - diff --git a/out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html b/out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html new file mode 100644 index 00000000..fc80ca02 --- /dev/null +++ b/out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html @@ -0,0 +1,325 @@ + + + + + + +
+flowchart TB
+    subgraph " "
+    v0["Channel.fromPath"]
+    v2["Channel.fromPath"]
+    v3["Channel.fromPath"]
+    v5["Channel.fromPath"]
+    v7["Channel.fromPath"]
+    v9["Channel.fromPath"]
+    v14["Channel.fromPath"]
+    v18["Channel.fromPath"]
+    v32["Channel.fromPath"]
+    v36["Channel.fromPath"]
+    v43["Channel.from"]
+    v46["samplesheet"]
+    v99["star_ignore_sjdbgtf"]
+    v100["seq_platform"]
+    v101["seq_center"]
+    v126["fasta"]
+    v164["fasta"]
+    v256["fasta"]
+    v305["ch_workflow_summary"]
+    end
+    subgraph NFCORE_RNAVAR
+    subgraph RNAVAR
+    subgraph PREPARE_GENOME
+    v20([GTF2BED])
+    v26([SAMTOOLS_FAIDX])
+    v38([UNTAR_STAR_INDEX])
+    v19(( ))
+    v21(( ))
+    v27(( ))
+    v37(( ))
+    v39(( ))
+    end
+    subgraph INPUT_CHECK
+    v47([SAMPLESHEET_CHECK])
+    v48(( ))
+    end
+    v54([CAT_FASTQ])
+    v59([FASTQC])
+    v68([FILTERBEDFILE])
+    v72([GATK4_BEDTOINTERVALLIST])
+    v78([GATK4_INTERVALLISTTOOLS])
+    subgraph ALIGN_STAR
+    v102([STAR_ALIGN])
+    subgraph BAM_SORT_SAMTOOLS
+    v115([SAMTOOLS_SORT])
+    v118([SAMTOOLS_INDEX])
+    subgraph BAM_STATS_SAMTOOLS
+    v127([SAMTOOLS_STATS])
+    v130([SAMTOOLS_FLAGSTAT])
+    v134([SAMTOOLS_IDXSTATS])
+    end
+    v122(( ))
+    end
+    end
+    subgraph MARKDUPLICATES
+    v152([GATK4_MARKDUPLICATES])
+    v156([SAMTOOLS_INDEX])
+    subgraph BAM_STATS_SAMTOOLS
+    v165([SAMTOOLS_STATS])
+    v168([SAMTOOLS_FLAGSTAT])
+    v172([SAMTOOLS_IDXSTATS])
+    end
+    end
+    subgraph SPLITNCIGAR
+    v192([GATK4_SPLITNCIGARREADS])
+    v197([SAMTOOLS_MERGE])
+    v201([SAMTOOLS_INDEX])
+    v15(( ))
+    v195(( ))
+    end
+    v223([GATK4_BASERECALIBRATOR])
+    subgraph RECALIBRATE
+    v243([APPLYBQSR])
+    v247([SAMTOOLS_INDEX])
+    v257([SAMTOOLS_STATS])
+    v203(( ))
+    end
+    v270([GATK4_HAPLOTYPECALLER])
+    v277([GATK4_MERGEVCFS])
+    v281([TABIX])
+    v288([GATK4_VARIANTFILTRATION])
+    v298([CUSTOM_DUMPSOFTWAREVERSIONS])
+    v310([MULTIQC])
+    v4(( ))
+    v6(( ))
+    v22(( ))
+    v33(( ))
+    v61(( ))
+    v80(( ))
+    v272(( ))
+    v282(( ))
+    v314(( ))
+    end
+    end
+    subgraph " "
+    v60[" "]
+    v69[" "]
+    v79[" "]
+    v103[" "]
+    v104[" "]
+    v105["tab"]
+    v106["fastq"]
+    v107[" "]
+    v108["ch_transcriptome_bam"]
+    v109["bam_sorted"]
+    v110["log_progress"]
+    v111["log_out"]
+    v119[" "]
+    v131["flagstat"]
+    v135["idxstats"]
+    v153[" "]
+    v157[" "]
+    v169["flagstat"]
+    v173["idxstats"]
+    v198[" "]
+    v202[" "]
+    v244[" "]
+    v248[" "]
+    v271[" "]
+    v289[" "]
+    v290["ch_final_vcf"]
+    v299[" "]
+    v300[" "]
+    v311[" "]
+    v312[" "]
+    v313[" "]
+    v315[" "]
+    end
+    v0 --> v61
+    v2 --> v61
+    v3 --> v4
+    v5 --> v6
+    v7 --> v4
+    v9 --> v6
+    v14 --> v15
+    v18 --> v19
+    v19 --> v20
+    v20 --> v21
+    v20 --> v22
+    v15 --> v26
+    v26 --> v22
+    v26 --> v27
+    v32 --> v33
+    v36 --> v37
+    v37 --> v38
+    v38 --> v22
+    v38 --> v39
+    v43 --> v21
+    v46 --> v47
+    v47 --> v22
+    v47 --> v48
+    v48 --> v54
+    v54 --> v22
+    v54 --> v48
+    v48 --> v59
+    v59 --> v60
+    v59 --> v22
+    v59 --> v61
+    v21 --> v68
+    v33 --> v68
+    v68 --> v72
+    v68 --> v69
+    v33 --> v72
+    v72 --> v78
+    v72 --> v22
+    v72 --> v203
+    v78 --> v79
+    v78 --> v80
+    v99 --> v102
+    v100 --> v102
+    v101 --> v102
+    v19 --> v102
+    v39 --> v102
+    v48 --> v102
+    v102 --> v115
+    v102 --> v111
+    v102 --> v110
+    v102 --> v109
+    v102 --> v108
+    v102 --> v107
+    v102 --> v106
+    v102 --> v105
+    v102 --> v104
+    v102 --> v103
+    v102 --> v22
+    v102 --> v61
+    v115 --> v118
+    v115 --> v152
+    v115 --> v22
+    v115 --> v122
+    v118 --> v119
+    v118 --> v22
+    v118 --> v122
+    v126 --> v127
+    v122 --> v127
+    v127 --> v22
+    v127 --> v61
+    v122 --> v130
+    v130 --> v131
+    v130 --> v22
+    v122 --> v134
+    v134 --> v135
+    v134 --> v22
+    v152 --> v156
+    v152 --> v153
+    v152 --> v22
+    v152 --> v61
+    v152 --> v80
+    v156 --> v157
+    v156 --> v22
+    v156 --> v80
+    v164 --> v165
+    v80 --> v165
+    v165 --> v22
+    v165 --> v61
+    v80 --> v168
+    v168 --> v169
+    v168 --> v22
+    v80 --> v172
+    v172 --> v173
+    v172 --> v22
+    v15 --> v192
+    v27 --> v192
+    v33 --> v192
+    v80 --> v192
+    v192 --> v22
+    v192 --> v195
+    v15 --> v197
+    v195 --> v197
+    v197 --> v201
+    v197 --> v198
+    v197 --> v22
+    v197 --> v203
+    v201 --> v202
+    v201 --> v22
+    v201 --> v203
+    v4 --> v223
+    v6 --> v223
+    v15 --> v223
+    v27 --> v223
+    v33 --> v223
+    v203 --> v223
+    v223 --> v22
+    v223 --> v61
+    v223 --> v203
+    v15 --> v243
+    v27 --> v243
+    v33 --> v243
+    v203 --> v243
+    v243 --> v247
+    v243 --> v244
+    v243 --> v22
+    v243 --> v80
+    v247 --> v248
+    v247 --> v22
+    v247 --> v80
+    v256 --> v257
+    v80 --> v257
+    v257 --> v22
+    v257 --> v61
+    v4 --> v270
+    v6 --> v270
+    v15 --> v270
+    v27 --> v270
+    v33 --> v270
+    v80 --> v270
+    v270 --> v271
+    v270 --> v22
+    v270 --> v272
+    v33 --> v277
+    v272 --> v277
+    v277 --> v281
+    v277 --> v22
+    v277 --> v282
+    v281 --> v22
+    v281 --> v282
+    v15 --> v288
+    v27 --> v288
+    v33 --> v288
+    v282 --> v288
+    v288 --> v290
+    v288 --> v289
+    v288 --> v22
+    v22 --> v298
+    v298 --> v300
+    v298 --> v299
+    v298 --> v61
+    v305 --> v61
+    v61 --> v310
+    v310 --> v313
+    v310 --> v312
+    v310 --> v311
+    v310 --> v314
+    v314 --> v315
+
+
+ + + diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index 34688263..15df17e4 100644 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -47,9 +47,11 @@ ch_rnavar_logo = Channel.fromPath(file("$projectDir/assets/nf-core-rna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files -include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both +include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files +include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both +include { FILTERBEDFILE } from '../modules/local/filterbed/main' // Filter a BED file based on the available genome.dict file to prevent errors for extra chromosomes + /* ======================================================================================== @@ -176,12 +178,22 @@ workflow RNAVAR { ch_reports = ch_reports.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // PROCESS: Filter BED file before BedToIntervalList + // + ch_filtered_genome_bed = Channel.empty() + FILTERBEDFILE ( + ch_genome_bed, // This should be the channel containing your exome.bed file + PREPARE_GENOME.out.dict // This should be the channel containing your genome.dict file + ) + ch_filtered_genome_bed = FILTERBEDFILE.out.filtered_bed + // // MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList // ch_interval_list = Channel.empty() GATK4_BEDTOINTERVALLIST( - ch_genome_bed, + ch_filtered_genome_bed, PREPARE_GENOME.out.dict ) ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list From 98e7dfd9faff2506e7328c719256ea5cc4d4299c Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Wed, 27 Dec 2023 12:54:16 +0200 Subject: [PATCH 02/18] removed generated test files --- .../execution_report_2023-12-27_12-50-57.html | 1075 ----------------- ...xecution_timeline_2023-12-27_12-50-57.html | 227 ---- .../execution_trace_2023-12-27_12-42-15.txt | 1 - .../execution_trace_2023-12-27_12-43-55.txt | 1 - .../execution_trace_2023-12-27_12-44-43.txt | 1 - .../execution_trace_2023-12-27_12-50-57.txt | 6 - .../pipeline_dag_2023-12-27_12-50-57.html | 325 ----- 7 files changed, 1636 deletions(-) delete mode 100644 out/pipeline_info/execution_report_2023-12-27_12-50-57.html delete mode 100644 out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html delete mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt delete mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt delete mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt delete mode 100644 out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt delete mode 100644 out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html diff --git a/out/pipeline_info/execution_report_2023-12-27_12-50-57.html b/out/pipeline_info/execution_report_2023-12-27_12-50-57.html deleted file mode 100644 index 9abaffb4..00000000 --- a/out/pipeline_info/execution_report_2023-12-27_12-50-57.html +++ /dev/null @@ -1,1075 +0,0 @@ - - - - - - - - - - - [dreamy_heisenberg] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[dreamy_heisenberg]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: 139.

-

The full error message was:

-
Error executing process > 'NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta)'
-
-Caused by:
-  Process `NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta)` terminated with an error exit status (139)
-
-Command executed:
-
-  samtools \
-      faidx \
-      genome.fasta
-  
-  cat <<-END_VERSIONS > versions.yml
-  "NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX":
-      samtools: $(echo $(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*$//')
-  END_VERSIONS
-
-Command exit status:
-  139
-
-Command output:
-  (empty)
-
-Command error:
-  WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested
-
-Work dir:
-  /Users/shaunie/Desktop/rnavar_edit/work/3d/dbd836d06cd92f49ad17b17aeedec0
-
-Tip: when you have fixed the problem you can continue the execution adding the option `-resume` to the run command line
-
- - -
-
Run times
-
- 27-Dec-2023 12:50:57 - 27-Dec-2023 12:51:18 - (duration: 20.3s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  2 failed  
-
-
- -
Nextflow command
-
nextflow run main.nf -profile test,docker --outdir out/
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/Users/shaunie/Desktop/rnavar_edit
- -
Work directory
-
/Users/shaunie/Desktop/rnavar_edit/work
- -
Project directory
-
/Users/shaunie/Desktop/rnavar_edit
- - -
Script name
-
main.nf
- - - -
Script ID
-
e58604470acecd0bb0ce67da4f63a0b0
- - -
Workflow session
-
fcf68bc5-c8a6-4d5a-8396-5874d3221de0
- - - -
Workflow profile
-
test,docker
- - -
Workflow container
-
[ENSEMBLVEP:nfcore/vep:104.3.null, SNPEFF:(dynamic resolved)]
- -
Container engine
-
docker
- - -
Nextflow version
-
version 23.10.0, build 5889 (15-10-2023 15:07 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.10.0 -
-
- - - - - diff --git a/out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html b/out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html deleted file mode 100644 index 8a3e4614..00000000 --- a/out/pipeline_info/execution_timeline_2023-12-27_12-50-57.html +++ /dev/null @@ -1,227 +0,0 @@ - - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt b/out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt deleted file mode 100644 index 6b739acd..00000000 --- a/out/pipeline_info/execution_trace_2023-12-27_12-42-15.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt b/out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt deleted file mode 100644 index 6b739acd..00000000 --- a/out/pipeline_info/execution_trace_2023-12-27_12-43-55.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt b/out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt deleted file mode 100644 index 6b739acd..00000000 --- a/out/pipeline_info/execution_trace_2023-12-27_12-44-43.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt b/out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt deleted file mode 100644 index c185f302..00000000 --- a/out/pipeline_info/execution_trace_2023-12-27_12-50-57.txt +++ /dev/null @@ -1,6 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -4 f4/35bf24 68515 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:UNTAR_STAR_INDEX (star.tar.gz) COMPLETED 0 2023-12-27 12:51:06.245 7.9s 107ms 26.5% 5.6 MB 9.9 MB 1021.8 KB 1.2 MB -3 eb/45e380 68502 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta) FAILED 139 2023-12-27 12:51:06.147 11.5s 11.5s - - - - - -5 3d/dbd836 68718 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta) FAILED 139 2023-12-27 12:51:17.695 320ms 236ms - - - - - -1 bb/f66435 68525 NFCORE_RNAVAR:RNAVAR:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) ABORTED - 2023-12-27 12:51:06.270 - - - - - - - -2 80/2da648 68535 NFCORE_RNAVAR:RNAVAR:PREPARE_GENOME:GTF2BED (genome.gtf) ABORTED - 2023-12-27 12:51:06.322 - - - - - - - diff --git a/out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html b/out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html deleted file mode 100644 index fc80ca02..00000000 --- a/out/pipeline_info/pipeline_dag_2023-12-27_12-50-57.html +++ /dev/null @@ -1,325 +0,0 @@ - - - - - - -
-flowchart TB
-    subgraph " "
-    v0["Channel.fromPath"]
-    v2["Channel.fromPath"]
-    v3["Channel.fromPath"]
-    v5["Channel.fromPath"]
-    v7["Channel.fromPath"]
-    v9["Channel.fromPath"]
-    v14["Channel.fromPath"]
-    v18["Channel.fromPath"]
-    v32["Channel.fromPath"]
-    v36["Channel.fromPath"]
-    v43["Channel.from"]
-    v46["samplesheet"]
-    v99["star_ignore_sjdbgtf"]
-    v100["seq_platform"]
-    v101["seq_center"]
-    v126["fasta"]
-    v164["fasta"]
-    v256["fasta"]
-    v305["ch_workflow_summary"]
-    end
-    subgraph NFCORE_RNAVAR
-    subgraph RNAVAR
-    subgraph PREPARE_GENOME
-    v20([GTF2BED])
-    v26([SAMTOOLS_FAIDX])
-    v38([UNTAR_STAR_INDEX])
-    v19(( ))
-    v21(( ))
-    v27(( ))
-    v37(( ))
-    v39(( ))
-    end
-    subgraph INPUT_CHECK
-    v47([SAMPLESHEET_CHECK])
-    v48(( ))
-    end
-    v54([CAT_FASTQ])
-    v59([FASTQC])
-    v68([FILTERBEDFILE])
-    v72([GATK4_BEDTOINTERVALLIST])
-    v78([GATK4_INTERVALLISTTOOLS])
-    subgraph ALIGN_STAR
-    v102([STAR_ALIGN])
-    subgraph BAM_SORT_SAMTOOLS
-    v115([SAMTOOLS_SORT])
-    v118([SAMTOOLS_INDEX])
-    subgraph BAM_STATS_SAMTOOLS
-    v127([SAMTOOLS_STATS])
-    v130([SAMTOOLS_FLAGSTAT])
-    v134([SAMTOOLS_IDXSTATS])
-    end
-    v122(( ))
-    end
-    end
-    subgraph MARKDUPLICATES
-    v152([GATK4_MARKDUPLICATES])
-    v156([SAMTOOLS_INDEX])
-    subgraph BAM_STATS_SAMTOOLS
-    v165([SAMTOOLS_STATS])
-    v168([SAMTOOLS_FLAGSTAT])
-    v172([SAMTOOLS_IDXSTATS])
-    end
-    end
-    subgraph SPLITNCIGAR
-    v192([GATK4_SPLITNCIGARREADS])
-    v197([SAMTOOLS_MERGE])
-    v201([SAMTOOLS_INDEX])
-    v15(( ))
-    v195(( ))
-    end
-    v223([GATK4_BASERECALIBRATOR])
-    subgraph RECALIBRATE
-    v243([APPLYBQSR])
-    v247([SAMTOOLS_INDEX])
-    v257([SAMTOOLS_STATS])
-    v203(( ))
-    end
-    v270([GATK4_HAPLOTYPECALLER])
-    v277([GATK4_MERGEVCFS])
-    v281([TABIX])
-    v288([GATK4_VARIANTFILTRATION])
-    v298([CUSTOM_DUMPSOFTWAREVERSIONS])
-    v310([MULTIQC])
-    v4(( ))
-    v6(( ))
-    v22(( ))
-    v33(( ))
-    v61(( ))
-    v80(( ))
-    v272(( ))
-    v282(( ))
-    v314(( ))
-    end
-    end
-    subgraph " "
-    v60[" "]
-    v69[" "]
-    v79[" "]
-    v103[" "]
-    v104[" "]
-    v105["tab"]
-    v106["fastq"]
-    v107[" "]
-    v108["ch_transcriptome_bam"]
-    v109["bam_sorted"]
-    v110["log_progress"]
-    v111["log_out"]
-    v119[" "]
-    v131["flagstat"]
-    v135["idxstats"]
-    v153[" "]
-    v157[" "]
-    v169["flagstat"]
-    v173["idxstats"]
-    v198[" "]
-    v202[" "]
-    v244[" "]
-    v248[" "]
-    v271[" "]
-    v289[" "]
-    v290["ch_final_vcf"]
-    v299[" "]
-    v300[" "]
-    v311[" "]
-    v312[" "]
-    v313[" "]
-    v315[" "]
-    end
-    v0 --> v61
-    v2 --> v61
-    v3 --> v4
-    v5 --> v6
-    v7 --> v4
-    v9 --> v6
-    v14 --> v15
-    v18 --> v19
-    v19 --> v20
-    v20 --> v21
-    v20 --> v22
-    v15 --> v26
-    v26 --> v22
-    v26 --> v27
-    v32 --> v33
-    v36 --> v37
-    v37 --> v38
-    v38 --> v22
-    v38 --> v39
-    v43 --> v21
-    v46 --> v47
-    v47 --> v22
-    v47 --> v48
-    v48 --> v54
-    v54 --> v22
-    v54 --> v48
-    v48 --> v59
-    v59 --> v60
-    v59 --> v22
-    v59 --> v61
-    v21 --> v68
-    v33 --> v68
-    v68 --> v72
-    v68 --> v69
-    v33 --> v72
-    v72 --> v78
-    v72 --> v22
-    v72 --> v203
-    v78 --> v79
-    v78 --> v80
-    v99 --> v102
-    v100 --> v102
-    v101 --> v102
-    v19 --> v102
-    v39 --> v102
-    v48 --> v102
-    v102 --> v115
-    v102 --> v111
-    v102 --> v110
-    v102 --> v109
-    v102 --> v108
-    v102 --> v107
-    v102 --> v106
-    v102 --> v105
-    v102 --> v104
-    v102 --> v103
-    v102 --> v22
-    v102 --> v61
-    v115 --> v118
-    v115 --> v152
-    v115 --> v22
-    v115 --> v122
-    v118 --> v119
-    v118 --> v22
-    v118 --> v122
-    v126 --> v127
-    v122 --> v127
-    v127 --> v22
-    v127 --> v61
-    v122 --> v130
-    v130 --> v131
-    v130 --> v22
-    v122 --> v134
-    v134 --> v135
-    v134 --> v22
-    v152 --> v156
-    v152 --> v153
-    v152 --> v22
-    v152 --> v61
-    v152 --> v80
-    v156 --> v157
-    v156 --> v22
-    v156 --> v80
-    v164 --> v165
-    v80 --> v165
-    v165 --> v22
-    v165 --> v61
-    v80 --> v168
-    v168 --> v169
-    v168 --> v22
-    v80 --> v172
-    v172 --> v173
-    v172 --> v22
-    v15 --> v192
-    v27 --> v192
-    v33 --> v192
-    v80 --> v192
-    v192 --> v22
-    v192 --> v195
-    v15 --> v197
-    v195 --> v197
-    v197 --> v201
-    v197 --> v198
-    v197 --> v22
-    v197 --> v203
-    v201 --> v202
-    v201 --> v22
-    v201 --> v203
-    v4 --> v223
-    v6 --> v223
-    v15 --> v223
-    v27 --> v223
-    v33 --> v223
-    v203 --> v223
-    v223 --> v22
-    v223 --> v61
-    v223 --> v203
-    v15 --> v243
-    v27 --> v243
-    v33 --> v243
-    v203 --> v243
-    v243 --> v247
-    v243 --> v244
-    v243 --> v22
-    v243 --> v80
-    v247 --> v248
-    v247 --> v22
-    v247 --> v80
-    v256 --> v257
-    v80 --> v257
-    v257 --> v22
-    v257 --> v61
-    v4 --> v270
-    v6 --> v270
-    v15 --> v270
-    v27 --> v270
-    v33 --> v270
-    v80 --> v270
-    v270 --> v271
-    v270 --> v22
-    v270 --> v272
-    v33 --> v277
-    v272 --> v277
-    v277 --> v281
-    v277 --> v22
-    v277 --> v282
-    v281 --> v22
-    v281 --> v282
-    v15 --> v288
-    v27 --> v288
-    v33 --> v288
-    v282 --> v288
-    v288 --> v290
-    v288 --> v289
-    v288 --> v22
-    v22 --> v298
-    v298 --> v300
-    v298 --> v299
-    v298 --> v61
-    v305 --> v61
-    v61 --> v310
-    v310 --> v313
-    v310 --> v312
-    v310 --> v311
-    v310 --> v314
-    v314 --> v315
-
-
- - - From a3d04cc6800e58e40279537902aeadacd9d9274b Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Wed, 27 Dec 2023 14:20:32 +0200 Subject: [PATCH 03/18] fixes --- modules/local/filterbed/main.nf | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf index 71d0cdc4..00bf34d9 100644 --- a/modules/local/filterbed/main.nf +++ b/modules/local/filterbed/main.nf @@ -1,5 +1,5 @@ process FILTERBEDFILE { - tag "${genome_bed} -> ${filtered_bed}" + tag "$meta.id" label 'process_medium' conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) @@ -7,13 +7,12 @@ process FILTERBEDFILE { 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'quay.io/biocontainers/python:3.8.3' }" - input: - path genome_bed - path genome_dict + input: + tuple val(meta), path(bed) + path dict_file output: - path 'filtered_exome.bed', emit: filtered_bed - path "versions.yml", emit: versions + tuple val(meta), path('filtered_${bed.simpleName}') when: task.ext.when == null || task.ext.when @@ -41,12 +40,13 @@ process FILTERBEDFILE { if sequence in sequences: out.write(line) - def main(): - sequences = load_sequences_from_dict("${genome_dict}") - filter_bed_file("${genome_bed}", sequences, "filtered_exome.bed") + def main(bed_file, dict_file, output_file): + sequences = load_sequences_from_dict(dict_file) + filter_bed_file(bed_file, sequences, output_file) if __name__ == "__main__": - main() + main("${bed}", "${dict_file}", "filtered_${bed.simpleName}") + PYCODE cat <<-END_VERSIONS > versions.yml From d2f156ce2151258d274089fe2e9620920b1753f9 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Wed, 27 Dec 2023 14:30:24 +0200 Subject: [PATCH 04/18] emitting correctly now --- modules/local/filterbed/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf index 00bf34d9..78bc7104 100644 --- a/modules/local/filterbed/main.nf +++ b/modules/local/filterbed/main.nf @@ -12,7 +12,7 @@ process FILTERBEDFILE { path dict_file output: - tuple val(meta), path('filtered_${bed.simpleName}') + tuple val(meta), path('filtered_${bed.simpleName}'), emit: filtered_bed when: task.ext.when == null || task.ext.when From 1ded8ae6a554d60a66980fc9f425fd1c33db389a Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Wed, 27 Dec 2023 14:52:11 +0200 Subject: [PATCH 05/18] printing --- modules/local/filterbed/main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf index 78bc7104..15fd3fa3 100644 --- a/modules/local/filterbed/main.nf +++ b/modules/local/filterbed/main.nf @@ -21,6 +21,7 @@ process FILTERBEDFILE { """ python - < Date: Wed, 27 Dec 2023 15:00:10 +0200 Subject: [PATCH 06/18] no dynamic name wizardry --- modules/local/filterbed/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf index 15fd3fa3..6bb7c2b3 100644 --- a/modules/local/filterbed/main.nf +++ b/modules/local/filterbed/main.nf @@ -12,7 +12,7 @@ process FILTERBEDFILE { path dict_file output: - tuple val(meta), path('filtered_${bed.simpleName}'), emit: filtered_bed + tuple val(meta), path('filtered.bed'), emit: filtered_bed when: task.ext.when == null || task.ext.when @@ -47,7 +47,7 @@ process FILTERBEDFILE { print(f"Output file {output_file} created in {os.getcwd()}") if __name__ == "__main__": - main("${bed}", "${dict_file}", "filtered_${bed.simpleName}") + main("${bed}", "${dict_file}", "filtered.bed") PYCODE From 03c0ed1ea3c3830a6414593ff356b932ef593a54 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Mon, 1 Jan 2024 15:36:41 +0200 Subject: [PATCH 07/18] added preparegenome --- .../nf-core/genomegenerate/environment.yaml | 11 ++ subworkflows/nf-core/genomegenerate/main.nf | 119 ++++++++++++++++++ subworkflows/nf-core/genomegenerate/meta.yaml | 53 ++++++++ workflows/rnavar.nf | 13 +- 4 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 subworkflows/nf-core/genomegenerate/environment.yaml create mode 100644 subworkflows/nf-core/genomegenerate/main.nf create mode 100644 subworkflows/nf-core/genomegenerate/meta.yaml diff --git a/subworkflows/nf-core/genomegenerate/environment.yaml b/subworkflows/nf-core/genomegenerate/environment.yaml new file mode 100644 index 00000000..93e4476a --- /dev/null +++ b/subworkflows/nf-core/genomegenerate/environment.yaml @@ -0,0 +1,11 @@ +name: star_genomegenerate + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/subworkflows/nf-core/genomegenerate/main.nf b/subworkflows/nf-core/genomegenerate/main.nf new file mode 100644 index 00000000..b8855715 --- /dev/null +++ b/subworkflows/nf-core/genomegenerate/main.nf @@ -0,0 +1,119 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/subworkflows/nf-core/genomegenerate/meta.yaml b/subworkflows/nf-core/genomegenerate/meta.yaml new file mode 100644 index 00000000..1061e1b8 --- /dev/null +++ b/subworkflows/nf-core/genomegenerate/meta.yaml @@ -0,0 +1,53 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index 15df17e4..6808ba59 100644 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -80,6 +80,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custo */ include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' // Align reads to genome and sort and index the alignment file +include { STAR_GENOMEGENERATE } from '../subworkflows/nf-core/genomegenerate' // Generate genome index for STAR include { MARKDUPLICATES } from '../subworkflows/nf-core/markduplicates' // Mark duplicates in the BAM file include { SPLITNCIGAR } from '../subworkflows/nf-core/splitncigar' // Splits reads that contain Ns in their cigar string include { RECALIBRATE } from '../subworkflows/nf-core/recalibrate' // Estimate and correct systematic bias @@ -222,11 +223,21 @@ workflow RNAVAR { ch_star_multiqc = Channel.empty() ch_aligner_pca_multiqc = Channel.empty() ch_aligner_clustering_multiqc = Channel.empty() + ch_star_index = Channel.empty() if (params.aligner == 'star') { + + // Generate the STAR index + STAR_GENOMEGENERATE ( + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + + ) + ch_star_index = STAR_GENOMEGENERATE.out.index + ALIGN_STAR ( ch_cat_fastq, - PREPARE_GENOME.out.star_index, + ch_star_index, PREPARE_GENOME.out.gtf, params.star_ignore_sjdbgtf, seq_platform, From ef20e4bd0b8ad480a0c351ab457a99aa4561871c Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Mon, 1 Jan 2024 15:46:24 +0200 Subject: [PATCH 08/18] uncomment --- subworkflows/local/prepare_genome.nf | 8 ++++---- workflows/rnavar.nf | 13 ++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index e4760b57..fd7db408 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -128,10 +128,10 @@ workflow PREPARE_GENOME { ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) } - //if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){ - // ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index - // ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - //} + if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){ + ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + } } diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index 6808ba59..ec4a2504 100644 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -223,21 +223,20 @@ workflow RNAVAR { ch_star_multiqc = Channel.empty() ch_aligner_pca_multiqc = Channel.empty() ch_aligner_clustering_multiqc = Channel.empty() - ch_star_index = Channel.empty() if (params.aligner == 'star') { // Generate the STAR index - STAR_GENOMEGENERATE ( - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gtf, + // STAR_GENOMEGENERATE ( + // PREPARE_GENOME.out.fasta, + // PREPARE_GENOME.out.gtf, - ) - ch_star_index = STAR_GENOMEGENERATE.out.index + // ) + // ch_star_index = STAR_GENOMEGENERATE.out.index ALIGN_STAR ( ch_cat_fastq, - ch_star_index, + PREPARE_GENOME.out.star_index, PREPARE_GENOME.out.gtf, params.star_ignore_sjdbgtf, seq_platform, From bf3efcd00fcc3070eca16b769d00f2230a988466 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 09:56:36 +0200 Subject: [PATCH 09/18] Update environment.yml to snpeff5.1 --- modules/nf-core/modules/snpeff/environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/modules/snpeff/environment.yml b/modules/nf-core/modules/snpeff/environment.yml index ad0523fb..e0527690 100644 --- a/modules/nf-core/modules/snpeff/environment.yml +++ b/modules/nf-core/modules/snpeff/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-snpeff-5.0 +name: nf-core-snpeff-5.1 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::snpeff=5.0 + - bioconda::snpeff=5.1 From 7017fa7c413e4fa738929b0562e9d2fe4482dee2 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 09:57:14 +0200 Subject: [PATCH 10/18] Update main.nf to snpeff5.1 --- modules/nf-core/modules/snpeff/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/modules/snpeff/main.nf b/modules/nf-core/modules/snpeff/main.nf index 1b4d5f43..93cf29a6 100644 --- a/modules/nf-core/modules/snpeff/main.nf +++ b/modules/nf-core/modules/snpeff/main.nf @@ -2,10 +2,10 @@ process SNPEFF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_1' : + 'quay.io/biocontainers/snpeff:5.1--hdfd78af_1' }" input: tuple val(meta), path(vcf) From 91b4307ea2f4af9a06193cdf304482334510a117 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 09:59:01 +0200 Subject: [PATCH 11/18] Update build.sh to v 5.1 and add UMD3.1 --- modules/nf-core/modules/snpeff/build.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh index 2fccf9a8..0a9922a7 100644 --- a/modules/nf-core/modules/snpeff/build.sh +++ b/modules/nf-core/modules/snpeff/build.sh @@ -18,8 +18,9 @@ build_push() { docker push nfcore/snpeff:${SNPEFF_TAG}.${GENOME} } -build_push "GRCh37" "75" "5.0" -build_push "GRCh38" "99" "5.0" -build_push "GRCm38" "99" "5.0" -build_push "CanFam3.1" "99" "5.0" -build_push "WBcel235" "99" "5.0" +build_push "GRCh37" "75" "5.1" +build_push "GRCh38" "99" "5.1" +build_push "GRCm38" "99" "5.1" +build_push "CanFam3.1" "99" "5.1" +build_push "WBcel235" "99" "5.1" +build_push "UMD3.1" "99" "5.1" From 377e45dc7944fc2dc6ec6c76e1734a6b61352f32 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 10:12:02 +0200 Subject: [PATCH 12/18] Update modules.config updated snpeff to 5.1 --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 61cf9ba1..bdcb8ff3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -373,7 +373,7 @@ process { withName: 'SNPEFF' { ext.args = '-nodownload -canon -v' if(!params.snpeff_cache){ - container = { params.genome ? "nfcore/snpeff:5.0.${params.genome}" : "nfcore/snpeff:5.0.${params.snpeff_db.split('\\.')[0]}" } + container = { params.genome ? "nfcore/snpeff:5.1.${params.genome}" : "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}" } } publishDir = [ mode: params.publish_dir_mode, From b47d97ee036d43090935e8d00359be7cbcd0c342 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 10:27:45 +0200 Subject: [PATCH 13/18] Update modules.config in snpeff, default to trying snpeff_db for container pull vs genome. --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index bdcb8ff3..d9a6cf5b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -373,7 +373,7 @@ process { withName: 'SNPEFF' { ext.args = '-nodownload -canon -v' if(!params.snpeff_cache){ - container = { params.genome ? "nfcore/snpeff:5.1.${params.genome}" : "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}" } + container = { params.snpeff_db ? "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}" : "nfcore/snpeff:5.1.${params.genome}"} } publishDir = [ mode: params.publish_dir_mode, From 5ffb6ed83f76da22f2594b29bf50bc0979d443c2 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 10:36:21 +0200 Subject: [PATCH 14/18] Update build.sh should be v75 per dockerhub --- modules/nf-core/modules/snpeff/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh index 0a9922a7..5842c40f 100644 --- a/modules/nf-core/modules/snpeff/build.sh +++ b/modules/nf-core/modules/snpeff/build.sh @@ -23,4 +23,4 @@ build_push "GRCh38" "99" "5.1" build_push "GRCm38" "99" "5.1" build_push "CanFam3.1" "99" "5.1" build_push "WBcel235" "99" "5.1" -build_push "UMD3.1" "99" "5.1" +build_push "UMD3.1" "75" "5.1" From 8709c191bca39ee05d2ed926e05fbc0799ff3ab5 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 10:39:17 +0200 Subject: [PATCH 15/18] Update modules.config reverted back to using genome --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d9a6cf5b..45d56500 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -373,7 +373,7 @@ process { withName: 'SNPEFF' { ext.args = '-nodownload -canon -v' if(!params.snpeff_cache){ - container = { params.snpeff_db ? "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}" : "nfcore/snpeff:5.1.${params.genome}"} + container = { params.genome ? "nfcore/snpeff:5.1.${params.genome}" : "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}"} } publishDir = [ mode: params.publish_dir_mode, From 87bcbfbdd0a6b867abb99279d2ac8ffbfbff9648 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 10:52:37 +0200 Subject: [PATCH 16/18] Update build.sh update build push --- modules/nf-core/modules/snpeff/build.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh index 5842c40f..d16fd939 100644 --- a/modules/nf-core/modules/snpeff/build.sh +++ b/modules/nf-core/modules/snpeff/build.sh @@ -17,10 +17,11 @@ build_push() { docker push nfcore/snpeff:${SNPEFF_TAG}.${GENOME} } - -build_push "GRCh37" "75" "5.1" -build_push "GRCh38" "99" "5.1" -build_push "GRCm38" "99" "5.1" -build_push "CanFam3.1" "99" "5.1" -build_push "WBcel235" "99" "5.1" -build_push "UMD3.1" "75" "5.1" +build_push "CanFam3.1" "99" "5.1" +build_push "GRCh37" "87" "5.1" +build_push "GRCh38" "105" "5.1" +build_push "GRCm38" "99" "5.1" +build_push "GRCm39" "105" "5.1" +build_push "R64-1-1" "105" "5.1" +build_push "UMD3.1" "75" "5.1" +build_push "WBcel235" "105" "5.1" From d840720f4d98fa090dcf7adc0f0646cff40512d9 Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Thu, 15 Feb 2024 10:55:48 +0200 Subject: [PATCH 17/18] updated tag version to 2 --- modules/nf-core/modules/snpeff/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/modules/snpeff/main.nf b/modules/nf-core/modules/snpeff/main.nf index 93cf29a6..ba02fbe9 100644 --- a/modules/nf-core/modules/snpeff/main.nf +++ b/modules/nf-core/modules/snpeff/main.nf @@ -4,8 +4,9 @@ process SNPEFF { conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.1--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' }" + input: tuple val(meta), path(vcf) From 21ddc2a94b4504f64b791ddb2a4f8e51400a962a Mon Sep 17 00:00:00 2001 From: Shaun Regenbaum Date: Wed, 28 Aug 2024 14:28:03 +0300 Subject: [PATCH 18/18] Set commit d840720f4d98fa090dcf7adc0f0646cff40512d9 as new head