From 2db5ceae3ab5b89091c12bfa3a1c50a3a5ac9d34 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:20:46 +0200 Subject: [PATCH 01/89] Changes to check_samplesheet --- bin/check_samplesheet.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 47d1b446..59d39ca5 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -84,10 +84,12 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: + ## Check header MIN_COLS = 2 MIN_HEADER = ["sample", "fastq_1", "fastq_2"] - OPT_HEADER = ["expected_cells", "seq_center"] + OPT_HEADER = ["expected_cells", "seq_center", "fastq_barcode", "sample_type"] + SAMPLE_TYPES = ["gex", "atac"] header = [x.strip('"') for x in fin.readline().strip().split(",")] unknown_header = 0 @@ -101,8 +103,7 @@ def check_samplesheet(file_in, file_out): min_header_count = min_header_count + 1 colmap[h] = i i = i + 1 - if min_header_count < len(MIN_HEADER): - # code was checking for unknown_header or min_header_count however looking at the ifelse, unknown_header does not seem that it should be tested + if unknown_header or min_header_count < len(MIN_HEADER): given = ",".join(header) wanted = ",".join(MIN_HEADER) print(f"ERROR: Please check samplesheet header -> {given} != {wanted}") @@ -147,7 +148,20 @@ def check_samplesheet(file_in, file_out): seq_center = seq_center.replace(" ", "_") ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: + fastq_list = [fastq_1, fastq_2] + + fastq_barcode = "" + if "fastq_barcode" in header: + fastq_barcode = lspl[colmap["fastq_barcode"]] + fastq_list.append(fastq_barcode) + + sample_type = "" + if "sample_type" in header: + sample_type = lspl[colmap["sample_type"]] + if (sample_type not in SAMPLE_TYPES): + print_error("Sample type {} is not supported! Please specify either {}".format(sample_type, " or ".join(SAMPLE_TYPES)), "Line", line) + + for fastq in fastq_list: if fastq: if fastq.find(" ") != -1: print_error("FastQ file contains spaces!", "Line", line) @@ -161,9 +175,9 @@ def check_samplesheet(file_in, file_out): ## Auto-detect paired-end/single-end sample_info = [] ## [single_end, fastq_1, fastq_2] if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center] + sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center] + sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] else: print_error("Invalid combination of columns provided!", "Line", line) @@ -180,8 +194,9 @@ def check_samplesheet(file_in, file_out): ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center"]) + "\n") + fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center" , "fastq_barcode", "sample_type"]) + "\n") for sample in sorted(sample_mapping_dict.keys()): + ## Check that multiple runs of the same sample are of the same datatype if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( From d197871014f9fc0998b414162b4616a03079d72e Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:20:56 +0200 Subject: [PATCH 02/89] Changes to nextflow.config --- nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextflow.config b/nextflow.config index 3367748d..16425abf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,6 +40,9 @@ params { // Cellranger parameters cellranger_index = null + // Cellranger ARC parameters + motifs = null + // UniverSC paramaters universc_index = null universc_technology = '10x' From f36d7fb010d7aa61db2a83cc3a330a4a9cb13dde Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:21:07 +0200 Subject: [PATCH 03/89] Changes to input_check --- subworkflows/local/input_check.nf | 80 ++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index f5a11b18..55eec787 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -10,21 +10,44 @@ workflow INPUT_CHECK { samplesheet // file: /path/to/samplesheet.csv main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .groupTuple(by: [0]) // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] - .map { meta, reads -> [ meta, reads.flatten() ] } // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by nf-core modules: [ val(meta), [ reads ] ] - .set { reads } + + reads = null + versions = null + + if (params.aligner == "cellranger-arc"){ + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + // group replicate files together, modifies channel to + // [ val(meta), [ multimeta_s1, multimeta_s1 ], [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by + // nf-core modules: [ val(meta), [multi_meta], [ reads ] ] + .map { meta, multi_meta, reads -> [ meta, multi_meta.flatten(), reads.flatten() ] } + .set { reads } + versions = SAMPLESHEET_CHECK.out.versions + } else { + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by + // nf-core modules: [ val(meta), [ reads ] ] + .map { meta, reads -> [ meta, reads.flatten() ] } + .set { reads } + versions = SAMPLESHEET_CHECK.out.versions + } emit: - reads // channel: [ val(meta), [ reads ] ] + reads // channel: [ val(meta), [multi_meta], [ reads ] ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +// Function to get list of [ meta, [ multimeta ] , [ fastq_1, fastq_2 ] ] def create_fastq_channel(LinkedHashMap row) { // create meta map def meta = [:] @@ -35,16 +58,49 @@ def create_fastq_channel(LinkedHashMap row) { // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] + def fastqs = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" } if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] + fastqs = [ file(row.fastq_1) ] } else { if (!file(row.fastq_2).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + fastqs = [ file(row.fastq_1), file(row.fastq_2) ] + if (row.sample_type == "atac") { + if (row.fastq_barcode == "") { + exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file is missing!\n" + } + if (!file(row.fastq_barcode).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file does not exist!" + + "\n${row.fastq_barcode}" + } + fastqs.add(file(row.fastq_barcode)) + } + } + + // define meta_data for multiome + def multi_meta = [] + multi_meta = row.sample_type ? [row.sample_type] : [param.sample_type] + + if (params.aligner == "cellranger-arc"){ + sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") + fastqs.each{ + if(!it.name.contains(sub_sample)){ + exit 1, "ERROR: Please check input samplesheet -> Some files do not have the same sample name " + + "${sub_sample} in common!\n${it}" + } + } + multi_meta.add(sub_sample) } + + fastq_meta = [ meta, fastqs ] + + if (params.aligner == "cellranger-arc"){ + fastq_meta = [ meta, multi_meta, fastqs ] + } + return fastq_meta -} +} \ No newline at end of file From beeca28d60309b5954ce63488c6353a30cdd1d2b Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:21:24 +0200 Subject: [PATCH 04/89] Changes to scranseq.nf --- workflows/scrnaseq.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 70265642..4407ca2e 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -44,6 +44,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellranger_arc" include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' @@ -116,6 +117,7 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // Run FastQC + /* ch_multiqc_fastqc = Channel.empty() if (!params.skip_fastqc) { FASTQC_CHECK ( ch_fastq ) @@ -124,6 +126,7 @@ workflow SCRNASEQ { } else { ch_multiqc_fastqc = Channel.empty() } + */ ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf @@ -206,6 +209,20 @@ workflow SCRNASEQ { ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out) } + // Run cellranger pipeline + if (params.aligner == "cellranger-arc") { + CELLRANGER_ARC_ALIGN( + ch_genome_fasta, + ch_filter_gtf, + ch_motifs, + ch_cellranger_index, + ch_fastq + ) + ch_versions = ch_versions.mix(CELLRANGER_ARC_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ARC_ALIGN.out.cellranger_arc_out) + } + + /* // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices, @@ -213,6 +230,7 @@ workflow SCRNASEQ { ch_txp2gene, ch_star_index ) + */ //Add Versions from MTX Conversion workflow too ch_versions.mix(MTX_CONVERSION.out.ch_versions) From ee520a589d3dd504055cac7c2483dbb2445b10e8 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:21:58 +0200 Subject: [PATCH 05/89] Adding script for config and lib.csv generation --- bin/generate_config.py | 29 +++++++++++++++++++++++++++++ bin/generate_lib_csv.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100755 bin/generate_config.py create mode 100755 bin/generate_lib_csv.py diff --git a/bin/generate_config.py b/bin/generate_config.py new file mode 100755 index 00000000..93077a1f --- /dev/null +++ b/bin/generate_config.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +import argparse + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + + parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") + parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") + parser.add_argument("-m", "--motifs", dest="motifs", help="Name of the motifs file.") + parser.add_argument("-a", "--add", dest="add", help="Additional filter line.") + + args = vars(parser.parse_args()) + + print(args) + + config = open("config", "w") + config.write("{\n") + config.write("\torganism: \"scrnaseq\"\n") + config.write("\tgenome: [\"cellranger_arc_reference\"]\n") + config.write("\tinput_fasta: [\"{}\"]\n".format(args["fasta"])) + config.write("\tinput_gtf: [\"{}\"]\n".format(args["gtf"])) + config.write("\tinput_motifs: \"{}\"\n".format(args["motifs"])) + if(args["add"] != "none"): + config.write(args["add"] + "\n") + config.write("}") + config.close() + + print("Wrote config file") diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py new file mode 100755 index 00000000..f93a23eb --- /dev/null +++ b/bin/generate_lib_csv.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +import argparse +import os + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + + parser.add_argument("-t", "--sample_types", dest="sample_types", help="Comma seperated list of sample types.") + parser.add_argument("-n", "--sample_names", dest="sample_names", help="Comma seperated list of sample names.") + parser.add_argument("-f", "--fastq_folder", dest="fastq_folder", help="Folder of FASTQ files.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + + args = vars(parser.parse_args()) + + print(args) + + sample_types = args["sample_types"].split(",") + sample_names = args["sample_names"].split(",") + unique_samples_names = set(sample_names) + + lib_csv = open(args["out"], "w") + lib_csv.write("fastqs,sample,library_type") + + for i in range(0,len(sample_types)): + if (sample_names[i] in unique_samples_names): + unique_samples_names.remove(sample_names[i]) # this has to be done to account for different Lane files (e.g., L002) + if(sample_types[i] == "gex"): + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Gene Expression")) + else: + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Chromatin Accessibility")) + + lib_csv.close() + + print("Wrote lib.csv file to {}".format(args["out"])) From 87716edd295a4a4cdf197cd4cec7dfc5c72d5365 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:22:35 +0200 Subject: [PATCH 06/89] Adding cellranger-arc modules --- modules/local/cellrangerarc/Dockerfile | 28 +++++++++ modules/local/cellrangerarc/README.md | 19 ++++++ modules/local/cellrangerarc/count/main.nf | 72 ++++++++++++++++++++++ modules/local/cellrangerarc/count/meta.yml | 39 ++++++++++++ modules/local/cellrangerarc/mkgtf/main.nf | 36 +++++++++++ modules/local/cellrangerarc/mkgtf/meta.yml | 31 ++++++++++ modules/local/cellrangerarc/mkref/main.nf | 39 ++++++++++++ modules/local/cellrangerarc/mkref/meta.yml | 45 ++++++++++++++ 8 files changed, 309 insertions(+) create mode 100644 modules/local/cellrangerarc/Dockerfile create mode 100644 modules/local/cellrangerarc/README.md create mode 100644 modules/local/cellrangerarc/count/main.nf create mode 100644 modules/local/cellrangerarc/count/meta.yml create mode 100644 modules/local/cellrangerarc/mkgtf/main.nf create mode 100644 modules/local/cellrangerarc/mkgtf/meta.yml create mode 100644 modules/local/cellrangerarc/mkref/main.nf create mode 100644 modules/local/cellrangerarc/mkref/meta.yml diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/local/cellrangerarc/Dockerfile new file mode 100644 index 00000000..ccf55582 --- /dev/null +++ b/modules/local/cellrangerarc/Dockerfile @@ -0,0 +1,28 @@ +# Dockerfile to create container with Cell Ranger v2.0.2 +# Push to nfcore/cellranger-arc: + +FROM continuumio/miniconda3:4.8.2 +LABEL authors="Gisela Gabernet , Florian Heyl" \ + description="Docker image containing Cell Ranger Arc" +# Disclaimer: this container is not provided nor supported by Illumina or 10x Genomics. + +# Install procps and clean apt cache +RUN apt-get update --allow-releaseinfo-change \ + && apt-get install -y \ + cpio \ + procps \ + rpm2cpio \ + unzip \ + && apt-get clean -y && rm -rf /var/lib/apt/lists/* + +# Copy pre-downloaded cellranger-arc file +ENV CELLRANGER_ARC_VER=2.0.2 +COPY cellranger-arc-$CELLRANGER_ARC_VER.tar.gz /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz + +# Install cellranger-arc +RUN \ + cd /opt && \ + tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ + export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ + ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz diff --git a/modules/local/cellrangerarc/README.md b/modules/local/cellrangerarc/README.md new file mode 100644 index 00000000..d4192553 --- /dev/null +++ b/modules/local/cellrangerarc/README.md @@ -0,0 +1,19 @@ +# Updating the docker container and making a new module release + +Cell Ranger Arc is a commercial tool from 10X Genomics. The container provided for the cellranger-arc nf-core module is not provided nor supported by 10x Genomics. Updating the Cell Ranger Arc versions in the container and pushing the update to Dockerhub needs to be done manually. + +1. Navigate to the appropriate download page. - [Cell Ranger Arc](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/installation): download the tar ball of the desired Cell Ranger Arc version with `curl` or `wget`. Place this file in the same folder where the Dockerfile lies. + +2. Edit the Dockerfile. Update the Cell Ranger Arc versions in this line: + +```bash +ENV CELLRANGER_ARC_VER= +``` + +3. Create and test the container: + +```bash +docker build . -t nfcore/cellranger-arc: +``` + +4. **Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so.** \ No newline at end of file diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf new file mode 100644 index 00000000..a858a2eb --- /dev/null +++ b/modules/local/cellrangerarc/count/main.nf @@ -0,0 +1,72 @@ +process CELLRANGERARC_COUNT { + tag "$meta.id" + label 'process_high' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), val(multi_meta), path(reads) + path reference + + output: + tuple val(meta), path("${meta.id}/outs/*"), emit: outs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference_name = reference.name + + def multi_meta_info = multi_meta.collate(2).transpose() + def sample_types = multi_meta_info[0].join(",") + def sample_names = multi_meta_info[1].join(",") + def lib_csv = meta.id + "_lib.csv" + + """ + # The following ugly three commands (mkdir, mv, generate_lib_csv) + # are required because cellranger-arc only deals with abolsute paths + if [ ! -d "fastqs" ]; then + mkdir fastqs + fi + + mv *.fastq.gz fastqs/ + + generate_lib_csv.py \\ + --sample_types $sample_types \\ + --sample_names $sample_names \\ + --fastq_folder \$(readlink -f fastqs)\\ + --out $lib_csv + + cellranger-arc \\ + count \\ + --id='${meta.id}' \\ + --libraries=$lib_csv \\ + --reference=$reference_name \\ + --localcores=$task.cpus \\ + --localmem=${task.memory.toGiga()} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ + + stub: + """ + mkdir -p "${meta.id}/outs/" + touch ${meta.id}/outs/fake_file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/local/cellrangerarc/count/meta.yml b/modules/local/cellrangerarc/count/meta.yml new file mode 100644 index 00000000..f69bc1fa --- /dev/null +++ b/modules/local/cellrangerarc/count/meta.yml @@ -0,0 +1,39 @@ +name: cellrangerarc_count +description: Module to use Cell Ranger's ARC pipelines analyze sequencing data produced from Chromium Single Cell ARC. Uses the cellranger-arc count command. +keywords: + - align + - count + - reference +tools: + - cellrangerarc: + description: Cell Ranger ARC is a set of analysis pipelines that process Chromium Single Cell ARC data. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: 10x Genomics EULA +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lib_csv: + type: file + description: | + Path to a 3-column CSV file declaring FASTQ paths, sample names and library types of input ATAC and GEX FASTQs. + - reference: + type: directory + description: Directory containing all the reference indices needed by Cell Ranger ARC +output: + - outs: + type: file + description: Files containing the outputs of Cell Ranger ARC + pattern: "${meta.id}/outs/*" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@heylf" diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf new file mode 100644 index 00000000..f304c6bc --- /dev/null +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -0,0 +1,36 @@ +process CELLRANGERARC_MKGTF { + tag "$gtf" + label 'process_low' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + path gtf + + output: + path "*.filtered.gtf", emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + cellranger-arc \\ + mkgtf \\ + $gtf \\ + ${gtf.baseName}.filtered.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/local/cellrangerarc/mkgtf/meta.yml b/modules/local/cellrangerarc/mkgtf/meta.yml new file mode 100644 index 00000000..7ce211eb --- /dev/null +++ b/modules/local/cellrangerarc/mkgtf/meta.yml @@ -0,0 +1,31 @@ +name: cellrangerarc_mkgtf +description: Module to build a filtered gtf needed by the 10x Genomics Cell Ranger Arc tool. Uses the cellranger-arc mkgtf command. +keywords: + - reference + - mkref + - index +tools: + - cellrangerarc: + description: Cell Ranger Arc by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: 10x Genomics EULA +input: + - gtf: + type: file + description: The reference GTF transcriptome file + pattern: "*.gtf" +output: + - gtf: + type: directory + description: The filtered GTF transcriptome file + pattern: "*.filtered.gtf" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@heylf" diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf new file mode 100644 index 00000000..41e9db30 --- /dev/null +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -0,0 +1,39 @@ +process CELLRANGERARC_MKREF { + tag "$reference_config" + label 'process_medium' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + path fasta + path gtf + path motifs + path reference_config + val reference_name + + output: + path "${reference_name}", emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + cellranger-arc \\ + mkref \\ + --config=$reference_config \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/local/cellrangerarc/mkref/meta.yml b/modules/local/cellrangerarc/mkref/meta.yml new file mode 100644 index 00000000..a68adf96 --- /dev/null +++ b/modules/local/cellrangerarc/mkref/meta.yml @@ -0,0 +1,45 @@ +name: cellrangerarc_mkref +description: Module to build the reference needed by the 10x Genomics Cell Ranger Arc tool. Uses the cellranger-arc mkref command. +keywords: + - reference + - mkref + - index +tools: + - cellrangerarc: + description: Cell Ranger Arc is a set of analysis pipelines that process Chromium Single Cell Arc data. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: 10x Genomics EULA +input: + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta,fa}" + - gtf: + type: file + description: Reference transcriptome GTF file + pattern: "*.gtf" + - motifs: + type: file + description: Sequence motif file (e.g., from transcription factors) + pattern: "*.txt" + - reference_config: + type: file + description: JSON-like file holding organism, genome, reference fasta path, reference annotation gtf path, contigs that should be excluded and sequence format motif file path + pattern: config + - reference_name: + type: val + description: The name to give the new reference folder + pattern: str +output: + - reference: + type: folder + description: Folder called like the reference_name containing all the reference indices needed by Cell Ranger Arc + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@heylf" From 57e590c2f77407f8626272d9c24e1154e4d31e12 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:23:34 +0200 Subject: [PATCH 07/89] Adding cellranger-arc subworkflow --- subworkflows/local/align_cellranger_arc.nf | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 subworkflows/local/align_cellranger_arc.nf diff --git a/subworkflows/local/align_cellranger_arc.nf b/subworkflows/local/align_cellranger_arc.nf new file mode 100644 index 00000000..2bbc8a27 --- /dev/null +++ b/subworkflows/local/align_cellranger_arc.nf @@ -0,0 +1,52 @@ +/* + * Alignment with Cellranger Arc + */ + +include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" +include {GENERATELIBCSV} from "../../modules/local/generate_cellranger_lib_csv.nf" +include {GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" +include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" + +// Define workflow to subset and index a genome region fasta file +workflow CELLRANGERARC_ALIGN { + take: + fasta + gtf + motifs + cellranger_index + ch_fastq + + main: + ch_versions = Channel.empty() + + assert cellranger_index || (fasta && gtf && motifs): + "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + + if (!cellranger_index) { + // Filter GTF based on gene biotypes passed in params.modules + CELLRANGERARC_MKGTF( gtf ) + filtered_gtf = CELLRANGERARC_MKGTF.out.gtf + ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) + + // Generate the config for mkref + GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) + ch_versions.mix(GENERATECONFIG.out.versions) + + // Make reference genome + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, GENERATECONFIG.out.config, "cellranger_arc_reference" ) + ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) + cellranger_index = CELLRANGERARC_MKREF.out.reference + } + + // Obtain read counts + CELLRANGERARC_COUNT ( + ch_fastq, + cellranger_index + ) + ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) + + emit: + ch_versions + cellranger_arc_out = CELLRANGERARC_COUNT.out.outs +} \ No newline at end of file From 6ba304249f16314c129d6d0e933fae7827832c4b Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:31:41 +0200 Subject: [PATCH 08/89] Adding cellrangerarc to nextflow schema --- nextflow_schema.json | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2061e7c0..c7d155bd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -57,7 +57,7 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "universc"] + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] }, "protocol": { "type": "string", @@ -232,6 +232,22 @@ } } }, + "cellrangerarc_options": { + "title": "Cellranger ARC Options", + "type": "object", + "description": "Params related to the Cellranger pipeline", + "default": "", + "properties": { + "cellranger_index": { + "type": "string", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + }, + "motifs": { + "type": "string", + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + } + } + }, "universc_options": { "title": "UniverSC Options", "type": "object", @@ -461,6 +477,9 @@ { "$ref": "#/definitions/cellranger_options" }, + { + "$ref": "#/definitions/cellrangerarc_options" + }, { "$ref": "#/definitions/universc_options" }, From ef4b284930890a9a4ac4d56ea0d30878c4d96a39 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:40:16 +0200 Subject: [PATCH 09/89] Adding cellrangerarc to modules.config --- conf/modules.config | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index b9d907cf..cdce4f1c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,6 +73,39 @@ if(params.aligner == "cellranger") { } } +if(params.aligner == "cellranger-arc") { + process { + withName: CELLRANGERARC_MKGTF { + publishDir = [ + path: "${params.outdir}/${params.aligner}/mkgtf", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" + } + withName: GENERATECONFIG { + publishDir = [ + path: "${params.outdir}/${params.aligner}/config", + mode: params.publish_dir_mode + ] + ext.args = "--add none" + } + withName: CELLRANGERARC_MKREF { + publishDir = [ + path: "${params.outdir}/${params.aligner}/mkref", + mode: params.publish_dir_mode + ] + } + withName: CELLRANGERARC_COUNT { + publishDir = [ + path: "${params.outdir}/${params.aligner}/count", + mode: params.publish_dir_mode + ] + ext.args = {meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : ''} + } + } +} + if(params.aligner == "universc") { process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } From 5c51b1d58a329e001a89a905586bbe5bf3186f7a Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:40:37 +0200 Subject: [PATCH 10/89] Removing genrate lib csv module --- subworkflows/local/align_cellranger_arc.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/align_cellranger_arc.nf b/subworkflows/local/align_cellranger_arc.nf index 2bbc8a27..a58f4748 100644 --- a/subworkflows/local/align_cellranger_arc.nf +++ b/subworkflows/local/align_cellranger_arc.nf @@ -4,7 +4,6 @@ include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {GENERATELIBCSV} from "../../modules/local/generate_cellranger_lib_csv.nf" include {GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" From 0b86e18097eb5970bfb6c9ccbc4d8ca2d44a3986 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:56:30 +0200 Subject: [PATCH 11/89] Rename cellrangerarc subworkflow --- subworkflows/local/align_cellranger_arc.nf | 51 ---------------------- 1 file changed, 51 deletions(-) delete mode 100644 subworkflows/local/align_cellranger_arc.nf diff --git a/subworkflows/local/align_cellranger_arc.nf b/subworkflows/local/align_cellranger_arc.nf deleted file mode 100644 index a58f4748..00000000 --- a/subworkflows/local/align_cellranger_arc.nf +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Alignment with Cellranger Arc - */ - -include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" -include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" -include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" - -// Define workflow to subset and index a genome region fasta file -workflow CELLRANGERARC_ALIGN { - take: - fasta - gtf - motifs - cellranger_index - ch_fastq - - main: - ch_versions = Channel.empty() - - assert cellranger_index || (fasta && gtf && motifs): - "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." - - if (!cellranger_index) { - // Filter GTF based on gene biotypes passed in params.modules - CELLRANGERARC_MKGTF( gtf ) - filtered_gtf = CELLRANGERARC_MKGTF.out.gtf - ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) - - // Generate the config for mkref - GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) - ch_versions.mix(GENERATECONFIG.out.versions) - - // Make reference genome - CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, GENERATECONFIG.out.config, "cellranger_arc_reference" ) - ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) - cellranger_index = CELLRANGERARC_MKREF.out.reference - } - - // Obtain read counts - CELLRANGERARC_COUNT ( - ch_fastq, - cellranger_index - ) - ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) - - emit: - ch_versions - cellranger_arc_out = CELLRANGERARC_COUNT.out.outs -} \ No newline at end of file From 4b343d71bd6be577120d73baa7d73f258ee1b6d7 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:57:01 +0200 Subject: [PATCH 12/89] update cellrangerarc modules --- modules/local/cellrangerarc/count/main.nf | 4 ++-- modules/local/cellrangerarc/mkgtf/main.nf | 4 ++-- modules/local/cellrangerarc/mkref/main.nf | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index a858a2eb..df8e8fd1 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -1,8 +1,8 @@ process CELLRANGERARC_COUNT { tag "$meta.id" - label 'process_high' + label 'process_low' //TOFLO turn to high - container "nf-core/cellranger-arc:2.0.2" + container "heylf/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf index f304c6bc..bb3cbc8f 100644 --- a/modules/local/cellrangerarc/mkgtf/main.nf +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_MKGTF { tag "$gtf" label 'process_low' - container "nf-core/cellranger-arc:2.0.2" + container "heylf/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -33,4 +33,4 @@ process CELLRANGERARC_MKGTF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 41e9db30..27efb80a 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -1,8 +1,8 @@ process CELLRANGERARC_MKREF { tag "$reference_config" - label 'process_medium' + label 'process_low' //TOFLO change to medium - container "nf-core/cellranger-arc:2.0.2" + container "heylf/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -36,4 +36,4 @@ process CELLRANGERARC_MKREF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} +} \ No newline at end of file From 68a265485b666e8bc1c5419bb2233741f740e5e4 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:58:04 +0200 Subject: [PATCH 13/89] Adding generate config workflow and align cellrangerarc subworkflow --- .../local/generate_cellranger_mkref_config.nf | 36 +++++++++++++ subworkflows/local/align_cellrangerarc.nf | 51 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 modules/local/generate_cellranger_mkref_config.nf create mode 100644 subworkflows/local/align_cellrangerarc.nf diff --git a/modules/local/generate_cellranger_mkref_config.nf b/modules/local/generate_cellranger_mkref_config.nf new file mode 100644 index 00000000..f28f7d92 --- /dev/null +++ b/modules/local/generate_cellranger_mkref_config.nf @@ -0,0 +1,36 @@ +process CELLRANGERARC_GENERATECONFIG { + tag "$samplesheet" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + val(fasta) + val(gtf) + val(motifs) + + output: + path '*config' , emit: config + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/ + def args = task.ext.args ?: '' + """ + generate_config.py \\ + --fasta $fasta \\ + --gtf $gtf \\ + --motifs $motifs \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf new file mode 100644 index 00000000..b8acd2dd --- /dev/null +++ b/subworkflows/local/align_cellrangerarc.nf @@ -0,0 +1,51 @@ +/* + * Alignment with Cellranger Arc + */ + +include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" +include {CELLRANGERARC_GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" +include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" + +// Define workflow to subset and index a genome region fasta file +workflow CELLRANGERARC_ALIGN { + take: + fasta + gtf + motifs + cellranger_index + ch_fastq + + main: + ch_versions = Channel.empty() + + assert cellranger_index || (fasta && gtf && motifs): + "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + + if (!cellranger_index) { + // Filter GTF based on gene biotypes passed in params.modules + CELLRANGERARC_MKGTF( gtf ) + filtered_gtf = CELLRANGERARC_MKGTF.out.gtf + ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) + + // Generate the config for mkref + CELLRANGERARC_GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) + ch_versions = ch_versions.mix(CELLRANGERARC_GENERATECONFIG.out.versions) + + // Make reference genome + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) + ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) + cellranger_index = CELLRANGERARC_MKREF.out.reference + } + + // Obtain read counts + CELLRANGERARC_COUNT ( + ch_fastq, + cellranger_index + ) + ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) + + emit: + ch_versions + cellranger_arc_out = CELLRANGERARC_COUNT.out.outs +} \ No newline at end of file From a81849c28e48d10ca744a915818cf05a2c8ea658 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:59:19 +0200 Subject: [PATCH 14/89] Updating mtx conversion scripts --- modules/local/mtx_to_h5ad.nf | 9 +++++---- modules/local/mtx_to_seurat.nf | 13 +++++++------ subworkflows/local/mtx_conversion.nf | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 7961e057..bc329e4c 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,11 +1,12 @@ process MTX_TO_H5AD { tag "$meta.id" - label 'process_medium' + label 'process_low' //TOFLO set to medium + //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: // inputs from cellranger nf-core module does not come in a single sample dir @@ -41,11 +42,11 @@ process MTX_TO_H5AD { // // run script // - if (params.aligner == 'cellranger') + if (params.aligner == 'cellranger' || params.aligner == 'cellrangerarc') """ # convert file types mtx_to_h5ad.py \\ - --aligner ${params.aligner} \\ + --aligner cellranger \\ --input filtered_feature_bc_matrix.h5 \\ --sample ${meta.id} \\ --out ${meta.id}/${meta.id}_matrix.h5ad diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 4351f4b3..c1f40640 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -1,9 +1,10 @@ process MTX_TO_SEURAT { tag "$meta.id" - label 'process_medium' + label 'process_low' //TOFLO set to medium conda "r-seurat" - container "nf-core/seurat:4.3.0" + //TOFLO remove quay.io + container "quay.io/nf-core/seurat:4.3.0" input: // inputs from cellranger nf-core module does not come in a single sample dir @@ -19,10 +20,10 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner - if (params.aligner == "cellranger") { - matrix = "matrix.mtx.gz" - barcodes = "barcodes.tsv.gz" - features = "features.tsv.gz" + if (params.aligner == "cellranger" || params.aligner == "cellrangerarc") { + matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" + barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" + features = "filtered_feature_bc_matrix/features.tsv.gz" } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 5286a1b5..956285c0 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -15,7 +15,7 @@ workflow MTX_CONVERSION { ch_versions = Channel.empty() // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - if ( params.aligner == "cellranger" ) { + if ( params.aligner == "cellranger" || params.aligner == "cellrangerarc" ) { mtx_matrices = mtx_matrices.map { meta, mtx_files -> [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] } From f7c07a44cb788153deaf7e4db6c7c524ec0ee7a2 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:59:54 +0200 Subject: [PATCH 15/89] Changing module.config and nextflow.config --- conf/modules.config | 4 ++-- nextflow.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cdce4f1c..f2051e41 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,7 +73,7 @@ if(params.aligner == "cellranger") { } } -if(params.aligner == "cellranger-arc") { +if(params.aligner == "cellrangerarc") { process { withName: CELLRANGERARC_MKGTF { publishDir = [ @@ -83,7 +83,7 @@ if(params.aligner == "cellranger-arc") { ] ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" } - withName: GENERATECONFIG { + withName: CELLRANGERARC_GENERATECONFIG { publishDir = [ path: "${params.outdir}/${params.aligner}/config", mode: params.publish_dir_mode diff --git a/nextflow.config b/nextflow.config index 16425abf..01c72714 100644 --- a/nextflow.config +++ b/nextflow.config @@ -230,7 +230,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers singularity.registry = 'quay.io' -docker.registry = 'quay.io' +docker.registry = '' podman.registry = 'quay.io' def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') From bf180dadc299d2e6c04a77c0cdab7182ee9c4705 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:00:36 +0200 Subject: [PATCH 16/89] Changing scripts for the input check --- modules/local/samplesheet_check.nf | 3 ++- subworkflows/local/input_check.nf | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index feaf3dfc..f8dabac6 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,10 +2,11 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_low' + //TOFLO quay.io/ conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" + 'quay.io/biocontainers/python:3.8.3' }" input: path samplesheet diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 55eec787..80b3cbae 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -14,7 +14,7 @@ workflow INPUT_CHECK { reads = null versions = null - if (params.aligner == "cellranger-arc"){ + if (params.aligner == "cellrangerarc"){ SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) @@ -85,7 +85,7 @@ def create_fastq_channel(LinkedHashMap row) { def multi_meta = [] multi_meta = row.sample_type ? [row.sample_type] : [param.sample_type] - if (params.aligner == "cellranger-arc"){ + if (params.aligner == "cellrangerarc"){ sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") fastqs.each{ if(!it.name.contains(sub_sample)){ @@ -98,7 +98,7 @@ def create_fastq_channel(LinkedHashMap row) { fastq_meta = [ meta, fastqs ] - if (params.aligner == "cellranger-arc"){ + if (params.aligner == "cellrangerarc"){ fastq_meta = [ meta, multi_meta, fastqs ] } From 4d4ed84e2986eab0e9343651cb13c7345f14b3d6 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:01:00 +0200 Subject: [PATCH 17/89] Bugfix for generate config python script --- bin/generate_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 93077a1f..e9c9d45e 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -16,8 +16,8 @@ config = open("config", "w") config.write("{\n") - config.write("\torganism: \"scrnaseq\"\n") - config.write("\tgenome: [\"cellranger_arc_reference\"]\n") + config.write("\torganism: \"{}\"\n".format(args["fasta"].split(".")[0])) + config.write("\tgenome: [\"cellrangerarc_reference\"]\n") config.write("\tinput_fasta: [\"{}\"]\n".format(args["fasta"])) config.write("\tinput_gtf: [\"{}\"]\n".format(args["gtf"])) config.write("\tinput_motifs: \"{}\"\n".format(args["motifs"])) From cad902400595ca3d68f3a8a3e83469e95da1f5cf Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:01:34 +0200 Subject: [PATCH 18/89] Changing multiqc and dumsoftware scripts for containers --- modules/nf-core/custom/dumpsoftwareversions/main.nf | 3 ++- modules/nf-core/multiqc/main.nf | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..536b282c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,12 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' + //TOFLO remove https://quay.io/ // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..c673ee05 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,11 @@ process MULTIQC { label 'process_single' + //TOFLO remove https://quay.io/ conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" From 37e61d6676561f5fe5d7a38c26bbffb3f057b20d Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:02:01 +0200 Subject: [PATCH 19/89] Changing concat_h5ad.nf --- modules/local/concat_h5ad.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 96920f9e..5f0870f8 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,10 +1,11 @@ process CONCAT_H5AD { - label 'process_medium' + label 'process_low' //TOFLO set to medium + //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: path h5ad From f884d00d30e6a9334d16f8f944039ba97de0c1ec Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:02:20 +0200 Subject: [PATCH 20/89] Changing gtf_gene_filter.nf --- modules/local/gtf_gene_filter.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index 063bd228..bd775711 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -2,10 +2,11 @@ process GTF_GENE_FILTER { tag "$fasta" label 'process_low' + //TOFLO remove https://quay.io/ conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" + 'quay.io/biocontainers/python:3.9--1' }" input: path fasta From fe187a63d6ef7f027f6db8a25856425fd7ea4fd2 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:02:35 +0200 Subject: [PATCH 21/89] Chaning scrnaseq.nf --- workflows/scrnaseq.nf | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 4407ca2e..09e16127 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -44,7 +44,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellranger_arc" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' @@ -77,6 +77,7 @@ ch_input = file(params.input) ch_genome_fasta = params.fasta ? file(params.fasta) : [] ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] +ch_motifs = params.motifs ? file(params.motifs) : [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] ch_multiqc_alevin = Channel.empty() ch_multiqc_star = Channel.empty() @@ -117,7 +118,6 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // Run FastQC - /* ch_multiqc_fastqc = Channel.empty() if (!params.skip_fastqc) { FASTQC_CHECK ( ch_fastq ) @@ -126,7 +126,6 @@ workflow SCRNASEQ { } else { ch_multiqc_fastqc = Channel.empty() } - */ ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf @@ -210,19 +209,18 @@ workflow SCRNASEQ { } // Run cellranger pipeline - if (params.aligner == "cellranger-arc") { - CELLRANGER_ARC_ALIGN( + if (params.aligner == "cellrangerarc") { + CELLRANGERARC_ALIGN( ch_genome_fasta, ch_filter_gtf, ch_motifs, ch_cellranger_index, ch_fastq ) - ch_versions = ch_versions.mix(CELLRANGER_ARC_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ARC_ALIGN.out.cellranger_arc_out) + ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) } - /* // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices, @@ -230,7 +228,6 @@ workflow SCRNASEQ { ch_txp2gene, ch_star_index ) - */ //Add Versions from MTX Conversion workflow too ch_versions.mix(MTX_CONVERSION.out.ch_versions) From f9f581e809cbee68245ed00698db30018ee9537a Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 2 Oct 2023 14:20:30 +0200 Subject: [PATCH 22/89] Bump version to 2.5.0dev --- CHANGELOG.md | 2 ++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1344dcb0..67c6606f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + ## v2.4.1 - 2023-09-28 - Fix whitelist logic for dropseq ([#267](https://github.com/nf-core/scrnaseq/pull/267)) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b788419d..163c952f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,8 @@ report_comment: > - This report has been generated by the nf-core/scrnaseq + This report has been generated by the nf-core/scrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-scrnaseq-methods-description": diff --git a/nextflow.config b/nextflow.config index ee3ba62d..b911ce28 100644 --- a/nextflow.config +++ b/nextflow.config @@ -263,7 +263,7 @@ manifest { description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.4.1' + version = '2.5.0dev' doi = '10.5281/zenodo.3568187' } From 0cbe7c616430dfb4bdffdd0827a6c540d07d08d3 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 27 Oct 2023 10:02:53 +0200 Subject: [PATCH 23/89] Update nextflow schema and switch to protocol mapping via JSON --- assets/protocols.json | 44 +++++++++++++++ lib/WorkflowScrnaseq.groovy | 97 +++++---------------------------- modules/local/simpleaf_quant.nf | 2 +- nextflow.config | 3 +- nextflow_schema.json | 13 ++--- workflows/scrnaseq.nf | 2 +- 6 files changed, 65 insertions(+), 96 deletions(-) create mode 100644 assets/protocols.json diff --git a/assets/protocols.json b/assets/protocols.json new file mode 100644 index 00000000..5efba1a1 --- /dev/null +++ b/assets/protocols.json @@ -0,0 +1,44 @@ +{ + "alevin": { + "10XV1": {"protocol": "10xv1"}, + "10XV2": {"protocol": "10xv2"}, + "10XV3": {"protocol": "10xv3"}, + "dropseq": {"protocol": "dropseq"} + }, + "cellranger": { + "auto": {"protocol": "auto"}, + "10XV1": {"protocol": "SC3Pv1"}, + "10XV2": {"protocol": "SC3Pv2"}, + "10XV3": {"protocol": "SC3Pv3"} + }, + "star": { + "10XV1": { + "protocol": "CB_UMI_Simple", + "args": "--soloUMIlen 10" + }, + "10XV2": { + "protocol": "CB_UMI_Simple", + "args": "--soloUMIlen 10" + }, + "10XV3": { + "protocol": "CB_UMI_Simple", + "args": "--soloUMIlen 12" + }, + "dropseq": {"protocol": "CB_UMI_Simple"}, + "smartseq": {"protocol": "SmartSeq"} + }, + "kallisto": { + "10XV1": {"protocol": "10XV1"}, + "10XV2": {"protocol": "10XV2"}, + "10XV3": {"protocol": "10XV3"}, + "dropseq": {"protocol": "DROPSEQ"}, + "smartseq": {"protocol": "SMARTSEQ"} + }, + "universc": { + "auto": {"protocol": "10x"}, + "10XV1": {"protocol": "10x-v1"}, + "10XV2": {"protocol": "10x-v2"}, + "10XV3": {"protocol": "10x-v3"}, + "dropseq": {"protocol": "dropseq"} + } + } diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index b58a89db..e2735260 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -4,6 +4,8 @@ import nextflow.Nextflow import groovy.text.SimpleTemplateEngine +import groovy.json.JsonSlurper + class WorkflowScrnaseq { @@ -121,90 +123,19 @@ class WorkflowScrnaseq { } } - /* - * Format the protocol - * Given the protocol paramter (params.protocol) and the aligner (params.aligner), - * this function formats the protocol such that it is fit for the respective - * subworkflow - */ - static formatProtocol(protocol, aligner) { - String new_protocol = protocol - String chemistry = '' - String other_parameters = '' - - // alevin - if (aligner == 'alevin') { - switch (protocol) { - case '10XV1': - new_protocol = '10xv1' - chemistry = 'V1' - break - case '10XV2': - new_protocol = '10xv2' - chemistry = 'V2' - break - case '10XV3': - new_protocol = '10xv3' - chemistry = 'V3' - break - // case 'dropseq': - // new_protocol = 'dropseq' - } - } - - // star - else if (aligner == 'star') { - switch (protocol) { - case '10XV1': - new_protocol = 'CB_UMI_Simple' - chemistry = 'V1' - other_parameters = '--soloUMIlen 10' - break - case '10XV2': - new_protocol = 'CB_UMI_Simple' - chemistry = 'V2' - other_parameters = '--soloUMIlen 10' - break - case '10XV3': - new_protocol = 'CB_UMI_Simple' - chemistry = 'V3' - other_parameters = '--soloUMIlen 12' - break - case 'dropseq': - new_protocol = 'CB_UMI_Simple' - break - case 'smartseq': - new_protocol = 'SmartSeq' - } - } - - // kallisto bustools - else if (aligner = 'kallisto' ) { - switch (protocol) { - case '10XV1': - new_protocol = '10XV1' - chemistry = 'V1' - break - case '10XV2': - new_protocol = '10XV2' - chemistry = 'V2' - break - case '10XV3': - new_protocol = '10XV3' - chemistry = 'V3' - break - case 'dropseq': - new_protocol = 'DROPSEQ' - break - case 'smartseq': - new_protocol = 'SMARTSEQ' - } - } - else { - exit 1, 'Aligner not recognized.' + // + // Retrieve the aligner-specific protocol based on the specified protocol. + // Returns a tuple [protocol, extra_args] + static getProtocol(workflow, String aligner, String protocol) { + def jsonSlurper = new JsonSlurper() + def protocols = jsonSlurper.parse(new File("${workflow.projectDir}/assets/protocols.json")) + aligner_map = protocols[aligner] + if aligner_map.containsKey(protocol) { + return [aligner_map[protocol]["protocol"], aligner_map[protocol].get("args", "")] + } else { + Nextflow.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") + return [protocol, ""] } - - return [new_protocol, chemistry, other_parameters] } } diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 0c879ceb..f350acf3 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -66,7 +66,7 @@ process SIMPLEAF_QUANT { -o ${prefix}_alevin_results \\ -m $txp2gene \\ -t $task.cpus \\ - -c $protocol \\ + -c "$protocol" \\ $expect_cells \\ $unfiltered_command \\ $args diff --git a/nextflow.config b/nextflow.config index b911ce28..a4a24255 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { outdir = null input = null save_reference = false - protocol = '10XV3' + protocol = 'auto' // reference files genome = null @@ -42,7 +42,6 @@ params { // UniverSC paramaters universc_index = null - universc_technology = '10x' // Template Boilerplate options skip_multiqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c2642a1b..d3d5fec6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -62,10 +62,10 @@ }, "protocol": { "type": "string", - "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", - "default": "10XV2", - "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] + "description": "The protocol that was used to generate the single cell data, e.g. 10x Genomics v2 Chemistry.\n\n Can be 'auto' (cellranger only), '10XV1', '10XV2', '10XV3', or any other protocol string that will get directly passed the respective aligner.", + "help_text": "The default is to auto-detect the protocol when running cellranger. For all other aligners the protocol MUST be manually specified. \n\n The following protocols are recognized by the pipeline and mapped to the corresponding protocol name of the respective aligner: '10XV1', '10XV2', '10XV3'. \n\nAny other protocol value is passed to the aligner in verbatim to support other sequencing platforms. See the [kallisto](https://pachterlab.github.io/kallisto/manual#bus), [simpleaf](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag), [starsolo](https://gensoft.pasteur.fr/docs/STAR/2.7.9a/STARsolo.html), and [universc](https://github.com/minoda-lab/universc#pre-set-configurations) documentations for more details.", + "default": "auto", + "fa_icon": "fas fa-cogs" } }, "fa_icon": "fas fa-terminal" @@ -243,11 +243,6 @@ "universc_index": { "type": "string", "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." - }, - "universc_technology": { - "type": "string", - "description": "Specify a single-cell technology, vendor, or platform. See the UniverSC documentation or GitHub repository for more details.", - "default": "10x" } } }, diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 581bf2c4..a99d4b96 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -205,7 +205,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_universc_index, - params.universc_technology, + protocol, ch_fastq ) ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions) From ad5e8bacf8d53a8cd4c3e377697c66c3da28e7bc Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 27 Oct 2023 10:30:29 +0200 Subject: [PATCH 24/89] Support protocol in all aligners --- assets/protocols.json | 112 +++++++++++++++++------- conf/modules.config | 2 +- lib/WorkflowScrnaseq.groovy | 7 +- subworkflows/local/alevin.nf | 1 - subworkflows/local/align_cellranger.nf | 3 +- subworkflows/local/kallisto_bustools.nf | 1 - workflows/scrnaseq.nf | 21 +++-- 7 files changed, 96 insertions(+), 51 deletions(-) diff --git a/assets/protocols.json b/assets/protocols.json index 5efba1a1..73a2c4a5 100644 --- a/assets/protocols.json +++ b/assets/protocols.json @@ -1,44 +1,90 @@ { "alevin": { - "10XV1": {"protocol": "10xv1"}, - "10XV2": {"protocol": "10xv2"}, - "10XV3": {"protocol": "10xv3"}, - "dropseq": {"protocol": "dropseq"} + "10XV1": { + "protocol": "10xv1", + "whitelist": "assets/whitelist/10x_V1_barcode_whitelist.txt.gz" + }, + "10XV2": { + "protocol": "10xv2", + "whitelist": "assets/whitelist/10x_V2_barcode_whitelist.txt.gz" + }, + "10XV3": { + "protocol": "10xv3", + "whitelist": "assets/whitelist/10x_V3_barcode_whitelist.txt.gz" + }, + "dropseq": { + "protocol": "dropseq" + } }, "cellranger": { - "auto": {"protocol": "auto"}, - "10XV1": {"protocol": "SC3Pv1"}, - "10XV2": {"protocol": "SC3Pv2"}, - "10XV3": {"protocol": "SC3Pv3"} + "auto": { + "protocol": "auto" + }, + "10XV1": { + "protocol": "SC3Pv1" + }, + "10XV2": { + "protocol": "SC3Pv2" + }, + "10XV3": { + "protocol": "SC3Pv3" + } }, "star": { - "10XV1": { - "protocol": "CB_UMI_Simple", - "args": "--soloUMIlen 10" - }, - "10XV2": { - "protocol": "CB_UMI_Simple", - "args": "--soloUMIlen 10" - }, - "10XV3": { - "protocol": "CB_UMI_Simple", - "args": "--soloUMIlen 12" - }, - "dropseq": {"protocol": "CB_UMI_Simple"}, - "smartseq": {"protocol": "SmartSeq"} + "10XV1": { + "protocol": "CB_UMI_Simple", + "extra_args": "--soloUMIlen 10", + "whitelist": "assets/whitelist/10x_V1_barcode_whitelist.txt.gz" + }, + "10XV2": { + "protocol": "CB_UMI_Simple", + "extra_args": "--soloUMIlen 10", + "whitelist": "assets/whitelist/10x_V2_barcode_whitelist.txt.gz" + }, + "10XV3": { + "protocol": "CB_UMI_Simple", + "extra_args": "--soloUMIlen 12", + "whitelist": "assets/whitelist/10x_V3_barcode_whitelist.txt.gz" + }, + "dropseq": { + "protocol": "CB_UMI_Simple" + }, + "smartseq": { + "protocol": "SmartSeq" + } }, "kallisto": { - "10XV1": {"protocol": "10XV1"}, - "10XV2": {"protocol": "10XV2"}, - "10XV3": {"protocol": "10XV3"}, - "dropseq": {"protocol": "DROPSEQ"}, - "smartseq": {"protocol": "SMARTSEQ"} + "10XV1": { + "protocol": "10XV1" + }, + "10XV2": { + "protocol": "10XV2" + }, + "10XV3": { + "protocol": "10XV3" + }, + "dropseq": { + "protocol": "DROPSEQ" + }, + "smartseq": { + "protocol": "SMARTSEQ" + } }, "universc": { - "auto": {"protocol": "10x"}, - "10XV1": {"protocol": "10x-v1"}, - "10XV2": {"protocol": "10x-v2"}, - "10XV3": {"protocol": "10x-v3"}, - "dropseq": {"protocol": "dropseq"} + "auto": { + "protocol": "10x" + }, + "10XV1": { + "protocol": "10x-v1" + }, + "10XV2": { + "protocol": "10x-v2" + }, + "10XV3": { + "protocol": "10x-v3" + }, + "dropseq": { + "protocol": "dropseq" + } } - } +} \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 2fd974c5..c60949fc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -78,7 +78,7 @@ if(params.aligner == "cellranger") { path: "${params.outdir}/${params.aligner}/count", mode: params.publish_dir_mode ] - ext.args = {meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : ''} + ext.args = {"--chemistry ${meta.chemistry} " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} } } } diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index e2735260..48c8667f 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -125,16 +125,17 @@ class WorkflowScrnaseq { // // Retrieve the aligner-specific protocol based on the specified protocol. - // Returns a tuple [protocol, extra_args] + // Returns a map ["protocol": protocol, "extra_args": , "whitelist": ] + // extra_args and whitelist are optional. static getProtocol(workflow, String aligner, String protocol) { def jsonSlurper = new JsonSlurper() def protocols = jsonSlurper.parse(new File("${workflow.projectDir}/assets/protocols.json")) aligner_map = protocols[aligner] if aligner_map.containsKey(protocol) { - return [aligner_map[protocol]["protocol"], aligner_map[protocol].get("args", "")] + return aligner_map[protocol] } else { Nextflow.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") - return [protocol, ""] + return ["protocol": protocol] } } diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 8fc0a983..764c08f8 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -20,7 +20,6 @@ workflow SCRNASEQ_ALEVIN { txp2gene barcode_whitelist protocol - chemistry ch_fastq diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 228edb06..bfdd533e 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -13,6 +13,7 @@ workflow CELLRANGER_ALIGN { gtf cellranger_index ch_fastq + protocol main: ch_versions = Channel.empty() @@ -34,7 +35,7 @@ workflow CELLRANGER_ALIGN { // Obtain read counts CELLRANGER_COUNT ( // TODO what is `gem` and why is it needed? - ch_fastq.map{ meta, reads -> [meta + ["gem": meta.id, "samples": [meta.id]], reads] }, + ch_fastq.map{ meta, reads -> [meta + ["chemistry": protocol, "gem": meta.id, "samples": [meta.id]], reads] }, cellranger_index ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 9d63ef1e..3210e47a 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -15,7 +15,6 @@ workflow KALLISTO_BUSTOOLS { kallisto_index txp2gene protocol - chemistry kb_workflow ch_fastq diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index a99d4b96..5edd1afa 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -68,7 +68,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' // TODO: Are this channels still necessary? ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) -(protocol, chemistry, other_parameters) = WorkflowScrnaseq.formatProtocol(params.protocol, params.aligner) +protocol_config = WorkflowScrnaseq.getProtocol(workflow, params.aligner, params.protocol) // general input and params ch_input = file(params.input) @@ -81,8 +81,8 @@ ch_multiqc_star = Channel.empty() ch_multiqc_cellranger = Channel.empty() if (params.barcode_whitelist) { ch_barcode_whitelist = file(params.barcode_whitelist) -} else if (params.protocol.contains("10X")) { - ch_barcode_whitelist = file("$baseDir/assets/whitelist/10x_${chemistry}_barcode_whitelist.txt.gz", checkIfExists: true) +} else if (protocol_config.containsKey("whitelist")) { + ch_barcode_whitelist = file("$projectDir/${protocol_config['whitelist']}") } else { ch_barcode_whitelist = [] } @@ -137,8 +137,7 @@ workflow SCRNASEQ { ch_filter_gtf, ch_kallisto_index, ch_txp2gene, - protocol, - chemistry, + protocol_config['protocol'], kb_workflow, ch_fastq ) @@ -156,8 +155,7 @@ workflow SCRNASEQ { ch_salmon_index, ch_txp2gene, ch_barcode_whitelist, - protocol, - chemistry, + protocol_config['protocol'], ch_fastq ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) @@ -171,11 +169,11 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_star_index, - protocol, + protocol_config['protocol'], ch_barcode_whitelist, ch_fastq, star_feature, - other_parameters + protocol_config.get('extra_args', ""), ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.star_counts) @@ -189,7 +187,8 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_cellranger_index, - ch_fastq + ch_fastq, + protocol_config['protocol'] ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out) @@ -205,7 +204,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_universc_index, - protocol, + protocol_config['protocol'], ch_fastq ) ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions) From 9a619ecf53cfc86fad5062d9d82975287d34676f Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 27 Oct 2023 10:56:07 +0200 Subject: [PATCH 25/89] Update documentation --- docs/usage.md | 50 +++++++++++++++++++++++++++---------------- workflows/scrnaseq.nf | 3 +++ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index f90dc242..abe170fd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -25,19 +25,6 @@ CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz There is a strict requirement for the first 3 columns to match those defined in the table below. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, -``` - | Column | Description | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Required. Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | @@ -54,9 +41,9 @@ This parameter is currently supported by - [Salmon Alevin](https://salmon.readthedocs.io/en/latest/alevin.html#expectcells) - [STARsolo](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md) +- [Cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) -In the future, support for this paramter will be added to cellranger and UniverSC. Note that since cellranger v7, -it is not recommended anymore to supply the `--expected-cells` parameter. +Note that since cellranger v7, it is **not recommended** anymore to supply the `--expected-cells` parameter. ## Aligning options @@ -71,7 +58,7 @@ Other aligner options for running the pipeline are: - [Cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) to perform both alignment and downstream analysis. - `--aligner cellranger` - [UniverSC](https://github.com/minoda-lab/universc) to run an open-source version of Cell Ranger on any technology - - '--aligner universc' + - '--aligner universc` ### If using cellranger or universc @@ -91,9 +78,36 @@ For more details, see As a sanity check, we verify that filenames of a pair of FASTQ files only differ by `R1`/`R2`. -#### UniverSC technology configuration +### Support for different scRNA-seq protocols + +The single-cell protocol used in the experiment can be specified using the `--protocol` flag. +For cellranger, it is recommended to stick with the default value `'auto'` for automatic detection of the protocol. +For all other aligner, you need to specify the protocol manually. + +The three 10x Genomics protocols 3' v1 (`10XV1`), 3' v2 (`10XV2`) and 3' v3 (`10XV3`) are universally supported +by all aligners in the pipeline and mapped to the correct options automatically. If the protocol is unknown to the +nf-core pipeline, the value specified to `--protocol` is passed to the aligner _in verbatim_ to support additional protocols. + +Here are some hints on running the various aligners with different protocols + +#### Kallisto/bustools + +The command `kb --list` shows all supported, preconfigured protocols. Additionally, a custom technology string such as +`0,0,16:0,16,26:1,0,0` can be speficied: + +> Additionally kallisto bus will accept a string specifying a new technology in the format of bc:umi:seq where each of bc,umi and seq are a triplet of integers separated by a comma, denoting the file index, start and stop of the sequence used. For example to specify the 10xV2 technology we would use 0,0,16:0,16,26:1,0,0 + +For more details, please refer to the [Kallisto/bustools documentation](https://pachterlab.github.io/kallisto/manual#bus). + +#### Alevin/fry + +Alevin/fry also supports custom chemistries in a slighly different format, e.g. `1{b[16]u[12]x:}2{r:}`. + +For more details, see the [simpleaf documentation](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag) + +#### UniverSC -UniverSC automatically updates the barcode whitelist and chemistry parameters. Use "universc_technology" to set the 'technology' parameter to configure the run. +See the [UniverSC GitHub page](https://github.com/minoda-lab/universc#pre-set-configurations) for all supported protocols. Currently only 3\' scRNA-Seq parameters are supported in nextflow, although chemistry parameters for 5\' scRNA-Seq and full-length scRNA-Seq libraries are supported by teh container. diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 5edd1afa..01ce787f 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -69,6 +69,9 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) protocol_config = WorkflowScrnaseq.getProtocol(workflow, params.aligner, params.protocol) +if (protocol_config['protocol'] == 'auto' && aligner != "cellranger") { + error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" +} // general input and params ch_input = file(params.input) From 386bcfde78f2d1d094ce8158bfafe61f3ac94fe9 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 27 Oct 2023 10:58:14 +0200 Subject: [PATCH 26/89] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67c6606f..c732afa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Better support for custom protocols ([#273](https://github.com/nf-core/scrnaseq/pull/273)). + - The universc protocol is now specified via the `--protocol` flag + - Any protocol specified is now passed to the respective aligner + - Added a section to the documentation + ## v2.4.1 - 2023-09-28 - Fix whitelist logic for dropseq ([#267](https://github.com/nf-core/scrnaseq/pull/267)) From 41f14817880fd0494aad4ae96646989daba0a1cb Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 27 Oct 2023 11:12:06 +0200 Subject: [PATCH 27/89] Fix syntax error --- lib/WorkflowScrnaseq.groovy | 11 ++++++----- workflows/scrnaseq.nf | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index 48c8667f..e4273887 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -127,14 +127,15 @@ class WorkflowScrnaseq { // Retrieve the aligner-specific protocol based on the specified protocol. // Returns a map ["protocol": protocol, "extra_args": , "whitelist": ] // extra_args and whitelist are optional. - static getProtocol(workflow, String aligner, String protocol) { + public static Map getProtocol(workflow, log, aligner, protocol) { def jsonSlurper = new JsonSlurper() - def protocols = jsonSlurper.parse(new File("${workflow.projectDir}/assets/protocols.json")) - aligner_map = protocols[aligner] - if aligner_map.containsKey(protocol) { + def json = new File("${workflow.projectDir}/assets/protocols.json").text + def protocols = jsonSlurper.parseText(json) + def aligner_map = protocols[aligner] + if(aligner_map.containsKey(protocol)) { return aligner_map[protocol] } else { - Nextflow.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") + log.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") return ["protocol": protocol] } } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 01ce787f..aeed5a0a 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -68,7 +68,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' // TODO: Are this channels still necessary? ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) -protocol_config = WorkflowScrnaseq.getProtocol(workflow, params.aligner, params.protocol) +protocol_config = WorkflowScrnaseq.getProtocol(workflow, log, params.aligner, params.protocol) if (protocol_config['protocol'] == 'auto' && aligner != "cellranger") { error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" } From af63d84a1d3542b710b738c7e891d68385ffa0f0 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 27 Oct 2023 11:13:50 +0200 Subject: [PATCH 28/89] Fix prettier --- assets/protocols.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/protocols.json b/assets/protocols.json index 73a2c4a5..23ff1328 100644 --- a/assets/protocols.json +++ b/assets/protocols.json @@ -87,4 +87,4 @@ "protocol": "dropseq" } } -} \ No newline at end of file +} From e6cd7989bc311869e4da286bd9b7b982351efe73 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:05:18 +0100 Subject: [PATCH 29/89] Changes to Dockerfile and README for cellrangerarc --- modules/local/cellrangerarc/Dockerfile | 4 ++-- modules/local/cellrangerarc/README.md | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/local/cellrangerarc/Dockerfile index ccf55582..081dc7bd 100644 --- a/modules/local/cellrangerarc/Dockerfile +++ b/modules/local/cellrangerarc/Dockerfile @@ -1,5 +1,5 @@ # Dockerfile to create container with Cell Ranger v2.0.2 -# Push to nfcore/cellranger-arc: +# Push to quay.io/nf-core/cellranger-arc: FROM continuumio/miniconda3:4.8.2 LABEL authors="Gisela Gabernet , Florian Heyl" \ @@ -25,4 +25,4 @@ RUN \ tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ - rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz \ No newline at end of file diff --git a/modules/local/cellrangerarc/README.md b/modules/local/cellrangerarc/README.md index d4192553..9f4358f1 100644 --- a/modules/local/cellrangerarc/README.md +++ b/modules/local/cellrangerarc/README.md @@ -13,7 +13,11 @@ ENV CELLRANGER_ARC_VER= 3. Create and test the container: ```bash -docker build . -t nfcore/cellranger-arc: +docker build . -t quay.io/nf-core/cellranger-arc: ``` -4. **Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so.** \ No newline at end of file +4. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so. + +```bash +docker push quay.io/nf-core/cellranger-arc: +``` \ No newline at end of file From 2c4a83238b4385b0e17fd0f8c79cd9a7cf168293 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:05:39 +0100 Subject: [PATCH 30/89] Changes to meta.yml for mkref --- modules/local/cellrangerarc/mkref/meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/cellrangerarc/mkref/meta.yml b/modules/local/cellrangerarc/mkref/meta.yml index a68adf96..1eac878a 100644 --- a/modules/local/cellrangerarc/mkref/meta.yml +++ b/modules/local/cellrangerarc/mkref/meta.yml @@ -29,12 +29,12 @@ input: description: JSON-like file holding organism, genome, reference fasta path, reference annotation gtf path, contigs that should be excluded and sequence format motif file path pattern: config - reference_name: - type: val + type: string description: The name to give the new reference folder pattern: str output: - reference: - type: folder + type: directory description: Folder called like the reference_name containing all the reference indices needed by Cell Ranger Arc - versions: type: file From 7ed40fdd6a315516453250588985289d43447597 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:11:02 +0100 Subject: [PATCH 31/89] Remove TODOS --- modules/local/cellrangerarc/count/main.nf | 2 +- modules/local/cellrangerarc/mkref/main.nf | 2 +- modules/local/concat_h5ad.nf | 5 ++--- modules/local/gtf_gene_filter.nf | 3 +-- modules/local/mtx_to_h5ad.nf | 5 ++--- modules/local/mtx_to_seurat.nf | 6 +++--- modules/local/samplesheet_check.nf | 3 +-- modules/nf-core/custom/dumpsoftwareversions/main.nf | 3 +-- modules/nf-core/multiqc/main.nf | 3 +-- nextflow.config | 2 +- 10 files changed, 14 insertions(+), 20 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index df8e8fd1..56057a61 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -1,6 +1,6 @@ process CELLRANGERARC_COUNT { tag "$meta.id" - label 'process_low' //TOFLO turn to high + label 'process_high' container "heylf/cellranger-arc:2.0.2" diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 27efb80a..45c2f09f 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -1,6 +1,6 @@ process CELLRANGERARC_MKREF { tag "$reference_config" - label 'process_low' //TOFLO change to medium + label 'process_medium' container "heylf/cellranger-arc:2.0.2" diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 5f0870f8..96920f9e 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,11 +1,10 @@ process CONCAT_H5AD { - label 'process_low' //TOFLO set to medium + label 'process_medium' - //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: path h5ad diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index bd775711..063bd228 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -2,11 +2,10 @@ process GTF_GENE_FILTER { tag "$fasta" label 'process_low' - //TOFLO remove https://quay.io/ conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'quay.io/biocontainers/python:3.9--1' }" + 'biocontainers/python:3.9--1' }" input: path fasta diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index bc329e4c..ca9b1d48 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,12 +1,11 @@ process MTX_TO_H5AD { tag "$meta.id" - label 'process_low' //TOFLO set to medium + label 'process_medium' - //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: // inputs from cellranger nf-core module does not come in a single sample dir diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index c1f40640..b85864ee 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -1,10 +1,10 @@ process MTX_TO_SEURAT { tag "$meta.id" - label 'process_low' //TOFLO set to medium + label 'process_medium' conda "r-seurat" - //TOFLO remove quay.io - container "quay.io/nf-core/seurat:4.3.0" + + container "nf-core/seurat:4.3.0" input: // inputs from cellranger nf-core module does not come in a single sample dir diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index f8dabac6..feaf3dfc 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,11 +2,10 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_low' - //TOFLO quay.io/ conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 536b282c..ebc87273 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,12 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' - //TOFLO remove https://quay.io/ // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index c673ee05..1fc387be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,11 +1,10 @@ process MULTIQC { label 'process_single' - //TOFLO remove https://quay.io/ conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow.config b/nextflow.config index 01c72714..16425abf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -230,7 +230,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers singularity.registry = 'quay.io' -docker.registry = '' +docker.registry = 'quay.io' podman.registry = 'quay.io' def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') From d8a4e8e78c273b2cdaa0c432097e66dc1d4418b4 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:14:04 +0100 Subject: [PATCH 32/89] Changing containers for cellrangerarc modules --- modules/local/cellrangerarc/count/main.nf | 2 +- modules/local/cellrangerarc/mkgtf/main.nf | 2 +- modules/local/cellrangerarc/mkref/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index 56057a61..a858a2eb 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_COUNT { tag "$meta.id" label 'process_high' - container "heylf/cellranger-arc:2.0.2" + container "nf-core/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf index bb3cbc8f..bb96af64 100644 --- a/modules/local/cellrangerarc/mkgtf/main.nf +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_MKGTF { tag "$gtf" label 'process_low' - container "heylf/cellranger-arc:2.0.2" + container "nf-core/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 45c2f09f..4dab010c 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_MKREF { tag "$reference_config" label 'process_medium' - container "heylf/cellranger-arc:2.0.2" + container "nf-core/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { From c1d4e47059a2ad4e2104c4f63187778aa6e74ab7 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 19:07:07 +0100 Subject: [PATCH 33/89] Adding cellrangerarc_index to nextflow.config nextflow_schema.json align_cellrangerarc.nf and scrnaseq.nf --- nextflow.config | 1 + nextflow_schema.json | 2 +- subworkflows/local/align_cellrangerarc.nf | 10 +++++----- workflows/scrnaseq.nf | 7 +++++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8e344df2..c6c4fa11 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,6 +41,7 @@ params { cellranger_index = null // Cellranger ARC parameters + cellrangerarc_index = null motifs = null // UniverSC paramaters diff --git a/nextflow_schema.json b/nextflow_schema.json index c0353ce4..351cee8a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -240,7 +240,7 @@ "description": "Params related to the Cellranger pipeline", "default": "", "properties": { - "cellranger_index": { + "cellrangerarc_index": { "type": "string", "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " }, diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index b8acd2dd..c2dffde6 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -13,16 +13,16 @@ workflow CELLRANGERARC_ALIGN { fasta gtf motifs - cellranger_index + cellrangerarc_index ch_fastq main: ch_versions = Channel.empty() - assert cellranger_index || (fasta && gtf && motifs): + assert cellrangerarc_index || (fasta && gtf && motifs): "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." - if (!cellranger_index) { + if (!cellrangerarc_index) { // Filter GTF based on gene biotypes passed in params.modules CELLRANGERARC_MKGTF( gtf ) filtered_gtf = CELLRANGERARC_MKGTF.out.gtf @@ -35,13 +35,13 @@ workflow CELLRANGERARC_ALIGN { // Make reference genome CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) - cellranger_index = CELLRANGERARC_MKREF.out.reference + cellrangerarc_index = CELLRANGERARC_MKREF.out.reference } // Obtain read counts CELLRANGERARC_COUNT ( ch_fastq, - cellranger_index + cellrangerarc_index ) ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 7039478e..65895dc6 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -12,7 +12,7 @@ def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.gtf, params.transcript_fasta, params.salmon_index, params.kallisto_index, params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index, - params.universc_index + params.cellrangerarc_index, params.universc_index ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -104,6 +104,9 @@ star_feature = params.star_feature //cellranger params ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] +//cellrangerarc params +ch_cellrangerarc_index = params.cellrangerarc_index ? file(params.cellrangerarc_index) : [] + //universc params ch_universc_index = params.universc_index ? file(params.universc_index) : [] @@ -220,7 +223,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_motifs, - ch_cellranger_index, + ch_cellrangerarc_index, ch_fastq ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) From 78b06b1c11b33db4711f0be4c507c291abe819e4 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:32:28 +0100 Subject: [PATCH 34/89] Linting fix --- modules/local/cellrangerarc/Dockerfile | 2 +- modules/local/cellrangerarc/mkgtf/main.nf | 2 +- modules/local/cellrangerarc/mkref/main.nf | 2 +- modules/local/generate_cellranger_mkref_config.nf | 2 +- modules/local/mtx_to_seurat.nf | 1 - 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/local/cellrangerarc/Dockerfile index 081dc7bd..812b64ba 100644 --- a/modules/local/cellrangerarc/Dockerfile +++ b/modules/local/cellrangerarc/Dockerfile @@ -25,4 +25,4 @@ RUN \ tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ - rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz \ No newline at end of file + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf index bb96af64..f304c6bc 100644 --- a/modules/local/cellrangerarc/mkgtf/main.nf +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -33,4 +33,4 @@ process CELLRANGERARC_MKGTF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 4dab010c..41e9db30 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -36,4 +36,4 @@ process CELLRANGERARC_MKREF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/generate_cellranger_mkref_config.nf b/modules/local/generate_cellranger_mkref_config.nf index f28f7d92..4bc474c8 100644 --- a/modules/local/generate_cellranger_mkref_config.nf +++ b/modules/local/generate_cellranger_mkref_config.nf @@ -8,7 +8,7 @@ process CELLRANGERARC_GENERATECONFIG { 'quay.io/biocontainers/python:3.8.3' }" input: - val(fasta) + val(fasta) val(gtf) val(motifs) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index b85864ee..8d344035 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -3,7 +3,6 @@ process MTX_TO_SEURAT { label 'process_medium' conda "r-seurat" - container "nf-core/seurat:4.3.0" input: From 00ccaaa3bbec2f15823c45cba99883e041943e30 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:42:43 +0100 Subject: [PATCH 35/89] Fixing run for test data --- nextflow.config | 1 + nextflow_schema.json | 4 ++++ subworkflows/local/input_check.nf | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c6c4fa11..8c2488e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,7 @@ params { // Cellranger ARC parameters cellrangerarc_index = null motifs = null + sample_type = 'gex' // UniverSC paramaters universc_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 351cee8a..31f95e5f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -247,6 +247,10 @@ "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + }, + "sample_type": { + "type": "string", + "description": "Specify the type of data (gex or atac)." } } }, diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 80b3cbae..0de0ac59 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -83,7 +83,7 @@ def create_fastq_channel(LinkedHashMap row) { // define meta_data for multiome def multi_meta = [] - multi_meta = row.sample_type ? [row.sample_type] : [param.sample_type] + multi_meta = row.sample_type ? [row.sample_type] : [params.sample_type] if (params.aligner == "cellrangerarc"){ sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") From 44d8cab3989eec415cfb23cb997beec78b9598c8 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:44:07 +0100 Subject: [PATCH 36/89] Prettier --- modules/local/cellrangerarc/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/cellrangerarc/README.md b/modules/local/cellrangerarc/README.md index 9f4358f1..6089d994 100644 --- a/modules/local/cellrangerarc/README.md +++ b/modules/local/cellrangerarc/README.md @@ -20,4 +20,4 @@ docker build . -t quay.io/nf-core/cellranger-arc: ```bash docker push quay.io/nf-core/cellranger-arc: -``` \ No newline at end of file +``` From 75eb1637e4eb84f223e70d65be435128338fe28e Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:44:53 +0100 Subject: [PATCH 37/89] PythonBlack --- bin/check_samplesheet.py | 26 +++++++++++++++++++++++--- bin/generate_config.py | 12 ++++++------ bin/generate_lib_csv.py | 14 ++++++++------ 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 59d39ca5..3544dab9 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -158,8 +158,14 @@ def check_samplesheet(file_in, file_out): sample_type = "" if "sample_type" in header: sample_type = lspl[colmap["sample_type"]] - if (sample_type not in SAMPLE_TYPES): - print_error("Sample type {} is not supported! Please specify either {}".format(sample_type, " or ".join(SAMPLE_TYPES)), "Line", line) + if sample_type not in SAMPLE_TYPES: + print_error( + "Sample type {} is not supported! Please specify either {}".format( + sample_type, " or ".join(SAMPLE_TYPES) + ), + "Line", + line, + ) for fastq in fastq_list: if fastq: @@ -194,7 +200,21 @@ def check_samplesheet(file_in, file_out): ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center" , "fastq_barcode", "sample_type"]) + "\n") + fout.write( + ",".join( + [ + "sample", + "single_end", + "fastq_1", + "fastq_2", + "expected_cells", + "seq_center", + "fastq_barcode", + "sample_type", + ] + ) + + "\n" + ) for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype diff --git a/bin/generate_config.py b/bin/generate_config.py index e9c9d45e..85647826 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -16,12 +16,12 @@ config = open("config", "w") config.write("{\n") - config.write("\torganism: \"{}\"\n".format(args["fasta"].split(".")[0])) - config.write("\tgenome: [\"cellrangerarc_reference\"]\n") - config.write("\tinput_fasta: [\"{}\"]\n".format(args["fasta"])) - config.write("\tinput_gtf: [\"{}\"]\n".format(args["gtf"])) - config.write("\tinput_motifs: \"{}\"\n".format(args["motifs"])) - if(args["add"] != "none"): + config.write('\torganism: "{}"\n'.format(args["fasta"].split(".")[0])) + config.write('\tgenome: ["cellrangerarc_reference"]\n') + config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) + config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) + config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) + if args["add"] != "none": config.write(args["add"] + "\n") config.write("}") config.close() diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py index f93a23eb..07ab9661 100755 --- a/bin/generate_lib_csv.py +++ b/bin/generate_lib_csv.py @@ -22,13 +22,15 @@ lib_csv = open(args["out"], "w") lib_csv.write("fastqs,sample,library_type") - for i in range(0,len(sample_types)): - if (sample_names[i] in unique_samples_names): - unique_samples_names.remove(sample_names[i]) # this has to be done to account for different Lane files (e.g., L002) - if(sample_types[i] == "gex"): - lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Gene Expression")) + for i in range(0, len(sample_types)): + if sample_names[i] in unique_samples_names: + unique_samples_names.remove( + sample_names[i] + ) # this has to be done to account for different Lane files (e.g., L002) + if sample_types[i] == "gex": + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i], "Gene Expression")) else: - lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Chromatin Accessibility")) + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i], "Chromatin Accessibility")) lib_csv.close() From 129ad8c11dd5520552755707a2fc8c1f4c77b76c Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:53:53 +0100 Subject: [PATCH 38/89] Python Black --- modules.json | 28 +++--- nextflow_schema.json | 233 +++++++++++++++++-------------------------- 2 files changed, 108 insertions(+), 153 deletions(-) diff --git a/modules.json b/modules.json index 5b4e4a3f..06849b69 100644 --- a/modules.json +++ b/modules.json @@ -8,65 +8,65 @@ "cellranger/count": { "branch": "master", "git_sha": "5df79e0383386a9e43462a6e81bf978ce0a6db09", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "cellranger/mkgtf": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "cellranger/mkref": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "fastqc": { "branch": "master", "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "kallistobustools/count": { "branch": "master", "git_sha": "de204d3c950f091336539ad74f0e47ddffe69ed4", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "kallistobustools/ref": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "multiqc": { "branch": "master", "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "star/genomegenerate": { "branch": "master", "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "universc": { "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", - "installed_by": ["modules"] - } + "installed_by": ["modules"], + }, } } } - } + }, } diff --git a/nextflow_schema.json b/nextflow_schema.json index 31f95e5f..c850796d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,27 +19,27 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)." + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input).", }, "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", }, "email": { "type": "string", "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", }, "multiqc_title": { "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" - } - } + "fa_icon": "fas fa-file-signature", + }, + }, }, "mandatory_arguments": { "title": "Mandatory arguments", @@ -50,7 +50,7 @@ "barcode_whitelist": { "type": "string", "description": "If not using the 10X Genomics platform, a custom barcode whitelist can be used with `--barcode_whitelist`.", - "fa_icon": "fas fa-barcode" + "fa_icon": "fas fa-barcode", }, "aligner": { "type": "string", @@ -58,17 +58,17 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"], }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] - } + "enum": ["10XV3", "10XV2", "10XV1", "dropseq"], + }, }, - "fa_icon": "fas fa-terminal" + "fa_icon": "fas fa-terminal", }, "skip_tools": { "title": "Skip Tools", @@ -77,15 +77,9 @@ "default": "", "fa_icon": "fas fa-forward", "properties": { - "skip_multiqc": { - "type": "boolean", - "description": "Skip MultiQC Report" - }, - "skip_fastqc": { - "type": "boolean", - "description": "Skip FastQC" - } - } + "skip_multiqc": {"type": "boolean", "description": "Skip MultiQC Report"}, + "skip_fastqc": {"type": "boolean", "description": "Skip FastQC"}, + }, }, "reference_genome_options": { "title": "Reference genome options", @@ -97,7 +91,7 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", }, "fasta": { "type": "string", @@ -107,29 +101,25 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "fa_icon": "far fa-file-code", }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - }, - "transcript_fasta": { - "type": "string", - "description": "A cDNA FASTA file", - "fa_icon": "fas fa-dna" + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", }, + "transcript_fasta": {"type": "string", "description": "A cDNA FASTA file", "fa_icon": "fas fa-dna"}, "gtf": { "type": "string", "description": "Reference GTF annotation file", - "fa_icon": "fas fa-code-branch" + "fa_icon": "fas fa-code-branch", }, "save_reference": { "type": "boolean", "description": "Specify this parameter to save the indices created (STAR, Kallisto, Salmon) to the results.", - "fa_icon": "fas fa-bookmark" + "fa_icon": "fas fa-bookmark", }, "igenomes_base": { "type": "string", @@ -137,9 +127,9 @@ "description": "Directory / URL base for iGenomes references.", "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - } - } + "hidden": true, + }, + }, }, "alevin_options": { "title": "Alevin Options", @@ -150,21 +140,21 @@ "salmon_index": { "type": "string", "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", }, "txp2gene": { "type": "string", "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.", - "fa_icon": "fas fa-map-marked-alt" + "fa_icon": "fas fa-map-marked-alt", }, "simpleaf_rlen": { "type": "integer", "default": 91, "description": "It is the target read length the index will be built for, using simpleaf.", - "fa_icon": "fas fa-map-marked-alt" - } - } + "fa_icon": "fas fa-map-marked-alt", + }, + }, }, "starsolo_options": { "title": "STARSolo Options", @@ -176,25 +166,19 @@ "type": "string", "description": "Specify a path to the precomputed STAR index.", "help_text": "> NB: This has to be computed with STAR Version 2.7 or later, as STARsolo was only first supported by STAR Version 2.7.", - "fa_icon": "fas fa-asterisk" - }, - "star_ignore_sjdbgtf": { - "type": "string", - "description": "Ignore the SJDB GTF file." - }, - "seq_center": { - "type": "string", - "description": "Name of sequencing center for BAM read group tag." + "fa_icon": "fas fa-asterisk", }, + "star_ignore_sjdbgtf": {"type": "string", "description": "Ignore the SJDB GTF file."}, + "seq_center": {"type": "string", "description": "Name of sequencing center for BAM read group tag."}, "star_feature": { "type": "string", "default": "Gene", "enum": ["Gene", "GeneFull", "Gene Velocyto"], "description": "Quantification type of different transcriptomic feature. Use `GeneFull` on pre-mRNA count for single-nucleus RNA-seq reads. Use `Gene Velocyto` to generate RNA velocity matrix.", - "fa_icon": "fas fa-asterisk" - } + "fa_icon": "fas fa-asterisk", + }, }, - "fa_icon": "fas fa-star" + "fa_icon": "fas fa-star", }, "kallisto_bus_options": { "title": "Kallisto/BUS Options", @@ -206,21 +190,21 @@ "kallisto_gene_map": { "type": "string", "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", }, "kb_workflow": { "type": "string", "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] - } - } + "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"], + }, + }, }, "cellranger_options": { "title": "Cellranger Options", @@ -230,9 +214,9 @@ "properties": { "cellranger_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", } - } + }, }, "cellrangerarc_options": { "title": "Cellranger ARC Options", @@ -242,17 +226,14 @@ "properties": { "cellrangerarc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", }, "motifs": { "type": "string", - "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database.", }, - "sample_type": { - "type": "string", - "description": "Specify the type of data (gex or atac)." - } - } + "sample_type": {"type": "string", "description": "Specify the type of data (gex or atac)."}, + }, }, "universc_options": { "title": "UniverSC Options", @@ -262,14 +243,14 @@ "properties": { "universc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website.", }, "universc_technology": { "type": "string", "description": "Specify a single-cell technology, vendor, or platform. See the UniverSC documentation or GitHub repository for more details.", - "default": "10x" - } - } + "default": "10x", + }, + }, }, "institutional_config_options": { "title": "Institutional config options", @@ -283,7 +264,7 @@ "description": "Git commit id for Institutional configs.", "default": "master", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "custom_config_base": { "type": "string", @@ -291,33 +272,33 @@ "default": "https://raw.githubusercontent.com/nf-core/configs/master", "hidden": true, "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_name": { "type": "string", "description": "Institutional config name.", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_description": { "type": "string", "description": "Institutional config description.", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_contact": { "type": "string", "description": "Institutional config contact information.", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_url": { "type": "string", "description": "Institutional config URL link.", "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } + "fa_icon": "fas fa-users-cog", + }, + }, }, "max_job_request_options": { "title": "Max job request options", @@ -332,7 +313,7 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`", }, "max_memory": { "type": "string", @@ -341,7 +322,7 @@ "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`", }, "max_time": { "type": "string", @@ -350,9 +331,9 @@ "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`", + }, + }, }, "generic_options": { "title": "Generic options", @@ -365,13 +346,13 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "hidden": true + "hidden": true, }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "hidden": true + "hidden": true, }, "publish_dir_mode": { "type": "string", @@ -380,7 +361,7 @@ "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true + "hidden": true, }, "email_on_fail": { "type": "string", @@ -388,13 +369,13 @@ "fa_icon": "fas fa-exclamation-triangle", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true + "hidden": true, }, "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "hidden": true + "hidden": true, }, "max_multiqc_email_size": { "type": "string", @@ -402,109 +383,83 @@ "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", - "hidden": true + "hidden": true, }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true + "hidden": true, }, "hook_url": { "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true + "hidden": true, }, "multiqc_config": { "type": "string", "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", - "hidden": true + "hidden": true, }, "multiqc_logo": { "type": "string", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", - "hidden": true + "hidden": true, }, "multiqc_methods_description": { "type": "string", "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" + "fa_icon": "fas fa-cog", }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square", - "hidden": true + "hidden": true, }, "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.", }, "validationFailUnrecognisedParams": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig.", }, "validationLenientMode": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - } + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).", + }, + }, + }, }, "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/mandatory_arguments" - }, - { - "$ref": "#/definitions/skip_tools" - }, - { - "$ref": "#/definitions/reference_genome_options" - }, - { - "$ref": "#/definitions/alevin_options" - }, - { - "$ref": "#/definitions/starsolo_options" - }, - { - "$ref": "#/definitions/kallisto_bus_options" - }, - { - "$ref": "#/definitions/cellranger_options" - }, - { - "$ref": "#/definitions/cellrangerarc_options" - }, - { - "$ref": "#/definitions/universc_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/generic_options" - } - ] + {"$ref": "#/definitions/input_output_options"}, + {"$ref": "#/definitions/mandatory_arguments"}, + {"$ref": "#/definitions/skip_tools"}, + {"$ref": "#/definitions/reference_genome_options"}, + {"$ref": "#/definitions/alevin_options"}, + {"$ref": "#/definitions/starsolo_options"}, + {"$ref": "#/definitions/kallisto_bus_options"}, + {"$ref": "#/definitions/cellranger_options"}, + {"$ref": "#/definitions/cellrangerarc_options"}, + {"$ref": "#/definitions/universc_options"}, + {"$ref": "#/definitions/institutional_config_options"}, + {"$ref": "#/definitions/max_job_request_options"}, + {"$ref": "#/definitions/generic_options"}, + ], } From afbfd040b043c00566c57d94eab5834ad739cf42 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:01:45 +0100 Subject: [PATCH 39/89] Remove pythonblack correction --- modules.json | 24 ++--- nextflow_schema.json | 233 ++++++++++++++++++++++++++----------------- 2 files changed, 151 insertions(+), 106 deletions(-) diff --git a/modules.json b/modules.json index 06849b69..c44a2a15 100644 --- a/modules.json +++ b/modules.json @@ -8,62 +8,62 @@ "cellranger/count": { "branch": "master", "git_sha": "5df79e0383386a9e43462a6e81bf978ce0a6db09", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "cellranger/mkgtf": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "cellranger/mkref": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "kallistobustools/count": { "branch": "master", "git_sha": "de204d3c950f091336539ad74f0e47ddffe69ed4", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "kallistobustools/ref": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "universc": { "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", - "installed_by": ["modules"], + "installed_by": ["modules"] }, } } diff --git a/nextflow_schema.json b/nextflow_schema.json index c850796d..31f95e5f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,27 +19,27 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)." }, "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open", + "fa_icon": "fas fa-folder-open" }, "email": { "type": "string", "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, "multiqc_title": { "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature", - }, - }, + "fa_icon": "fas fa-file-signature" + } + } }, "mandatory_arguments": { "title": "Mandatory arguments", @@ -50,7 +50,7 @@ "barcode_whitelist": { "type": "string", "description": "If not using the 10X Genomics platform, a custom barcode whitelist can be used with `--barcode_whitelist`.", - "fa_icon": "fas fa-barcode", + "fa_icon": "fas fa-barcode" }, "aligner": { "type": "string", @@ -58,17 +58,17 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"], + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"], - }, + "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] + } }, - "fa_icon": "fas fa-terminal", + "fa_icon": "fas fa-terminal" }, "skip_tools": { "title": "Skip Tools", @@ -77,9 +77,15 @@ "default": "", "fa_icon": "fas fa-forward", "properties": { - "skip_multiqc": {"type": "boolean", "description": "Skip MultiQC Report"}, - "skip_fastqc": {"type": "boolean", "description": "Skip FastQC"}, - }, + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC Report" + }, + "skip_fastqc": { + "type": "boolean", + "description": "Skip FastQC" + } + } }, "reference_genome_options": { "title": "Reference genome options", @@ -91,7 +97,7 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", @@ -101,25 +107,29 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code", + "fa_icon": "far fa-file-code" }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "transcript_fasta": { + "type": "string", + "description": "A cDNA FASTA file", + "fa_icon": "fas fa-dna" }, - "transcript_fasta": {"type": "string", "description": "A cDNA FASTA file", "fa_icon": "fas fa-dna"}, "gtf": { "type": "string", "description": "Reference GTF annotation file", - "fa_icon": "fas fa-code-branch", + "fa_icon": "fas fa-code-branch" }, "save_reference": { "type": "boolean", "description": "Specify this parameter to save the indices created (STAR, Kallisto, Salmon) to the results.", - "fa_icon": "fas fa-bookmark", + "fa_icon": "fas fa-bookmark" }, "igenomes_base": { "type": "string", @@ -127,9 +137,9 @@ "description": "Directory / URL base for iGenomes references.", "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", - "hidden": true, - }, - }, + "hidden": true + } + } }, "alevin_options": { "title": "Alevin Options", @@ -140,21 +150,21 @@ "salmon_index": { "type": "string", "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-fish" }, "txp2gene": { "type": "string", "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.", - "fa_icon": "fas fa-map-marked-alt", + "fa_icon": "fas fa-map-marked-alt" }, "simpleaf_rlen": { "type": "integer", "default": 91, "description": "It is the target read length the index will be built for, using simpleaf.", - "fa_icon": "fas fa-map-marked-alt", - }, - }, + "fa_icon": "fas fa-map-marked-alt" + } + } }, "starsolo_options": { "title": "STARSolo Options", @@ -166,19 +176,25 @@ "type": "string", "description": "Specify a path to the precomputed STAR index.", "help_text": "> NB: This has to be computed with STAR Version 2.7 or later, as STARsolo was only first supported by STAR Version 2.7.", - "fa_icon": "fas fa-asterisk", + "fa_icon": "fas fa-asterisk" + }, + "star_ignore_sjdbgtf": { + "type": "string", + "description": "Ignore the SJDB GTF file." + }, + "seq_center": { + "type": "string", + "description": "Name of sequencing center for BAM read group tag." }, - "star_ignore_sjdbgtf": {"type": "string", "description": "Ignore the SJDB GTF file."}, - "seq_center": {"type": "string", "description": "Name of sequencing center for BAM read group tag."}, "star_feature": { "type": "string", "default": "Gene", "enum": ["Gene", "GeneFull", "Gene Velocyto"], "description": "Quantification type of different transcriptomic feature. Use `GeneFull` on pre-mRNA count for single-nucleus RNA-seq reads. Use `Gene Velocyto` to generate RNA velocity matrix.", - "fa_icon": "fas fa-asterisk", - }, + "fa_icon": "fas fa-asterisk" + } }, - "fa_icon": "fas fa-star", + "fa_icon": "fas fa-star" }, "kallisto_bus_options": { "title": "Kallisto/BUS Options", @@ -190,21 +206,21 @@ "kallisto_gene_map": { "type": "string", "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-fish" }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-fish" }, "kb_workflow": { "type": "string", "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"], - }, - }, + "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + } + } }, "cellranger_options": { "title": "Cellranger Options", @@ -214,9 +230,9 @@ "properties": { "cellranger_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " } - }, + } }, "cellrangerarc_options": { "title": "Cellranger ARC Options", @@ -226,14 +242,17 @@ "properties": { "cellrangerarc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " }, "motifs": { "type": "string", - "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database.", + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." }, - "sample_type": {"type": "string", "description": "Specify the type of data (gex or atac)."}, - }, + "sample_type": { + "type": "string", + "description": "Specify the type of data (gex or atac)." + } + } }, "universc_options": { "title": "UniverSC Options", @@ -243,14 +262,14 @@ "properties": { "universc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website.", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." }, "universc_technology": { "type": "string", "description": "Specify a single-cell technology, vendor, or platform. See the UniverSC documentation or GitHub repository for more details.", - "default": "10x", - }, - }, + "default": "10x" + } + } }, "institutional_config_options": { "title": "Institutional config options", @@ -264,7 +283,7 @@ "description": "Git commit id for Institutional configs.", "default": "master", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "custom_config_base": { "type": "string", @@ -272,33 +291,33 @@ "default": "https://raw.githubusercontent.com/nf-core/configs/master", "hidden": true, "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_name": { "type": "string", "description": "Institutional config name.", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_description": { "type": "string", "description": "Institutional config description.", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_contact": { "type": "string", "description": "Institutional config contact information.", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_url": { "type": "string", "description": "Institutional config URL link.", "hidden": true, - "fa_icon": "fas fa-users-cog", - }, - }, + "fa_icon": "fas fa-users-cog" + } + } }, "max_job_request_options": { "title": "Max job request options", @@ -313,7 +332,7 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`", + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" }, "max_memory": { "type": "string", @@ -322,7 +341,7 @@ "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`", + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, "max_time": { "type": "string", @@ -331,9 +350,9 @@ "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`", - }, - }, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } }, "generic_options": { "title": "Generic options", @@ -346,13 +365,13 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "hidden": true, + "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "hidden": true, + "hidden": true }, "publish_dir_mode": { "type": "string", @@ -361,7 +380,7 @@ "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true, + "hidden": true }, "email_on_fail": { "type": "string", @@ -369,13 +388,13 @@ "fa_icon": "fas fa-exclamation-triangle", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true, + "hidden": true }, "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "hidden": true, + "hidden": true }, "max_multiqc_email_size": { "type": "string", @@ -383,83 +402,109 @@ "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", - "hidden": true, + "hidden": true }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true, + "hidden": true }, "hook_url": { "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true, + "hidden": true }, "multiqc_config": { "type": "string", "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", - "hidden": true, + "hidden": true }, "multiqc_logo": { "type": "string", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", - "hidden": true, + "hidden": true }, "multiqc_methods_description": { "type": "string", "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog", + "fa_icon": "fas fa-cog" }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square", - "hidden": true, + "hidden": true }, "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.", + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, "validationFailUnrecognisedParams": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig.", + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, "validationLenientMode": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).", - }, - }, - }, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + } + } + } }, "allOf": [ - {"$ref": "#/definitions/input_output_options"}, - {"$ref": "#/definitions/mandatory_arguments"}, - {"$ref": "#/definitions/skip_tools"}, - {"$ref": "#/definitions/reference_genome_options"}, - {"$ref": "#/definitions/alevin_options"}, - {"$ref": "#/definitions/starsolo_options"}, - {"$ref": "#/definitions/kallisto_bus_options"}, - {"$ref": "#/definitions/cellranger_options"}, - {"$ref": "#/definitions/cellrangerarc_options"}, - {"$ref": "#/definitions/universc_options"}, - {"$ref": "#/definitions/institutional_config_options"}, - {"$ref": "#/definitions/max_job_request_options"}, - {"$ref": "#/definitions/generic_options"}, - ], + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/mandatory_arguments" + }, + { + "$ref": "#/definitions/skip_tools" + }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/alevin_options" + }, + { + "$ref": "#/definitions/starsolo_options" + }, + { + "$ref": "#/definitions/kallisto_bus_options" + }, + { + "$ref": "#/definitions/cellranger_options" + }, + { + "$ref": "#/definitions/cellrangerarc_options" + }, + { + "$ref": "#/definitions/universc_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] } From 6884791d02eb7d100b6a3a77bee4192640bde059 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:03:08 +0100 Subject: [PATCH 40/89] Prettier modules json --- modules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index c44a2a15..5b4e4a3f 100644 --- a/modules.json +++ b/modules.json @@ -64,9 +64,9 @@ "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", "installed_by": ["modules"] - }, + } } } } - }, + } } From c2004eb1fba63774cf6d00fdfa2ad29d4ec3a880 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:24:06 +0100 Subject: [PATCH 41/89] Python black for bin/check_samplesheet.py bin/generate_config.py bin/generate_lib_csv.py --- bin/check_samplesheet.py | 2 -- bin/generate_config.py | 1 - bin/generate_lib_csv.py | 1 - 3 files changed, 4 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3544dab9..bd713438 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -84,7 +84,6 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: - ## Check header MIN_COLS = 2 MIN_HEADER = ["sample", "fastq_1", "fastq_2"] @@ -216,7 +215,6 @@ def check_samplesheet(file_in, file_out): + "\n" ) for sample in sorted(sample_mapping_dict.keys()): - ## Check that multiple runs of the same sample are of the same datatype if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( diff --git a/bin/generate_config.py b/bin/generate_config.py index 85647826..62434aef 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,7 +2,6 @@ import argparse if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py index 07ab9661..5c1c0c4f 100755 --- a/bin/generate_lib_csv.py +++ b/bin/generate_lib_csv.py @@ -3,7 +3,6 @@ import os if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") parser.add_argument("-t", "--sample_types", dest="sample_types", help="Comma seperated list of sample types.") From cfc00fb151cb70a8ddd7db612edc47887b9a8660 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:45:59 +0100 Subject: [PATCH 42/89] Forgot to output the lib.csv from count --- modules/local/cellrangerarc/count/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index a858a2eb..988caec5 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -15,6 +15,7 @@ process CELLRANGERARC_COUNT { output: tuple val(meta), path("${meta.id}/outs/*"), emit: outs + path("${meta.id}_lib.csv") , emit: lib path "versions.yml" , emit: versions when: From a97b8586321d6709a849b41a8fad232296e7dd99 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 10 Nov 2023 16:21:25 +0100 Subject: [PATCH 43/89] Adding more documentation for the generat config python script --- bin/generate_config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 62434aef..2eef125e 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,7 +2,10 @@ import argparse if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + parser = argparse.ArgumentParser(description="Generate the config for cellranger-arc mkref. \ + cellranger-arc mkref takes as input a configuration file that bundles various inputs to the tool. \ + You can also create a config file on your own, please find more information here:\ + https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references") parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") From 1d2b48a39d0ec8285ecd2f2a5f44bb49f9ed9683 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 10 Nov 2023 16:27:24 +0100 Subject: [PATCH 44/89] Changes to cellrangarc count main-nf from review --- modules/local/cellrangerarc/count/main.nf | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index 988caec5..ed42527d 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -10,7 +10,7 @@ process CELLRANGERARC_COUNT { } input: - tuple val(meta), val(multi_meta), path(reads) + tuple val(meta), val(multi_meta), path(reads, stageAs: "fastqs/*") path reference output: @@ -31,14 +31,6 @@ process CELLRANGERARC_COUNT { def lib_csv = meta.id + "_lib.csv" """ - # The following ugly three commands (mkdir, mv, generate_lib_csv) - # are required because cellranger-arc only deals with abolsute paths - if [ ! -d "fastqs" ]; then - mkdir fastqs - fi - - mv *.fastq.gz fastqs/ - generate_lib_csv.py \\ --sample_types $sample_types \\ --sample_names $sample_names \\ From d4411f8ceeb462a8d67ba8d12d055966ac30c978 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 17:50:39 +0100 Subject: [PATCH 45/89] Changing multi_meta to sub_sample and sample_type --- modules/local/cellrangerarc/count/main.nf | 8 ++++---- subworkflows/local/input_check.nf | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index ed42527d..dbba4815 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -10,7 +10,7 @@ process CELLRANGERARC_COUNT { } input: - tuple val(meta), val(multi_meta), path(reads, stageAs: "fastqs/*") + tuple val(meta), val(sample_type), val(sub_sample), path(reads, stageAs: "fastqs/*") path reference output: @@ -25,9 +25,9 @@ process CELLRANGERARC_COUNT { def args = task.ext.args ?: '' def reference_name = reference.name - def multi_meta_info = multi_meta.collate(2).transpose() - def sample_types = multi_meta_info[0].join(",") - def sample_names = multi_meta_info[1].join(",") + //def multi_meta_info = multi_meta.collate(2).transpose() + def sample_types = sample_type.join(",") + def sample_names = sub_sample.join(",") def lib_csv = meta.id + "_lib.csv" """ diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 0de0ac59..619787d6 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -23,8 +23,9 @@ workflow INPUT_CHECK { // [ val(meta), [ multimeta_s1, multimeta_s1 ], [ [reads_rep1], [reads_repN] ] ] .groupTuple(by: [0]) // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by - // nf-core modules: [ val(meta), [multi_meta], [ reads ] ] - .map { meta, multi_meta, reads -> [ meta, multi_meta.flatten(), reads.flatten() ] } + // nf-core modules: [ val(meta), [sample_type], [sub_sample], [ reads ] ] + .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), + reads.flatten() ] } .set { reads } versions = SAMPLESHEET_CHECK.out.versions } else { @@ -42,7 +43,7 @@ workflow INPUT_CHECK { } emit: - reads // channel: [ val(meta), [multi_meta], [ reads ] ] + reads // channel: [ val(meta), [*], [ reads ] ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } @@ -82,9 +83,9 @@ def create_fastq_channel(LinkedHashMap row) { } // define meta_data for multiome - def multi_meta = [] - multi_meta = row.sample_type ? [row.sample_type] : [params.sample_type] + def sample_type = row.sample_type ? [row.sample_type] : [params.sample_type] + def sub_sample = "" if (params.aligner == "cellrangerarc"){ sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") fastqs.each{ @@ -93,13 +94,12 @@ def create_fastq_channel(LinkedHashMap row) { "${sub_sample} in common!\n${it}" } } - multi_meta.add(sub_sample) } fastq_meta = [ meta, fastqs ] if (params.aligner == "cellrangerarc"){ - fastq_meta = [ meta, multi_meta, fastqs ] + fastq_meta = [ meta, sample_type, sub_sample, fastqs ] } return fastq_meta From 0e049ce0587a6cd614132db9744a0b03fbe5e0bb Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 18:08:32 +0100 Subject: [PATCH 46/89] black cleanup --- bin/generate_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 2eef125e..f2d818d1 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,10 +2,12 @@ import argparse if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the config for cellranger-arc mkref. \ + parser = argparse.ArgumentParser( + description="Generate the config for cellranger-arc mkref. \ cellranger-arc mkref takes as input a configuration file that bundles various inputs to the tool. \ You can also create a config file on your own, please find more information here:\ - https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references") + https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references" + ) parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") From a8923c615f9d3c4b2976ab0e43f0de0ff052f6d5 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 18:27:30 +0100 Subject: [PATCH 47/89] Adapting fastqc for cellrangerarc main.nf of fastqc and scrnaseq.nf --- subworkflows/local/fastqc.nf | 13 ++++++++++--- workflows/scrnaseq.nf | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index f18214a1..53e20857 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -8,9 +8,16 @@ workflow FASTQC_CHECK { ch_fastq main: - ch_fastq - .map { ch -> [ ch[0], ch[1] ] } - .set { ch_fastq } + + if( params.aligner == 'cellrangerarc'){ + ch_fastq + .map { ch -> [ ch[0], ch[3] ] } + .set { ch_fastq } + } else { + ch_fastq + .map { ch -> [ ch[0], ch[1] ] } + .set { ch_fastq } + } /* * FastQ QC using FASTQC diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 65895dc6..8b64e0f2 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -47,6 +47,7 @@ include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS From 1d020ba8ddf490106eade2d7418d0991c6fc3239 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 18:27:48 +0100 Subject: [PATCH 48/89] Comment change to input_check.nf --- subworkflows/local/input_check.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 619787d6..30b9cf2e 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -20,7 +20,7 @@ workflow INPUT_CHECK { .splitCsv ( header:true, sep:',' ) .map { create_fastq_channel(it) } // group replicate files together, modifies channel to - // [ val(meta), [ multimeta_s1, multimeta_s1 ], [ [reads_rep1], [reads_repN] ] ] + // [ val(meta), [sample_type], [sub_sample], [ [reads_rep1], [reads_repN] ] ] .groupTuple(by: [0]) // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by // nf-core modules: [ val(meta), [sample_type], [sub_sample], [ reads ] ] From 5fc6c051b23ef5efa355c138dcbe63003d406954 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:24:36 +0100 Subject: [PATCH 49/89] Changes to mtx to h5ad --- modules/local/mtx_to_h5ad.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index ca9b1d48..84d98608 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -41,7 +41,7 @@ process MTX_TO_H5AD { // // run script // - if (params.aligner == 'cellranger' || params.aligner == 'cellrangerarc') + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) """ # convert file types mtx_to_h5ad.py \\ From 050de407bf66d771876057a1c96a2b48f30b5c29 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:24:54 +0100 Subject: [PATCH 50/89] Changes to mtx to seurat --- modules/local/mtx_to_seurat.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 8d344035..73e260d2 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -19,7 +19,7 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner - if (params.aligner == "cellranger" || params.aligner == "cellrangerarc") { + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" features = "filtered_feature_bc_matrix/features.tsv.gz" From cbe54933960d68d1cbb4be5a35c2f5d3ec2c0ce1 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:41:04 +0100 Subject: [PATCH 51/89] Pretty fastqc.main --- subworkflows/local/fastqc.nf | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index 53e20857..6825a9e0 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -9,15 +9,8 @@ workflow FASTQC_CHECK { main: - if( params.aligner == 'cellrangerarc'){ - ch_fastq - .map { ch -> [ ch[0], ch[3] ] } - .set { ch_fastq } - } else { - ch_fastq - .map { ch -> [ ch[0], ch[1] ] } - .set { ch_fastq } - } + def n = (params.aligner == 'cellrangerarc') ? 3 : 1 + ch_fastq.map { ch -> [ ch[0], ch[n] ] }.set { ch_fastq } /* * FastQ QC using FASTQC From fce8e26f26ce892a9c5dc8684dd4c1ee1f8ccb50 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:41:28 +0100 Subject: [PATCH 52/89] Remove cellrangerarc_index --- nextflow.config | 2 +- nextflow_schema.json | 4 ---- subworkflows/local/align_cellrangerarc.nf | 10 +++++----- workflows/scrnaseq.nf | 7 ++----- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8c2488e3..9cc0a2d0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,7 +41,6 @@ params { cellranger_index = null // Cellranger ARC parameters - cellrangerarc_index = null motifs = null sample_type = 'gex' @@ -208,6 +207,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_multiome { includeConfig 'conf/test_multiome.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index 31f95e5f..ea6df842 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -240,10 +240,6 @@ "description": "Params related to the Cellranger pipeline", "default": "", "properties": { - "cellrangerarc_index": { - "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " - }, "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index c2dffde6..b8acd2dd 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -13,16 +13,16 @@ workflow CELLRANGERARC_ALIGN { fasta gtf motifs - cellrangerarc_index + cellranger_index ch_fastq main: ch_versions = Channel.empty() - assert cellrangerarc_index || (fasta && gtf && motifs): + assert cellranger_index || (fasta && gtf && motifs): "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." - if (!cellrangerarc_index) { + if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules CELLRANGERARC_MKGTF( gtf ) filtered_gtf = CELLRANGERARC_MKGTF.out.gtf @@ -35,13 +35,13 @@ workflow CELLRANGERARC_ALIGN { // Make reference genome CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) - cellrangerarc_index = CELLRANGERARC_MKREF.out.reference + cellranger_index = CELLRANGERARC_MKREF.out.reference } // Obtain read counts CELLRANGERARC_COUNT ( ch_fastq, - cellrangerarc_index + cellranger_index ) ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 8b64e0f2..053865a6 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -12,7 +12,7 @@ def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.gtf, params.transcript_fasta, params.salmon_index, params.kallisto_index, params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index, - params.cellrangerarc_index, params.universc_index + params.universc_index ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -105,9 +105,6 @@ star_feature = params.star_feature //cellranger params ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] -//cellrangerarc params -ch_cellrangerarc_index = params.cellrangerarc_index ? file(params.cellrangerarc_index) : [] - //universc params ch_universc_index = params.universc_index ? file(params.universc_index) : [] @@ -224,7 +221,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_motifs, - ch_cellrangerarc_index, + ch_cellranger_index, ch_fastq ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) From a46f63d090e1f4a996a0f00df5f727de5c3c6199 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:56:42 +0100 Subject: [PATCH 53/89] Pretty input:_check --- subworkflows/local/input_check.nf | 40 ++++++++++++------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 30b9cf2e..56fd4ddf 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -14,32 +14,22 @@ workflow INPUT_CHECK { reads = null versions = null - if (params.aligner == "cellrangerarc"){ - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - // group replicate files together, modifies channel to - // [ val(meta), [sample_type], [sub_sample], [ [reads_rep1], [reads_repN] ] ] - .groupTuple(by: [0]) - // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by - // nf-core modules: [ val(meta), [sample_type], [sub_sample], [ reads ] ] - .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), - reads.flatten() ] } - .set { reads } - versions = SAMPLESHEET_CHECK.out.versions + grouped_ch = + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + + if (params.aligner == 'cellrangerarc' ) { + grouped_ch + .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), reads.flatten() ] } + .set { reads } } else { - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] - .groupTuple(by: [0]) - // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by - // nf-core modules: [ val(meta), [ reads ] ] - .map { meta, reads -> [ meta, reads.flatten() ] } - .set { reads } - versions = SAMPLESHEET_CHECK.out.versions + grouped_ch + .map { meta, reads -> [ meta, reads.flatten() ] } + .set { reads } } emit: From 509450ad71defd437d2026bdc744b0d2b915bb63 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:57:03 +0100 Subject: [PATCH 54/89] Pretty mtx_converstion --- subworkflows/local/mtx_conversion.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index d1f8345a..958da400 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -15,7 +15,7 @@ workflow MTX_CONVERSION { ch_versions = Channel.empty() // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - if ( params.aligner == "cellranger" || params.aligner == "cellrangerarc" ) { + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { mtx_matrices = mtx_matrices.map { meta, mtx_files -> [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] } From d2c0eee23150a8155edb8b2829d3828773f23b25 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 27 Nov 2023 15:47:13 +0100 Subject: [PATCH 55/89] Removing global parameter sample_type --- nextflow.config | 1 - nextflow_schema.json | 4 ---- subworkflows/local/align_cellrangerarc.nf | 2 +- subworkflows/local/input_check.nf | 2 +- 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 9cc0a2d0..9b105f27 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,6 @@ params { // Cellranger ARC parameters motifs = null - sample_type = 'gex' // UniverSC paramaters universc_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ea6df842..493ca47c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -243,10 +243,6 @@ "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." - }, - "sample_type": { - "type": "string", - "description": "Specify the type of data (gex or atac)." } } }, diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index b8acd2dd..8313fd47 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -20,7 +20,7 @@ workflow CELLRANGERARC_ALIGN { ch_versions = Channel.empty() assert cellranger_index || (fasta && gtf && motifs): - "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 56fd4ddf..2e06e889 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -73,7 +73,7 @@ def create_fastq_channel(LinkedHashMap row) { } // define meta_data for multiome - def sample_type = row.sample_type ? [row.sample_type] : [params.sample_type] + def sample_type = row.sample_type ? [row.sample_type] : ['gex'] def sub_sample = "" if (params.aligner == "cellrangerarc"){ From 7ffe8b07a15064377766913798bbd8086c67bc94 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 28 Nov 2023 18:21:24 +0100 Subject: [PATCH 56/89] Addig the config generation into the mkref module and also adding the option to procide your config file --- bin/generate_config.py | 2 +- modules.json | 5 +++ modules/local/cellrangerarc/mkref/main.nf | 26 ++++++++++++-- .../local/generate_cellranger_mkref_config.nf | 36 ------------------- nextflow.config | 2 ++ nextflow_schema.json | 8 +++++ subworkflows/local/align_cellrangerarc.nf | 19 ++++++---- workflows/scrnaseq.nf | 4 ++- 8 files changed, 56 insertions(+), 46 deletions(-) delete mode 100644 modules/local/generate_cellranger_mkref_config.nf diff --git a/bin/generate_config.py b/bin/generate_config.py index f2d818d1..aae903a4 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -25,7 +25,7 @@ config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) - if args["add"] != "none": + if args["add"] != None: config.write(args["add"] + "\n") config.write("}") config.close() diff --git a/modules.json b/modules.json index 5b4e4a3f..0d7aefd8 100644 --- a/modules.json +++ b/modules.json @@ -64,6 +64,11 @@ "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", "installed_by": ["modules"] + }, + "unzip": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] } } } diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 41e9db30..88029ebd 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -1,5 +1,5 @@ process CELLRANGERARC_MKREF { - tag "$reference_config" + tag "$reference_name" label 'process_medium' container "nf-core/cellranger-arc:2.0.2" @@ -18,17 +18,39 @@ process CELLRANGERARC_MKREF { output: path "${reference_name}", emit: reference + path "config" , emit: config path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + def fast_name = fasta.name + def gtf_name = gtf.name + def motifs_name = motifs.name + def reference_config = reference_config.name def args = task.ext.args ?: '' + + if ( !reference_name ){ + reference_name = "cellrangerarc_reference" + } + """ + if [ $reference_config == [] ]; then + generate_config.py \\ + --fasta $fast_name \\ + --gtf $gtf_name \\ + --motifs $motifs_name \\ + $args + else + if [ ! -f config ]; then + mv -i $reference_config config + fi + fi + cellranger-arc \\ mkref \\ - --config=$reference_config \\ + --config=config \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/generate_cellranger_mkref_config.nf b/modules/local/generate_cellranger_mkref_config.nf deleted file mode 100644 index 4bc474c8..00000000 --- a/modules/local/generate_cellranger_mkref_config.nf +++ /dev/null @@ -1,36 +0,0 @@ -process CELLRANGERARC_GENERATECONFIG { - tag "$samplesheet" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - val(fasta) - val(gtf) - val(motifs) - - output: - path '*config' , emit: config - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/ - def args = task.ext.args ?: '' - """ - generate_config.py \\ - --fasta $fasta \\ - --gtf $gtf \\ - --motifs $motifs \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/nextflow.config b/nextflow.config index 9b105f27..6d35d957 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,8 @@ params { // Cellranger ARC parameters motifs = null + cellrangerarc_config = null + cellrangerarc_reference = null // UniverSC paramaters universc_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 493ca47c..b94728fc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -243,6 +243,14 @@ "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + }, + "cellrangerarc_config": { + "type": "string", + "description": "Specify a config file to create the cellranger-arc index." + }, + "cellrangerarc_reference": { + "type": "string", + "description": "Specify the genome reference name in the config file to create a cellranger-arc index." } } }, diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index 8313fd47..8355e0a3 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -4,7 +4,6 @@ include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {CELLRANGERARC_GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" // Define workflow to subset and index a genome region fasta file @@ -15,6 +14,7 @@ workflow CELLRANGERARC_ALIGN { motifs cellranger_index ch_fastq + cellrangerarc_config main: ch_versions = Channel.empty() @@ -28,12 +28,19 @@ workflow CELLRANGERARC_ALIGN { filtered_gtf = CELLRANGERARC_MKGTF.out.gtf ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) - // Generate the config for mkref - CELLRANGERARC_GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) - ch_versions = ch_versions.mix(CELLRANGERARC_GENERATECONFIG.out.versions) - // Make reference genome - CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) + if ( ( params.cellrangerarc_reference && !cellrangerarc_config ) || + ( !params.cellrangerarc_reference && cellrangerarc_config ) ) { + exit 1, "ERROR: If you provide a config file you also have to specific the reference name and vice versa." + } else { + + cellrangerarc_reference = 'cellrangerarc_reference' + if ( params.cellrangerarc_reference ){ + cellrangerarc_reference = params.cellrangerarc_reference + } + + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) + } ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) cellranger_index = CELLRANGERARC_MKREF.out.reference } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 053865a6..ffdb8421 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -78,6 +78,7 @@ ch_genome_fasta = Channel.value(params.fasta ? file(params.fasta) : []) ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] ch_motifs = params.motifs ? file(params.motifs) : [] +ch_cellrangerarc_config = params.cellrangerarc_config ? file(params.cellrangerarc_config) : [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] ch_multiqc_alevin = Channel.empty() ch_multiqc_star = Channel.empty() @@ -222,7 +223,8 @@ workflow SCRNASEQ { ch_filter_gtf, ch_motifs, ch_cellranger_index, - ch_fastq + ch_fastq, + ch_cellrangerarc_config ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) From 3f9218994974fd8900545984bf66097f5ec0f361 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 28 Nov 2023 18:41:32 +0100 Subject: [PATCH 57/89] Documentation added to output.md --- docs/output.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/output.md b/docs/output.md index c1e2b013..1fca8b5c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,6 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [STARsolo](#starsolo) - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) - [Cellranger](#cellranger) + - [Cellranger ARC](#cellrangerarc) - [UniverSC](#universc) - [Other output data](#other-output-data) - [MultiQC](#multiqc) @@ -103,6 +104,14 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell - Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by Cellranger +## Cellranger ARC + +Cell Ranger ARC is a set of analysis pipelines that process Chromium Single Cell Multiome ATAC + Gene Expression sequencing data to generate a variety of analyses pertaining to gene expression (GEX), chromatin accessibility, and their linkage. Furthermore, since the ATAC and GEX measurements are on the very same cell, we are able to perform analyses that link chromatin accessibility and GEX. See [Cellranger ARC](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc) for more information on Cellranger. + +**Output directory: `results/cellrangerarc`** + +- Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by Cellranger ARC + ## UniverSC UniverSC is a wrapper that calls an open-source implementation of Cell Ranger v3.0.2 and adjusts run parameters for compatibility with a wide ranger of technologies. From 2852db356e1373995b0d926d4f73b14fbee44a03 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 28 Nov 2023 18:42:47 +0100 Subject: [PATCH 58/89] Documentation added to output.md --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 1fca8b5c..7e9f0cd8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,7 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [STARsolo](#starsolo) - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) - [Cellranger](#cellranger) - - [Cellranger ARC](#cellrangerarc) + - [Cellranger ARC](#cellranger-arc) - [UniverSC](#universc) - [Other output data](#other-output-data) - [MultiQC](#multiqc) From a39c4d9dd71e22ce66930a8b9e76fb53a4ad4ad0 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 11:53:31 +0100 Subject: [PATCH 59/89] Update usage.md for cellranger-arc --- docs/usage.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index f90dc242..0de496a8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -97,6 +97,55 @@ UniverSC automatically updates the barcode whitelist and chemistry parameters. U Currently only 3\' scRNA-Seq parameters are supported in nextflow, although chemistry parameters for 5\' scRNA-Seq and full-length scRNA-Seq libraries are supported by teh container. +### If using cellranger-arc + +#### Automatic file name detection + +This pipeline currently **does not** automatically renames input FASTQ files to follow the +[naming convention by 10x](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input): + +``` +[Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz +``` + +For more details, see + +- [this issue](https://github.com/nf-core/scrnaseq/issues/241), discussing various mechanisms to deal with non-conformant filenames +- [the README of the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/README.md) which demonstrates that renaming files does not affect the results. +- [the code for renaming files in the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/templates/cellranger_count.py) +- [the code for renaming files in UniverSC](https://github.com/minoda-lab/universc/blob/99a20652430c1dc9f962536a2793536f643810b7/launch_universc.sh#L1411-L1609) + +#### Sample sheet definition + +If you are using cellranger-arc you have to add the column *sample_type* (atac for scATAC or gex for scRNA) and *fastq_barcode* (part of the scATAC data) to your samplesheet as an input. + +**Beware of the following points:** + - It is important that you give your scRNA and scATAC different [Sample Name]s. + - Check first which file is your barcode fastq file for your scATAC data [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input). + - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start). + +An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run +from two lanes for the scRNA could look like this: + +sample,fastq_1,fastq_2,fastq_barcode,sample_type +test_scARC,path/test_scARC_atac_rep1_S1_L001_R1_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_R2_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_I2_001.fastq.gz,atac +test_scARC,path/test_scARC_atac_rep2_S2_L001_R1_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_R2_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_I2_001.fastq.gz,atac +test_scARC,path/test_scARC_gex_S1_L001_R1_001.fastq.gz,path/test_scARC_gex_S1_L001_R2_001.fastq.gz,,gex +test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L002_R2_001.fastq.gz,,gex + +#### Config file and index + +Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use +for cellranger-arc the same index you use for cellranger [see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines). +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please check first if +you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for +more information) + +If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline +can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can +also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` +the reference genome name that you have used and stated as genome: [""] in your config file. + ## Running the pipeline The minimum typical command for running the pipeline is as follows: From e24df897ab104dd4fd4a64c33ae9b5e69baeb2a4 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 12:00:37 +0100 Subject: [PATCH 60/89] Update usage.md for cellranger-arc --- docs/usage.md | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 0de496a8..6c393861 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,12 +108,7 @@ This pipeline currently **does not** automatically renames input FASTQ files to [Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz ``` -For more details, see - -- [this issue](https://github.com/nf-core/scrnaseq/issues/241), discussing various mechanisms to deal with non-conformant filenames -- [the README of the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/README.md) which demonstrates that renaming files does not affect the results. -- [the code for renaming files in the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/templates/cellranger_count.py) -- [the code for renaming files in UniverSC](https://github.com/minoda-lab/universc/blob/99a20652430c1dc9f962536a2793536f643810b7/launch_universc.sh#L1411-L1609) +Thus please make sure your files follow this naming convention. #### Sample sheet definition @@ -121,8 +116,8 @@ If you are using cellranger-arc you have to add the column *sample_type* (atac f **Beware of the following points:** - It is important that you give your scRNA and scATAC different [Sample Name]s. - - Check first which file is your barcode fastq file for your scATAC data [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input). - - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start). + - Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). + - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run from two lanes for the scRNA could look like this: @@ -136,15 +131,15 @@ test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L0 #### Config file and index Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use -for cellranger-arc the same index you use for cellranger [see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines). -Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please check first if +for cellranger-arc the same index you use for cellranger ((see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for more information) If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` -the reference genome name that you have used and stated as genome: [""] in your config file. +the reference genome name that you have used and stated as *genome:* in your config file. ## Running the pipeline From 561720cb1121e123c77073b3e37e47a5ce4b4bfd Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 12:00:58 +0100 Subject: [PATCH 61/89] Update usage.md for cellranger-arc --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 6c393861..07a6124e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,7 +131,7 @@ test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L0 #### Config file and index Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use -for cellranger-arc the same index you use for cellranger ((see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). +for cellranger-arc the same index you use for cellranger ([see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for more information) From d7f100cc173db30752d86e24b442595f89feadad Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 12:58:01 +0100 Subject: [PATCH 62/89] Making motif file optional --- bin/generate_config.py | 7 ++++--- subworkflows/local/align_cellrangerarc.nf | 22 ++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index aae903a4..91d0a209 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -9,8 +9,8 @@ https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references" ) - parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") - parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") + parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.", required=True) + parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.", required=True) parser.add_argument("-m", "--motifs", dest="motifs", help="Name of the motifs file.") parser.add_argument("-a", "--add", dest="add", help="Additional filter line.") @@ -24,7 +24,8 @@ config.write('\tgenome: ["cellrangerarc_reference"]\n') config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) - config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) + if args["motifs"] != '[]': + config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) if args["add"] != None: config.write(args["add"] + "\n") config.write("}") diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index 8355e0a3..b33cd61b 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -19,8 +19,8 @@ workflow CELLRANGERARC_ALIGN { main: ch_versions = Channel.empty() - assert cellranger_index || (fasta && gtf && motifs): - "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + assert cellranger_index || (fasta && gtf): + "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf')." if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules @@ -29,18 +29,16 @@ workflow CELLRANGERARC_ALIGN { ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) // Make reference genome - if ( ( params.cellrangerarc_reference && !cellrangerarc_config ) || - ( !params.cellrangerarc_reference && cellrangerarc_config ) ) { - exit 1, "ERROR: If you provide a config file you also have to specific the reference name and vice versa." - } else { + assert ( ( !params.cellrangerarc_reference && !cellrangerarc_config ) || + ( params.cellrangerarc_reference && cellrangerarc_config ) ) : + "If you provide a config file you also have to specific the reference name and vice versa." - cellrangerarc_reference = 'cellrangerarc_reference' - if ( params.cellrangerarc_reference ){ - cellrangerarc_reference = params.cellrangerarc_reference - } - - CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) + cellrangerarc_reference = 'cellrangerarc_reference' + if ( params.cellrangerarc_reference ){ + cellrangerarc_reference = params.cellrangerarc_reference } + + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) cellranger_index = CELLRANGERARC_MKREF.out.reference } From 67aa26da63ab072ba5644a4f2f4eb529a1c08f6d Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 13:01:08 +0100 Subject: [PATCH 63/89] Prettier for usage.md --- docs/usage.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 07a6124e..9b37ee6f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -112,12 +112,13 @@ Thus please make sure your files follow this naming convention. #### Sample sheet definition -If you are using cellranger-arc you have to add the column *sample_type* (atac for scATAC or gex for scRNA) and *fastq_barcode* (part of the scATAC data) to your samplesheet as an input. +If you are using cellranger-arc you have to add the column _sample_type_ (atac for scATAC or gex for scRNA) and _fastq_barcode_ (part of the scATAC data) to your samplesheet as an input. **Beware of the following points:** - - It is important that you give your scRNA and scATAC different [Sample Name]s. - - Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). - - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). + +- It is important that you give your scRNA and scATAC different [Sample Name]s. +- Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). +- If you have more than one sequencing run then you have to give them another suffix (e.g., rep\*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run from two lanes for the scRNA could look like this: @@ -130,16 +131,16 @@ test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L0 #### Config file and index -Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use +Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use for cellranger-arc the same index you use for cellranger ([see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). -Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if -you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if +you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for more information) If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can -also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` -the reference genome name that you have used and stated as *genome:* in your config file. +also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` +the reference genome name that you have used and stated as _genome:_ in your config file. ## Running the pipeline From df964d81d539e0202b85df5340fee2952073b3f0 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 13:01:51 +0100 Subject: [PATCH 64/89] Black generate_config.py --- bin/generate_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 91d0a209..c01260dd 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -24,7 +24,7 @@ config.write('\tgenome: ["cellrangerarc_reference"]\n') config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) - if args["motifs"] != '[]': + if args["motifs"] != "[]": config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) if args["add"] != None: config.write(args["add"] + "\n") From a74de9edb80a50e0625fda7ff3b1580fea480f7f Mon Sep 17 00:00:00 2001 From: heylf Date: Sun, 3 Dec 2023 16:41:33 +0100 Subject: [PATCH 65/89] Update docs/usage.md Co-authored-by: Gregor Sturm --- docs/usage.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 9b37ee6f..c9e96eb0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -123,11 +123,13 @@ If you are using cellranger-arc you have to add the column _sample_type_ (atac f An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run from two lanes for the scRNA could look like this: +```csv sample,fastq_1,fastq_2,fastq_barcode,sample_type test_scARC,path/test_scARC_atac_rep1_S1_L001_R1_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_R2_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_I2_001.fastq.gz,atac test_scARC,path/test_scARC_atac_rep2_S2_L001_R1_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_R2_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_I2_001.fastq.gz,atac test_scARC,path/test_scARC_gex_S1_L001_R1_001.fastq.gz,path/test_scARC_gex_S1_L001_R2_001.fastq.gz,,gex test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L002_R2_001.fastq.gz,,gex +``` #### Config file and index From 7dba557b599f5e43f77d13efe978bcb11559cb97 Mon Sep 17 00:00:00 2001 From: heylf Date: Sun, 3 Dec 2023 16:41:55 +0100 Subject: [PATCH 66/89] Update nextflow_schema.json Co-authored-by: Gregor Sturm --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b94728fc..b9758014 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -250,7 +250,7 @@ }, "cellrangerarc_reference": { "type": "string", - "description": "Specify the genome reference name in the config file to create a cellranger-arc index." + "description": "Specify the genome reference name used in the config file to create a cellranger-arc index." } } }, From 64bdd3d3592aa2db7203fc118fb713983ff42179 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 19 Dec 2023 18:36:47 +0000 Subject: [PATCH 67/89] Template update for nf-core/tools version 2.11 --- .github/CONTRIBUTING.md | 3 + .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/ci.yml | 2 +- .github/workflows/fix-linting.yml | 4 +- .github/workflows/linting.yml | 12 +- .gitpod.yml | 4 +- CHANGELOG.md | 2 +- README.md | 17 +-- assets/multiqc_config.yml | 6 +- assets/slackreport.json | 2 +- conf/modules.config | 2 +- docs/usage.md | 4 +- lib/NfcoreTemplate.groovy | 32 ++--- modules.json | 6 +- .../dumpsoftwareversions/environment.yml | 7 ++ .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 7 +- .../dumpsoftwareversions/tests/main.nf.test | 38 ++++++ .../tests/main.nf.test.snap | 27 +++++ .../dumpsoftwareversions/tests/tags.yml | 2 + modules/nf-core/fastqc/environment.yml | 7 ++ modules/nf-core/fastqc/main.nf | 10 +- modules/nf-core/fastqc/meta.yml | 5 + modules/nf-core/fastqc/tests/main.nf.test | 109 ++++++++++++++++++ .../nf-core/fastqc/tests/main.nf.test.snap | 10 ++ modules/nf-core/fastqc/tests/tags.yml | 2 + modules/nf-core/multiqc/environment.yml | 7 ++ modules/nf-core/multiqc/main.nf | 8 +- modules/nf-core/multiqc/meta.yml | 11 +- modules/nf-core/multiqc/tests/main.nf.test | 63 ++++++++++ modules/nf-core/multiqc/tests/tags.yml | 2 + nextflow.config | 12 +- 32 files changed, 361 insertions(+), 69 deletions(-) create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml create mode 100644 modules/nf-core/fastqc/environment.yml create mode 100644 modules/nf-core/fastqc/tests/main.nf.test create mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastqc/tests/tags.yml create mode 100644 modules/nf-core/multiqc/environment.yml create mode 100644 modules/nf-core/multiqc/tests/main.nf.test create mode 100644 modules/nf-core/multiqc/tests/tags.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a66c8d8b..597c854d 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e66587f6..073b2953 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/scrn - [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d466269a..de0c4594 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 9adbb159..05b700e4 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..905c58e4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..acf72695 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d0db033..8839e0f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.4.1 - [date] +## v2.5.0dev - [date] Initial release of nf-core/scrnaseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 3f35dfd8..4736c821 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,8 @@ ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - + diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f550a97b..051ed54e 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,9 +1,7 @@ report_comment: > - - This report has been generated by the nf-core/scrnaseq + This report has been generated by the nf-core/scrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. - + documentation. report_section_order: "nf-core-scrnaseq-methods-description": order: -1000 diff --git a/assets/slackreport.json b/assets/slackreport.json index f6ba6baf..9538f235 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/scrnaseq v${version} - ${runName}", + "author_name": "nf-core/scrnaseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/modules.config b/conf/modules.config index 39e81386..d91c6aba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -39,7 +39,7 @@ process { } withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/docs/usage.md b/docs/usage.md index 03ade981..e9b83795 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -20,7 +20,7 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz @@ -33,7 +33,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d..e248e4c3 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/modules.json b/modules.json index 13919f04..47755872 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..f0c63f69 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..7685b33c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..eec1db10 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..4274ed57 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..b9e8f926 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..bc0bdb5b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..00cc48d2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..f1aa660e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..c2dad217 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index 0d250ffd..41b7daba 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { input = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false @@ -82,6 +82,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -104,13 +105,13 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + runOptions = '-u $(id -u):$(id -g)' } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -181,7 +182,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -204,6 +205,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -229,7 +233,7 @@ manifest { description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.4.1' + version = '2.5.0dev' doi = '' } From 4a3231abf6b66dbd425a6d0056f5d48c9e475619 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 20 Dec 2023 17:05:31 +0000 Subject: [PATCH 68/89] Template update for nf-core/tools version 2.11.1 --- .../{release-announcments.yml => release-announcements.yml} | 0 nextflow.config | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{release-announcments.yml => release-announcements.yml} (100%) diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/nextflow.config b/nextflow.config index 41b7daba..68563110 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,10 +111,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - runOptions = '-u $(id -u):$(id -g)' + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true From 6c9151aca9dd68465d3ec4573ab4d79494e6f606 Mon Sep 17 00:00:00 2001 From: heylf Date: Wed, 20 Dec 2023 18:16:02 +0100 Subject: [PATCH 69/89] Delete unnecessary files and update file naming convention --- bin/generate_config.py | 34 -------- .../count/templates/cellranger_count.py | 84 ------------------- 2 files changed, 118 deletions(-) delete mode 100755 bin/generate_config.py delete mode 100644 modules/nf-core/cellranger/count/templates/cellranger_count.py diff --git a/bin/generate_config.py b/bin/generate_config.py deleted file mode 100755 index c01260dd..00000000 --- a/bin/generate_config.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -import argparse - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate the config for cellranger-arc mkref. \ - cellranger-arc mkref takes as input a configuration file that bundles various inputs to the tool. \ - You can also create a config file on your own, please find more information here:\ - https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references" - ) - - parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.", required=True) - parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.", required=True) - parser.add_argument("-m", "--motifs", dest="motifs", help="Name of the motifs file.") - parser.add_argument("-a", "--add", dest="add", help="Additional filter line.") - - args = vars(parser.parse_args()) - - print(args) - - config = open("config", "w") - config.write("{\n") - config.write('\torganism: "{}"\n'.format(args["fasta"].split(".")[0])) - config.write('\tgenome: ["cellrangerarc_reference"]\n') - config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) - config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) - if args["motifs"] != "[]": - config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) - if args["add"] != None: - config.write(args["add"] + "\n") - config.write("}") - config.close() - - print("Wrote config file") diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py deleted file mode 100644 index 4bfb9f4f..00000000 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -""" -Automatically rename staged files for input into cellranger count. - -Copyright (c) Gregor Sturm 2023 - MIT License -""" -from subprocess import run -from pathlib import Path -from textwrap import dedent -import shlex -import re - - -def chunk_iter(seq, size): - """iterate over `seq` in chunks of `size`""" - return (seq[pos : pos + size] for pos in range(0, len(seq), size)) - - -sample_id = "${meta.id}" - -# get fastqs, ordered by path. Files are staged into -# - "fastq_001/{original_name.fastq.gz}" -# - "fastq_002/{oritinal_name.fastq.gz}" -# - ... -# Since we require fastq files in the input channel to be ordered such that a R1/R2 pair -# of files follows each other, ordering will get us a sequence of [R1, R2, R1, R2, ...] -fastqs = sorted(Path(".").glob("fastq_*/*")) -assert len(fastqs) % 2 == 0 - -# target directory in which the renamed fastqs will be placed -fastq_all = Path("./fastq_all") -fastq_all.mkdir(exist_ok=True) - -# Match R1 in the filename, but only if it is followed by a non-digit or non-character -# match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but -# do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" - -for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): - # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: - raise AssertionError( - dedent( - f"""\ - We expect R1 and R2 of the same sample to have the same filename except for R1/R2. - This has been checked by replacing "R1" with "R2" in the first filename and comparing it to the second filename. - If you believe this check shouldn't have failed on your filenames, please report an issue on GitHub! - - Files involved: - - {r1} - - {r2} - """ - ) - ) - r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz") - r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz") - -run( - # fmt: off - [ - "cellranger", "count", - "--id", "${prefix}", - "--fastqs", str(fastq_all), - "--transcriptome", "${reference.name}", - "--localcores", "${task.cpus}", - "--localmem", "${task.memory.toGiga()}", - *shlex.split("""${args}""") - ], - # fmt: on - check=True, -) - -# Output version information -version = run( - ["cellranger", "-V"], - text=True, - check=True, - capture_output=True, -).stdout.replace("cellranger cellranger-", "") - -# alas, no `pyyaml` pre-installed in the cellranger container -with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') - f.write(f' cellranger: "{version}"\\n') From c0bad15494221af9c3c58cb05dccc8c66e012996 Mon Sep 17 00:00:00 2001 From: heylf Date: Wed, 20 Dec 2023 18:17:10 +0100 Subject: [PATCH 70/89] Remove CELLRANGERARC_GENERATECONFIG module --- conf/modules.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8a8c7b0d..b6e4e8c8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -93,13 +93,6 @@ if(params.aligner == "cellrangerarc") { ] ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" } - withName: CELLRANGERARC_GENERATECONFIG { - publishDir = [ - path: "${params.outdir}/${params.aligner}/config", - mode: params.publish_dir_mode - ] - ext.args = "--add none" - } withName: CELLRANGERARC_MKREF { publishDir = [ path: "${params.outdir}/${params.aligner}/mkref", From 83830142b4ec2f95bb6a5fd16a78f9755456d8c8 Mon Sep 17 00:00:00 2001 From: heylf Date: Wed, 20 Dec 2023 18:17:38 +0100 Subject: [PATCH 71/89] Wirte python script into count module --- modules/local/cellrangerarc/count/main.nf | 33 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index dbba4815..2bf0193a 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -24,18 +24,37 @@ process CELLRANGERARC_COUNT { script: def args = task.ext.args ?: '' def reference_name = reference.name - - //def multi_meta_info = multi_meta.collate(2).transpose() def sample_types = sample_type.join(",") def sample_names = sub_sample.join(",") def lib_csv = meta.id + "_lib.csv" """ - generate_lib_csv.py \\ - --sample_types $sample_types \\ - --sample_names $sample_names \\ - --fastq_folder \$(readlink -f fastqs)\\ - --out $lib_csv + fastq_folder=\$(readlink -f fastqs) + + python3 < Date: Wed, 20 Dec 2023 18:17:56 +0100 Subject: [PATCH 72/89] Write python script into mkref module --- modules/local/cellrangerarc/mkref/main.nf | 45 +++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 88029ebd..c8cc3d2b 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -36,17 +36,39 @@ process CELLRANGERARC_MKREF { } """ - if [ $reference_config == [] ]; then - generate_config.py \\ - --fasta $fast_name \\ - --gtf $gtf_name \\ - --motifs $motifs_name \\ - $args - else - if [ ! -f config ]; then - mv -i $reference_config config - fi - fi + + python3 < Date: Wed, 20 Dec 2023 18:20:00 +0100 Subject: [PATCH 73/89] eclint mkref main --- modules/local/cellrangerarc/mkref/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index c8cc3d2b..079776ba 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -41,7 +41,7 @@ process CELLRANGERARC_MKREF { from os.path import exists import shutil - + fasta = "${fast_name}" gtf = "${gtf_name}" motifs = "${motifs_name}" @@ -81,4 +81,3 @@ process CELLRANGERARC_MKREF { END_VERSIONS """ } - From 46a076381f92ce6ef5358293e46067b37d8409a2 Mon Sep 17 00:00:00 2001 From: heylf Date: Thu, 21 Dec 2023 14:02:48 +0100 Subject: [PATCH 74/89] Adding nf-core cellrangerarc and remove local cellrangearc --- modules/{local => nf-core}/cellrangerarc/Dockerfile | 0 modules/{local => nf-core}/cellrangerarc/README.md | 0 modules/{local => nf-core}/cellrangerarc/count/main.nf | 0 modules/{local => nf-core}/cellrangerarc/count/meta.yml | 3 ++- modules/{local => nf-core}/cellrangerarc/mkgtf/main.nf | 0 modules/{local => nf-core}/cellrangerarc/mkgtf/meta.yml | 3 ++- modules/{local => nf-core}/cellrangerarc/mkref/main.nf | 0 modules/{local => nf-core}/cellrangerarc/mkref/meta.yml | 3 ++- 8 files changed, 6 insertions(+), 3 deletions(-) rename modules/{local => nf-core}/cellrangerarc/Dockerfile (100%) rename modules/{local => nf-core}/cellrangerarc/README.md (100%) rename modules/{local => nf-core}/cellrangerarc/count/main.nf (100%) rename modules/{local => nf-core}/cellrangerarc/count/meta.yml (97%) rename modules/{local => nf-core}/cellrangerarc/mkgtf/main.nf (100%) rename modules/{local => nf-core}/cellrangerarc/mkgtf/meta.yml (96%) rename modules/{local => nf-core}/cellrangerarc/mkref/main.nf (100%) rename modules/{local => nf-core}/cellrangerarc/mkref/meta.yml (97%) diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/nf-core/cellrangerarc/Dockerfile similarity index 100% rename from modules/local/cellrangerarc/Dockerfile rename to modules/nf-core/cellrangerarc/Dockerfile diff --git a/modules/local/cellrangerarc/README.md b/modules/nf-core/cellrangerarc/README.md similarity index 100% rename from modules/local/cellrangerarc/README.md rename to modules/nf-core/cellrangerarc/README.md diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/nf-core/cellrangerarc/count/main.nf similarity index 100% rename from modules/local/cellrangerarc/count/main.nf rename to modules/nf-core/cellrangerarc/count/main.nf diff --git a/modules/local/cellrangerarc/count/meta.yml b/modules/nf-core/cellrangerarc/count/meta.yml similarity index 97% rename from modules/local/cellrangerarc/count/meta.yml rename to modules/nf-core/cellrangerarc/count/meta.yml index f69bc1fa..919de4dc 100644 --- a/modules/local/cellrangerarc/count/meta.yml +++ b/modules/nf-core/cellrangerarc/count/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - meta: type: map diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/nf-core/cellrangerarc/mkgtf/main.nf similarity index 100% rename from modules/local/cellrangerarc/mkgtf/main.nf rename to modules/nf-core/cellrangerarc/mkgtf/main.nf diff --git a/modules/local/cellrangerarc/mkgtf/meta.yml b/modules/nf-core/cellrangerarc/mkgtf/meta.yml similarity index 96% rename from modules/local/cellrangerarc/mkgtf/meta.yml rename to modules/nf-core/cellrangerarc/mkgtf/meta.yml index 7ce211eb..923c3e18 100644 --- a/modules/local/cellrangerarc/mkgtf/meta.yml +++ b/modules/nf-core/cellrangerarc/mkgtf/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - gtf: type: file diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/nf-core/cellrangerarc/mkref/main.nf similarity index 100% rename from modules/local/cellrangerarc/mkref/main.nf rename to modules/nf-core/cellrangerarc/mkref/main.nf diff --git a/modules/local/cellrangerarc/mkref/meta.yml b/modules/nf-core/cellrangerarc/mkref/meta.yml similarity index 97% rename from modules/local/cellrangerarc/mkref/meta.yml rename to modules/nf-core/cellrangerarc/mkref/meta.yml index 1eac878a..cf98e60c 100644 --- a/modules/local/cellrangerarc/mkref/meta.yml +++ b/modules/nf-core/cellrangerarc/mkref/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - fasta: type: file From f0e34c13e12cfae7f1017321d6ae9aef4ca648cb Mon Sep 17 00:00:00 2001 From: heylf Date: Thu, 21 Dec 2023 14:27:42 +0100 Subject: [PATCH 75/89] Changes from local to nf-core cellrangerarc --- subworkflows/local/align_cellrangerarc.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index b33cd61b..3232a020 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -2,9 +2,9 @@ * Alignment with Cellranger Arc */ -include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" -include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" +include {CELLRANGERARC_MKGTF} from "../../modules/nf-core/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/nf-core/cellrangerarc/mkref/main.nf" +include {CELLRANGERARC_COUNT} from "../../modules/nf-core/cellrangerarc/count/main.nf" // Define workflow to subset and index a genome region fasta file workflow CELLRANGERARC_ALIGN { From 7f4a70c23ce540326782ce4fce621549a34543d1 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 22 Dec 2023 08:25:06 +0100 Subject: [PATCH 76/89] Modulese updates --- modules.json | 24 ++-- .../nf-core/cellranger/count/environment.yml | 5 + modules/nf-core/cellranger/count/meta.yml | 4 +- .../count/templates/cellranger_count.py | 4 +- .../nf-core/cellranger/mkgtf/environment.yml | 5 + modules/nf-core/cellranger/mkgtf/meta.yml | 4 +- .../nf-core/cellranger/mkref/environment.yml | 5 + modules/nf-core/cellranger/mkref/meta.yml | 3 +- .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 7 +- modules/nf-core/fastqc/main.nf | 10 +- modules/nf-core/fastqc/meta.yml | 5 + modules/nf-core/fastqc/tests/main.nf.test | 89 ++++++++++++- modules/nf-core/gffread/environment.yml | 7 ++ modules/nf-core/gffread/main.nf | 12 +- modules/nf-core/gffread/meta.yml | 11 +- modules/nf-core/gffread/tests/main.nf.test | 61 +++++++++ .../nf-core/gffread/tests/main.nf.test.snap | 52 ++++++++ modules/nf-core/gffread/tests/nextflow.config | 5 + modules/nf-core/gffread/tests/tags.yml | 2 + modules/nf-core/gunzip/environment.yml | 7 ++ modules/nf-core/gunzip/main.nf | 2 +- modules/nf-core/gunzip/meta.yml | 4 + modules/nf-core/gunzip/tests/main.nf.test | 35 ++++++ .../nf-core/gunzip/tests/main.nf.test.snap | 31 +++++ modules/nf-core/gunzip/tests/tags.yml | 2 + .../kallistobustools/count/environment.yml | 7 ++ .../nf-core/kallistobustools/count/main.nf | 2 +- .../nf-core/kallistobustools/count/meta.yml | 5 +- .../kallistobustools/ref/environment.yml | 8 ++ modules/nf-core/kallistobustools/ref/main.nf | 2 +- modules/nf-core/kallistobustools/ref/meta.yml | 13 +- modules/nf-core/multiqc/main.nf | 8 +- modules/nf-core/multiqc/meta.yml | 11 +- .../star/genomegenerate/environment.yml | 11 ++ modules/nf-core/star/genomegenerate/main.nf | 89 ++++++++----- modules/nf-core/star/genomegenerate/meta.yml | 5 +- .../star/genomegenerate/tests/main.nf.test | 117 ++++++++++++++++++ .../genomegenerate/tests/main.nf.test.snap | 22 ++++ .../star/genomegenerate/tests/tags.yml | 2 + modules/nf-core/universc/environment.yml | 5 + modules/nf-core/universc/meta.yml | 6 +- 42 files changed, 616 insertions(+), 99 deletions(-) create mode 100644 modules/nf-core/cellranger/count/environment.yml create mode 100644 modules/nf-core/cellranger/mkgtf/environment.yml create mode 100644 modules/nf-core/cellranger/mkref/environment.yml create mode 100644 modules/nf-core/gffread/environment.yml create mode 100644 modules/nf-core/gffread/tests/main.nf.test create mode 100644 modules/nf-core/gffread/tests/main.nf.test.snap create mode 100644 modules/nf-core/gffread/tests/nextflow.config create mode 100644 modules/nf-core/gffread/tests/tags.yml create mode 100644 modules/nf-core/gunzip/environment.yml create mode 100644 modules/nf-core/gunzip/tests/main.nf.test create mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap create mode 100644 modules/nf-core/gunzip/tests/tags.yml create mode 100644 modules/nf-core/kallistobustools/count/environment.yml create mode 100644 modules/nf-core/kallistobustools/ref/environment.yml create mode 100644 modules/nf-core/star/genomegenerate/environment.yml create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test.snap create mode 100644 modules/nf-core/star/genomegenerate/tests/tags.yml create mode 100644 modules/nf-core/universc/environment.yml diff --git a/modules.json b/modules.json index 5b4e4a3f..a05f65d8 100644 --- a/modules.json +++ b/modules.json @@ -7,62 +7,62 @@ "nf-core": { "cellranger/count": { "branch": "master", - "git_sha": "5df79e0383386a9e43462a6e81bf978ce0a6db09", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "cellranger/mkgtf": { "branch": "master", - "git_sha": "716ef3019b66772a817b417078edce2f7b337858", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "cellranger/mkref": { "branch": "master", - "git_sha": "716ef3019b66772a817b417078edce2f7b337858", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "gffread": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "kallistobustools/count": { "branch": "master", - "git_sha": "de204d3c950f091336539ad74f0e47ddffe69ed4", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "kallistobustools/ref": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", + "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a", "installed_by": ["modules"] }, "universc": { "branch": "master", - "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] } } diff --git a/modules/nf-core/cellranger/count/environment.yml b/modules/nf-core/cellranger/count/environment.yml new file mode 100644 index 00000000..662f747d --- /dev/null +++ b/modules/nf-core/cellranger/count/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_count +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml index c7d82bbc..a672180e 100644 --- a/modules/nf-core/cellranger/count/meta.yml +++ b/modules/nf-core/cellranger/count/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA input: - meta: @@ -47,3 +46,6 @@ output: authors: - "@ggabernet" - "@Emiller88" +maintainers: + - "@ggabernet" + - "@Emiller88" diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 4bfb9f4f..1527ba76 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -34,11 +34,11 @@ def chunk_iter(seq, size): # Match R1 in the filename, but only if it is followed by a non-digit or non-character # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but # do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" +filename_pattern = r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])' for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name: raise AssertionError( dedent( f"""\ diff --git a/modules/nf-core/cellranger/mkgtf/environment.yml b/modules/nf-core/cellranger/mkgtf/environment.yml new file mode 100644 index 00000000..c81ef3e4 --- /dev/null +++ b/modules/nf-core/cellranger/mkgtf/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_mkgtf +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/mkgtf/meta.yml b/modules/nf-core/cellranger/mkgtf/meta.yml index e226e42d..7ec0e0a3 100644 --- a/modules/nf-core/cellranger/mkgtf/meta.yml +++ b/modules/nf-core/cellranger/mkgtf/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA input: - gtf: @@ -29,3 +28,6 @@ output: authors: - "@ggabernet" - "@Emiller88" +maintainers: + - "@ggabernet" + - "@Emiller88" diff --git a/modules/nf-core/cellranger/mkref/environment.yml b/modules/nf-core/cellranger/mkref/environment.yml new file mode 100644 index 00000000..9ca3e88c --- /dev/null +++ b/modules/nf-core/cellranger/mkref/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_mkref +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/mkref/meta.yml b/modules/nf-core/cellranger/mkref/meta.yml index 1ad5d6e3..4cd9091c 100644 --- a/modules/nf-core/cellranger/mkref/meta.yml +++ b/modules/nf-core/cellranger/mkref/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA input: - fasta: @@ -35,3 +34,5 @@ output: pattern: "versions.yml" authors: - "@ggabernet" +maintainers: + - "@ggabernet" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index c9d014b1..7685b33c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 3961de60..b9e8f926 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -1,13 +1,18 @@ nextflow_process { name "Test Process FASTQC" - script "modules/nf-core/fastqc/main.nf" + script "../main.nf" process "FASTQC" + tag "modules" + tag "modules_nfcore" tag "fastqc" test("Single-Read") { when { + params { + outdir = "$outputDir" + } process { """ input[0] = [ @@ -21,12 +26,84 @@ nextflow_process { } then { - assert process.success - assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" - assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") - assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) } - } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] +// FASTQC ( input ) +// } } diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 00000000..5398f71c --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,7 @@ +name: gffread +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gffread=0.12.1 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf index f4472b0e..d8a473e0 100644 --- a/modules/nf-core/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -2,7 +2,7 @@ process GFFREAD { tag "$gff" label 'process_low' - conda "bioconda::gffread=0.12.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : 'biocontainers/gffread:0.12.1--h8b12597_0' }" @@ -11,20 +11,22 @@ process GFFREAD { path gff output: - path "*.gtf" , emit: gtf + path "*.gtf" , emit: gtf , optional: true + path "*.gff3" , emit: gffread_gff , optional: true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${gff.baseName}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3' """ gffread \\ $gff \\ $args \\ - -o ${prefix}.gtf + -o ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": gffread: \$(gffread --version 2>&1) diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml index 20335747..27ac3105 100644 --- a/modules/nf-core/gffread/meta.yml +++ b/modules/nf-core/gffread/meta.yml @@ -12,22 +12,25 @@ tools: tool_dev_url: https://github.com/gpertea/gffread doi: 10.12688/f1000research.23297.1 licence: ["MIT"] - input: - gff: type: file description: A reference file in either the GFF3, GFF2 or GTF format. pattern: "*.{gff, gtf}" - output: - gtf: type: file - description: GTF file resulting from the conversion of the GFF input file + description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present pattern: "*.{gtf}" + - gffread_gff: + type: file + description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent + pattern: "*.{gff3}" - versions: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@emiller88" +maintainers: + - "@emiller88" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 00000000..3c064b31 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf != null }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff != null }, + ) + } + + } + +} diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 00000000..1f1342e1 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + + ], + "gtf": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:30.006985" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:34.636061" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 00000000..74b25094 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 00000000..05576065 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..25910b34 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 73bf08cd..468a6f28 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,7 +2,7 @@ process GUNZIP { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4c..231034f2 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -33,3 +33,7 @@ authors: - "@joseespinosa" - "@drpatelh" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..d0317922 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..720fd9ff --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/kallistobustools/count/environment.yml b/modules/nf-core/kallistobustools/count/environment.yml new file mode 100644 index 00000000..7ff8a2da --- /dev/null +++ b/modules/nf-core/kallistobustools/count/environment.yml @@ -0,0 +1,7 @@ +name: kallistobustools_count +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kb-python=0.27.2 diff --git a/modules/nf-core/kallistobustools/count/main.nf b/modules/nf-core/kallistobustools/count/main.nf index b7942fc2..036bb35d 100644 --- a/modules/nf-core/kallistobustools/count/main.nf +++ b/modules/nf-core/kallistobustools/count/main.nf @@ -2,7 +2,7 @@ process KALLISTOBUSTOOLS_COUNT { tag "$meta.id" label 'process_medium' - conda "bioconda::kb-python=0.27.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" diff --git a/modules/nf-core/kallistobustools/count/meta.yml b/modules/nf-core/kallistobustools/count/meta.yml index f25b7bc4..7491248c 100644 --- a/modules/nf-core/kallistobustools/count/meta.yml +++ b/modules/nf-core/kallistobustools/count/meta.yml @@ -13,7 +13,6 @@ tools: documentation: https://kb-python.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/pachterlab/kb_python licence: MIT License - input: - meta: type: map @@ -49,7 +48,6 @@ input: type: string description: String value defining the sequencing technology used. pattern: "{10XV1,10XV2,10XV3,CELSEQ,CELSEQ2,DROPSEQ,INDROPSV1,INDROPSV2,INDROPSV3,SCRUBSEQ,SURECELL,SMARTSEQ}" - output: - meta: type: map @@ -64,6 +62,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@flowuenne" +maintainers: + - "@flowuenne" diff --git a/modules/nf-core/kallistobustools/ref/environment.yml b/modules/nf-core/kallistobustools/ref/environment.yml new file mode 100644 index 00000000..acbd0e0a --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/environment.yml @@ -0,0 +1,8 @@ +name: kallistobustools_ref +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kb-python=0.27.2 + - conda-forge::requests>=2.23.0 diff --git a/modules/nf-core/kallistobustools/ref/main.nf b/modules/nf-core/kallistobustools/ref/main.nf index 9d7f1741..68d72ca9 100644 --- a/modules/nf-core/kallistobustools/ref/main.nf +++ b/modules/nf-core/kallistobustools/ref/main.nf @@ -2,7 +2,7 @@ process KALLISTOBUSTOOLS_REF { tag "$fasta" label 'process_medium' - conda "bioconda::kb-python=0.27.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" diff --git a/modules/nf-core/kallistobustools/ref/meta.yml b/modules/nf-core/kallistobustools/ref/meta.yml index aca61082..00be5143 100644 --- a/modules/nf-core/kallistobustools/ref/meta.yml +++ b/modules/nf-core/kallistobustools/ref/meta.yml @@ -1,7 +1,11 @@ name: kallistobustools_ref description: index creation for kb count quantification of single-cell data. keywords: - - kallisto-bustools + - scRNA-seq + - count + - single-cell + - kallisto + - bustools - index tools: - kb: @@ -11,7 +15,6 @@ tools: tool_dev_url: https://github.com/pachterlab/kb_python doi: "10.22002/D1.1876" licence: MIT License - input: - fasta: type: file @@ -22,10 +25,9 @@ input: description: Genomic gtf file pattern: "*.{gtf,gtf.gz}" - workflow_mode: - type: value + type: string description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" pattern: "{standard,lamanno,nucleus}" - output: - versions: type: file @@ -55,6 +57,7 @@ output: type: file description: intron transcript to capture file pattern: "*intron_t2c.{txt}" - authors: - "@flowuenne" +maintainers: + - "@flowuenne" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 65d7dd0d..00cc48d2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..f1aa660e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..93e4476a --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,11 @@ +name: star_genomegenerate + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 43424042..b8855715 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,10 +2,10 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: tuple val(meta), path(fasta) @@ -19,9 +19,10 @@ process STAR_GENOMEGENERATE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ $memory \\ $args @@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ @@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE { } stub: - """ - mkdir star - touch star/Genome - touch star/Log.out - touch star/SA - touch star/SAindex - touch star/chrLength.txt - touch star/chrName.txt - touch star/chrNameLength.txt - touch star/chrStart.txt - touch star/exonGeTrInfo.tab - touch star/exonInfo.tab - touch star/geneInfo.tab - touch star/genomeParameters.txt - touch star/sjdbInfo.txt - touch star/sjdbList.fromGTF.out.tab - touch star/sjdbList.out.tab - touch star/transcriptInfo.tab + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index eba2d9cf..1061e1b8 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -31,7 +31,6 @@ input: - gtf: type: file description: GTF file of the reference genome - output: - meta: type: map @@ -46,7 +45,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@kevinmenden" - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..af0c9421 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("homo_sapiens") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..9de08c74 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,22 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "timestamp": "2023-12-19T11:05:51.741109" + }, + "index_with_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "timestamp": "2023-12-19T11:38:14.551548" + }, + "index_without_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "timestamp": "2023-12-19T11:38:22.382905" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/universc/environment.yml b/modules/nf-core/universc/environment.yml new file mode 100644 index 00000000..e9cdf650 --- /dev/null +++ b/modules/nf-core/universc/environment.yml @@ -0,0 +1,5 @@ +name: universc +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/universc/meta.yml b/modules/nf-core/universc/meta.yml index 681bb849..92a46bc6 100644 --- a/modules/nf-core/universc/meta.yml +++ b/modules/nf-core/universc/meta.yml @@ -15,7 +15,6 @@ tools: tool_dev_url: "https://github.com/minoda-lab/universc" doi: "10.1101/2021.01.19.427209" licence: ["GPL-3.0-or-later"] - input: - meta: type: map @@ -26,7 +25,6 @@ input: type: file description: FASTQ or FASTQ.GZ file, list of 2 files for paired-end data pattern: "*.{fastq,fq,fastq.gz,fq.gz}" - output: - outs: type: file @@ -36,7 +34,9 @@ output: type: file description: File containing software version pattern: "versions.yml" - authors: - "@kbattenb" - "@tomkellygenetics" +maintainers: + - "@kbattenb" + - "@tomkellygenetics" From 29eab6b46806f162d8da27c428894d023f22960e Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 22 Dec 2023 08:32:40 +0100 Subject: [PATCH 77/89] Fix numba cache dir --- bin/concat_h5ad.py | 5 +++++ bin/mtx_to_h5ad.py | 6 +++++- modules/local/simpleaf_index.nf | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index e38ca80e..6ecc17fd 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -1,4 +1,9 @@ #!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os +os.environ["NUMBA_CACHE_DIR"] = "." + import scanpy as sc, anndata as ad, pandas as pd from pathlib import Path import argparse diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 40e1e025..62449f5f 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,8 +1,12 @@ #!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os +os.environ["NUMBA_CACHE_DIR"] = "." + import scanpy as sc import pandas as pd import argparse -import os from scipy import io from anndata import AnnData diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 5e8f5c42..8e8bd519 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -27,6 +27,7 @@ process SIMPLEAF_INDEX { """ # export required var export ALEVIN_FRY_HOME=. + export NUMBA_CACHE_DIR=. # prep simpleaf simpleaf set-paths From 06c79d3b34b0d7a5b922559d9eaa314570afe18b Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 22 Dec 2023 08:35:47 +0100 Subject: [PATCH 78/89] Fix numba cache dir in simpleaf quant --- modules/local/simpleaf_quant.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index f350acf3..abb58404 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -53,6 +53,7 @@ process SIMPLEAF_QUANT { """ # export required var export ALEVIN_FRY_HOME=. + export NUMBA_CACHE_DIR=. # prep simpleaf simpleaf set-paths From 5e2273b9aa490ad8a3537dd138acd616e6d83a90 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 22 Dec 2023 08:37:57 +0100 Subject: [PATCH 79/89] black --- bin/concat_h5ad.py | 1 + bin/mtx_to_h5ad.py | 1 + .../nf-core/cellranger/count/templates/cellranger_count.py | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 6ecc17fd..43ea071a 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -2,6 +2,7 @@ # Set numba chache dir to current working directory (which is a writable mount also in containers) import os + os.environ["NUMBA_CACHE_DIR"] = "." import scanpy as sc, anndata as ad, pandas as pd diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 62449f5f..3282122d 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -2,6 +2,7 @@ # Set numba chache dir to current working directory (which is a writable mount also in containers) import os + os.environ["NUMBA_CACHE_DIR"] = "." import scanpy as sc diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 1527ba76..4bfb9f4f 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -34,11 +34,11 @@ def chunk_iter(seq, size): # Match R1 in the filename, but only if it is followed by a non-digit or non-character # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but # do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])' +filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name: + if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: raise AssertionError( dedent( f"""\ From 49cfbd5fc7bb85b41c6219f2bd5b180f2deaa2d8 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 2 Jan 2024 11:51:42 +0100 Subject: [PATCH 80/89] Increase time limits --- conf/base.config | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/base.config b/conf/base.config index 8c6f6db9..77f7a9a5 100644 --- a/conf/base.config +++ b/conf/base.config @@ -27,25 +27,25 @@ process { withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } } withLabel:process_high { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + time = { check_max( 240.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { check_max( 240.h * task.attempt, 'time' ) } } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } From cc3aa971584020f30d6714deeabf9880bc557280 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 2 Jan 2024 11:52:44 +0100 Subject: [PATCH 81/89] trigger ci From 9df519a9f4fed6f9b567cba19bed6d37e4de3f4d Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 2 Jan 2024 11:57:07 +0100 Subject: [PATCH 82/89] Update Changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c732afa7..0a698b79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) +- Add support for paired GEX+ATAC sequencing using cellranger-arc ([#274](https://github.com/nf-core/scrnaseq/pull/274)) +- Increase default runtime limits for `medium` and `high` processes ([#281])(https://github.com/nf-core/scrnaseq/pull/281) - Better support for custom protocols ([#273](https://github.com/nf-core/scrnaseq/pull/273)). - The universc protocol is now specified via the `--protocol` flag - Any protocol specified is now passed to the respective aligner From 31ea547a44f922a53ba0c4164e78af56c3af9a64 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 2 Jan 2024 12:50:33 +0100 Subject: [PATCH 83/89] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a698b79..706c9bfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) - Add support for paired GEX+ATAC sequencing using cellranger-arc ([#274](https://github.com/nf-core/scrnaseq/pull/274)) -- Increase default runtime limits for `medium` and `high` processes ([#281])(https://github.com/nf-core/scrnaseq/pull/281) +- Increase default runtime limits for all processes ([#281])(https://github.com/nf-core/scrnaseq/pull/281) - Better support for custom protocols ([#273](https://github.com/nf-core/scrnaseq/pull/273)). - The universc protocol is now specified via the `--protocol` flag - Any protocol specified is now passed to the respective aligner From f0d08ac4c22d683a54e11f078c06d35ef80b3fce Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 2 Jan 2024 13:09:54 +0100 Subject: [PATCH 84/89] bump version to 2.5.0 for release --- CHANGELOG.md | 2 +- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 706c9bfa..01fd484a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## v2.5.0 - 2024-01-02 - Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) - Add support for paired GEX+ATAC sequencing using cellranger-arc ([#274](https://github.com/nf-core/scrnaseq/pull/274)) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 051ed54e..0db840ca 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/scrnaseq + This report has been generated by the nf-core/scrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-scrnaseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 628bb881..6155b7c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -272,7 +272,7 @@ manifest { description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.5.0dev' + version = '2.5.0' doi = '10.5281/zenodo.3568187' } From 584ddf60b37c1dc8f33b09aa57ba4e56812f540e Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 5 Jan 2024 14:21:31 +0100 Subject: [PATCH 85/89] Revert "Increase time limits" This reverts commit 49cfbd5fc7bb85b41c6219f2bd5b180f2deaa2d8. --- conf/base.config | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/base.config b/conf/base.config index 77f7a9a5..8c6f6db9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -27,25 +27,25 @@ process { withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 120.h * task.attempt, 'time' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 240.h * task.attempt, 'time' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 240.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } From b5bcedbcc6c877d78f6d0ae7dd18fcb42e32c521 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 5 Jan 2024 14:27:36 +0100 Subject: [PATCH 86/89] Move increased runtime limits from base.config to modules.config --- CHANGELOG.md | 2 +- conf/base.config | 4 ---- conf/modules.config | 11 +++++++++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 706c9bfa..a8954815 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) - Add support for paired GEX+ATAC sequencing using cellranger-arc ([#274](https://github.com/nf-core/scrnaseq/pull/274)) -- Increase default runtime limits for all processes ([#281])(https://github.com/nf-core/scrnaseq/pull/281) +- Increase default runtime limits for some processes ([#281])(https://github.com/nf-core/scrnaseq/pull/281) - Better support for custom protocols ([#273](https://github.com/nf-core/scrnaseq/pull/273)). - The universc protocol is now specified via the `--protocol` flag - Any protocol specified is now passed to the respective aligner diff --git a/conf/base.config b/conf/base.config index 8c6f6db9..eea930a9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -60,8 +60,4 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } - //Fix for issue 196 - withName: 'NFCORE_SCRNASEQ:SCRNASEQ:SCRNASEQ_ALEVIN:ALEVINQC' { - time = '20.h' - } } diff --git a/conf/modules.config b/conf/modules.config index 3bd0631b..5813926a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -17,6 +17,10 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + withName: FASTQC { + time = { check_max( 120.h * task.attempt, 'time' ) } + } + withName: SAMPLESHEET_CHECK { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -79,6 +83,7 @@ if(params.aligner == "cellranger") { mode: params.publish_dir_mode ] ext.args = {"--chemistry ${meta.chemistry} " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} + time = { check_max( 240.h * task.attempt, 'time' ) } } } } @@ -105,6 +110,7 @@ if(params.aligner == "cellrangerarc") { mode: params.publish_dir_mode ] ext.args = {meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : ''} + time = { check_max( 240.h * task.attempt, 'time' ) } } } } @@ -134,6 +140,7 @@ if(params.aligner == "universc") { path: "${params.outdir}/universc", mode: params.publish_dir_mode ] + time = { check_max( 240.h * task.attempt, 'time' ) } } } } @@ -159,6 +166,10 @@ if (params.aligner == "alevin") { ] ext.args = "-r cr-like" } + //Fix for issue 196 + withName: 'ALEVINQC' { + time = '120.h' + } } } From bc8825a136c92ff147afe7bbb3095125b99918c6 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 5 Jan 2024 14:29:05 +0100 Subject: [PATCH 87/89] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8954815..c8cacebb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) - Add support for paired GEX+ATAC sequencing using cellranger-arc ([#274](https://github.com/nf-core/scrnaseq/pull/274)) -- Increase default runtime limits for some processes ([#281])(https://github.com/nf-core/scrnaseq/pull/281) +- Increase default runtime limits for some processes ([#281](https://github.com/nf-core/scrnaseq/pull/281), [#284](https://github.com/nf-core/scrnaseq/pull/284)) - Better support for custom protocols ([#273](https://github.com/nf-core/scrnaseq/pull/273)). - The universc protocol is now specified via the `--protocol` flag - Any protocol specified is now passed to the respective aligner From 04aadab705eb2d20528311209142f178b23555df Mon Sep 17 00:00:00 2001 From: heylf Date: Mon, 8 Jan 2024 16:24:15 +0100 Subject: [PATCH 88/89] Adding GHGA contribution --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bf9dd2b8..5ac84a92 100644 --- a/README.md +++ b/README.md @@ -90,12 +90,13 @@ For more details about the output files and reports, please refer to the nf-core/scrnaseq was originally written by Bailey PJ, Botvinnik O, Marques de Almeida F, Gabernet G, Peltzer A, Sturm G. -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people and teams for their extensive assistance in the development of this pipeline: - @heylf - @KevinMenden - @FloWuenne - @rob-p +- (GHGA)[https://www.ghga.de/] ## Contributions and Support From e7f01753e6364066d42bf192f76508a5116226b4 Mon Sep 17 00:00:00 2001 From: heylf Date: Mon, 8 Jan 2024 16:24:48 +0100 Subject: [PATCH 89/89] Adding GHGA contribution --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5ac84a92..55044582 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ We thank the following people and teams for their extensive assistance in the de - @KevinMenden - @FloWuenne - @rob-p -- (GHGA)[https://www.ghga.de/] +- [GHGA](https://www.ghga.de/) ## Contributions and Support