diff --git a/README.md b/README.md index 71689af..f8736dd 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,7 @@ nextflow run nf-core/spatialtranscriptomics \ ``` > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialtranscriptomics/usage) and the [parameter documentation](https://nf-co.re/spatialtranscriptomics/parameters). @@ -74,18 +73,18 @@ nf-core/spatialtranscriptomics was originally developed by the Jackson Laboratory1, up to the [0.1.0](https://github.com/nf-core/spatialtranscriptomics/releases/tag/0.1.0) tag. It was further developed in a collaboration between the [National Bioinformatics Infrastructure Sweden](https://nbis.se/) and [National Genomics -Infastructure](https://ngisweden.scilifelab.se/) within [SciLifeLab](https://scilifelab.se/); +Infrastructure](https://ngisweden.scilifelab.se/) within [SciLifeLab](https://scilifelab.se/); it is currently developed and maintained by [Erik Fasterius](https://github.com/fasterius) and [Christophe Avenel](https://github.com/cavenel). Many thanks to others who have helped out along the way too, especially [Gregor Sturm](https://github.com/grst)! -1 Supported by grants from the US National Institutes of Health +_1 Supported by grants from the US National Institutes of Health [U24CA224067](https://reporter.nih.gov/project-details/10261367) and [U54AG075941](https://reporter.nih.gov/project-details/10376627). Original authors [Dr. Sergii Domanskyi](https://github.com/sdomanskyi), Prof. Jeffrey -Chuang and Dr. Anuj Srivastava. +Chuang and Dr. Anuj Srivastava._ ## Contributions and Support diff --git a/bin/st_spatial_de.qmd b/bin/st_spatial_de.qmd index f1f3265..d8a407a 100644 --- a/bin/st_spatial_de.qmd +++ b/bin/st_spatial_de.qmd @@ -76,7 +76,7 @@ Then we can inspect significant genes that varies in space and visualize them wi ```{python} results_tab = st_adata.var.sort_values("qval", ascending=True) results_tab.to_csv(saveSpatialDEFileName) -results_tab.head(10) +results_tab.head(plotTopHVG) ``` ```{python} diff --git a/conf/analysis.config b/conf/analysis.config deleted file mode 100644 index 5e062eb..0000000 --- a/conf/analysis.config +++ /dev/null @@ -1,26 +0,0 @@ -/* -Default config options -*/ - -params { - - // Data loading - st_load_min_counts = 1 - st_load_min_cells = 1 - - // Preprocessing, QC and normalisation - st_preprocess_fig_size = 6 - st_preprocess_min_counts = 500 - st_preprocess_min_genes = 250 - st_preprocess_min_cells = 1 - st_preprocess_hist_qc_max_total_counts = 10000 - st_preprocess_hist_qc_min_gene_counts = 4000 - st_preprocess_hist_qc_bins = 40 - - // Clustering - st_cluster_resolution = 1 - - // Spatial differential expression - st_spatial_de_top_hgv = 15 - st_spatial_de_ncols = 5 -} diff --git a/docs/usage.md b/docs/usage.md index 8010490..e255a6f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -14,13 +14,17 @@ in the examples below and depends on the input data type. Use this parameter to --input '[path to samplesheet file]' ``` -The workflow will automatically detect the samplesheet type and run the appropriate analysis steps. +There are two types of samplesheets that the pipeline can handle: those +specifying _raw data_ (to be analysed by Space Ranger) and _processed data_ +(_i.e._ already analysed by Space Ranger). The workflow will automatically +detect the samplesheet type and run the appropriate analysis steps. The two +types of samplesheet are described in the following sections. ### Raw spatial data -This section describes samplesheets for processing _raw spatial data_ yet to be analyzed with Space Ranger. +This section describes samplesheets for processing _raw spatial data_ yet to be analysed with Space Ranger. -Here is an example of a typical samplesheet for analyzing FFPE or fresh frozen (FF) data with bright field microscopy +Here is an example of a typical samplesheet for analysing FFPE or fresh frozen (FF) data with bright field microscopy imagery: ```no-highlight @@ -46,29 +50,31 @@ SAMPLE_1,fastqs_1/,cytassist_1.tif,V11J26,B1 SAMPLE_2,fastqs_2/,cytassist_2.tif,V11J26,B1 ``` -Depending on the experimental setup, (additional) color composite fluorescence images or dark background +Depending on the experimental setup, (additional) colour composite fluorescence images or dark background fluorescence images can be supplied using the `colorizedimage` or `darkimage` columns, respectively. Please refer to the following table for an overview of all supported columns: -| Column | Description | -| ------------------ | ------------------------------------------------------------------------------------------------------------------- | -| `sample` | Unique sample identifier. MUST match the prefix of the fastq files | -| `fastq_dir` | Path to directory where the sample FASTQ files are stored. May be a `.tar.gz` file instead of a directory. | -| `image` | Brightfield microscopy image | -| `cytaimage` | Brightfield tissue image captured with Cytassist device | -| `colorizedimage` | A color composite of one or more fluorescence image channels saved as a single-page, single-file color TIFF or JPEG | -| `darkimage` | Dark background fluorescence microscopy image | -| `slide` | The Visium slide ID used for the sequencing. | -| `area` | Which slide area contains the tissue sample. | -| `manual_alignment` | Path to the manual alignment file (optional) | -| `slidefile` | Slide specification as JSON. Overrides `slide` and `area` if specified. (optional) | - -> **NB:** +| Column | Description | +| ------------------ | --------------------------------------------------------------------------------------------------------------------- | +| `sample` | Unique sample identifier. MUST match the prefix of the fastq files | +| `fastq_dir` | Path to directory where the sample FASTQ files are stored. May be a `.tar.gz` file instead of a directory. | +| `image` | Brightfield microscopy image | +| `cytaimage` | Brightfield tissue image captured with Cytassist device | +| `colorizedimage` | A colour composite of one or more fluorescence image channels saved as a single-page, single-file colour TIFF or JPEG | +| `darkimage` | Dark background fluorescence microscopy image | +| `slide` | The Visium slide ID used for the sequencing. | +| `area` | Which slide area contains the tissue sample. | +| `manual_alignment` | Path to the manual alignment file (optional) | +| `slidefile` | Slide specification as JSON. Overrides `slide` and `area` if specified. (optional) | + +> [!NOTE] > -> - You need to specify _at least one_ of `image`, `cytaimage`, `darkimage`, `colorizedimage`. Most commonly, you'll -> specify `image` for bright field microscopy data, or `cytaimage` for tissue scans generated with the 10x Cyatassist -> device. Please refer to the [Space Ranger documentation](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger), how multiple image types can be combined. +> - You need to specify _at least one_ of `image`, `cytaimage`, `darkimage`, +> `colorizedimage`. Most commonly, you'll specify `image` for bright field +> microscopy data, or `cytaimage` for tissue scans generated with the 10x +> Cyatassist device. Please refer to the [Space Ranger documentation](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger), +> how multiple image types can be combined. > - The `manual_alignment` column is only required for samples for which a > manual alignment file is needed and can be ignored if you're using automatic > alignment. @@ -80,8 +86,8 @@ appropriate for your samples. ### Processed data -If your data has already been processed by Space Ranger and you are only interested in running downstream QC steps, -the samplesheet looks as follows: +If your data has already been processed by Space Ranger and you are only +interested in running downstream steps, the samplesheet looks as follows: ```no-highlight sample,spaceranger_dir @@ -118,15 +124,15 @@ path to its directory (or another link from the 10X website above) using the `--spaceranger_reference` parameter, otherwise the pipeline will download the default human reference for you automatically. -> **Important**: -> +> [!NOTE] > For FFPE and Cytassist experiments, you need to manually supply the appropriate probset using the `--spaceranger_probeset` parameter > Please refer to the [Spaceranger Downloads page](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) > to obtain the correct probeset. ## Analysis options -The pipeline is using Python and the scverse tools to do the downstream analysis (quality control, filtering, clustering, spatial differential equations). +The pipeline uses Python and the `scverse` tools to do the downstream analysis +(quality control, filtering, clustering, spatial differential equations). ### Parameters for Quality Control and Filtering: @@ -135,7 +141,7 @@ The following parameters are exposed for preprocessing: - `--st_preprocess_min_counts`: Minimum number of counts for a spot to be considered in the analysis. - `--st_preprocess_min_genes`: Minimum number of genes expressed in a spot for the spot to be considered. - `--st_preprocess_min_cells`: Minimum number of spots expressing a gene for the gene to be considered. -- `--st_preprocess_fig_size`: The figure size for the plots generated during preprocessing (e.g., quality control plots). +- `--st_preprocess_fig_size`: The figure size for the plots generated during preprocessing (_e.g._, quality control plots). - `--st_preprocess_hist_qc_max_total_counts`: Maximum total counts for the histogram plot in quality control. - `--st_preprocess_hist_qc_min_gene_counts`: Minimum gene counts for the histogram plot in quality control. - `--st_preprocess_hist_qc_bins`: Number of bins for the histogram plot in quality control. @@ -153,14 +159,14 @@ The following parameters are exposed for preprocessing: The typical command for running the pipeline is as follows: ```bash -# Run the pipeline with raw data yet to be processed by Space Ranger -nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir -profile docker - -# Run pipeline with data already processed by Space Ranger -nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir -profile docker +nextflow run \ + nf-core/spatialtranscriptomics \ + --input \ + --outdir \ + -profile docker ``` -This will launch the pipeline with the docker configuration profile. See below for more information about profiles. +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -188,8 +194,8 @@ nextflow run nf-core/spatialtranscriptomics -profile docker -params-file params. with `params.yaml` containing: ```yaml -input: './samplesheet.csv' -outdir: './results/' +input: '' +outdir: '' <...> ``` @@ -211,7 +217,7 @@ First, go to the [nf-core/spatialtranscriptomics releases page](https://github.c This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. -To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. +To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. :::tip If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. @@ -229,10 +235,11 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. +> [!INFO] > We highly recommend the use of Docker or Singularity containers for full -> pipeline reproducibility, however when this is not possible, Conda is also -> supported. Please note that Conda is not at all supported for Space Ranger -> processing, and only supported on non-ARM64 architectures for analyses +> pipeline reproducibility, however when this is not possible, Conda is +> partially supported. Please note that Conda is not at all supported for Space +> Ranger processing, and only supported on non-ARM64 architectures for analyses > downstream of Space Ranger. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). @@ -240,7 +247,7 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. - `test` - A profile with a complete configuration for automated testing diff --git a/modules.json b/modules.json index b06cceb..ad78221 100644 --- a/modules.json +++ b/modules.json @@ -12,12 +12,12 @@ }, "fastqc": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "git_sha": "617777a807a1770f73deb38c80004bac06807eef", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "642a0d8afe373ac45244a7947fb8a6c0a5a312d4", "installed_by": ["modules"] }, "spaceranger/count": { diff --git a/modules/local/st_spatial_de.nf b/modules/local/st_spatial_de.nf index 14dbc18..6f0f617 100644 --- a/modules/local/st_spatial_de.nf +++ b/modules/local/st_spatial_de.nf @@ -37,6 +37,7 @@ process ST_SPATIAL_DE { --output "st_spatial_de.html" \ -P fileNameST:${st_adata_norm} \ -P numberOfColumns:${params.st_spatial_de_ncols} \ + -P plotTopHVG:${params.st_spatial_de_top_hvg} \ -P saveDEFileName:st_gde.csv \ -P saveSpatialDEFileName:st_spatial_de.csv diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a14..ad9bc54 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,23 +3,21 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] ] """ } @@ -28,14 +26,195 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
Mon 2 Oct 2023
test.gz
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } ) } } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = [ + [ id:'mysample', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32c..5ef5afb 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,20 @@ { + "sarscov2 single-end [fastq] - stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-12-29T02:48:05.126117287" + }, "versions": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "timestamp": "2023-12-29T02:46:49.507942667" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d..70708f3 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660..45a9bc3 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad21..d0438ed 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -7,12 +7,9 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -26,20 +23,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -53,9 +47,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..d087a9d --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,f81e19ab3a8e2b6f2b5d22078117df71" + ] + ], + "timestamp": "2023-12-30T00:26:14.048089591" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,f81e19ab3a8e2b6f2b5d22078117df71" + ] + ], + "timestamp": "2023-12-30T00:26:52.963964055" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index a801be0..e351f8b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,22 @@ params { spaceranger_probeset = null spaceranger_save_reference = false + // Preprocessing, QC and normalisation + st_preprocess_fig_size = 6 + st_preprocess_min_counts = 500 + st_preprocess_min_genes = 250 + st_preprocess_min_cells = 1 + st_preprocess_hist_qc_max_total_counts = 10000 + st_preprocess_hist_qc_min_gene_counts = 4000 + st_preprocess_hist_qc_bins = 40 + + // Clustering + st_cluster_resolution = 1 + + // Spatial differential expression + st_spatial_de_top_hvg = 15 + st_spatial_de_ncols = 5 + // MultiQC options multiqc_config = null multiqc_title = null @@ -64,9 +80,6 @@ params { // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Default analysis parameters -includeConfig 'conf/analysis.config' - // Load nf-core custom profiles from different Institutions try { includeConfig "${params.custom_config_base}/nfcore_custom.config" diff --git a/nextflow_schema.json b/nextflow_schema.json index b489a54..31648bf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,7 +19,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/spatialtranscriptomics/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline, use this parameter to specify its location. It has to be a comma-separated file with 2 or 5 columns, plus a header row. See [usage docs](https://nf-co.re/spatialtranscriptomics/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -28,122 +28,133 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, - "spaceranger_reference": { + "email": { "type": "string", - "format": "file-path", - "description": "Location of Space Ranger reference directory. May be packed as `tar.gz` file.", - "fa_icon": "fas fa-folder-open", - "default": "https://cf.10xgenomics.com/supp/spatial-exp/refdata-gex-GRCh38-2020-A.tar.gz" + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + } + } + }, + + "spaceranger_options": { + "title": "Space Ranger options", + "type": "object", + "fa_icon": "fas fa-rocket", + "description": "Options related to Space Ranger execution and raw spatial data processing", + "properties": { "spaceranger_probeset": { "type": "string", "format": "file-path", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "description": "Location of Space Ranger probeset file", + "description": "Location of Space Ranger probeset file.", "fa_icon": "fas fa-file-csv" }, + "spaceranger_reference": { + "type": "string", + "format": "file-path", + "description": "Location of Space Ranger reference directory. May be packed as `tar.gz` file.", + "help_text": "Please see the [10x website](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) to download either of the supported human or mouse references. If not specified the GRCh38 human reference is automatically downladed and used.", + "fa_icon": "fas fa-folder-open", + "default": "https://cf.10xgenomics.com/supp/spatial-exp/refdata-gex-GRCh38-2020-A.tar.gz" + } + } + }, + + "optional_outputs": { + "title": "Optional outputs", + "type": "object", + "fa_icon": "fas fa-floppy-disk", + "description": "Additional intermediate output files that can be optionally saved.", + "properties": { "spaceranger_save_reference": { "type": "boolean", "description": "Save the extracted tar archive of the Space Ranger reference.", + "help_text": "By default, extracted versions of archived Space Ranger reference data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", "fa_icon": "fas fa-floppy-disk" }, "save_untar_output": { "type": "boolean", "description": "Save extracted tar archives of input data.", + "help_text": "By default, extracted versions of archived input data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", "fa_icon": "fas fa-floppy-disk" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" } } }, "analysis_options": { - "title": "Analysis option", + "title": "Analysis options", "type": "object", - "description": "Define options for each tool in the pipeline", - "default": "", + "fa_icon": "fas fa-magnifying-glass-chart", + "description": "Options related to the downstream analyses performed by the pipeline.", "properties": { - "st_load_min_counts": { - "type": "integer", - "default": 1, - "description": "Minimum genes count", - "fa_icon": "fas fa-hashtag" - }, - "st_load_min_cells": { - "type": "integer", - "default": 1, - "description": "Minimum cells count", - "fa_icon": "fas fa-hashtag" - }, "st_preprocess_fig_size": { "type": "integer", "default": 6, - "description": "Figure size, inches", + "description": "The size of the QC figures, in inches.", "fa_icon": "fas fa-up-right-and-down-left-from-center" }, "st_preprocess_min_counts": { "type": "integer", "default": 500, - "description": "Minimum UMI count", + "description": "The minimum number of UMIs needed in a spot for that spot to pass the filtering.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_min_genes": { "type": "integer", "default": 250, - "description": "Minimum genes count", + "description": "The minimum number of expressed genes in a spot needed for that spot to pass the filtering.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_min_cells": { "type": "integer", "default": 1, - "description": "Minimum cells count", + "description": "The minimum number of spots in which a gene is expressed for that gene to pass the filtering.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_max_total_counts": { "type": "integer", "default": 10000, - "description": "Max total counts cutoff for histogram QC plot", + "description": "Max total counts cutoff for histogram QC plot.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_min_gene_counts": { "type": "integer", "default": 4000, - "description": "Min total gene counts cutoff for histogram QC plot", + "description": "Min total gene counts cutoff for histogram QC plot.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_bins": { "type": "integer", "default": 40, - "description": "Histogram QC plot number of bins", + "description": "The number of bins for the QC histogram plots.", "fa_icon": "fas fa-chart-simple" }, "st_cluster_resolution": { "type": "number", "default": 0.4, - "description": "Clustering resolution for ST spots", + "description": "The resolution for the clustering of the spots.", + "help_text": "The resolution controls the coarseness of the clustering, where a higher resolution leads to more clusters.", "fa_icon": "fas fa-circle-nodes" }, - "st_spatial_de_top_hgv": { + "st_spatial_de_top_hvg": { "type": "integer", "default": 15, - "description": "Number of top highly variable genes to plot", + "description": "The number of top spatially highly variable genes to plot.", "fa_icon": "fas fa-hashtag" }, "st_spatial_de_ncols": { "type": "integer", "default": 5, - "description": "Number of columns to group genes plots into", + "description": "Number of columns to group gene plots into.", + "help_text": "The default, 5, will plot the top spatially highly variable genes into groups of 5 plots per row. This, in combinationation with the default number of top HVGs to plot (15) will yield three rows with 5 plots each.", "fa_icon": "fas fa-hashtag" } } @@ -197,6 +208,7 @@ } } }, + "max_job_request_options": { "title": "Max job request options", "type": "object", @@ -232,6 +244,7 @@ } } }, + "generic_options": { "title": "Generic options", "type": "object", @@ -348,6 +361,12 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/spaceranger_options" + }, + { + "$ref": "#/definitions/optional_outputs" + }, { "$ref": "#/definitions/analysis_options" },