From b998f6ab01629bae87047727fe86a8aad131bc84 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 12 Sep 2024 14:30:30 +0200 Subject: [PATCH 1/7] Add MultiQC custom content for QC metrics --- assets/multiqc_config.yml | 17 +++++++++++++++-- bin/quality_controls.qmd | 25 +++++++++++++++++++++++++ subworkflows/local/downstream.nf | 11 ++++++++++- workflows/spatialvi.nf | 3 +++ 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2449bbe..ad7eef9 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -5,11 +5,24 @@ report_comment: > report_section_order: "nf-core-spatialvi-methods-description": order: -1000 - software_versions: + "custom_data": order: -1001 - "nf-core-spatialvi-summary": + software_versions: order: -1002 + "nf-core-spatialvi-summary": + order: -1003 export_plots: true disable_version_detection: true + +custom_data: + quality_controls: + file_format: "csv" + section_name: "Quality controls" + description: "Quality control metrics from the report analyses" + plot_type: "table" + +sp: + quality_controls: + fn: "mqc_*.csv" diff --git a/bin/quality_controls.qmd b/bin/quality_controls.qmd index 6bfe9c3..f2eecb0 100644 --- a/bin/quality_controls.qmd +++ b/bin/quality_controls.qmd @@ -177,6 +177,7 @@ del adata.obs["in_tissue_str"] n_spots = adata.shape[0] adata = adata[adata.obs["in_tissue"] == 1] n_spots_in_tissue = adata.shape[0] +n_spots_outside_tissue = n_spots - n_spots_in_tissue Markdown(f"""A total of `{n_spots_in_tissue}` spots are situated inside the tissue, out of `{n_spots}` spots in total.""") ``` @@ -266,6 +267,8 @@ if (adata.shape[0] == 0 or adata.shape[1] == 0): ```{python} # Print filtering results +spots_filtered = n_spots - adata.shape[0] +genes_filtered = n_genes - adata.shape[1] Markdown(f""" The final results of all the filtering is as follows: @@ -287,3 +290,25 @@ del sdata.tables["table"] sdata.tables["table"] = adata sdata.write(os.path.join(artifact_dir, output_sdata)) ``` + +```{python} +#| echo: false +# Write QC metrics to file for MultiQC aggregation +mqc_dict = { + 'sample': [meta['id']], + 'total_spots': [n_spots], + 'total_genes': [n_genes], + 'spots_filtered_outside_tissue': [n_spots_outside_tissue], + 'spots_filtered_total_counts': [n_spots_filtered_min_counts], + 'spots_filtered_genes_expressed': [n_spots_filtered_min_genes], + 'spots_filtered_mito_content': [n_spots_filtered_mito], + 'spots_filtered_ribo_content': [n_spots_filtered_ribo], + 'spots_filtered_hb_content': [n_spots_filtered_hb], + 'genes_filtered_expressed_in_spots': [n_genes_filtered_min_spots], + 'total_spots_filtered': [spots_filtered], + 'total_genes_filtered': [genes_filtered] +} +mqc_data = pd.DataFrame(mqc_dict) +mqc_name = 'artifacts/mqc_quality_controls_' + meta['id'] + '.csv' +mqc_data.to_csv(mqc_name, index=False) +``` diff --git a/subworkflows/local/downstream.nf b/subworkflows/local/downstream.nf index a477bbc..2d90d3a 100644 --- a/subworkflows/local/downstream.nf +++ b/subworkflows/local/downstream.nf @@ -49,6 +49,14 @@ workflow DOWNSTREAM { extensions ) ch_versions = ch_versions.mix(QUALITY_CONTROLS.out.versions) + ch_qc = QUALITY_CONTROLS.out.artifacts + | map { meta, artifacts -> [meta, artifacts[0], meta, artifacts[1]] } + | flatten + | collate ( 2 ) + | branch { + sdata: it[1].name.endsWith('.zarr') + mqc: it[1].name.endsWith('.csv') + } // // Normalisation, dimensionality reduction and clustering @@ -99,7 +107,8 @@ workflow DOWNSTREAM { emit: qc_html = QUALITY_CONTROLS.out.html // channel: [ meta, html ] - qc_sdata = QUALITY_CONTROLS.out.artifacts // channel: [ meta, h5ad ] + qc_sdata = ch_qc.sdata // channel: [ meta, zarr ] + qc_mqc = ch_qc.mqc // channel: [ meta, csv ] qc_nb = QUALITY_CONTROLS.out.notebook // channel: [ meta, qmd ] qc_params = QUALITY_CONTROLS.out.params_yaml // channel: [ meta, yml ] diff --git a/workflows/spatialvi.nf b/workflows/spatialvi.nf index 7f611fe..9436096 100644 --- a/workflows/spatialvi.nf +++ b/workflows/spatialvi.nf @@ -116,6 +116,9 @@ workflow SPATIALVI { ch_methods_description = Channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix( + DOWNSTREAM.out.qc_mqc.map{it[1]}.collect() + ) ch_multiqc_files = ch_multiqc_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) From 363790cc9fc849015082afab403eea1ceb1953a3 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 12 Sep 2024 14:46:51 +0200 Subject: [PATCH 2/7] Clarify subworkflow outputs channel names --- subworkflows/local/downstream.nf | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/downstream.nf b/subworkflows/local/downstream.nf index 2d90d3a..3b5eb58 100644 --- a/subworkflows/local/downstream.nf +++ b/subworkflows/local/downstream.nf @@ -57,6 +57,11 @@ workflow DOWNSTREAM { sdata: it[1].name.endsWith('.zarr') mqc: it[1].name.endsWith('.csv') } + ch_qc_sdata = ch_qc.sdata + ch_qc_mqc = ch_qc.mqc + ch_qc_html = QUALITY_CONTROLS.out.html + ch_qc_nb = QUALITY_CONTROLS.out.notebook + ch_qc_yml = QUALITY_CONTROLS.out.params_yaml // // Normalisation, dimensionality reduction and clustering @@ -106,11 +111,11 @@ workflow DOWNSTREAM { ch_versions = ch_versions.mix(SPATIALLY_VARIABLE_GENES.out.versions) emit: - qc_html = QUALITY_CONTROLS.out.html // channel: [ meta, html ] - qc_sdata = ch_qc.sdata // channel: [ meta, zarr ] - qc_mqc = ch_qc.mqc // channel: [ meta, csv ] - qc_nb = QUALITY_CONTROLS.out.notebook // channel: [ meta, qmd ] - qc_params = QUALITY_CONTROLS.out.params_yaml // channel: [ meta, yml ] + qc_html = ch_qc_html // channel: [ meta, html ] + qc_sdata = ch_qc_sdata // channel: [ meta, zarr ] + qc_mqc = ch_qc_mqc // channel: [ meta, csv ] + qc_nb = ch_qc_nb // channel: [ meta, qmd ] + qc_params = ch_qc_yml // channel: [ meta, yml ] clustering_html = CLUSTERING.out.html // channel: [ html ] clustering_sdata = CLUSTERING.out.artifacts // channel: [ meta, h5ad] From c384af7dfcf9beb3a949c234439d67d9b275af3b Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 12 Sep 2024 15:06:56 +0200 Subject: [PATCH 3/7] Also publish QC metrics file --- conf/modules.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 6740b30..4cda4e2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -81,6 +81,12 @@ process { pattern: "artifacts/adata_processed.h5ad", saveAs: { "adata_processed.h5ad" } ], + [ + path: { "${params.outdir}/${meta.id}/data" }, + mode: params.publish_dir_mode, + pattern: "artifacts/mqc_*.csv", + saveAs: { filename -> filename.split('/')[1] } + ], [ path: { "${params.outdir}/${meta.id}/data" }, mode: params.publish_dir_mode, From 0374c7f496d1cfcae0439df0af54abb1590ca74c Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 12 Sep 2024 15:10:31 +0200 Subject: [PATCH 4/7] Add patch to MultiQC for multi-platform image --- modules.json | 3 ++- modules/nf-core/multiqc/main.nf | 4 ++-- modules/nf-core/multiqc/multiqc.diff | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/multiqc/multiqc.diff diff --git a/modules.json b/modules.json index 9d1ddfb..5dd17e8 100644 --- a/modules.json +++ b/modules.json @@ -13,7 +13,8 @@ "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/multiqc/multiqc.diff" }, "quartonotebook": { "branch": "master", diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index ceaec13..a9253b6 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { - label 'process_single' + label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" + 'docker.io/multiqc/multiqc:v1.24.1' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/multiqc.diff b/modules/nf-core/multiqc/multiqc.diff new file mode 100644 index 0000000..a698258 --- /dev/null +++ b/modules/nf-core/multiqc/multiqc.diff @@ -0,0 +1,18 @@ +Changes in module 'nf-core/multiqc' +--- modules/nf-core/multiqc/main.nf ++++ modules/nf-core/multiqc/main.nf +@@ -1,10 +1,10 @@ + process MULTIQC { +- label 'process_single' ++ label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : +- 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" ++ 'docker.io/multiqc/multiqc:v1.24.1' }" + + input: + path multiqc_files, stageAs: "?/*" + +************************************************************ From 7123af01671971b845700542f50882fb89b035f9 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 12 Sep 2024 16:13:58 +0200 Subject: [PATCH 5/7] Update quality control MultiQC section description --- assets/multiqc_config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ad7eef9..8fa5338 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -18,10 +18,10 @@ disable_version_detection: true custom_data: quality_controls: - file_format: "csv" - section_name: "Quality controls" - description: "Quality control metrics from the report analyses" - plot_type: "table" + file_format: csv + section_name: Quality controls + description: Quality control metrics from the report analyses. + plot_type: table sp: quality_controls: From ba222527c0c52e61ecdb3af95f949c9977d765fb Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 12 Sep 2024 16:16:38 +0200 Subject: [PATCH 6/7] Update and harmonise QC filtering numbers Update and harmonise the QC filtering numbers, which had slightly different wording and counting of the number of spots/genes that were filtered at each step. Now they follow the format of `Removed X [spots/genes] with Y [feature cut-off]` or similar. Each filtering step still depends on the former step. --- bin/quality_controls.qmd | 65 +++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/bin/quality_controls.qmd b/bin/quality_controls.qmd index f2eecb0..0102be3 100644 --- a/bin/quality_controls.qmd +++ b/bin/quality_controls.qmd @@ -174,12 +174,13 @@ sc.pl.spatial(adata, color=["in_tissue_str"], title="Spots in tissue", size=1.25 del adata.obs["in_tissue_str"] # Remove spots outside tissue and print results -n_spots = adata.shape[0] +n_total_spots = adata.shape[0] adata = adata[adata.obs["in_tissue"] == 1] n_spots_in_tissue = adata.shape[0] -n_spots_outside_tissue = n_spots - n_spots_in_tissue -Markdown(f"""A total of `{n_spots_in_tissue}` spots are situated inside the -tissue, out of `{n_spots}` spots in total.""") +n_spots_outside_tissue = n_total_spots - n_spots_in_tissue +Markdown(f""" +- Removed `{n_spots_outside_tissue}` spots outside the tissue. +""") ``` ## Counts, genes and spots @@ -191,24 +192,28 @@ your knowledge of the specific tissue at hand. ```{python} #| warning: false # Filter spots based on counts -n_spots = adata.shape[0] -n_genes = adata.shape[1] +n_current_spots = adata.shape[0] sc.pp.filter_cells(adata, min_counts=min_counts) -n_spots_filtered_min_counts = adata.shape[0] +n_spots_filtered_min_counts = n_current_spots - adata.shape[0] # Filter spots based on genes +n_current_spots = adata.shape[0] sc.pp.filter_cells(adata, min_genes=min_genes) -n_spots_filtered_min_genes = adata.shape[0] +n_spots_filtered_min_genes = n_current_spots - adata.shape[0] # Filter genes based on spots +n_total_genes = adata.shape[1] sc.pp.filter_genes(adata, min_cells=min_spots) -n_genes_filtered_min_spots = adata.shape[1] +n_genes_filtered_min_spots = n_total_genes - adata.shape[1] # Print results Markdown(f""" -- Removed `{n_spots - n_spots_filtered_min_counts}` spots with less than `{min_counts}` total counts. -- Removed `{n_spots_filtered_min_counts - n_spots_filtered_min_genes}` spots with less than `{min_genes}` genes expressed. -- Removed `{n_genes - n_genes_filtered_min_spots}` genes expressed in less than `{min_spots}` spots. +- Removed `{n_spots_filtered_min_counts}` spots with less than `{min_counts}` +total counts. +- Removed `{n_spots_filtered_min_genes}` spots with less than `{min_genes}` +genes expressed. +- Removed `{n_genes_filtered_min_spots}` genes expressed in less than +`{min_spots}` spots. """) ``` @@ -221,18 +226,21 @@ ribosomal nor haemoglobin content is filtered by default. ```{python} # Filter spots +n_current_spots = adata.shape[0] adata = adata[adata.obs["pct_counts_mt"] <= mito_threshold] -n_spots_filtered_mito = adata.shape[0] +n_spots_filtered_mito = n_current_spots - adata.shape[0] +n_current_spots = adata.shape[0] adata = adata[adata.obs["pct_counts_ribo"] >= ribo_threshold] -n_spots_filtered_ribo = adata.shape[0] +n_spots_filtered_ribo = n_current_spots - adata.shape[0] +n_current_spots = adata.shape[0] adata = adata[adata.obs["pct_counts_hb"] <= hb_threshold] -n_spots_filtered_hb = adata.shape[0] +n_spots_filtered_hb = n_current_spots - adata.shape[0] # Print results Markdown(f""" -- Removed `{adata.shape[0] - n_spots_filtered_mito}` spots with more than `{mito_threshold}%` mitochondrial content. -- Removed `{n_spots_filtered_mito - n_spots_filtered_ribo}` spots with less than `{ribo_threshold}%` ribosomal content. -- Removed `{n_spots_filtered_ribo - n_spots_filtered_hb}` spots with more than `{hb_threshold}%` haemoglobin content. +- Removed `{n_spots_filtered_mito}` spots with more than `{mito_threshold}%` mitochondrial content. +- Removed `{n_spots_filtered_ribo}` spots with less than `{ribo_threshold}%` ribosomal content. +- Removed `{n_spots_filtered_hb}` spots with more than `{hb_threshold}%` haemoglobin content. """) ``` @@ -267,13 +275,15 @@ if (adata.shape[0] == 0 or adata.shape[1] == 0): ```{python} # Print filtering results -spots_filtered = n_spots - adata.shape[0] -genes_filtered = n_genes - adata.shape[1] +n_remaining_spots = adata.shape[0] +n_remaining_genes = adata.shape[1] +n_spots_filtered = n_total_spots - n_remaining_spots +n_genes_filtered = n_total_genes - n_remaining_genes Markdown(f""" The final results of all the filtering is as follows: -- A total of `{adata.shape[0]}` spots out of `{n_spots}` remain after filtering. -- A total of `{adata.shape[1]}` genes out of `{n_genes}` remain after filtering. +- A total of `{n_remaining_spots}` spots out of `{n_total_spots}` remain after filtering. +- A total of `{n_remaining_genes}` genes out of `{n_total_genes}` remain after filtering. """) ``` @@ -296,17 +306,18 @@ sdata.write(os.path.join(artifact_dir, output_sdata)) # Write QC metrics to file for MultiQC aggregation mqc_dict = { 'sample': [meta['id']], - 'total_spots': [n_spots], - 'total_genes': [n_genes], + 'total_spots': [n_total_spots], + 'spots_filtered': [n_spots_filtered], + 'spots_remaining': [n_remaining_spots], 'spots_filtered_outside_tissue': [n_spots_outside_tissue], 'spots_filtered_total_counts': [n_spots_filtered_min_counts], 'spots_filtered_genes_expressed': [n_spots_filtered_min_genes], 'spots_filtered_mito_content': [n_spots_filtered_mito], 'spots_filtered_ribo_content': [n_spots_filtered_ribo], 'spots_filtered_hb_content': [n_spots_filtered_hb], - 'genes_filtered_expressed_in_spots': [n_genes_filtered_min_spots], - 'total_spots_filtered': [spots_filtered], - 'total_genes_filtered': [genes_filtered] + 'total_genes': [n_total_genes], + 'genes_filtered': [n_genes_filtered], + 'genes_remaining': [n_remaining_genes] } mqc_data = pd.DataFrame(mqc_dict) mqc_name = 'artifacts/mqc_quality_controls_' + meta['id'] + '.csv' From ae0a9a8fd8b8b03bcc5c7f7bca272615a3531b96 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Fri, 13 Sep 2024 09:11:16 +0200 Subject: [PATCH 7/7] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84cc97d..ec49e7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ compatible with further downstream analyses and/or exploration in _e.g._ ### `Added` +- Add quality control metrics as custom MultiQC content [[#88](https://github.com/nf-core/spatialvi/pull/88)] - Add MultiQC support for Space Ranger outputs [[#70](https://github.com/nf-core/spatialvi/pull/70)] - Use the QUARTONOTEBOOK nf-core module instead of local Quarto-based modules [[#68](https://github.com/nf-core/spatialvi/pull/68)] - Add support for SpatialData [[$67](https://github.com/nf-core/spatialvi/pull/67)]