diff --git a/CHANGELOG.md b/CHANGELOG.md index 84cc97d..ec49e7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ compatible with further downstream analyses and/or exploration in _e.g._ ### `Added` +- Add quality control metrics as custom MultiQC content [[#88](https://github.com/nf-core/spatialvi/pull/88)] - Add MultiQC support for Space Ranger outputs [[#70](https://github.com/nf-core/spatialvi/pull/70)] - Use the QUARTONOTEBOOK nf-core module instead of local Quarto-based modules [[#68](https://github.com/nf-core/spatialvi/pull/68)] - Add support for SpatialData [[$67](https://github.com/nf-core/spatialvi/pull/67)] diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2449bbe..8fa5338 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -5,11 +5,24 @@ report_comment: > report_section_order: "nf-core-spatialvi-methods-description": order: -1000 - software_versions: + "custom_data": order: -1001 - "nf-core-spatialvi-summary": + software_versions: order: -1002 + "nf-core-spatialvi-summary": + order: -1003 export_plots: true disable_version_detection: true + +custom_data: + quality_controls: + file_format: csv + section_name: Quality controls + description: Quality control metrics from the report analyses. + plot_type: table + +sp: + quality_controls: + fn: "mqc_*.csv" diff --git a/bin/quality_controls.qmd b/bin/quality_controls.qmd index 784ec78..c8c9109 100644 --- a/bin/quality_controls.qmd +++ b/bin/quality_controls.qmd @@ -173,11 +173,13 @@ sc.pl.spatial(adata, color=["in_tissue_str"], title="Spots in tissue", size=1.25 del adata.obs["in_tissue_str"] # Remove spots outside tissue and print results -n_spots = adata.shape[0] +n_total_spots = adata.shape[0] adata = adata[adata.obs["in_tissue"] == 1] n_spots_in_tissue = adata.shape[0] -Markdown(f"""A total of `{n_spots_in_tissue}` spots are situated inside the -tissue, out of `{n_spots}` spots in total.""") +n_spots_outside_tissue = n_total_spots - n_spots_in_tissue +Markdown(f""" +- Removed `{n_spots_outside_tissue}` spots outside the tissue. +""") ``` ## Counts, genes and spots @@ -189,24 +191,28 @@ your knowledge of the specific tissue at hand. ```{python} #| warning: false # Filter spots based on counts -n_spots = adata.shape[0] -n_genes = adata.shape[1] +n_current_spots = adata.shape[0] sc.pp.filter_cells(adata, min_counts=min_counts) -n_spots_filtered_min_counts = adata.shape[0] +n_spots_filtered_min_counts = n_current_spots - adata.shape[0] # Filter spots based on genes +n_current_spots = adata.shape[0] sc.pp.filter_cells(adata, min_genes=min_genes) -n_spots_filtered_min_genes = adata.shape[0] +n_spots_filtered_min_genes = n_current_spots - adata.shape[0] # Filter genes based on spots +n_total_genes = adata.shape[1] sc.pp.filter_genes(adata, min_cells=min_spots) -n_genes_filtered_min_spots = adata.shape[1] +n_genes_filtered_min_spots = n_total_genes - adata.shape[1] # Print results Markdown(f""" -- Removed `{n_spots - n_spots_filtered_min_counts}` spots with less than `{min_counts}` total counts. -- Removed `{n_spots_filtered_min_counts - n_spots_filtered_min_genes}` spots with less than `{min_genes}` genes expressed. -- Removed `{n_genes - n_genes_filtered_min_spots}` genes expressed in less than `{min_spots}` spots. +- Removed `{n_spots_filtered_min_counts}` spots with less than `{min_counts}` +total counts. +- Removed `{n_spots_filtered_min_genes}` spots with less than `{min_genes}` +genes expressed. +- Removed `{n_genes_filtered_min_spots}` genes expressed in less than +`{min_spots}` spots. """) ``` @@ -219,18 +225,21 @@ ribosomal nor haemoglobin content is filtered by default. ```{python} # Filter spots +n_current_spots = adata.shape[0] adata = adata[adata.obs["pct_counts_mt"] <= mito_threshold] -n_spots_filtered_mito = adata.shape[0] +n_spots_filtered_mito = n_current_spots - adata.shape[0] +n_current_spots = adata.shape[0] adata = adata[adata.obs["pct_counts_ribo"] >= ribo_threshold] -n_spots_filtered_ribo = adata.shape[0] +n_spots_filtered_ribo = n_current_spots - adata.shape[0] +n_current_spots = adata.shape[0] adata = adata[adata.obs["pct_counts_hb"] <= hb_threshold] -n_spots_filtered_hb = adata.shape[0] +n_spots_filtered_hb = n_current_spots - adata.shape[0] # Print results Markdown(f""" -- Removed `{adata.shape[0] - n_spots_filtered_mito}` spots with more than `{mito_threshold}%` mitochondrial content. -- Removed `{n_spots_filtered_mito - n_spots_filtered_ribo}` spots with less than `{ribo_threshold}%` ribosomal content. -- Removed `{n_spots_filtered_ribo - n_spots_filtered_hb}` spots with more than `{hb_threshold}%` haemoglobin content. +- Removed `{n_spots_filtered_mito}` spots with more than `{mito_threshold}%` mitochondrial content. +- Removed `{n_spots_filtered_ribo}` spots with less than `{ribo_threshold}%` ribosomal content. +- Removed `{n_spots_filtered_hb}` spots with more than `{hb_threshold}%` haemoglobin content. """) ``` @@ -265,11 +274,15 @@ if (adata.shape[0] == 0 or adata.shape[1] == 0): ```{python} # Print filtering results +n_remaining_spots = adata.shape[0] +n_remaining_genes = adata.shape[1] +n_spots_filtered = n_total_spots - n_remaining_spots +n_genes_filtered = n_total_genes - n_remaining_genes Markdown(f""" The final results of all the filtering is as follows: -- A total of `{adata.shape[0]}` spots out of `{n_spots}` remain after filtering. -- A total of `{adata.shape[1]}` genes out of `{n_genes}` remain after filtering. +- A total of `{n_remaining_spots}` spots out of `{n_total_spots}` remain after filtering. +- A total of `{n_remaining_genes}` genes out of `{n_total_genes}` remain after filtering. """) ``` @@ -286,3 +299,26 @@ del sdata.tables["table"] sdata.tables["table"] = adata sdata.write(os.path.join(artifact_dir, output_sdata)) ``` + +```{python} +#| echo: false +# Write QC metrics to file for MultiQC aggregation +mqc_dict = { + 'sample': [meta['id']], + 'total_spots': [n_total_spots], + 'spots_filtered': [n_spots_filtered], + 'spots_remaining': [n_remaining_spots], + 'spots_filtered_outside_tissue': [n_spots_outside_tissue], + 'spots_filtered_total_counts': [n_spots_filtered_min_counts], + 'spots_filtered_genes_expressed': [n_spots_filtered_min_genes], + 'spots_filtered_mito_content': [n_spots_filtered_mito], + 'spots_filtered_ribo_content': [n_spots_filtered_ribo], + 'spots_filtered_hb_content': [n_spots_filtered_hb], + 'total_genes': [n_total_genes], + 'genes_filtered': [n_genes_filtered], + 'genes_remaining': [n_remaining_genes] +} +mqc_data = pd.DataFrame(mqc_dict) +mqc_name = 'artifacts/mqc_quality_controls_' + meta['id'] + '.csv' +mqc_data.to_csv(mqc_name, index=False) +``` diff --git a/conf/modules.config b/conf/modules.config index 4ec1fbf..2d1265d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,6 +82,12 @@ process { pattern: "artifacts/adata_processed.h5ad", saveAs: { "adata_processed.h5ad" } ], + [ + path: { "${params.outdir}/${meta.id}/data" }, + mode: params.publish_dir_mode, + pattern: "artifacts/mqc_*.csv", + saveAs: { filename -> filename.split('/')[1] } + ], [ path: { "${params.outdir}/${meta.id}/data" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index 9d1ddfb..5dd17e8 100644 --- a/modules.json +++ b/modules.json @@ -13,7 +13,8 @@ "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/multiqc/multiqc.diff" }, "quartonotebook": { "branch": "master", diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index ceaec13..a9253b6 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { - label 'process_single' + label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" + 'docker.io/multiqc/multiqc:v1.24.1' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/multiqc.diff b/modules/nf-core/multiqc/multiqc.diff new file mode 100644 index 0000000..a698258 --- /dev/null +++ b/modules/nf-core/multiqc/multiqc.diff @@ -0,0 +1,18 @@ +Changes in module 'nf-core/multiqc' +--- modules/nf-core/multiqc/main.nf ++++ modules/nf-core/multiqc/main.nf +@@ -1,10 +1,10 @@ + process MULTIQC { +- label 'process_single' ++ label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : +- 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" ++ 'docker.io/multiqc/multiqc:v1.24.1' }" + + input: + path multiqc_files, stageAs: "?/*" + +************************************************************ diff --git a/subworkflows/local/downstream.nf b/subworkflows/local/downstream.nf index 4f6d742..5e07478 100644 --- a/subworkflows/local/downstream.nf +++ b/subworkflows/local/downstream.nf @@ -49,6 +49,19 @@ workflow DOWNSTREAM { extensions ) ch_versions = ch_versions.mix(QUALITY_CONTROLS.out.versions) + ch_qc = QUALITY_CONTROLS.out.artifacts + | map { meta, artifacts -> [meta, artifacts[0], meta, artifacts[1]] } + | flatten + | collate ( 2 ) + | branch { + sdata: it[1].name.endsWith('.zarr') + mqc: it[1].name.endsWith('.csv') + } + ch_qc_sdata = ch_qc.sdata + ch_qc_mqc = ch_qc.mqc + ch_qc_html = QUALITY_CONTROLS.out.html + ch_qc_nb = QUALITY_CONTROLS.out.notebook + ch_qc_yml = QUALITY_CONTROLS.out.params_yaml // // Normalisation, dimensionality reduction and clustering @@ -98,10 +111,11 @@ workflow DOWNSTREAM { ch_versions = ch_versions.mix(SPATIALLY_VARIABLE_GENES.out.versions) emit: - qc_html = QUALITY_CONTROLS.out.html // channel: [ meta, html ] - qc_sdata = QUALITY_CONTROLS.out.artifacts // channel: [ meta, h5ad ] - qc_nb = QUALITY_CONTROLS.out.notebook // channel: [ meta, qmd ] - qc_params = QUALITY_CONTROLS.out.params_yaml // channel: [ meta, yml ] + qc_html = ch_qc_html // channel: [ meta, html ] + qc_sdata = ch_qc_sdata // channel: [ meta, zarr ] + qc_mqc = ch_qc_mqc // channel: [ meta, csv ] + qc_nb = ch_qc_nb // channel: [ meta, qmd ] + qc_params = ch_qc_yml // channel: [ meta, yml ] clustering_html = CLUSTERING.out.html // channel: [ html ] clustering_sdata = CLUSTERING.out.artifacts // channel: [ meta, h5ad] diff --git a/workflows/spatialvi.nf b/workflows/spatialvi.nf index 7f611fe..9436096 100644 --- a/workflows/spatialvi.nf +++ b/workflows/spatialvi.nf @@ -116,6 +116,9 @@ workflow SPATIALVI { ch_methods_description = Channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix( + DOWNSTREAM.out.qc_mqc.map{it[1]}.collect() + ) ch_multiqc_files = ch_multiqc_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)