diff --git a/modules/nf-core/scanpy/hashsolo/environment.yml b/modules/nf-core/scanpy/hashsolo/environment.yml new file mode 100644 index 00000000000..cc3350502a5 --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=3.12.11 + - conda-forge::pyyaml=6.0.2 + - conda-forge::scanpy=1.11.2 diff --git a/modules/nf-core/scanpy/hashsolo/main.nf b/modules/nf-core/scanpy/hashsolo/main.nf new file mode 100644 index 00000000000..2ddb2af3af9 --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/main.nf @@ -0,0 +1,36 @@ +process SCANPY_HASHSOLO { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/16/168ecbbe27ccef766741ccbf937b0d2675be2e19b0565035e0719f1e9ea5ee95/data': + 'community.wave.seqera.io/library/python_pyyaml_scanpy:b5509a698e9aae25' }" + + input: + tuple val(meta), path(input_h5ad), val(cell_hashing_columns) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + priors = task.ext.priors ?: '0.01,0.8,0.19' + template('hashsolo.py') + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.h5ad + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | cut -f 2 -d " ") + scanpy: \$(python3 -c "import scanpy; print(scanpy.__version__)") + END_VERSIONS + """ +} diff --git a/modules/nf-core/scanpy/hashsolo/meta.yml b/modules/nf-core/scanpy/hashsolo/meta.yml new file mode 100644 index 00000000000..322cd0f2e17 --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "SCANPY_HASHSOLO" +description: Probabilistic demultiplexing of cell hashing data +keywords: + - anndata + - single-cell + - hashing + - demultiplexing + - scanpy +tools: + - "scanpy": + description: "Single-cell analysis in Python. Scales to >100M cells." + homepage: "https://github.com/scverse/scanpy" + documentation: "https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.hashsolo.html" + tool_dev_url: "https://github.com/scverse/scanpy" + doi: "10.1186/s13059-017-1382-0" + licence: ["BSD-3"] + identifier: biotools:scanpy + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - input_h5ad: + type: file + description: The (annotated) data matrix of shape n_obs x n_vars. Rows + correspond to cells and columns to genes. + pattern: "*.h5ad" + ontologies: [] + - cell_hashing_columns: + type: list + description: | + List of cell hashing columns to use for demultiplexing. + e.g. `['hash_1', 'hash_2']` + +output: + - h5ad: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + pattern: "*.h5ad" + - "*.h5ad": + type: file + description: | + AnnData file containing cluster feature, etc. + pattern: "*.h5ad" + ontologies: [] + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@seohyonkim" +maintainers: + - "@seohyonkim" diff --git a/modules/nf-core/scanpy/hashsolo/templates/hashsolo.py b/modules/nf-core/scanpy/hashsolo/templates/hashsolo.py new file mode 100644 index 00000000000..65760dd4625 --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/templates/hashsolo.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +import os +import platform +import yaml + +os.environ["MPLCONFIGDIR"] = "./tmp/mpl" +os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" + +import scanpy as sc +import scanpy.external as sce + + +adata = sc.read_h5ad("${input_h5ad}") +columns = "${cell_hashing_columns.join(' ')}".split() +columns_str = [str(x) for x in columns] +sce.pp.hashsolo(adata, columns_str, priors=[float(prior) for prior in "${priors}".split(',')]) + +adata.write("${prefix}.h5ad") + +versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + } +} + +with open("versions.yml", "w") as f: + yaml.dump(versions, f) diff --git a/modules/nf-core/scanpy/hashsolo/tests/main.nf.test b/modules/nf-core/scanpy/hashsolo/tests/main.nf.test new file mode 100644 index 00000000000..fffb31e9d37 --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process SCANPY_HASHSOLO" + script "../main.nf" + process "SCANPY_HASHSOLO" + + tag "modules" + tag "modules_nfcore" + tag "hashsolo" + tag "scanpy/hashsolo" + tag "scanpy" + + test("generated h5ad") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/hashing_demultiplexing/hashsolo_anndata.h5ad', checkIfExists: true), + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("generated h5ad - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/hashing_demultiplexing/hashsolo_anndata.h5ad', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/scanpy/hashsolo/tests/main.nf.test.snap b/modules/nf-core/scanpy/hashsolo/tests/main.nf.test.snap new file mode 100644 index 00000000000..574995d20bf --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "generated h5ad": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_hashsolo.h5ad:md5,e1f129270b67e8575dd060d44e1c0c4b" + ] + ], + "1": [ + "versions.yml:md5,d4b55c68ad8effa4580cf202d802b007" + ], + "h5ad": [ + [ + { + "id": "test" + }, + "test_hashsolo.h5ad:md5,e1f129270b67e8575dd060d44e1c0c4b" + ] + ], + "versions": [ + "versions.yml:md5,d4b55c68ad8effa4580cf202d802b007" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-15T16:12:56.470744429" + }, + "generated h5ad - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_hashsolo.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,59133af84f960e142d89d672f0c32cff" + ], + "h5ad": [ + [ + { + "id": "test" + }, + "test_hashsolo.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,59133af84f960e142d89d672f0c32cff" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-15T16:21:51.26475278" + } +} \ No newline at end of file diff --git a/modules/nf-core/scanpy/hashsolo/tests/nextflow.config b/modules/nf-core/scanpy/hashsolo/tests/nextflow.config new file mode 100644 index 00000000000..b01e960a541 --- /dev/null +++ b/modules/nf-core/scanpy/hashsolo/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SCANPY_HASHSOLO { + ext.prefix = { "${meta.id}_hashsolo"} + } +}