diff --git a/src/methods/harmony/config.vsh.yaml b/src/methods/harmony/config.vsh.yaml new file mode 100644 index 0000000..c94bc25 --- /dev/null +++ b/src/methods/harmony/config.vsh.yaml @@ -0,0 +1,33 @@ +__merge__: /src/api/comp_method.yaml +name: harmony +label: Harmony +summary: Fast, sensitive and accurate integration of single-cell data with Harmony +description: | + Harmony is a general-purpose R package with an efficient algorithm for integrating multiple data sets. + It is especially useful for large single-cell datasets such as single-cell RNA-seq. +references: + # Korsunsky, I., Millard, N., Fan, J. et al. + # Fast, sensitive and accurate integration of single-cell data with Harmony. + # Nat Methods 16, 1289–1296 (2019). https://doi.org/10.1038/s41592-019-0619-0 + doi: 10.1038/s41592-019-0619-0 +links: + repository: https://github.com/immunogenomics/harmony + documentation: https://portals.broadinstitute.org/harmony +info: + method_types: [embedding] + preferred_normalization: log_cp10k +resources: + - type: r_script + path: script.R +engines: + - type: docker + image: openproblems/base_r:1.0.0 + setup: + - type: r + cran: + - harmony +runners: + - type: executable + - type: nextflow + directives: + label: [lowcpu, highmem, midtime] diff --git a/src/methods/harmony/script.R b/src/methods/harmony/script.R new file mode 100644 index 0000000..acbe4ef --- /dev/null +++ b/src/methods/harmony/script.R @@ -0,0 +1,40 @@ +cat("Loading dependencies\n") +requireNamespace("anndata", quietly = TRUE) +requireNamespace("Matrix", quietly = TRUE) +requireNamespace("harmony", quietly = TRUE) + +## VIASH START +par <- list( + input = 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad', + output = 'output.h5ad' +) +meta <- list( + name = "harmony" +) +## VIASH END + +cat("Read input\n") +adata <- anndata::read_h5ad(par$input) + +cat("Run harmony\n") +out <- harmony::RunHarmony( + data_mat = adata$obsm[["X_pca"]], + meta_data = adata$obs[["batch"]] +) + +cat("Store outputs\n") +output <- anndata::AnnData( + obs = adata$obs[, c()], + var = adata$var[, c()], + obsm = list( + X_emb = out, + ), + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = meta$name + ) +) + +cat("Write output to file\n") +zzz <- output$write_h5ad(par$output, compression = "gzip") diff --git a/src/methods/harmonypy/config.vsh.yaml b/src/methods/harmonypy/config.vsh.yaml new file mode 100644 index 0000000..511229d --- /dev/null +++ b/src/methods/harmonypy/config.vsh.yaml @@ -0,0 +1,34 @@ +__merge__: /src/api/comp_method.yaml +name: harmonypy +label: Harmonypy +summary: harmonypy is a port of the harmony R package by Ilya Korsunsky. +description: | + Harmony is a general-purpose R package with an efficient algorithm for integrating multiple data sets. + It is especially useful for large single-cell datasets such as single-cell RNA-seq. +references: + # Korsunsky, I., Millard, N., Fan, J. et al. + # Fast, sensitive and accurate integration of single-cell data with Harmony. + # Nat Methods 16, 1289–1296 (2019). https://doi.org/10.1038/s41592-019-0619-0 + doi: 10.1038/s41592-019-0619-0 +links: + repository: https://github.com/slowkow/harmonypy + documentation: https://portals.broadinstitute.org/harmony +info: + method_types: [embedding] + preferred_normalization: log_cp10k +resources: + - type: python_script + path: script.py + - path: /src/utils/read_anndata_partial.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 + setup: + - type: python + pypi: + - harmonypy +runners: + - type: executable + - type: nextflow + directives: + label: [lowcpu, highmem, midtime] diff --git a/src/methods/harmonypy/script.py b/src/methods/harmonypy/script.py new file mode 100644 index 0000000..79b3253 --- /dev/null +++ b/src/methods/harmonypy/script.py @@ -0,0 +1,52 @@ +import sys +import anndata as ad +import numpy as np +import harmonypy as hm + +## VIASH START +par = { + "input": "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad", + "output": "output.h5ad" +} +meta = { + "name": "harmonypy", + "resources_dir": "src/utils" +} +## VIASH END + +sys.path.append(meta["resources_dir"]) +from read_anndata_partial import read_anndata + +print(">> Read input", flush=True) +adata = read_anndata( + par["input"], + obs="obs", + obsm="obsm", + var="var", + uns="uns" +) + +print(">> Run harmonypy", flush=True) +out = hm.run_harmony( + adata.obsm["X_pca"], + adata.obs, + "batch" +) + +print("Store output", flush=True) +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + obsm={ + "X_emb": out.Z_corr.transpose() + }, + shape=adata.shape, + uns={ + "dataset_id": adata.uns["dataset_id"], + "normalization_id": adata.uns["normalization_id"], + "method_id": meta["name"], + } +) + +print("Write output to file", flush=True) +output.write_h5ad(par["output"], compression="gzip")