diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..c503c4f --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1 @@ +^\.github$ diff --git a/DESCRIPTION b/DESCRIPTION index 08266e3..7e3d6d7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scviR -Date: 2023-03-07 +Date: 2024-01-27 Title: experimental inferface from R to scvi-tools -Version: 1.1.1 +Version: 1.3.3 Authors@R: c(person( "Vincent", "Carey", role = c("aut", "cre"), @@ -25,6 +25,6 @@ Imports: reticulate, BiocFileCache, utils, pheatmap, Suggests: knitr, testthat, reshape2, ggplot2, rhdf5, BiocStyle VignetteBuilder: knitr biocViews: Infrastructure, SingleCell, DataImport -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 URL: https://github.com/vjcitn/scviR BugReports: https://github.com/vjcitn/scviR/issues diff --git a/NAMESPACE b/NAMESPACE index 7d92622..e0c44df 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(MuDataR) export(adtProfiles) export(anndataR) export(cacheCiteseq5k10kPbmcs) diff --git a/R/basilisk.R b/R/basilisk.R index 3445799..310549f 100644 --- a/R/basilisk.R +++ b/R/basilisk.R @@ -4,10 +4,28 @@ bsklenv <- basilisk::BasiliskEnvironment( envname = "bsklenv", pkgname = "scviR", - packages = c("numpy==1.24.4", "pandas==1.5.3"), + packages = c("python=3.10", "numpy=1.24.4", "pandas=1.5.3"), pip = c( - "scvi-tools==1.0.3", "scanpy==1.9.5", - "matplotlib==3.7.3" + "anndata==0.10.1", + "docrep==0.3.2", + "flax==0.8.1", + "jax==0.4.23", + "jaxlib==0.4.23", + "optax==0.1.9", + "scipy==1.12.0", + "scikit-learn==1.4.0", + "rich==13.7.1", + "h5py==3.9.0", + "torch==1.13.1", + "lightning==2.1.4", + "torchmetrics==0.11.0", +# "pyro-ppl==1.6.0", + "tqdm==4.66.2", +# "numpyro==0.12.1", + "ml-collections==0.1.1", + "mudata==0.2.3", + "scvi-tools==1.1.2", "scanpy==1.9.5", + "matplotlib==3.7.3", "scikit-misc==0.3.1" ) ) diff --git a/R/cache_citeseq_pbmcs.R b/R/cache_citeseq_pbmcs.R index c4b4069..a03baca 100644 --- a/R/cache_citeseq_pbmcs.R +++ b/R/cache_citeseq_pbmcs.R @@ -48,7 +48,7 @@ cacheCiteseq5k10kPbmcs <- function() { #' adm <- anndataR() #' hpath <- cacheCiteseq5k10kPbmcs() #' adata <- adm$read(hpath) -#' mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = FALSE) +#' mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = FALSE) #' mod #' @export cacheCiteseq5k10kTutvae <- function() { @@ -79,6 +79,7 @@ cacheCiteseq5k10kTutvae <- function() { #' helper to get the tutorial VAE for PBMCs from scvi-tools tutorial #' @param use_gpu logical(1), defaulting to FALSE, passed to TOTALVI.load #' @return python reference to anndata +#' @note March 2024 use_gpu ignored #' @examples #' getCiteseqTutvae() #' @export @@ -91,7 +92,7 @@ getCiteseqTutvae <- function(use_gpu = FALSE) { adm <- anndataR() hpath <- cacheCiteseq5k10kPbmcs() adata <- adm$read(hpath) - mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = use_gpu) + mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = use_gpu) mod } diff --git a/R/scanpy.R b/R/scanpy.R index 69551b8..ee716f8 100644 --- a/R/scanpy.R +++ b/R/scanpy.R @@ -29,3 +29,18 @@ anndataR <- function() { reticulate::import("anndata") }) } + +#' basic interface to MuData +#' @return basiliskRun result with import from reticulate, typically a Module +#' @examples +#' md <- MuDataR() +#' md +#' head(names(md)) +#' @export +MuDataR <- function() { + proc <- basilisk::basiliskStart(bsklenv) + on.exit(basilisk::basiliskStop(proc)) + basilisk::basiliskRun(proc, function() { + reticulate::import("mudata") + }) +} diff --git a/inst/scripts/unpack.R b/inst/scripts/unpack.R new file mode 100644 index 0000000..4273120 --- /dev/null +++ b/inst/scripts/unpack.R @@ -0,0 +1,17 @@ +# this function retrieves key components from +# trained totalVI model for serialization; the totalVI +# references are invalid when session ends +unpack = function(mod, txbatch=c("PBMC10k", "PBMC5k")) { + elbo_train = unname(unlist(mod$history_["elbo_train"])) + elbo_val = unname(unlist(mod$history_["elbo_validation"])) +# + latrep = mod$get_latent_representation() + denoised = mod$get_normalized_expression( + n_samples=25L, return_mean=TRUE, transform_batch=txbatch) + names(denoised) = c("rna_denoised", "protein_denoised") + protein_foreground = 100*model$get_protein_foreground_probability( + n_samples=25L, return_mean=TRUE, transform_batch=txbatch) + + list(elbo_train=elbo_train, elbo_val=elbo_val, latrep=latrep, + denoised=denoised, protein_foreground=protein_foreground) +} diff --git a/inst/scripts/usetotal.R b/inst/scripts/usetotal.R new file mode 100644 index 0000000..75596be --- /dev/null +++ b/inst/scripts/usetotal.R @@ -0,0 +1,76 @@ +# script to use scviR from Bioconductor to +# run most of the computations of https://colab.research.google.com/github/scverse/scvi-tutorials/blob/1.1.2/multimodal/totalVI.ipynb +# +# anndata will be acquired and cached by scvi.data +# +save_dir.name = "./total2024" +if (!dir.exists(save_dir.name)) { + dir.create(save_dir.name) +} +library(scviR) +scvi = scviR() +adata = scvi$data$pbmcs_10x_cite_seq(save_path=save_dir.name) +adata + +# these functions are defined in scviR ... this is not +# the way basilisk clients should work, because they +# pass python references around, but it will take time +# to properly encapsulate all the relevant tasks + +ad = anndataR() +sc = scanpyR() + +# use scanpy utilities to normalize + +adata$layers["counts"] = adata$X # $copy() +sc$pp$normalize_total(adata) +sc$pp$log1p(adata) +adata$obs_names_make_unique() + +# build up the protein component +protein_adata = ad$AnnData(adata$obsm["protein_expression"]) +protein_adata$obs_names = adata$obs_names +adata$obsm["protein_expression"] = NULL +#del adata.obsm["protein_expression"] + +# use mudata for multiple modalities +md = MuDataR() + +mdata = md$MuData(dict("rna"= adata, "protein"= protein_adata)) +mdata + +# filter genes +sc$pp$highly_variable_genes( + mdata$mod[["rna"]], #["rna"], + n_top_genes=4000, + flavor="seurat_v3", + batch_key="batch", + layer="counts", +) + +# works: mdata$mod$rna[ ,mdata$mod$rna$var$highly_variable] + +# add into mudata +mdata$mod$rna_subset = mdata$mod$rna[ ,mdata$mod$rna$var$highly_variable]$copy() + +# crucial +mdata$update() + +# perform 'setup' -- excluding protein_layer spec seems essential +scvi$model$TOTALVI$setup_mudata( + mdata, + rna_layer="counts", +# protein_layer=NA, + batch_key="batch", + modalities=dict( + "rna_layer"= "rna_subset", + "protein_layer"= "protein", + "batch_key"= "rna_subset" + ), +) + +# configure and train +model = scvi$model$TOTALVI(mdata) +model + +model$train(max_epochs=50L) diff --git a/man/MuDataR.Rd b/man/MuDataR.Rd new file mode 100644 index 0000000..d8f2224 --- /dev/null +++ b/man/MuDataR.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scanpy.R +\name{MuDataR} +\alias{MuDataR} +\title{basic interface to MuData} +\usage{ +MuDataR() +} +\value{ +basiliskRun result with import from reticulate, typically a Module +} +\description{ +basic interface to MuData +} +\examples{ +md <- MuDataR() +md +head(names(md)) +} diff --git a/man/cacheCiteseq5k10kTutvae.Rd b/man/cacheCiteseq5k10kTutvae.Rd index 3d31973..0931037 100644 --- a/man/cacheCiteseq5k10kTutvae.Rd +++ b/man/cacheCiteseq5k10kTutvae.Rd @@ -28,6 +28,6 @@ scvi <- scviR() adm <- anndataR() hpath <- cacheCiteseq5k10kPbmcs() adata <- adm$read(hpath) -mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = FALSE) +mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = FALSE) mod } diff --git a/man/getCiteseqTutvae.Rd b/man/getCiteseqTutvae.Rd index 4bc349f..254c565 100644 --- a/man/getCiteseqTutvae.Rd +++ b/man/getCiteseqTutvae.Rd @@ -15,6 +15,9 @@ python reference to anndata \description{ helper to get the tutorial VAE for PBMCs from scvi-tools tutorial } +\note{ +March 2024 use_gpu ignored +} \examples{ getCiteseqTutvae() } diff --git a/vignettes/compch12.Rmd b/vignettes/compch12.Rmd index 47ea721..703dee5 100644 --- a/vignettes/compch12.Rmd +++ b/vignettes/compch12.Rmd @@ -47,6 +47,7 @@ ch12sce ``` ```{r getdat2, message=FALSE} +options(timeout=3600) fullvi = getTotalVI5k10kAdata() fullvi ```