vjcitn · vjcitn · Oct 24, 2023 · Oct 24, 2023 · Jan 26, 2024 · Jan 26, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1 @@
+^\.github$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: scviR
-Date: 2023-03-07
+Date: 2024-01-27
 Title: experimental inferface from R to scvi-tools
-Version: 1.1.1
+Version: 1.3.3
 Authors@R: 
     c(person(
         "Vincent", "Carey", role = c("aut", "cre"),
@@ -25,6 +25,6 @@ Imports: reticulate, BiocFileCache, utils, pheatmap,
 Suggests: knitr, testthat, reshape2, ggplot2, rhdf5, BiocStyle
 VignetteBuilder: knitr
 biocViews: Infrastructure, SingleCell, DataImport
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 URL: https://github.com/vjcitn/scviR
 BugReports: https://github.com/vjcitn/scviR/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(MuDataR)
 export(adtProfiles)
 export(anndataR)
 export(cacheCiteseq5k10kPbmcs)

diff --git a/R/basilisk.R b/R/basilisk.R
@@ -4,10 +4,28 @@
 bsklenv <- basilisk::BasiliskEnvironment(
   envname = "bsklenv",
   pkgname = "scviR",
-  packages = c("numpy==1.24.4", "pandas==1.5.3"),
+  packages = c("python=3.10", "numpy=1.24.4", "pandas=1.5.3"),
   pip = c(
-    "scvi-tools==1.0.3", "scanpy==1.9.5",
-    "matplotlib==3.7.3"
+    "anndata==0.10.1",
+    "docrep==0.3.2",
+    "flax==0.8.1",
+    "jax==0.4.23",
+    "jaxlib==0.4.23",
+    "optax==0.1.9",
+    "scipy==1.12.0",
+    "scikit-learn==1.4.0",
+    "rich==13.7.1",
+    "h5py==3.9.0",
+    "torch==1.13.1",
+    "lightning==2.1.4",
+    "torchmetrics==0.11.0",
+#    "pyro-ppl==1.6.0",
+    "tqdm==4.66.2",
+#    "numpyro==0.12.1",
+    "ml-collections==0.1.1",
+    "mudata==0.2.3",
+    "scvi-tools==1.1.2", "scanpy==1.9.5",
+    "matplotlib==3.7.3", "scikit-misc==0.3.1"
   )
 )
 

diff --git a/R/cache_citeseq_pbmcs.R b/R/cache_citeseq_pbmcs.R
@@ -48,7 +48,7 @@ cacheCiteseq5k10kPbmcs <- function() {
 #' adm <- anndataR()
 #' hpath <- cacheCiteseq5k10kPbmcs()
 #' adata <- adm$read(hpath)
-#' mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = FALSE)
+#' mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = FALSE)
 #' mod
 #' @export
 cacheCiteseq5k10kTutvae <- function() {
@@ -79,6 +79,7 @@ cacheCiteseq5k10kTutvae <- function() {
 #' helper to get the tutorial VAE for PBMCs from scvi-tools tutorial
 #' @param use_gpu logical(1), defaulting to FALSE, passed to TOTALVI.load
 #' @return python reference to anndata
+#' @note March 2024 use_gpu ignored
 #' @examples
 #' getCiteseqTutvae()
 #' @export
@@ -91,7 +92,7 @@ getCiteseqTutvae <- function(use_gpu = FALSE) {
   adm <- anndataR()
   hpath <- cacheCiteseq5k10kPbmcs()
   adata <- adm$read(hpath)
-  mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = use_gpu)
+  mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = use_gpu)
   mod
 }
 

diff --git a/R/scanpy.R b/R/scanpy.R
@@ -29,3 +29,18 @@ anndataR <- function() {
     reticulate::import("anndata")
   })
 }
+
+#' basic interface to MuData
+#' @return basiliskRun result with import from reticulate, typically a Module
+#' @examples
+#' md <- MuDataR()
+#' md
+#' head(names(md))
+#' @export
+MuDataR <- function() {
+  proc <- basilisk::basiliskStart(bsklenv)
+  on.exit(basilisk::basiliskStop(proc))
+  basilisk::basiliskRun(proc, function() {
+    reticulate::import("mudata")
+  })
+}
diff --git a/inst/scripts/unpack.R b/inst/scripts/unpack.R
@@ -0,0 +1,17 @@
+# this function retrieves key components from
+# trained totalVI model for serialization; the totalVI
+# references are invalid when session ends
+unpack = function(mod, txbatch=c("PBMC10k", "PBMC5k")) {
+ elbo_train = unname(unlist(mod$history_["elbo_train"]))
+ elbo_val = unname(unlist(mod$history_["elbo_validation"]))
+#
+ latrep = mod$get_latent_representation()
+ denoised = mod$get_normalized_expression(
+    n_samples=25L, return_mean=TRUE, transform_batch=txbatch)
+ names(denoised) = c("rna_denoised", "protein_denoised")
+ protein_foreground = 100*model$get_protein_foreground_probability(
+    n_samples=25L, return_mean=TRUE, transform_batch=txbatch)
+
+ list(elbo_train=elbo_train, elbo_val=elbo_val, latrep=latrep,
+     denoised=denoised, protein_foreground=protein_foreground)
+}
diff --git a/inst/scripts/usetotal.R b/inst/scripts/usetotal.R
@@ -0,0 +1,76 @@
+# script to use scviR from Bioconductor to
+# run most of the computations of https://colab.research.google.com/github/scverse/scvi-tutorials/blob/1.1.2/multimodal/totalVI.ipynb
+#
+# anndata will be acquired and cached by scvi.data
+#
+save_dir.name = "./total2024"
+if (!dir.exists(save_dir.name)) {
+  dir.create(save_dir.name)
+}
+library(scviR)
+scvi = scviR()
+adata = scvi$data$pbmcs_10x_cite_seq(save_path=save_dir.name)
+adata
+
+# these functions are defined in scviR ... this is not
+# the way basilisk clients should work, because they
+# pass python references around, but it will take time
+# to properly encapsulate all the relevant tasks
+
+ad = anndataR()
+sc = scanpyR()
+
+# use scanpy utilities to normalize
+
+adata$layers["counts"] = adata$X # $copy()
+sc$pp$normalize_total(adata)
+sc$pp$log1p(adata)
+adata$obs_names_make_unique()
+
+# build up the protein component
+protein_adata = ad$AnnData(adata$obsm["protein_expression"])
+protein_adata$obs_names = adata$obs_names
+adata$obsm["protein_expression"] = NULL
+#del adata.obsm["protein_expression"]
+
+# use mudata for multiple modalities
+md = MuDataR()
+
+mdata = md$MuData(dict("rna"= adata, "protein"= protein_adata))
+mdata
+
+# filter genes
+sc$pp$highly_variable_genes(
+    mdata$mod[["rna"]], #["rna"],
+    n_top_genes=4000,
+    flavor="seurat_v3",
+    batch_key="batch",
+    layer="counts",
+)
+
+# works: mdata$mod$rna[ ,mdata$mod$rna$var$highly_variable]
+
+# add into mudata
+mdata$mod$rna_subset = mdata$mod$rna[ ,mdata$mod$rna$var$highly_variable]$copy()
+
+# crucial
+mdata$update()
+
+# perform 'setup' -- excluding protein_layer spec seems essential
+scvi$model$TOTALVI$setup_mudata(
+    mdata,
+    rna_layer="counts",
+#    protein_layer=NA,
+    batch_key="batch",
+    modalities=dict(
+        "rna_layer"= "rna_subset",
+        "protein_layer"= "protein",
+        "batch_key"= "rna_subset"
+    ),
+)
+
+# configure and train
+model = scvi$model$TOTALVI(mdata)
+model
+
+model$train(max_epochs=50L)
diff --git a/man/MuDataR.Rd b/man/MuDataR.Rd
diff --git a/man/cacheCiteseq5k10kTutvae.Rd b/man/cacheCiteseq5k10kTutvae.Rd
diff --git a/man/getCiteseqTutvae.Rd b/man/getCiteseqTutvae.Rd
diff --git a/vignettes/compch12.Rmd b/vignettes/compch12.Rmd
@@ -47,6 +47,7 @@ ch12sce
 ```
 
 ```{r getdat2, message=FALSE}
+options(timeout=3600)
 fullvi = getTotalVI5k10kAdata()
 fullvi
 ```
-Original file line number
+Diff line change
@@ Expand Up / @@ -47,6 +47,7 @@ ch12sce @@
     ```
     ```{r getdat2, message=FALSE}
+    options(timeout=3600)
     fullvi = getTotalVI5k10kAdata()
     fullvi
     ```
@@ Expand Down @@