Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

many changes to produce totalVI run example #3

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^\.github$
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: scviR
Date: 2023-03-07
Date: 2024-01-27
Title: experimental inferface from R to scvi-tools
Version: 1.1.1
Version: 1.3.3
Authors@R:
c(person(
"Vincent", "Carey", role = c("aut", "cre"),
Expand All @@ -25,6 +25,6 @@ Imports: reticulate, BiocFileCache, utils, pheatmap,
Suggests: knitr, testthat, reshape2, ggplot2, rhdf5, BiocStyle
VignetteBuilder: knitr
biocViews: Infrastructure, SingleCell, DataImport
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
URL: https://github.com/vjcitn/scviR
BugReports: https://github.com/vjcitn/scviR/issues
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(MuDataR)
export(adtProfiles)
export(anndataR)
export(cacheCiteseq5k10kPbmcs)
Expand Down
24 changes: 21 additions & 3 deletions R/basilisk.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,28 @@
bsklenv <- basilisk::BasiliskEnvironment(
envname = "bsklenv",
pkgname = "scviR",
packages = c("numpy==1.24.4", "pandas==1.5.3"),
packages = c("python=3.10", "numpy=1.24.4", "pandas=1.5.3"),
pip = c(
"scvi-tools==1.0.3", "scanpy==1.9.5",
"matplotlib==3.7.3"
"anndata==0.10.1",
"docrep==0.3.2",
"flax==0.8.1",
"jax==0.4.23",
"jaxlib==0.4.23",
"optax==0.1.9",
"scipy==1.12.0",
"scikit-learn==1.4.0",
"rich==13.7.1",
"h5py==3.9.0",
"torch==1.13.1",
"lightning==2.1.4",
"torchmetrics==0.11.0",
# "pyro-ppl==1.6.0",
"tqdm==4.66.2",
# "numpyro==0.12.1",
"ml-collections==0.1.1",
"mudata==0.2.3",
"scvi-tools==1.1.2", "scanpy==1.9.5",
"matplotlib==3.7.3", "scikit-misc==0.3.1"
)
)

Expand Down
5 changes: 3 additions & 2 deletions R/cache_citeseq_pbmcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ cacheCiteseq5k10kPbmcs <- function() {
#' adm <- anndataR()
#' hpath <- cacheCiteseq5k10kPbmcs()
#' adata <- adm$read(hpath)
#' mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = FALSE)
#' mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = FALSE)
#' mod
#' @export
cacheCiteseq5k10kTutvae <- function() {
Expand Down Expand Up @@ -79,6 +79,7 @@ cacheCiteseq5k10kTutvae <- function() {
#' helper to get the tutorial VAE for PBMCs from scvi-tools tutorial
#' @param use_gpu logical(1), defaulting to FALSE, passed to TOTALVI.load
#' @return python reference to anndata
#' @note March 2024 use_gpu ignored
#' @examples
#' getCiteseqTutvae()
#' @export
Expand All @@ -91,7 +92,7 @@ getCiteseqTutvae <- function(use_gpu = FALSE) {
adm <- anndataR()
hpath <- cacheCiteseq5k10kPbmcs()
adata <- adm$read(hpath)
mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata, use_gpu = use_gpu)
mod <- scvi$model$`_totalvi`$TOTALVI$load(vaedir, adata) #, use_gpu = use_gpu)
mod
}

Expand Down
15 changes: 15 additions & 0 deletions R/scanpy.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,18 @@ anndataR <- function() {
reticulate::import("anndata")
})
}

#' basic interface to MuData
#' @return basiliskRun result with import from reticulate, typically a Module
#' @examples
#' md <- MuDataR()
#' md
#' head(names(md))
#' @export
MuDataR <- function() {
proc <- basilisk::basiliskStart(bsklenv)
on.exit(basilisk::basiliskStop(proc))
basilisk::basiliskRun(proc, function() {
reticulate::import("mudata")
})
}
17 changes: 17 additions & 0 deletions inst/scripts/unpack.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# this function retrieves key components from
# trained totalVI model for serialization; the totalVI
# references are invalid when session ends
unpack = function(mod, txbatch=c("PBMC10k", "PBMC5k")) {
elbo_train = unname(unlist(mod$history_["elbo_train"]))
elbo_val = unname(unlist(mod$history_["elbo_validation"]))
#
latrep = mod$get_latent_representation()
denoised = mod$get_normalized_expression(
n_samples=25L, return_mean=TRUE, transform_batch=txbatch)
names(denoised) = c("rna_denoised", "protein_denoised")
protein_foreground = 100*model$get_protein_foreground_probability(
n_samples=25L, return_mean=TRUE, transform_batch=txbatch)

list(elbo_train=elbo_train, elbo_val=elbo_val, latrep=latrep,
denoised=denoised, protein_foreground=protein_foreground)
}
76 changes: 76 additions & 0 deletions inst/scripts/usetotal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# script to use scviR from Bioconductor to
# run most of the computations of https://colab.research.google.com/github/scverse/scvi-tutorials/blob/1.1.2/multimodal/totalVI.ipynb
#
# anndata will be acquired and cached by scvi.data
#
save_dir.name = "./total2024"
if (!dir.exists(save_dir.name)) {
dir.create(save_dir.name)
}
library(scviR)
scvi = scviR()
adata = scvi$data$pbmcs_10x_cite_seq(save_path=save_dir.name)
adata

# these functions are defined in scviR ... this is not
# the way basilisk clients should work, because they
# pass python references around, but it will take time
# to properly encapsulate all the relevant tasks

ad = anndataR()
sc = scanpyR()

# use scanpy utilities to normalize

adata$layers["counts"] = adata$X # $copy()
sc$pp$normalize_total(adata)
sc$pp$log1p(adata)
adata$obs_names_make_unique()

# build up the protein component
protein_adata = ad$AnnData(adata$obsm["protein_expression"])
protein_adata$obs_names = adata$obs_names
adata$obsm["protein_expression"] = NULL
#del adata.obsm["protein_expression"]

# use mudata for multiple modalities
md = MuDataR()

mdata = md$MuData(dict("rna"= adata, "protein"= protein_adata))
mdata

# filter genes
sc$pp$highly_variable_genes(
mdata$mod[["rna"]], #["rna"],
n_top_genes=4000,
flavor="seurat_v3",
batch_key="batch",
layer="counts",
)

# works: mdata$mod$rna[ ,mdata$mod$rna$var$highly_variable]

# add into mudata
mdata$mod$rna_subset = mdata$mod$rna[ ,mdata$mod$rna$var$highly_variable]$copy()

# crucial
mdata$update()

# perform 'setup' -- excluding protein_layer spec seems essential
scvi$model$TOTALVI$setup_mudata(
mdata,
rna_layer="counts",
# protein_layer=NA,
batch_key="batch",
modalities=dict(
"rna_layer"= "rna_subset",
"protein_layer"= "protein",
"batch_key"= "rna_subset"
),
)

# configure and train
model = scvi$model$TOTALVI(mdata)
model

model$train(max_epochs=50L)
19 changes: 19 additions & 0 deletions man/MuDataR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/cacheCiteseq5k10kTutvae.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/getCiteseqTutvae.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vignettes/compch12.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ ch12sce
```

```{r getdat2, message=FALSE}
options(timeout=3600)
fullvi = getTotalVI5k10kAdata()
fullvi
```
Expand Down