-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f593eeb
commit d073069
Showing
5 changed files
with
11 additions
and
212 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,4 @@ | |
^somegraphs\.R$ | ||
^\.github$ | ||
^old$ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,227 +4,23 @@ author: | |
- name: Daniela Corbetta | ||
affiliation: Department of Statistical Sciences, University of Padova | ||
email: [email protected] | ||
package: ConfCell | ||
output: | ||
BiocStyle::html_document: | ||
toc: true | ||
toc_float: true | ||
vignette: > | ||
%\VignetteIndexEntry{ConfCell} | ||
%\VignetteIndexEntry{my-vignette} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
%\VignetteEncoding{UTF-8} | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE) | ||
```{r, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>" | ||
) | ||
``` | ||
|
||
# Load useful packages | ||
|
||
```{r, message=FALSE} | ||
```{r setup} | ||
library(ConfCell) | ||
library(SingleCellExperiment) | ||
library(scater) | ||
library(scran) | ||
library(VGAM) | ||
library(foreach) | ||
library(ontoProc) | ||
library(MerfishData) | ||
library(doParallel) | ||
library(igraph) | ||
`%notin%` = Negate(`%in%`) | ||
num_cores <- 4 | ||
cl1 <- makeCluster(num_cores) | ||
registerDoParallel(cl1) | ||
``` | ||
|
||
# Preliminaries | ||
|
||
## Load data and cell ontology | ||
|
||
As an example, we'll load the mouse ileum Merfish data from the MerfishData Bioconductor package. Load also the cell ontology trough the ontoProc Bioc package | ||
```{r} | ||
spe.baysor <- MouseIleumPetukhov2021(segmentation = "baysor") | ||
cl <- getOnto("cellOnto", "2023") | ||
``` | ||
|
||
## Build the ontology | ||
|
||
To restrict the cell ontology to the interesting part that is related to the cell types | ||
that are present in our data, we need to find the corresponding tags: | ||
```{r} | ||
tags <- c("CL:0009022", | ||
"CL:0000236", | ||
"CL:0009080", | ||
"CL:1000411", | ||
"CL:1000335", | ||
"CL:1000326", | ||
"CL:0002088", | ||
"CL:0009007", | ||
"CL:1000343", | ||
"CL:0000669", | ||
"CL:1000278", | ||
"CL:0009017", | ||
"CL:0000492", | ||
"CL:0000625", | ||
"CL:0017004") | ||
opi <- graph_from_graphnel(onto_plot2(cl, tags)) | ||
``` | ||
|
||
In the \texttt{opi} object, there are also instances from other ontologies (CARO and BFO). | ||
Remove them and rename the vertex to have them match the annotations. | ||
|
||
```{r} | ||
sel_ver <- V(opi)$name[c(grep("CARO", V(opi)$name), grep("BFO", V(opi)$name))] | ||
opi1 <- opi - sel_ver | ||
## Rename vertex to match annotations | ||
V(opi1)$name[V(opi1)$name=="B\ncell\nCL:0000236"] <- "B cell" | ||
V(opi1)$name[V(opi1)$name=="endothelial\ncell of Peyer's patch\nCL:1000411"] <- "Endothelial" | ||
V(opi1)$name[V(opi1)$name=="enterocyte\nof epithelium of intestinal villus\nCL:1000335"] <- "Enterocyte" | ||
V(opi1)$name[V(opi1)$name=="ileal\ngoblet cell\nCL:1000326"] <- "Goblet" | ||
V(opi1)$name[V(opi1)$name=="interstitial\ncell of Cajal\nCL:0002088"] <- "ICC" | ||
V(opi1)$name[V(opi1)$name=="gastrointestinal\ntract (lamina propria) macrophage of small intestine\nCL:0009007"] <- "Macrophage + DC" | ||
V(opi1)$name[V(opi1)$name=="paneth\ncell of epithelium of small intestine\nCL:1000343"] <- "Paneth" | ||
V(opi1)$name[V(opi1)$name=="pericyte\nCL:0000669"] <- "Pericyte" | ||
V(opi1)$name[V(opi1)$name=="smooth\nmuscle fiber of ileum\nCL:1000278"] <- "Smooth Muscle" | ||
V(opi1)$name[V(opi1)$name=="intestinal\ncrypt stem cell of small intestine\nCL:0009017"] <- "Stem + TA" | ||
V(opi1)$name[V(opi1)$name=="stromal\ncell of lamina propria of small intestine\nCL:0009022"] <- "Stromal" | ||
V(opi1)$name[V(opi1)$name=="CD4-positive\nhelper T cell\nCL:0000492"] <- "T (CD4+)" | ||
V(opi1)$name[V(opi1)$name=="CD8-positive,\nalpha-beta T cell\nCL:0000625"] <- "T (CD8+)" | ||
V(opi1)$name[V(opi1)$name=="telocyte\nCL:0017004"] <- "Telocyte" | ||
V(opi1)$name[V(opi1)$name=="tuft\ncell of small intestine\nCL:0009080"] <- "Tuft" | ||
## Add the edge from connective tissue cell and telocyte and delete useless nodes | ||
opi1 <- add_edges(opi1, c("connective\ntissue cell\nCL:0002320", "Telocyte")) | ||
gr <- as_graphnel(opi1) | ||
opi2 <- opi1 - c("somatic\ncell\nCL:0002371", "contractile\ncell\nCL:0000183", | ||
"native\ncell\nCL:0000003") | ||
gr1 <- as_graphnel(opi2) | ||
plot(gr1, attrs=list(node=list(fontsize=27))) | ||
``` | ||
|
||
## Preprocess data | ||
|
||
Modify the colData variable "leiden_final" to unify B cells and enterocytes | ||
|
||
```{r} | ||
spe.baysor$cell_type <- spe.baysor$leiden_final | ||
spe.baysor$cell_type[spe.baysor$cell_type %in% c("B (Follicular, Circulating)", "B (Plasma)")] <- "B cell" | ||
spe.baysor$cell_type[grep("Enterocyte", spe.baysor$cell_type)] <- "Enterocyte" | ||
spe.baysor <- spe.baysor[,spe.baysor$cell_type %notin% c("Removed", "Myenteric Plexus")] | ||
spe.baysor | ||
``` | ||
|
||
|
||
## Fit model | ||
Fit a multinomial model with a random sample of 500 cells using the 50 HVGs. | ||
```{r, warning=FALSE} | ||
# get. HVGs | ||
spe.baysor <- logNormCounts(spe.baysor) | ||
v <- modelGeneVar(spe.baysor) | ||
hvg <- getTopHVGs(v, n=50) | ||
# Extract counts and construct df | ||
df <- as.data.frame(t(as.matrix(counts(spe.baysor[hvg,])))) | ||
df$Y <- spe.baysor$cell_type | ||
set.seed(1703) | ||
train <- sample(1:nrow(df), 500) | ||
df.train <- df[train,] | ||
table(df.train$Y) | ||
# Fit model | ||
fit <- vglm(Y ~ ., family = multinomial(refLevel = "B cell"), | ||
data = df.train) | ||
df.other <- df[-train,] | ||
spe.other <- spe.baysor[,-train] | ||
``` | ||
|
||
# Prediction sets | ||
|
||
Split data randomly in calibration and query data | ||
```{r} | ||
# split data | ||
set.seed(1237) | ||
cal <- sample(1:nrow(df.other), 1000) | ||
df.cal <- df.other[cal,] | ||
df.test <- df.other[-cal,] | ||
``` | ||
|
||
|
||
## Obtain prediction matrices | ||
|
||
The first step to build conformal prediction sets is to obtain prediction matrices | ||
for data in the calibration data and in the query data. Each row of the matrix | ||
correspons to a particular cell, while each row to a different cell type. The entry | ||
$p_{i,j}$ of the matrix indicates the estimated probability that the cell $i$ is | ||
of type $j$. | ||
```{r} | ||
# Prediction matrix for calibration data | ||
p.cal <- predict(fit, newdata=df.cal, type="response") | ||
# Prediction matrix for query data | ||
p.test <- predict(fit, newdata=df.test, type="response") | ||
``` | ||
|
||
|
||
## Obtain conformal prediction sets | ||
|
||
We can now directly call the \texttt{getPredictionSet} function by using as input the | ||
prediction matrices for the calibration and the query dataset. In this case, the | ||
output of the function will be a list whose elements are the prediction sets for each query cell. | ||
By setting \texttt{follow_ontology=FALSE}, we are asking the function to return | ||
prediction sets obtained via standard conformal inference. | ||
|
||
```{r} | ||
sets <- getPredictionSets(x.query=p.test, x.cal=p.cal, y.cal = df.cal$Y, | ||
onto = opi2, alpha = 0.1, | ||
follow_ontology = FALSE) | ||
``` | ||
|
||
As an alternative, we can provide as input a SingleCellExperiment object. In this case, | ||
it needs to have in the colData the estimated probabilities for each cell type. | ||
The names of these colData have to correspond to the names of the leaf nodes in the | ||
ontology. Let's create the two separate SingleCellExperiment objects and add the predictions to the colData. | ||
|
||
```{r} | ||
# Create the SingleCellExperiment objects | ||
spe.cal <- spe.other[,cal] | ||
spe.test <- spe.other[,-cal] | ||
# Retrieve labels as leaf nodes of the ontology | ||
labels <- V(opi2)$name[degree(opi2, mode="out")==0] | ||
# Create corresponding colData | ||
for(i in labels){ | ||
colData(spe.cal)[[i]] <- p.cal[,i] | ||
colData(spe.test)[[i]] <- p.test[,i] | ||
} | ||
# Create prediction sets | ||
sets.bis <- getPredictionSets(x.query=spe.test, x.cal=spe.cal, y.cal = df.cal$Y, | ||
onto = opi2, alpha = 0.1, | ||
follow_ontology = FALSE) | ||
l <- sapply(sets, length) | ||
l.bis <- sapply(sets.bis, length) | ||
mean(l) | ||
barplot(table(l.bis)) | ||
# l.onto <- sapply(sets.onto, length) | ||
# mean(l.onto) | ||
``` | ||
|
||
|
||
|
||
|
||
```{r} | ||
sets.onto <- getPredictionSets(x.query=p.test, x.cal=p.cal, y.cal = df.cal$Y, | ||
onto = opi2, alpha = 0.1, | ||
lambdas = seq(0.001,0.999,length.out=100), | ||
follow_ontology = TRUE) | ||
``` | ||
|
||
|
||
|
||
|