-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 76c7f01
Showing
32 changed files
with
22,005 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.DS_Store | ||
data/ | ||
GEO/ | ||
notebooks/.Rhistory | ||
reports/figures/ | ||
*.smbdelete* | ||
*temp* | ||
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Wong et al 2021 | ||
|
||
This is a MARS-seq experiment on samples: | ||
|
||
* VFG53_-BMP+BMP_1109 | ||
* VFG83_-BMP+BMP_1109 | ||
* VFG53_1025 | ||
* VFG83_1025 | ||
* VFG53_-BMP_1101 | ||
* VFG83_-BMP_1101 | ||
* VFG53_-BMP+FGF_1109 | ||
* VFG83_-BMP+FGF_1109 | ||
* ADE_111_1213 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
--- | ||
title: "00 - preparation" | ||
author: "Martin Proks" | ||
date: '`r Sys.Date()`' | ||
knit: (function(inputFile, encoding) { | ||
rmarkdown::render(inputFile, | ||
encoding=encoding, | ||
output_format='all', | ||
output_dir='../reports/')}) | ||
output: | ||
# To create PDF report, uncomment below | ||
# pdf_document: | ||
# toc: yes | ||
html_document: | ||
number_sections: yes | ||
theme: yeti | ||
toc: yes | ||
toc_float: yes | ||
df_print: paged | ||
dev: png | ||
--- | ||
|
||
```{r knitr, include = FALSE} | ||
DOCNAME = knitr::current_input() | ||
knitr::opts_chunk$set(autodep = TRUE, | ||
cache = FALSE, | ||
cache.path = paste0("cache/", DOCNAME, "/"), | ||
cache.comments = TRUE, | ||
echo = TRUE, | ||
error = FALSE, | ||
fig.align = "center", | ||
fig.path = paste0("../reports/figures/", DOCNAME, "/"), | ||
fig.width = 10, | ||
fig.height = 8, | ||
message = FALSE, | ||
warning = FALSE) | ||
``` | ||
|
||
# Find count matrix | ||
|
||
```{r} | ||
files <- stringr::str_sort(list.files(path = '../GEO/', pattern = "^AB", full.names = T), numeric = T) | ||
``` | ||
|
||
# Concatenate counts | ||
|
||
```{r} | ||
counts <- data.frame(read.table(files[1], sep = "\t")) | ||
for (i in 2:length(files)) { | ||
counts <- cbind(counts, read.table(files[i], sep = "\t")) | ||
} | ||
rownames(counts) <- unlist(lapply(rownames(counts), FUN = function(x) { return(strsplit(x, ';')[[1]][1]) })) | ||
``` | ||
|
||
# Save counts | ||
|
||
```{r} | ||
data <- NULL | ||
data$counts <- counts | ||
data$metadata <- as.data.frame(readxl::read_excel("../data/corrected_fung_design_file.xlsx", sheet = "Sheet1", col_names = T)[-1]) | ||
data$annot <- read.table("../data/gene-annotations.tsv", sep = "\t", header = T) | ||
rownames(data$metadata) <- data$metadata[,1] | ||
saveRDS(data, file = "../data/processed/MARS-Fung.RDS") | ||
``` | ||
|
||
# Session info | ||
|
||
```{r} | ||
sessionInfo() | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
--- | ||
title: "Single Cell Analysis - human sample" | ||
author: "Martin Proks" | ||
date: '`r Sys.Date()`' | ||
knit: (function(inputFile, encoding) { | ||
rmarkdown::render(inputFile, | ||
encoding=encoding, | ||
output_format='all', | ||
output_dir='../reports/')}) | ||
output: | ||
# To create PDF report, uncomment below | ||
pdf_document: | ||
toc: yes | ||
html_document: | ||
number_sections: yes | ||
theme: yeti | ||
toc: yes | ||
toc_float: yes | ||
df_print: paged | ||
dev: png | ||
--- | ||
|
||
```{r knitr, include = FALSE} | ||
DOCNAME = knitr::current_input() | ||
knitr::opts_chunk$set(autodep = TRUE, | ||
cache = FALSE, | ||
cache.path = paste0("cache/", DOCNAME, "/"), | ||
cache.comments = TRUE, | ||
echo = TRUE, | ||
error = FALSE, | ||
fig.align = "center", | ||
fig.path = paste0("../reports/figures/", DOCNAME, "/"), | ||
fig.width = 10, | ||
fig.height = 8, | ||
message = FALSE, | ||
warning = FALSE) | ||
``` | ||
|
||
# Introduction | ||
|
||
Single-cell analysis of human dataset. | ||
|
||
# Load dataset | ||
|
||
```{r message=FALSE} | ||
library(dplyr) | ||
library(Seurat) | ||
library(ggplot2) | ||
library(patchwork) | ||
random_seed <- 12345 | ||
data <- readRDS('../data/raw/MARS-Fung.RDS') | ||
raw_ann <- CreateSeuratObject(data$counts, meta.data = data$metadata) | ||
``` | ||
|
||
## Quality plots | ||
|
||
```{r} | ||
raw_ann[['percent.mito']] <- PercentageFeatureSet(raw_ann, pattern = "^MT-") | ||
raw_ann[['percent.ercc']] <- PercentageFeatureSet(raw_ann, pattern = "^ERCC-") | ||
raw_ann[['percent.ribo']] <- PercentageFeatureSet(raw_ann, pattern = "^RP[LS]") | ||
``` | ||
|
||
```{r} | ||
VlnPlot(raw_ann, | ||
features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), | ||
ncol = 4) | ||
``` | ||
|
||
```{r} | ||
FeatureScatter(raw_ann, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage") | ||
``` | ||
|
||
## Filtering | ||
|
||
```{r} | ||
print(paste0("Before filtering: ", dim(raw_ann)[2], " cells ", dim(raw_ann)[1], " genes")) | ||
``` | ||
|
||
```{r} | ||
# Remove ERCC | ||
raw_ann <- raw_ann[rownames(raw_ann)[!grepl('ERCC-', rownames(raw_ann))], ] | ||
``` | ||
|
||
```{r} | ||
# Remove Zero stage | ||
raw_ann <- raw_ann[, [email protected]$Stage != 'Zero'] | ||
``` | ||
|
||
```{r} | ||
nbins <- 100 | ||
min_cells <- 2000 | ||
max_cells <- 35e3 | ||
min_genes <- 550 | ||
max_genes <- 4950 | ||
ggplot([email protected], aes(x=nCount_RNA)) + | ||
geom_histogram(bins = nbins) + | ||
geom_vline(aes(xintercept=min_cells), linetype="dashed", color='red') + | ||
geom_vline(aes(xintercept=max_cells), linetype="dashed", color='red') | ||
ggplot([email protected], aes(x=nFeature_RNA)) + | ||
geom_histogram(bins = nbins) + | ||
geom_vline(aes(xintercept=min_genes), linetype="dashed", color='red') + | ||
geom_vline(aes(xintercept=max_genes), linetype="dashed", color='red') | ||
ggplot([email protected], aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mito)) + | ||
geom_point() + | ||
scale_color_continuous(type = "viridis") + | ||
geom_vline(aes(xintercept=min_cells), linetype="dashed", color='red') + | ||
geom_vline(aes(xintercept=max_cells), linetype="dashed", color='red') + | ||
geom_hline(aes(yintercept=min_genes), linetype="dashed", color='red') + | ||
geom_hline(aes(yintercept=max_genes), linetype="dashed", color='red') | ||
``` | ||
|
||
```{r} | ||
adata <- subset(raw_ann, subset = | ||
nFeature_RNA > min_genes & nFeature_RNA < max_genes & | ||
nCount_RNA > min_cells & nCount_RNA < max_cells & | ||
percent.mito < 20) | ||
``` | ||
|
||
```{r} | ||
adata <- CreateSeuratObject(adata@assays$RNA@counts, min.cells = 3, meta.data = [email protected]) | ||
``` | ||
|
||
## After filtering | ||
|
||
```{r} | ||
print(paste0("After filtering: ", dim(adata)[2], " cells ", dim(adata)[1], " genes")) | ||
``` | ||
|
||
```{r} | ||
VlnPlot(adata, | ||
features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), | ||
ncol = 4) | ||
``` | ||
|
||
## Save dataset | ||
|
||
```{r} | ||
saveRDS(adata, file = "../data/processed/01_fung.filtered.RDS") | ||
``` | ||
|
||
# Subset | ||
|
||
```{r} | ||
adata <- adata[, adata$Condition %in% c("VFG53_1025", "VFG53_-BMP_1101", "VFG53_-BMP+FGF_1109")] | ||
dim(adata) | ||
``` | ||
|
||
# Normalization | ||
|
||
```{r message=FALSE, warning=FALSE} | ||
adata <- NormalizeData(adata) | ||
``` | ||
|
||
# HVG | ||
|
||
```{r} | ||
adata <- FindVariableFeatures(adata, selection.method = "vst", nfeatures = 2000) | ||
top10 <- head(VariableFeatures(adata), 10) | ||
# plot variable features with and without labels | ||
plot1 <- VariableFeaturePlot(adata) | ||
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) | ||
plot1 + plot2 | ||
``` | ||
|
||
# Scale data | ||
|
||
```{r, message=FALSE} | ||
adata <- ScaleData(adata) | ||
``` | ||
|
||
# Cell Cycle | ||
|
||
```{r} | ||
adata <- CellCycleScoring(adata, | ||
s.features = cc.genes.updated.2019$s.genes, | ||
g2m.features = cc.genes.updated.2019$g2m.genes, | ||
set.ident = TRUE) | ||
``` | ||
|
||
# PCA | ||
|
||
```{r} | ||
adata <- RunPCA(adata, npcs = 50) | ||
ElbowPlot(adata) | ||
DimPlot(adata, reduction = 'pca', group.by = 'Stage') | ||
DimPlot(adata, reduction = 'pca', group.by = 'Phase') | ||
``` | ||
|
||
```{r} | ||
Idents(adata) <- 'Stage' | ||
commonR::plot_proportion_bar(adata, group.by = "Phase") + ggtitle("Cell cycle proportion per Stage") | ||
``` | ||
|
||
# Clustering & UMAP (resolution 0.5) | ||
|
||
```{r, message=FALSE} | ||
adata <- FindNeighbors(adata, dims = 1:20) | ||
adata <- FindClusters(adata, random.seed = random_seed, resolution = 0.5) | ||
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed) | ||
``` | ||
|
||
```{r} | ||
DimPlot(adata, reduction = "umap") | ||
DimPlot(adata, reduction = "umap", group.by = 'Stage') | ||
DimPlot(adata, reduction = "umap", group.by = 'Phase') | ||
``` | ||
|
||
```{r} | ||
markers <- FindAllMarkers(adata, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.5) | ||
markers %>% | ||
group_by(cluster) %>% | ||
slice_max(n = 10, order_by = avg_log2FC) | ||
``` | ||
|
||
# Clustering & UMAP (resolution 0.6) | ||
|
||
```{r, message=FALSE} | ||
adata <- FindNeighbors(adata, dims = 1:20) | ||
adata <- FindClusters(adata, random.seed = random_seed, resolution = 0.6) | ||
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed) | ||
``` | ||
|
||
```{r} | ||
DimPlot(adata, reduction = "umap") | ||
DimPlot(adata, reduction = "umap", group.by = 'Stage') | ||
DimPlot(adata, reduction = "umap", group.by = 'Phase') | ||
``` | ||
|
||
```{r} | ||
markers <- FindAllMarkers(adata, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.5) | ||
markers %>% | ||
group_by(cluster) %>% | ||
slice_max(n = 10, order_by = avg_log2FC) | ||
``` | ||
|
||
# Save analysis | ||
|
||
```{r} | ||
saveRDS(adata, file = '../data/processed/01_fung.RDS') | ||
``` | ||
|
||
# Session info | ||
|
||
```{r} | ||
sessionInfo() | ||
``` |
Oops, something went wrong.