-
Notifications
You must be signed in to change notification settings - Fork 2
/
sample_level_diagnostic_plot.R
70 lines (67 loc) · 3.16 KB
/
sample_level_diagnostic_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
## ------------------------------------
##
## Script name: single-cell data sample level diagnostic plots
##
## Purpose of script: Generate sample-level diagnostic plots, including violin plots of QC metrics, scatter plots of QC metrics correlations, and principal components elbow plots
##
## Author: Jiekun (Jackie) Yang
##
## Date Created: 2022-11-02
##
## Email: [email protected]
##
##--------------------------------------
### cellranger_input_file is a tab delimited file with three columns: folder_path, library_ID and sample_ID
### species takes in mouse or human
### norm_mode takes in regular (normalize to the median number of genes) or sctransform
### pc_num takes in the number of PCs to use for Seurat FindNeighbors and DoubletFinder paramSweep_v3
### target_folder is where you would like to store output files generated by this function
## library setup
library(Seurat)
options(future.globals.maxSize = 128000 * 1024^2)
library(future)
library(future.apply)
plan("multiprocess", workers = 16)
## function
sample_level_diagnostic_plot <- function(cellranger_input_file = NULL, species = NULL, norm_mode = c("regular", "sctransform"), pc_num = 20, target_folder = NULL) {
temp.pwd <- getwd()
cellranger_input <- read.table(cellranger_input_file, header = T, stringsAsFactors = F)
setwd(target_folder)
seurat_plot_folder <- paste0(target_folder, "/seurat_diagnostic_plots")
if (!dir.exists("seurat_diagnostic_plots")) {
dir.create("seurat_diagnostic_plots")
}
for (temp.row in 1:nrow(cellranger_input)) {
temp.path <- cellranger_input[temp.row, 1]
temp.lib <- cellranger_input[temp.row, 2]
print(temp.lib)
temp.df.out <- Read10X(data.dir = paste0(temp.path, "/filtered_feature_bc_matrix/"))
colnames(temp.df.out) <- paste0(temp.lib, "_", colnames(temp.df.out))
temp.data <- CreateSeuratObject(counts = temp.df.out)
if (species == "mouse") {
temp.data[["percent.mt"]] <- PercentageFeatureSet(temp.data, pattern = "^mt-")
} else if (species == "human") {
temp.data[["percent.mt"]] <- PercentageFeatureSet(temp.data, pattern = "^MT-")
}
png(paste0(seurat_plot_folder, "/", temp.lib, "_violin.png"), width = 600, height = 300)
print(VlnPlot(temp.data, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0))
dev.off()
png(paste0(seurat_plot_folder, "/", temp.lib, "_scatter.png"), width = 600, height = 300)
plot1 <- FeatureScatter(temp.data, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(temp.data, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(plot1 + plot2)
dev.off()
if (norm_mode == "regular") {
temp.data <- NormalizeData(temp.data, scale.factor = median(temp.data$nCount_RNA))
temp.data <- ScaleData(temp.data, verbose = FALSE)
} else if (norm_mode == "sctransform") {
temp.data <- SCTransform(temp.data, verbose = FALSE)
}
temp.data <- FindVariableFeatures(temp.data)
temp.data <- RunPCA(temp.data, features = VariableFeatures(temp.data)) # default 50 pcs
png(paste0(seurat_plot_folder, "/", temp.lib, "_elbow.png"), width = 300, height = 300)
print(ElbowPlot(temp.data))
dev.off()
}
setwd(temp.pwd)
}