-
Notifications
You must be signed in to change notification settings - Fork 0
/
TAD-rmbatch-DESeq.R
89 lines (76 loc) · 2.91 KB
/
TAD-rmbatch-DESeq.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
######remove batch#####
####DE remove batch effect (DEseq)####
library(limma)
library(DESeq2)
library(sva)
library(dplyr)
library("RColorBrewer")
#####¼ÙÉèH1ÓëMS0Ïàͬ######
batch <- rep(c(rep("batch1",3),rep("batch2",3)),4)
#treat <- factor(rep(c(rep("WT",3),rep("MS",3)),4))
treat <- factor(rep(c(rep("WT",3),rep("MS",3)),4))
treat[4:6]<-"WT"
group<-factor(c(rep("Day0",6),rep("Day2",6),rep("Day5",6),rep("Day10",6)))
count<-read.table("D:/project/TAD/H1_MS_genesymbol_rawcount.txt",header=T)
colData <- data.frame(row.names=colnames(count),group=group,treat=treat,batch=batch)
colData
countData <- count[apply(count,1,sum) > 1 , ]
head(countData)
dds<-DESeqDataSetFromMatrix(countData,colData, formula(~group+treat+batch))
dds <- estimateSizeFactors(dds)
normalized_counts <- counts(dds, normalized=TRUE)
normalized_counts_mad <- apply(normalized_counts, 1, mad)
normalized_counts <- normalized_counts[order(normalized_counts_mad, decreasing=T), ]
# log trans results######
rld <- rlog(dds, blind=FALSE)
rlogMat <- assay(rld)
rlogMat <- rlogMat[order(normalized_counts_mad, decreasing=T), ]
#pearson_cor <- as.matrix(cor(rlogMat, method="pearson"))
#hc <- hclust(t(rlogMat) )
?hclust,method="pearson"
library(ggplot2)
vsd <- vst(dds)
head(vsd)
pca_data <- plotPCA(vsd, intgroup=c("group","treat","batch"), returnData=T, ntop=5000)
percentVar <- round(100 * attr(pca_data, "percentVar"))
ggplot(pca_data, aes(PC1, PC2, shape =treat,color=group)) +
geom_point(size=3) +
#xlim(-12, 12) +
#ylim(-10, 10) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))
####remove batch####
library(limma)
assay(vsd) <- limma::removeBatchEffect(assay(vsd), c(colData$batch))
pca <- plotPCA(vsd, intgroup=c("batch","group","treat"), returnData=T, ntop=5000)
percentVar <- round(100 * attr(pca, "percentVar"))
p<-ggplot(pca, aes(PC1, PC2, shape =treat,color=group)) +
geom_point(size=3) +
#xlim(-12, 12) +
#ylim(-10, 10) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5),
legend.position="right",
legend.title = element_blank())
p+theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
adjusted_counts <- sva::ComBat_seq(as.matrix(last), batch = batch, group = type)
head(assay(vsd))
pcacount<-assay(vsd)
head(pcacount)
write.table(pcacount,"RNAseq-PCAcount.csv")
####SVA################
library(sva)
batch <- rep(c(rep("1",3),rep("2",3)),4)
batch
group<-c(rep("Day0",6),rep("Day2",6),rep("Day5",6),rep("Day10",6))
mod = model.matrix(~as.factor(batch)) #group为分组信æ¯ã€‚æ¤æ¥æ“作为建模ã€?
mod
modcombat = model.matrix(~1, data = count)
modcombat
exp2 = ComBat(dat=count, batch=batch, mod=modcombat, par.prior=TRUE, ref.batch="1")
head(exp2)