-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtuxedo-ch_code4Github_09052019.r
executable file
·113 lines (91 loc) · 3.28 KB
/
tuxedo-ch_code4Github_09052019.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
########################################
## This data support the publication in PLoS Bioinformatics 2019, entitled
## "Chromatin-enriched RNAs mark active and repressive cis-regulation: an analysis of nuclear RNA-seq"
##
## This R code checks the dimensionof the raw data generated by the Tuxedo-ch pipeline
## in three cell types, respectively
##
## last updated on 9/19/2019 by Xiangying Sun and Holly Yang
########################################
# setwd('H:\\projects\\Ivan\\doc\\cheRNA\\data_4_revision\\github')
# set your working directory here !!!
################# K562 #################
## Tuxedo-ch ##
load("K562_tuxedo_limma_merged_fulltable_antiSense_GitHub.rData")
dim(K562_tuxedo_limma_merged)
#[1] 81628 30
expressed <- subset(K562_tuxedo_limma_merged, !is.na(AveExpr))
dim(expressed)
#[1] 14703 30
## The identified icheRNAs
dim(subset(expressed, overlap_coding=="no" & decision==1))
# [1] 3298 30
## The width of icheRNAs
median(subset(expressed, overlap_coding=="no" & decision==1)$width)
# [1] 4401.5
## The identified all cheRNAs
dim(subset(expressed, decision==1))
# [1] 5680
## The width of all cheRNAs
median(subset(expressed, decision==1)$width)
# [1] 5579
## The identified isneRNAs
dim(subset(expressed, overlap_coding=="no" & decision==-1))
# [1] 459 30
## The width of isneRNAs
median(subset(expressed, overlap_coding=="no" & decision==-1)$width)
# [1] 1297
## The identified all sneRNAs
dim(subset(expressed, decision==-1))
# [1] 5672 30
## The width of all sneRNAs
median(subset(expressed, decision==-1)$width)
# [1] 25978
################# HEK293 #################
## Tuxedo-ch ##
load("HEK293_tuxedo_limma_merged_adjcentCoding_GitHub.rData")
dim(HEK293_tuxedo_merged_adjcentCoding)
#[1] 17235 19
expressed <- subset(HEK293_tuxedo_merged_adjcentCoding, !is.na(AveExpr))
dim(expressed)
#[1] 17235 19
dim(subset(expressed, overlap_coding=="no" & decision==1))
# [1] 3319 19
median(subset(expressed, overlap_coding=="no" & decision==1)$width)
# [1] 892
dim(subset(expressed, decision==1))
# [1] 6189 19
median(subset(expressed, decision==1)$width)
# [1] 1343
dim(subset(expressed, overlap_coding=="no" & decision==-1))
# [1] 2792 19
median(subset(expressed, overlap_coding=="no" & decision==-1)$width)
# [1] 926
dim(subset(expressed, decision==-1))
# [1] 5254 19
median(subset(expressed, decision==-1)$width)
# [1] 1407.5
################# H1 #################
## Tuxedo-ch ##
load("H1_tuxedo_limma_merged_adjcentCoding_GitHub.rData")
dim(H1_tuxedo_merged_adjcentCoding)
#[1] 13978 20
expressed <- subset(H1_tuxedo_merged_adjcentCoding, !is.na(AveExpr))
dim(expressed)
#[1] 13978 20
dim(subset(expressed, overlap_coding=="no" & decision==1))
# [1] 1684 20
median(subset(expressed, overlap_coding=="no" & decision==1)$width)
# [1] 5068
dim(subset(expressed, decision==1))
# [1] 4331 20
median(subset(expressed, decision==1)$width)
# [1] 7026
dim(subset(expressed, overlap_coding=="no" & decision==-1))
# [1] 376 20
median(subset(expressed, overlap_coding=="no" & decision==-1)$width)
# [1] 1080
dim(subset(expressed, decision==-1))
# [1] 4650 20
median(subset(expressed, decision==-1)$width)
# [1] 24117.5