forked from Shoombuatong/PAAP
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRF_internal and external tests.txt
99 lines (90 loc) · 3.86 KB
/
RF_internal and external tests.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#######set directory
setwd('D:\\Peptide prediction\\Antihypertensive peptides\\CLassification\\AHTP')
#######Load package
library(caret)
library(randomForest)
library(Interpol)
library(protr)
library(seqinr)
###########################
m =20
ACCtr <- matrix(nrow = m, ncol = 1)
SENStr <- matrix(nrow = m, ncol = 1)
SPECtr <- matrix(nrow = m, ncol = 1)
MCCtr <- matrix(nrow = m, ncol = 1)
ACCts <- matrix(nrow = m, ncol = 1)
SENSts <- matrix(nrow = m, ncol = 1)
SPECts <- matrix(nrow = m, ncol = 1)
MCCts <- matrix(nrow = m, ncol = 1)
para1 = 3
para2 = 0.1
#######Read data
x <- read.fasta('AHTP2132.fasta', seqtype="AA", as.string = TRUE)
D = read.csv("Label all.csv", header = TRUE)
m = length(x)
aac <- t(sapply(x, extractAAC))
dpc <- t(sapply(x, extractDC))
paac <- matrix(nrow = m, ncol = 20 + para1)
for(i in 1:m){
paac[i, ]= extractPAAC(x[[i]][1],lambda = para1 , w = para2 , props = c("Hydrophobicity", "Hydrophilicity", "SideChainMass"))
}
data = data.frame(aac,paac,Class = D[,ncol(D)])
Pos = subset(data, Class == 'AHTP')
Neg = subset(data, Class == 'nonAHTP')
cross = 10
for (i in 1:m){
####### Dividing Training and Testing sets on positive and negative classes
sample1 <- c(sample(1:1066 ,800))
sample2 <- c(sample(1:1066,800))
train1 <- Pos[sample1,] ####Positive set for training
train2 <- Neg[sample2,] ####Negative set for training
test1 <- Pos[-sample1,] ####Positive set for testing
test2 <- Neg[-sample2,] ####Negative set for testing
internal <- rbind(train1,train2) ####combining for internal set
external <- rbind(test1,test2) ####combining for external set
######### Optimized parameter
control <- trainControl(method="repeatedcv", number=10, repeats=3)
tunegrid <- expand.grid(.mtry=c(1:10), .ntree=c(100,200,300,400,500,600,700,800,900,1000))
custom <- train(Class~., data=internal , method=customRF, metric=c("Accuracy"), tuneGrid=tunegrid, trControl=control)
######Loop for 10-fold CV
k <- cross;
Resultcv <- 0;
folds <- cvsegments(nrow(internal), k);
for (fold in 1:k){
currentFold <- folds[fold][[1]];
RF = randomForest(Class ~ ., internal[-currentFold,], ntree= as.numeric(custom$ bestTune[2]),mtry = as.numeric(custom$ bestTune[1]),orm.votes=TRUE,keep.forest=TRUE, importance=TRUE)
pred = predict(RF, internal[currentFold,])
Resultcv <- Resultcv + table(true=internal[currentFold,]$Class, pred=pred);
}
################### External validation
RF = randomForest(Class ~ ., internal, ntree= as.numeric(custom$ bestTune[2]),mtry = as.numeric(custom$ bestTune[1]),orm.votes=TRUE,keep.forest=TRUE, importance=TRUE)
predcv = table(external$Class, predict(RF, external)) ###### Prediction on external set
Resultext <- rbind(predcv[1], predcv[3],predcv[2], predcv[4]) ###### Reporting TN,FP,FN,TP
################### Performance report
data = Resultcv
ACCtr[i,] = (data[1]+data[4])/(data[1]+data[2]+data[3]+data[4])*100
SENStr[i,] = (data[1]/(data[1]+data[2]))*100
SPECtr[i,] = (data[4])/(data[3]+data[4])*100
MCC1 = (data[1]*data[4]) - (data[2]*data[3])
MCC2 = (data[4]+data[2])*(data[4]+data[3])
MCC3 = (data[1]+data[2])*(data[1]+data[3])
MCC4 = sqrt(MCC2)*sqrt(MCC3)
MCCtr[i,] = MCC1/MCC4
data = Resultext
ACCts[i,] = (data[1]+data[4])/(data[1]+data[2]+data[3]+data[4])*100
SENSts[i,] = (data[1]/(data[1]+data[2]))*100
SPECts[i,] = (data[4])/(data[3]+data[4])*100
MCC1 = (data[1]*data[4]) - (data[2]*data[3])
MCC2 = (data[4]+data[2])*(data[4]+data[3])
MCC3 = (data[1]+data[2])*(data[1]+data[3])
MCC4 = sqrt(MCC2)*sqrt(MCC3)
MCCts[i,] = MCC1/MCC4
}
result = data.frame (ACCtr,SENStr,SPECtr,MCCtr,ACCts,SENSts,SPECts,MCCts)
average <- matrix(nrow = 1, ncol = 8)
std <- matrix(nrow = 1, ncol = 8)
for (i in 1:8){
average[,i] = mean(result[,i])
std[,i] = sd(result[,i])
}
finalRE = t(rbind(average,std))