-
Notifications
You must be signed in to change notification settings - Fork 2
/
test_models.Rmd
82 lines (60 loc) · 2.86 KB
/
test_models.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
---
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
```
```{r, echo = FALSE}
library(tidyverse)
library(precrec)
library(caret)
```
# Test ampir v1.1 models
These models were trained in April 2021 with updated AMP sequences from AMP databases and SwissProt (see [01_collate_databases.md](01_collate_databases.md))
Read in trained models and test sets
```{r}
ampir_mature <- readRDS("ampir_v1.1.0_data/tuned_mature.rds")
ampir_precursor <- readRDS("ampir_v1.1.0_data/tuned_precursor_imbal.rds")
mature_test <- readRDS("ampir_v1.1.0_data/v1.1_featuresTest_mature.rds")
precursor_test <- readRDS("ampir_v1.1.0_data/v1.1_featuresTest_precursor_imbal.rds")
```
Function to calculate metrics
```{r}
calculate_model_metrics <- function(df) {
TP <- df %>% filter((Label=="Tg")) %>% filter(Tg >= 0.5) %>% n_distinct() %>% as.numeric()
FP <- df %>% filter((Label=="Bg")) %>% filter(Tg >= 0.5) %>% n_distinct() %>% as.numeric()
TN <- df %>% filter((Label=="Bg")) %>% filter(Tg < 0.5) %>% n_distinct() %>% as.numeric()
FN <- df %>% filter((Label=="Tg")) %>% filter(Tg < 0.5) %>% n_distinct() %>% as.numeric()
#as.numeric was necessary for the MCC calculation
#as otherwise it would result in a "NAs produced by integer overflow" error.
Specificity <- round(TN / (TN + FP), digits = 3) #aka TNR
Accuracy <- round((TP + TN) / (TP + TN + FP + FN), digits = 3) # all correct / all | (misclassification = 1-accuracy)
Recall <- round(TP / (TP + FN), digits = 3) # aka sensitivity, TPR
Precision <- round(TP/ (TP + FP), digits = 3) # positive predictive value
FPR <- round(FP / (TN + FP), digits = 3) # false positive rate
F1 <- round((2 * Precision * Recall) / (Precision + Recall), digits = 3) # F1 score
MCC <- round(((TP*TN) - (FP*FN)) / sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)), digits = 3) # Matthews correlation coefficient
df1 <- data.frame(FPR, Accuracy, Specificity, Recall, Precision, F1, MCC)
df2 <- evalmod(scores = df[["Tg"]], labels = df[["Label"]], mode = "rocprc") %>%
precrec::auc() %>%
select(curvetypes, aucs) %>%
pivot_wider(names_from = curvetypes, values_from = aucs) %>%
rename(AUROC = "ROC", AUPRC = "PRC") %>%
round(digits = 3)
cbind(df1, df2)
}
```
```{r}
ampir_mature_predict_and_actual <- predict(ampir_mature, mature_test, type = "prob") %>% add_column(Label = mature_test$Label)
ampir_precursor_predict_and_actual <- predict(ampir_precursor, precursor_test, type = "prob") %>% add_column(Label = precursor_test$Label)
mature_test_results <- calculate_model_metrics(ampir_mature_predict_and_actual)
precursor_test_results <- calculate_model_metrics(ampir_precursor_predict_and_actual)
```
Mature test results
```{r, echo=FALSE}
knitr::kable(mature_test_results)
```
Precursor test results
```{r, echo=FALSE}
knitr::kable(precursor_test_results)
```