-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibliography.bib
128 lines (122 loc) · 6.97 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
@INPROCEEDINGS{Davis2006-di,
title = "The relationship between {Precision-Recall} and {ROC} curves",
booktitle = "Proceedings of the 23rd international conference on Machine
learning",
author = "Davis, Jesse and Goadrich, Mark",
abstract = "Receiver Operator Characteristic (ROC) curves are commonly used
to present results for binary decision problems in machine
learning. However, when dealing with highly skewed datasets,
Precision-Recall (PR) curves give a more informative picture of
an algorithm's performance. We show that a deep connection
exists between ROC space and PR space, such that a curve
dominates in ROC space if and only if it dominates in PR space.
A corollary is the notion of an achievable PR curve, which has
properties much like the convex hull in ROC space; we show an
efficient algorithm for computing this curve. Finally, we also
note differences in the two types of curves are significant for
algorithm design. For example, in PR space it is incorrect to
linearly interpolate between points. Furthermore, algorithms
that optimize the area under the ROC curve are not guaranteed to
optimize the area under the PR curve.",
publisher = "Association for Computing Machinery",
pages = "233--240",
series = "ICML '06",
month = jun,
year = 2006,
address = "New York, NY, USA",
location = "Pittsburgh, Pennsylvania, USA"
}
@ARTICLE{Xu2021-ku,
title = "Comprehensive assessment of machine learning-based methods for
predicting antimicrobial peptides",
author = "Xu, Jing and Li, Fuyi and Leier, Andr{\'e} and Xiang, Dongxu and
Shen, Hsin-Hui and Marquez Lago, Tatiana T and Li, Jian and Yu,
Dong-Jun and Song, Jiangning",
abstract = "Abstract. Antimicrobial peptides (AMPs) are a unique and diverse
group of molecules that play a crucial role in a myriad of
biological processes and cellular fu",
journal = "Brief. Bioinform.",
publisher = "Oxford University Press",
month = mar,
year = 2021,
language = "en"
}
@ARTICLE{Gabere2017-ne,
title = "Empirical comparison of web-based antimicrobial peptide
prediction tools",
author = "Gabere, Musa Nur and Noble, William Stafford",
abstract = "Motivation: Antimicrobial peptides (AMPs) are innate immune
molecules that exhibit activities against a range of microbes,
including bacteria, fungi, viruses and protozoa. Recent increases
in microbial resistance against current drugs has led to a
concomitant increase in the need for novel antimicrobial agents.
Over the last decade, a number of AMP prediction tools have been
designed and made freely available online. These AMP prediction
tools show potential to discriminate AMPs from non-AMPs, but the
relative quality of the predictions produced by the various tools
is difficult to quantify. Results: We compiled two sets of AMP
and non-AMP peptides, separated into three
categories-antimicrobial, antibacterial and bacteriocins. Using
these benchmark data sets, we carried out a systematic evaluation
of ten publicly available AMP prediction methods. Among the six
general AMP prediction tools-ADAM, CAMPR3(RF), CAMPR3(SVM),
MLAMP, DBAASP and MLAMP-we find that CAMPR3(RF) provides a
statistically significant improvement in performance, as measured
by the area under the receiver operating characteristic (ROC)
curve, relative to the other five methods. Surprisingly, for
antibacterial prediction, the original AntiBP method
significantly outperforms its successor, AntiBP2 based on one
benchmark dataset. The two bacteriocin prediction tools, BAGEL3
and BACTIBASE, both provide very good performance and BAGEL3
outperforms its predecessor, BACTIBASE, on the larger of the two
benchmarks. Contact: [email protected] or
[email protected]. Supplementary information: Supplementary
data are available at Bioinformatics online.",
journal = "Bioinformatics",
volume = 33,
number = 13,
pages = "1921--1929",
month = jul,
year = 2017,
language = "en"
}
@ARTICLE{Chicco2020-re,
title = "The advantages of the Matthews correlation coefficient ({MCC})
over {F1} score and accuracy in binary classification evaluation",
author = "Chicco, Davide and Jurman, Giuseppe",
abstract = "BACKGROUND: To evaluate binary classifications and their
confusion matrices, scientific researchers can employ several
statistical rates, accordingly to the goal of the experiment
they are investigating. Despite being a crucial issue in machine
learning, no widespread consensus has been reached on a unified
elective chosen measure yet. Accuracy and F1 score computed on
confusion matrices have been (and still are) among the most
popular adopted metrics in binary classification tasks. However,
these statistical measures can dangerously show overoptimistic
inflated results, especially on imbalanced datasets. RESULTS:
The Matthews correlation coefficient (MCC), instead, is a more
reliable statistical rate which produces a high score only if
the prediction obtained good results in all of the four
confusion matrix categories (true positives, false negatives,
true negatives, and false positives), proportionally both to the
size of positive elements and the size of negative elements in
the dataset. CONCLUSIONS: In this article, we show how MCC
produces a more informative and truthful score in evaluating
binary classifications than accuracy and F1 score, by first
explaining the mathematical properties, and then the asset of
MCC in six synthetic use cases and in a real genomics scenario.
We believe that the Matthews correlation coefficient should be
preferred to accuracy and F1 score in evaluating binary
classification tasks by all scientific communities.",
journal = "BMC Genomics",
publisher = "Springer",
volume = 21,
number = 1,
pages = "6",
month = jan,
year = 2020,
keywords = "Accuracy; Binary classification; Biostatistics; Confusion
matrices; Dataset imbalance; F1 score; Genomics; Machine
learning; Matthews correlation coefficient",
language = "en"
}