-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreferences.bib
360 lines (338 loc) · 29 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
@article{huang_bioinformatics_2009,
title = {Bioinformatics enrichment tools: paths toward the comprehensive functional analysis of large gene lists},
volume = {37},
issn = {0305-1048},
shorttitle = {Bioinformatics enrichment tools},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2615629/},
doi = {10.1093/nar/gkn923},
abstract = {Functional analysis of large gene lists, derived in most cases from emerging high-throughput genomic, proteomic and bioinformatics scanning approaches, is still a challenging and daunting task. The gene-annotation enrichment analysis is a promising high-throughput strategy that increases the likelihood for investigators to identify biological processes most pertinent to their study. Approximately 68 bioinformatics enrichment tools that are currently available in the community are collected in this survey. Tools are uniquely categorized into three major classes, according to their underlying enrichment algorithms. The comprehensive collections, unique tool classifications and associated questions/issues will provide a more comprehensive and up-to-date view regarding the advantages, pitfalls and recent trends in a simpler tool-class level rather than by a tool-by-tool approach. Thus, the survey will help tool designers/developers and experienced end users understand the underlying algorithms and pertinent details of particular tool categories/tools, enabling them to make the best choices for their particular research interests.},
number = {1},
urldate = {2021-09-07},
journal = {Nucleic Acids Research},
author = {Huang, Da Wei and Sherman, Brad T. and Lempicki, Richard A.},
month = jan,
year = {2009},
pmid = {19033363},
pmcid = {PMC2615629},
pages = {1--13},
file = {PubMed Central Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\66LGL2SM\\Huang et al. - 2009 - Bioinformatics enrichment tools paths toward the .pdf:application/pdf},
}
@article{subramanian_gene_2005,
title = {Gene set enrichment analysis: {A} knowledge-based approach for interpreting genome-wide expression profiles},
volume = {102},
issn = {0027-8424},
shorttitle = {Gene set enrichment analysis},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1239896/},
doi = {10.1073/pnas.0506580102},
abstract = {Although genomewide RNA expression analysis has become a routine tool in biomedical research, extracting biological insight from such information remains a major challenge. Here, we describe a powerful analytical method called Gene Set Enrichment Analysis (GSEA) for interpreting gene expression data. The method derives its power by focusing on gene sets, that is, groups of genes that share common biological function, chromosomal location, or regulation. We demonstrate how GSEA yields insights into several cancer-related data sets, including leukemia and lung cancer. Notably, where single-gene analysis finds little similarity between two independent studies of patient survival in lung cancer, GSEA reveals many biological pathways in common. The GSEA method is embodied in a freely available software package, together with an initial database of 1,325 biologically defined gene sets.},
number = {43},
urldate = {2021-09-07},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
author = {Subramanian, Aravind and Tamayo, Pablo and Mootha, Vamsi K. and Mukherjee, Sayan and Ebert, Benjamin L. and Gillette, Michael A. and Paulovich, Amanda and Pomeroy, Scott L. and Golub, Todd R. and Lander, Eric S. and Mesirov, Jill P.},
month = oct,
year = {2005},
pmid = {16199517},
pmcid = {PMC1239896},
pages = {15545--15550},
file = {PubMed Central Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\8589J3C3\\Subramanian et al. - 2005 - Gene set enrichment analysis A knowledge-based ap.pdf:application/pdf},
}
@article{maleki_gene_2020,
title = {Gene {Set} {Analysis}: {Challenges}, {Opportunities}, and {Future} {Research}},
volume = {11},
issn = {1664-8021},
shorttitle = {Gene {Set} {Analysis}},
url = {https://www.frontiersin.org/article/10.3389/fgene.2020.00654},
doi = {10.3389/fgene.2020.00654},
abstract = {Gene set analysis methods are widely used to provide insight into high-throughput gene expression data. There are many gene set analysis methods available. These methods rely on various assumptions and have different requirements, strengths and weaknesses. In this paper, we classify gene set analysis methods based on their components, describe the underlying requirements and assumptions for each class, and provide directions for future research in developing and evaluating gene set analysis methods.},
urldate = {2021-09-07},
journal = {Frontiers in Genetics},
author = {Maleki, Farhad and Ovens, Katie and Hogan, Daniel J. and Kusalik, Anthony J.},
year = {2020},
pages = {654},
file = {Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\FWPCT3RH\\Maleki et al. - 2020 - Gene Set Analysis Challenges, Opportunities, and .pdf:application/pdf},
}
@article{zyla_ranking_2017,
title = {Ranking metrics in gene set enrichment analysis: do they matter?},
volume = {18},
issn = {1471-2105},
shorttitle = {Ranking metrics in gene set enrichment analysis},
url = {https://doi.org/10.1186/s12859-017-1674-0},
doi = {10.1186/s12859-017-1674-0},
abstract = {There exist many methods for describing the complex relation between changes of gene expression in molecular pathways or gene ontologies under different experimental conditions. Among them, Gene Set Enrichment Analysis seems to be one of the most commonly used (over 10,000 citations). An important parameter, which could affect the final result, is the choice of a metric for the ranking of genes. Applying a default ranking metric may lead to poor results.},
number = {1},
urldate = {2021-09-19},
journal = {BMC Bioinformatics},
author = {Zyla, Joanna and Marczyk, Michal and Weiner, January and Polanska, Joanna},
month = may,
year = {2017},
keywords = {enrichment, GSEA, Functional genomics, Pathway analysis, Ranking metrics, ranking metric, nocite},
pages = {256},
file = {Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\SVJSYUJI\\Zyla et al. - 2017 - Ranking metrics in gene set enrichment analysis d.pdf:application/pdf;Snapshot:C\:\\Users\\sage980\\Zotero\\storage\\9WL9KWBF\\s12859-017-1674-0.html:text/html},
}
@article{mootha_pgc-1-responsive_2003,
title = {{PGC}-1α-responsive genes involved in oxidative phosphorylation are coordinately downregulated in human diabetes},
volume = {34},
copyright = {2003 Nature Publishing Group},
issn = {1546-1718},
url = {https://www.nature.com/articles/ng1180},
doi = {10.1038/ng1180},
abstract = {DNA microarrays can be used to identify gene expression changes characteristic of human disease. This is challenging, however, when relevant differences are subtle at the level of individual genes. We introduce an analytical strategy, Gene Set Enrichment Analysis, designed to detect modest but coordinate changes in the expression of groups of functionally related genes. Using this approach, we identify a set of genes involved in oxidative phosphorylation whose expression is coordinately decreased in human diabetic muscle. Expression of these genes is high at sites of insulin-mediated glucose disposal, activated by PGC-1α and correlated with total-body aerobic capacity. Our results associate this gene set with clinically important variation in human metabolism and illustrate the value of pathway relationships in the analysis of genomic profiling experiments.},
language = {en},
number = {3},
urldate = {2021-09-19},
journal = {Nature Genetics},
author = {Mootha, Vamsi K. and Lindgren, Cecilia M. and Eriksson, Karl-Fredrik and Subramanian, Aravind and Sihag, Smita and Lehar, Joseph and Puigserver, Pere and Carlsson, Emma and Ridderstråle, Martin and Laurila, Esa and Houstis, Nicholas and Daly, Mark J. and Patterson, Nick and Mesirov, Jill P. and Golub, Todd R. and Tamayo, Pablo and Spiegelman, Bruce and Lander, Eric S. and Hirschhorn, Joel N. and Altshuler, David and Groop, Leif C.},
month = jul,
year = {2003},
note = {Bandiera\_abtest: a
Cg\_type: Nature Research Journals
Number: 3
Primary\_atype: Research
Publisher: Nature Publishing Group},
keywords = {GSEA, enrichment analysis},
pages = {267--273},
file = {Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\DEGY62F8\\Mootha et al. - 2003 - PGC-1α-responsive genes involved in oxidative phos.pdf:application/pdf;Snapshot:C\:\\Users\\sage980\\Zotero\\storage\\J3FLWM88\\ng1180.html:text/html},
}
@incollection{pesquita_semantic_2017,
address = {New York, NY},
series = {Methods in {Molecular} {Biology}},
title = {Semantic {Similarity} in the {Gene} {Ontology}},
isbn = {978-1-4939-3743-1},
url = {https://doi.org/10.1007/978-1-4939-3743-1_12},
abstract = {Gene Ontology-based semantic similarity (SS) allows the comparison of GO terms or entities annotated with GO terms, by leveraging on the ontology structure and properties and on annotation corpora. In the last decade the number and diversity of SS measures based on GO has grown considerably, and their application ranges from functional coherence evaluation, protein interaction prediction, and disease gene prioritization.Understanding how SS measures work, what issues can affect their performance and how they compare to each other in different evaluation settings is crucial to gain a comprehensive view of this area and choose the most appropriate approaches for a given application.In this chapter, we provide a guide to understanding and selecting SS measures for biomedical researchers. We present a straightforward categorization of SS measures and describe the main strategies they employ. We discuss the intrinsic and external issues that affect their performance, and how these can be addressed. We summarize comparative assessment studies, highlighting the top measures in different settings, and compare different implementation strategies and their use. Finally, we discuss some of the extant challenges and opportunities, namely the increased semantic complexity of GO and the need for fast and efficient computation, pointing the way towards the future generation of SS measures.},
language = {en},
urldate = {2021-10-07},
booktitle = {The {Gene} {Ontology} {Handbook}},
publisher = {Springer},
author = {Pesquita, Catia},
editor = {Dessimoz, Christophe and Škunca, Nives},
year = {2017},
doi = {10.1007/978-1-4939-3743-1_12},
keywords = {GSEA, pathway analysis, Functional similarity, Gene ontology, Protein similarity, Semantic similarity, ORA, semantic similarity},
pages = {161--173},
file = {Springer Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\6N2HGG7T\\Pesquita - 2017 - Semantic Similarity in the Gene Ontology.pdf:application/pdf},
}
@article{goeman_multiple_2008,
title = {Multiple testing on the directed acyclic graph of gene ontology},
volume = {24},
issn = {1367-4803},
url = {https://doi.org/10.1093/bioinformatics/btm628},
doi = {10.1093/bioinformatics/btm628},
abstract = {Motivation: Current methods for multiplicity adjustment do not make use of the graph structure of Gene Ontology (GO) when testing for association of expression profiles of GO terms with a response variable.Results: We propose a multiple testing method, called the focus level procedure, that preserves the graph structure of Gene Ontology (GO). The procedure is constructed as a combination of a Closed Testing procedure with Holm's method. It requires a user to choose a ‘focus level’ in the GO graph, which reflects the level of specificity of terms in which the user is most interested. This choice also determines the level in the GO graph at which the procedure has most power. We prove that the procedure strongly controls the family-wise error rate without any additional assumptions on the joint distribution of the test statistics used. We also present an algorithm to calculate multiplicity-adjusted P-values. Because the focus level procedure preserves the structure of the GO graph, it does not generally preserve the ordering of the raw P-values in the adjusted P-values.Availability: The focus level procedure has been implemented in the globaltest and GlobalAncova packages, both of which are available on www.bioconductor.org.Contact:[email protected] information: Supplementary data are available at Bioinformatics online.},
number = {4},
urldate = {2021-10-11},
journal = {Bioinformatics},
author = {Goeman, Jelle J. and Mansmann, Ulrich},
month = feb,
year = {2008},
keywords = {gene ontology, DAG, GO},
pages = {537--544},
file = {Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\ZQZCZKA9\\Goeman and Mansmann - 2008 - Multiple testing on the directed acyclic graph of .pdf:application/pdf;Snapshot:C\:\\Users\\sage980\\Zotero\\storage\\CEPA4G46\\207121.html:text/html},
}
@misc{noauthor_relations_2021,
title = {Relations in the {Gene} {Ontology}},
url = {http://geneontology.org/docs/ontology-relations/},
abstract = {Relations in the Gene Ontology},
urldate = {2021-10-11},
journal = {Gene Ontology Resource},
month = oct,
year = {2021},
keywords = {gene ontology, GO},
file = {Snapshot:C\:\\Users\\sage980\\Zotero\\storage\\6694KFSA\\ontology-relations.html:text/html},
}
@article{berglund_isotopic_2011,
title = {Isotopic compositions of the elements 2009 ({IUPAC} {Technical} {Report})},
volume = {83},
issn = {1365-3075},
url = {https://www.degruyter.com/document/doi/10.1351/PAC-REP-10-06-02/html},
doi = {10.1351/PAC-REP-10-06-02},
abstract = {The Commission on Isotopic Abundances and Atomic Weights (CIAAW) of the International Union of Pure and Applied Chemistry (IUPAC) completed its last update of the isotopic compositions of the elements as determined by isotope-ratio mass spectrometry in 2009. That update involved a critical evaluation of the published literature and forms the basis of the table of the isotopic compositions of the elements (TICE) presented here. For each element, TICE includes evaluated data from the “best measurement” of the isotope abundances in a single sample, along with a set of representative isotope abundances and uncertainties that accommodate known variations in normal terrestrial materials. The representative isotope abundances and uncertainties generally are consistent with the standard atomic weight of the element A r (E) and its uncertainty U [ A r (E)] recommended by CIAAW in 2007.},
language = {de},
number = {2},
urldate = {2022-02-12},
journal = {Pure and Applied Chemistry},
author = {Berglund, Michael and Wieser, Michael E.},
month = jan,
year = {2011},
note = {Publisher: De Gruyter},
keywords = {atomic weights, critical evaluation, elements, isotope abundance, isotopic composition, IUPAC Inorganic Chemistry Division, uncertainty, isotope},
pages = {397--410},
file = {Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\WPRI8E2H\\Berglund and Wieser - 2011 - Isotopic compositions of the elements 2009 (IUPAC .pdf:application/pdf},
}
@article{beausoleil_probability-based_2006,
title = {A probability-based approach for high-throughput protein phosphorylation analysis and site localization},
volume = {24},
copyright = {2006 Nature Publishing Group},
issn = {1546-1696},
url = {https://www.nature.com/articles/nbt1240},
doi = {10.1038/nbt1240},
abstract = {Data analysis and interpretation remain major logistical challenges when attempting to identify large numbers of protein phosphorylation sites by nanoscale reverse-phase liquid chromatography/tandem mass spectrometry (LC-MS/MS) (Supplementary Figure 1 online). In this report we address challenges that are often only addressable by laborious manual validation, including data set error, data set sensitivity and phosphorylation site localization. We provide a large-scale phosphorylation data set with a measured error rate as determined by the target-decoy approach, we demonstrate an approach to maximize data set sensitivity by efficiently distracting incorrect peptide spectral matches (PSMs), and we present a probability-based score, the Ascore, that measures the probability of correct phosphorylation site localization based on the presence and intensity of site-determining ions in MS/MS spectra. We applied our methods in a fully automated fashion to nocodazole-arrested HeLa cell lysate where we identified 1,761 nonredundant phosphorylation sites from 491 proteins with a peptide false-positive rate of 1.3\%.},
language = {en},
number = {10},
urldate = {2022-02-12},
journal = {Nature Biotechnology},
author = {Beausoleil, Sean A. and Villén, Judit and Gerber, Scott A. and Rush, John and Gygi, Steven P.},
month = oct,
year = {2006},
note = {Number: 10
Publisher: Nature Publishing Group},
keywords = {Agriculture, Bioinformatics, Biomedical Engineering/Biotechnology, Biomedicine, Biotechnology, general, Life Sciences, ascore},
pages = {1285--1292},
file = {Full Text PDF:C\:\\Users\\sage980\\Zotero\\storage\\YTQDF8T5\\Beausoleil et al. - 2006 - A probability-based approach for high-throughput p.pdf:application/pdf;Snapshot:C\:\\Users\\sage980\\Zotero\\storage\\2L7RGPFN\\nbt1240.html:text/html},
}
@article{smyth_linear_2004,
title = {Linear models and empirical bayes methods for assessing differential expression in microarray experiments},
volume = {3},
issn = {1544-6115},
doi = {10.2202/1544-6115.1027},
abstract = {The problem of identifying differentially expressed genes in designed microarray experiments is considered. Lonnstedt and Speed (2002) derived an expression for the posterior odds of differential expression in a replicated two-color experiment using a simple hierarchical parametric model. The purpose of this paper is to develop the hierarchical model of Lonnstedt and Speed (2002) into a practical approach for general microarray experiments with arbitrary numbers of treatments and RNA samples. The model is reset in the context of general linear models with arbitrary coefficients and contrasts of interest. The approach applies equally well to both single channel and two color microarray experiments. Consistent, closed form estimators are derived for the hyperparameters in the model. The estimators proposed have robust behavior even for small numbers of arrays and allow for incomplete data arising from spot filtering or spot quality weights. The posterior odds statistic is reformulated in terms of a moderated t-statistic in which posterior residual standard deviations are used in place of ordinary standard deviations. The empirical Bayes approach is equivalent to shrinkage of the estimated sample variances towards a pooled estimate, resulting in far more stable inference when the number of arrays is small. The use of moderated t-statistics has the advantage over the posterior odds that the number of hyperparameters which need to estimated is reduced; in particular, knowledge of the non-null prior for the fold changes are not required. The moderated t-statistic is shown to follow a t-distribution with augmented degrees of freedom. The moderated t inferential approach extends to accommodate tests of composite null hypotheses through the use of moderated F-statistics. The performance of the methods is demonstrated in a simulation study. Results are presented for two publicly available data sets.},
language = {eng},
journal = {Statistical Applications in Genetics and Molecular Biology},
author = {Smyth, Gordon K.},
year = {2004},
pmid = {16646809},
keywords = {limma},
pages = {Article3},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\GVNJREDQ\\Smyth - 2004 - Linear models and empirical bayes methods for asse.pdf:application/pdf},
}
@article{phipson_permutation_2010,
title = {Permutation {P}-values {Should} {Never} {Be} {Zero}: {Calculating} {Exact} {P}-values {When} {Permutations} {Are} {Randomly} {Drawn}},
volume = {9},
issn = {1544-6115},
shorttitle = {Permutation {P}-values {Should} {Never} {Be} {Zero}},
url = {https://www.degruyter.com/document/doi/10.2202/1544-6115.1585/html},
doi = {10.2202/1544-6115.1585},
number = {1},
urldate = {2022-04-04},
journal = {Statistical Applications in Genetics and Molecular Biology},
author = {Phipson, Belinda and Smyth, Gordon K},
month = jan,
year = {2010},
file = {Submitted Version:C\:\\Users\\sage980\\Zotero\\storage\\DZJWNL6Y\\Phipson and Smyth - 2010 - Permutation P-values Should Never Be Zero Calcula.pdf:application/pdf},
}
@article{mootha_reply_2004,
title = {Reply to "{Statistical} concerns about the {GSEA} procedure"},
volume = {36},
issn = {1061-4036, 1546-1718},
url = {http://www.nature.com/articles/ng0704-663b},
doi = {10.1038/ng0704-663b},
language = {en},
number = {7},
urldate = {2022-04-04},
journal = {Nature Genetics},
author = {Mootha, Vamsi K and Daly, Mark J and Patterson, Nick and Hirschhorn, Joel N and Groop, Leif C and Altshuler, David},
month = jul,
year = {2004},
pages = {663--663},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\H87DK9B8\\Mootha et al. - 2004 - Reply to Statistical concerns about the GSEA proc.pdf:application/pdf},
}
@article{varemo_enriching_2013,
title = {Enriching the gene set analysis of genome-wide data by incorporating directionality of gene expression and combining statistical hypotheses and methods},
volume = {41},
issn = {1362-4962, 0305-1048},
url = {https://academic.oup.com/nar/article/41/8/4378/2408999},
doi = {10.1093/nar/gkt111},
language = {en},
number = {8},
urldate = {2022-04-04},
journal = {Nucleic Acids Research},
author = {Väremo, Leif and Nielsen, Jens and Nookaew, Intawat},
month = apr,
year = {2013},
pages = {4378--4391},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\7BBEYGBJ\\Väremo et al. - 2013 - Enriching the gene set analysis of genome-wide dat.pdf:application/pdf},
}
@techreport{korotkevich_fast_2016,
type = {preprint},
title = {Fast gene set enrichment analysis},
url = {http://biorxiv.org/lookup/doi/10.1101/060012},
abstract = {Abstract
Gene set enrichment analysis (GSEA) is an ubiquitously used tool for evaluating pathway enrichment in transcriptional data. Typical experimental design consists in comparing two conditions with several replicates using a differential gene expression test followed by preranked GSEA performed against a collection of hundreds and thousands of pathways. However, the reference implementation of this method cannot accurately estimate small P-values, which significantly limits its sensitivity due to multiple hypotheses correction procedure.
Here we present FGSEA (Fast Gene Set Enrichment Analysis) method that is able to estimate arbitrarily low GSEA P-values with a high accuracy in a matter of minutes or even seconds. To confirm the accuracy of the method, we also developed an exact algorithm for GSEA P-values calculation for integer gene-level statistics. Using the exact algorithm as a reference we show that FGSEA is able to routinely estimate P-values up to 10
−100
with a small and predictable estimation error. We systematically evaluate FGSEA on a collection of 605 datasets and show that FGSEA recovers much more statistically significant pathways compared to other implementations.
FGSEA is open source and available as an R package in Bioconductor (
http://bioconductor.org/packages/fgsea/
) and on GitHub (
https://github.com/ctlab/fgsea/
).},
language = {en},
urldate = {2022-04-04},
institution = {Bioinformatics},
author = {Korotkevich, Gennady and Sukhov, Vladimir and Budin, Nikolay and Shpak, Boris and Artyomov, Maxim N. and Sergushichev, Alexey},
month = jun,
year = {2016},
doi = {10.1101/060012},
}
@article{xiao_novel_2014,
title = {A novel significance score for gene selection and ranking},
volume = {30},
issn = {1367-4811},
doi = {10.1093/bioinformatics/btr671},
abstract = {MOTIVATION: When identifying differentially expressed (DE) genes from high-throughput gene expression measurements, we would like to take both statistical significance (such as P-value) and biological relevance (such as fold change) into consideration. In gene set enrichment analysis (GSEA), a score that can combine fold change and P-value together is needed for better gene ranking.
RESULTS: We defined a gene significance score π-value by combining expression fold change and statistical significance (P-value), and explored its statistical properties. When compared to various existing methods, π-value based approach is more robust in selecting DE genes, with the largest area under curve in its receiver operating characteristic curve. We applied π-value to GSEA and found it comparable to P-value and t-statistic based methods, with added protection against false discovery in certain situations. Finally, in a gene functional study of breast cancer profiles, we showed that using π-value helps elucidating otherwise overlooked important biological functions.
AVAILABILITY: http://gccri.uthscsa.edu/Pi\_Value\_Supplementary.asp
CONTACT: [email protected], [email protected]
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
language = {eng},
number = {6},
journal = {Bioinformatics (Oxford, England)},
author = {Xiao, Yufei and Hsiao, Tzu-Hung and Suresh, Uthra and Chen, Hung-I. Harry and Wu, Xiaowu and Wolf, Steven E. and Chen, Yidong},
month = mar,
year = {2014},
pmid = {22321699},
pmcid = {PMC3957066},
keywords = {Breast Neoplasms, Databases, Genetic, Gene Expression, Gene Expression Profiling, Humans, Oligonucleotide Array Sequence Analysis, Receptors, Estrogen, ROC Curve},
pages = {801--807},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\X3A7YNJ3\\Xiao et al. - 2014 - A novel significance score for gene selection and .pdf:application/pdf},
}
@article{gillespie_reactome_2022,
title = {The reactome pathway knowledgebase 2022},
volume = {50},
issn = {0305-1048, 1362-4962},
url = {https://academic.oup.com/nar/article/50/D1/D687/6426058},
doi = {10.1093/nar/gkab1028},
abstract = {Abstract
The Reactome Knowledgebase (https://reactome.org), an Elixir core resource, provides manually curated molecular details across a broad range of physiological and pathological biological processes in humans, including both hereditary and acquired disease processes. The processes are annotated as an ordered network of molecular transformations in a single consistent data model. Reactome thus functions both as a digital archive of manually curated human biological processes and as a tool for discovering functional relationships in data such as gene expression profiles or somatic mutation catalogs from tumor cells. Recent curation work has expanded our annotations of normal and disease-associated signaling processes and of the drugs that target them, in particular infections caused by the SARS-CoV-1 and SARS-CoV-2 coronaviruses and the host response to infection. New tools support better simultaneous analysis of high-throughput data from multiple sources and the placement of understudied (‘dark’) proteins from analyzed datasets in the context of Reactome’s manually curated pathways.},
language = {en},
number = {D1},
urldate = {2022-05-02},
journal = {Nucleic Acids Research},
author = {Gillespie, Marc and Jassal, Bijay and Stephan, Ralf and Milacic, Marija and Rothfels, Karen and Senff-Ribeiro, Andrea and Griss, Johannes and Sevilla, Cristoffer and Matthews, Lisa and Gong, Chuqiao and Deng, Chuan and Varusai, Thawfeek and Ragueneau, Eliot and Haider, Yusra and May, Bruce and Shamovsky, Veronica and Weiser, Joel and Brunson, Timothy and Sanati, Nasim and Beckman, Liam and Shao, Xiang and Fabregat, Antonio and Sidiropoulos, Konstantinos and Murillo, Julieth and Viteri, Guilherme and Cook, Justin and Shorser, Solomon and Bader, Gary and Demir, Emek and Sander, Chris and Haw, Robin and Wu, Guanming and Stein, Lincoln and Hermjakob, Henning and D’Eustachio, Peter},
month = jan,
year = {2022},
pages = {D687--D692},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\GDEKKF6Q\\Gillespie et al. - 2022 - The reactome pathway knowledgebase 2022.pdf:application/pdf},
}
@article{falcon_using_2007,
title = {Using {GOstats} to test gene lists for {GO} term association},
volume = {23},
issn = {1367-4803, 1460-2059},
url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btl567},
doi = {10.1093/bioinformatics/btl567},
language = {en},
number = {2},
urldate = {2022-05-05},
journal = {Bioinformatics},
author = {Falcon, S. and Gentleman, R.},
month = jan,
year = {2007},
pages = {257--258},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\HRNGPIJ3\\Falcon and Gentleman - 2007 - Using GOstats to test gene lists for GO term assoc.pdf:application/pdf},
}
@article{liberzon_molecular_2015,
title = {The {Molecular} {Signatures} {Database} {Hallmark} {Gene} {Set} {Collection}},
volume = {1},
issn = {24054712},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2405471215002185},
doi = {10.1016/j.cels.2015.12.004},
language = {en},
number = {6},
urldate = {2022-05-06},
journal = {Cell Systems},
author = {Liberzon, Arthur and Birger, Chet and Thorvaldsdóttir, Helga and Ghandi, Mahmoud and Mesirov, Jill P. and Tamayo, Pablo},
month = dec,
year = {2015},
pages = {417--425},
file = {Full Text:C\:\\Users\\sage980\\Zotero\\storage\\YE5E25Z5\\Liberzon et al. - 2015 - The Molecular Signatures Database Hallmark Gene Se.pdf:application/pdf},
}