forked from compgenomr/book
-
Notifications
You must be signed in to change notification settings - Fork 0
/
book.bib
executable file
·5419 lines (5171 loc) · 352 KB
/
book.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{chen2012systematic,
title={Systematic evaluation of factors influencing ChIP-seq fidelity},
author={Chen, Yiwen and Negre, Nicolas and Li, Qunhua and Mieczkowska, Joanna O and Slattery, Matthew and Liu, Tao and Zhang, Yong and Kim, Tae-Kyung and He, Housheng Hansen and Zieba, Jennifer and others},
journal={Nature methods},
volume={9},
number={6},
pages={609--614},
year={2012},
publisher={Nature Publishing Group}
}
@article{mermel2011gistic2,
title={GISTIC2. 0 facilitates sensitive and confident localization of the targets of focal somatic copy-number alteration in human cancers},
author={Mermel, Craig H and Schumacher, Steven E and Hill, Barbara and Meyerson, Matthew L and Beroukhim, Rameen and Getz, Gad},
journal={Genome biology},
volume={12},
number={4},
pages={R41},
year={2011},
publisher={Springer}
}
@article{morris2014rise,
title={The rise of regulatory RNA},
author={Morris, Kevin V and Mattick, John S},
journal={Nature Reviews Genetics},
volume={15},
number={6},
pages={423--437},
year={2014},
publisher={Nature Publishing Group}
}
@article{bartel2004micrornas,
title={MicroRNAs: genomics, biogenesis, mechanism, and function},
author={Bartel, David P},
journal={cell},
volume={116},
number={2},
pages={281--297},
year={2004},
publisher={Elsevier}
}
@article{wang2008splicing,
title={Splicing regulation: from a parts list of regulatory elements to an integrated splicing code},
author={Wang, Zefeng and Burge, Christopher B},
journal={Rna},
volume={14},
number={5},
pages={802--813},
year={2008},
publisher={Cold Spring Harbor Lab}
}
@book{kutner2003applied,
title={Applied Linear Regression Models},
author={Kutner, M.H. and Nachtsheim, C.J. and Neter, J.},
isbn={9780072955675},
lccn={2003044224},
series={The McGraw-Hill/Irwin Series Operations and Decision Sciences},
url={https://books.google.de/books?id=0nAMAAAACAAJ},
year={2003},
publisher={McGraw-Hill Higher Education}
}
@book{james2013introduction,
title={An Introduction to Statistical Learning: with Applications in R},
author={James, G. and Witten, D. and Hastie, T. and Tibshirani, R.},
isbn={9781461471387},
lccn={13936251},
series={Springer Texts in Statistics},
url={https://books.google.de/books?id=qcI\_AAAAQBAJ},
year={2013},
publisher={Springer New York}
}
@book{crawley2012r,
title={The R Book},
author={Crawley, M.J.},
isbn={9781118448960},
lccn={2012027339},
url={https://books.google.de/books?id=XYDl0mlH-moC},
year={2012},
publisher={Wiley}
}
@book{diez2015openintro,
title={OpenIntro Statistics},
author={Diez, D.M. and Barr, C.D. and {\c{C}}etinkaya-Rundel, M. and Amazon.com},
isbn={9781943450046},
url={https://books.google.de/books?id=wfcPswEACAAJ},
year={2015},
publisher={OpenIntro, Incorporated}
}
@book{gonick2005cartoon,
title={The Cartoon Guide to Statistics},
author={Gonick, L. and Smith, W.},
isbn={9781435242715},
url={https://books.google.de/books?id=-U7vygAACAAJ},
year={2005},
publisher={Collins Reference}
}
@article{smyth2004linear,
title={Linear models and empirical bayes methods for assessing differential expression in microarray experiments},
author={Smyth Gordon, K},
journal={Statistical Applications in Genetics and Molecular Biology},
volume={3},
number={1},
pages={1--25},
year={2004}
}
@article{de2010benchmark,
title={A benchmark for statistical microarray data analysis that preserves actual biological and technical variance},
author={De Hertogh, Beno{\^\i}t and De Meulder, Bertrand and Berger, Fabrice and Pierre, Michael and Bareke, Eric and Gaigneaux, Anthoula and Depiereux, Eric},
journal={BMC bioinformatics},
volume={11},
number={1},
pages={17},
year={2010},
publisher={BioMed Central}
}
@article{maaten2008visualizing,
title={Visualizing data using t-SNE},
author={Maaten, Laurens van der and Hinton, Geoffrey},
journal={Journal of machine learning research},
volume={9},
number={Nov},
pages={2579--2605},
year={2008}
}
@book{cox2000multidimensional,
title={Multidimensional Scaling, Second Edition},
author={Cox, T.F. and Cox, M.A.A.},
isbn={9781420036121},
lccn={00060180},
series={Chapman \& Hall/CRC Monographs on Statistics \& Applied Probability},
year={2000},
publisher={CRC Press}
}
@article{mardia1978cmds,
title={Some properties of clasical multi-dimesional scaling},
author={Mardia, Kanti V},
journal={Communications in Statistics-Theory and Methods},
volume={7},
number={13},
pages={1233--1241},
year={1978},
publisher={Taylor \& Francis}
}
@inproceedings{lee2001algorithms,
title={Algorithms for non-negative matrix factorization},
author={Lee, Daniel D and Seung, H Sebastian},
booktitle={Advances in neural information processing systems},
pages={556--562},
year={2001}
}
@article{hyvarinen2013independent,
title={Independent component analysis: recent advances},
author={Hyv{\"a}rinen, Aapo},
journal={Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences},
volume={371},
number={1984},
pages={20110534},
year={2013},
publisher={The Royal Society Publishing}
}
@article{rousseeuw1987silhouettes,
title={Silhouettes: a graphical aid to the interpretation and validation of cluster analysis},
author={Rousseeuw, Peter J},
journal={Journal of computational and applied mathematics},
volume={20},
pages={53--65},
year={1987},
publisher={North-Holland}
}
@article{reynolds2006clustering,
title={Clustering rules: a comparison of partitioning and hierarchical clustering algorithms},
author={Reynolds, Alan P and Richards, Graeme and de la Iglesia, Beatriz and Rayward-Smith, Victor J},
journal={Journal of Mathematical Modelling and Algorithms},
volume={5},
number={4},
pages={475--504},
year={2006},
publisher={Springer}
}
@article{hartigan1979algorithm,
title={Algorithm AS 136: A k-means clustering algorithm},
author={Hartigan, John A and Wong, Manchek A},
journal={Journal of the Royal Statistical Society. Series C (Applied Statistics)},
volume={28},
number={1},
pages={100--108},
year={1979},
publisher={JSTOR}
}
@article{tibshirani2001estimating,
title={Estimating the number of clusters in a data set via the gap statistic},
author={Tibshirani, Robert and Walther, Guenther and Hastie, Trevor},
journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
volume={63},
number={2},
pages={411--423},
year={2001},
publisher={Wiley Online Library}
}
@book{Albe_2002_book,
author = {Alberts, B. and Bray, D. and Lewis, J. and Raff, M. and Roberts, K. and Watson, J.D.},
edition = {4th},
publisher = {Garland},
title = {{Molecular Biology of the Cell}},
year = 2002
}
@article{eilbeck2005sequence,
title={The Sequence Ontology: a tool for the unification of genome annotations},
author={Eilbeck, Karen and Lewis, Suzanna E and Mungall, Christopher J and Yandell, Mark and Stein, Lincoln and Durbin, Richard and Ashburner, Michael},
journal={Genome biology},
volume={6},
number={5},
pages={R44},
year={2005},
publisher={Springer}
}
@article{hager2009transcription,
title={Transcription dynamics},
author={Hager, Gordon L and McNally, James G and Misteli, Tom},
journal={Molecular cell},
volume={35},
number={6},
pages={741--753},
year={2009},
publisher={Elsevier}
}
@article{sood2019dnamod,
title={DNAmod: the DNA modification database},
author={Sood, Ankur Jai and Viner, Coby and Hoffman, Michael M},
journal={Journal of cheminformatics},
volume={11},
number={1},
pages={30},
year={2019},
publisher={BioMed Central}
}
@article{strahl2000language,
title={The language of covalent histone modifications},
author={Strahl, Brian D and Allis, C David},
journal={Nature},
volume={403},
number={6765},
pages={41--45},
year={2000},
publisher={Nature Publishing Group}
}
@article{phillips2009ctcf,
title={CTCF: master weaver of the genome},
author={Phillips, Jennifer E and Corces, Victor G},
journal={Cell},
volume={137},
number={7},
pages={1194--1211},
year={2009},
publisher={Elsevier}
}
@article{schwartz2007polycomb,
title={Polycomb silencing mechanisms and the management of genomic programmes},
author={Schwartz, Yuri B and Pirrotta, Vincenzo},
journal={Nature Reviews Genetics},
volume={8},
number={1},
pages={9--22},
year={2007},
publisher={Nature Publishing Group}
}
@article{henikoff2008nucleosome,
title={Nucleosome destabilization in the epigenetic regulation of gene expression},
author={Henikoff, Steven},
journal={Nature Reviews Genetics},
volume={9},
number={1},
pages={15--26},
year={2008},
publisher={Nature Publishing Group}
}
@book{friedman2001elements,
title={The elements of statistical learning},
author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
volume={1},
year={2001},
publisher={Springer series in statistics New York}
}
@article{hsu2003practical,
title={A practical guide to support vector classification},
author={Hsu, Chih-Wei and Chang, Chih-Chung and Lin, Chih-Jen and others},
year={2003},
publisher={Taipei}
}
@article{lecun2015deep,
title={Deep learning},
author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
journal={nature},
volume={521},
number={7553},
pages={436},
year={2015},
publisher={Nature Publishing Group}
}
@inproceedings{boser1992svm,
title={A training algorithm for optimal margin classifiers},
author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
pages={144--152},
year={1992},
organization={ACM}
}
@article{friedman2003mart,
title={Multiple additive regression trees with application in epidemiology},
author={Friedman, Jerome H and Meulman, Jacqueline J},
journal={Statistics in medicine},
volume={22},
number={9},
pages={1365--1381},
year={2003},
publisher={Wiley Online Library}
}
@article{elith2008brt,
title={A working guide to boosted regression trees},
author={Elith, Jane and Leathwick, John R and Hastie, Trevor},
journal={Journal of Animal Ecology},
volume={77},
number={4},
pages={802--813},
year={2008},
publisher={Wiley Online Library}
}
@article{friedman2001gbm,
title={Greedy function approximation: a gradient boosting machine},
author={Friedman, Jerome H},
journal={Annals of statistics},
pages={1189--1232},
year={2001},
publisher={JSTOR}
}
@inproceedings{chen2016xgboost,
title={Xgboost: A scalable tree boosting system},
author={Chen, Tianqi and Guestrin, Carlos},
booktitle={Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and data mining},
pages={785--794},
year={2016},
organization={ACM}
}
@article{horvath2013dna,
title={DNA methylation age of human tissues and cell types},
author={Horvath, Steve},
journal={Genome biology},
volume={14},
number={10},
pages={3156},
year={2013},
publisher={BioMed Central}
}
@article{numata2012dna,
title={DNA methylation signatures in development and aging of the human prefrontal cortex},
author={Numata, Shusuke and Ye, Tianzhang and Hyde, Thomas M and Guitart-Navarro, Xavier and Tao, Ran and Wininger, Michael and Colantuoni, Carlo and Weinberger, Daniel R and Kleinman, Joel E and Lipska, Barbara K},
journal={The American Journal of Human Genetics},
volume={90},
number={2},
pages={260--272},
year={2012},
publisher={Elsevier}
}
@article{zou2005regularization,
title={Regularization and variable selection via the elastic net},
author={Zou, Hui and Hastie, Trevor},
journal={Journal of the royal statistical society: series B (statistical methodology)},
volume={67},
number={2},
pages={301--320},
year={2005},
publisher={Wiley Online Library}
}
@article{friedman2010regularization,
title={Regularization paths for generalized linear models via coordinate descent},
author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Rob},
journal={Journal of statistical software},
volume={33},
number={1},
pages={1},
year={2010},
publisher={NIH Public Access}
}
@article{tibshirani1996regression,
title={Regression shrinkage and selection via the lasso},
author={Tibshirani, Robert},
journal={Journal of the Royal Statistical Society: Series B (Methodological)},
volume={58},
number={1},
pages={267--288},
year={1996},
publisher={Wiley Online Library}
}
@article{hoerl1970ridge,
title={Ridge regression: Biased estimation for nonorthogonal problems},
author={Hoerl, Arthur E and Kennard, Robert W},
journal={Technometrics},
volume={12},
number={1},
pages={55--67},
year={1970},
publisher={Taylor \& Francis Group}
}
@article{breiman2001random,
title={Random forests},
author={Breiman, Leo},
journal={Machine learning},
volume={45},
number={1},
pages={5--32},
year={2001},
publisher={Springer}
}
@article{smote,
title={SMOTE: synthetic minority over-sampling technique},
author={Chawla, Nitesh V and Bowyer, Kevin W and Hall, Lawrence O and Kegelmeyer, W Philip},
journal={Journal of artificial intelligence research},
volume={16},
pages={321--357},
year={2002}
}
@Article{enhancerImbalance,
Author="Libbrecht, M. W. and Noble, W. S. ",
Title="{{M}achine learning applications in genetics and genomics}",
Journal="Nat. Rev. Genet.",
Year="2015",
Volume="16",
Number="6",
Pages="321--332",
Month="Jun"
}
@article{mcr,
title={All Models are Wrong but many are Useful: Variable Importance for Black-Box, Proprietary, or Misspecified Prediction Models, using Model Class Reliance},
author={Fisher, Aaron and Rudin, Cynthia and Dominici, Francesca},
journal={arXiv preprint arXiv:1801.01489},
year={2018}
}
@Article{dalex,
title = {DALEX: Explainers for Complex Predictive Models in R},
author = {Przemyslaw Biecek},
journal = {Journal of Machine Learning Research},
year = {2018},
volume = {19},
pages = {1-5},
number = {84},
url = {http://jmlr.org/papers/v19/18-416.html},
}
@Article{pmid20399149,
Author="Noushmehr, H. and Weisenberger, D. J. and Diefes, K. and Phillips, H. S. and Pujara, K. and Berman, B. P. and Pan, F. and Pelloski, C. E. and Sulman, E. P. and Bhat, K. P. and Verhaak, R. G. and Hoadley, K. A. and Hayes, D. N. and Perou, C. M. and Schmidt, H. K. and Ding, L. and Wilson, R. K. and Van Den Berg, D. and Shen, H. and Bengtsson, H. and Neuvial, P. and Cope, L. M. and Buckley, J. and Herman, J. G. and Baylin, S. B. and Laird, P. W. and Aldape, K. ",
Title="{{I}dentification of a {C}p{G} island methylator phenotype that defines a distinct subgroup of glioma}",
Journal="Cancer Cell",
Year="2010",
Volume="17",
Number="5",
Pages="510--522",
Month="May"
}
@Article{pmid25750696,
Author="Kourou, K. and Exarchos, T. P. and Exarchos, K. P. and Karamouzis, M. V. and Fotiadis, D. I. ",
Title="{{M}achine learning applications in cancer prognosis and prediction}",
Journal="Comput Struct Biotechnol J",
Year="2015",
Volume="13",
Pages="8--17"
}
@Article{pmid30247488,
Author="Poplin, R. and Chang, P. C. and Alexander, D. and Schwartz, S. and Colthurst, T. and Ku, A. and Newburger, D. and Dijamco, J. and Nguyen, N. and Afshar, P. T. and Gross, S. S. and Dorfman, L. and McLean, C. Y. and DePristo, M. A. ",
Title="{{A} universal {S}{N}{P} and small-indel variant caller using deep neural networks}",
Journal="Nat. Biotechnol.",
Year="2018",
Volume="36",
Number="10",
Pages="983--987",
Month="11"
}
% 26301843
@Article{pmid26301843,
Author="Zhou, J. and Troyanskaya, O. G. ",
Title="{{P}redicting effects of noncoding variants with deep learning-based sequence model}",
Journal="Nat. Methods",
Year="2015",
Volume="12",
Number="10",
Pages="931--934",
Month="Oct"
}
@Article{pmid21428770,
Author="Wang, L. and McLeod, H. L. and Weinshilboum, R. M. ",
Title="{{G}enomics and drug response}",
Journal="N. Engl. J. Med.",
Year="2011",
Volume="364",
Number="12",
Pages="1144--1153",
Month="Mar"
}
@Article{pmid22328731,
Author="Fernandez, M. and Miranda-Saavedra, D. ",
Title="{{G}enome-wide enhancer prediction from epigenetic signatures using genetic algorithm-optimized support vector machines}",
Journal="Nucleic Acids Res.",
Year="2012",
Volume="40",
Number="10",
Pages="e77",
Month="May"
}
@Article{pmid22950368,
Author="Dong, X. and Greven, M. C. and Kundaje, A. and Djebali, S. and Brown, J. B. and Cheng, C. and Gingeras, T. R. and Gerstein, M. and Guigo, R. and Birney, E. and Weng, Z. ",
Title="{{M}odeling gene expression using chromatin features in various cellular contexts}",
Journal="Genome Biol.",
Year="2012",
Volume="13",
Number="9",
Pages="R53",
Month="Jun"
}
@Article{pmid12364589,
Author="Mathe, C. and Sagot, M. F. and Schiex, T. and Rouze, P. ",
Title="{{C}urrent methods of gene prediction, their strengths and weaknesses}",
Journal="Nucleic Acids Res.",
Year="2002",
Volume="30",
Number="19",
Pages="4103--4117",
Month="Oct"
}
@ARTICLE{Park2014-sr,
title = "{MethylSig}: a whole genome {DNA} methylation analysis pipeline",
author = "Park, Yongseok and Figueroa, Maria E and Rozek, Laura S and
Sartor, Maureen A",
abstract = "MOTIVATION: DNA methylation plays critical roles in gene
regulation and cellular specification without altering DNA
sequences. The wide application of reduced representation
bisulfite sequencing (RRBS) and whole genome bisulfite sequencing
(bis-seq) opens the door to study DNA methylation at single CpG
site resolution. One challenging question is how best to test for
significant methylation differences between groups of biological
samples in order to minimize false positive findings. RESULTS: We
present a statistical analysis package, methylSig, to analyse
genome-wide methylation differences between samples from
different treatments or disease groups. MethylSig takes into
account both read coverage and biological variation by utilizing
a beta-binomial approach across biological samples for a CpG site
or region, and identifies relevant differences in CpG
methylation. It can also incorporate local information to improve
group methylation level and/or variance estimation for
experiments with small sample size. A permutation study based on
data from enhanced RRBS samples shows that methylSig maintains a
well-calibrated type-I error when the number of samples is three
or more per group. Our simulations show that methylSig has higher
sensitivity compared with several alternative methods. The use of
methylSig is illustrated with a comparison of different subtypes
of acute leukemia and normal bone marrow samples. AVAILABILITY:
methylSig is available as an R package at
http://sartorlab.ccmb.med.umich.edu/software. SUPPLEMENTARY
INFORMATION: Supplementary data are available at Bioinformatics
online.",
journal = "Bioinformatics",
volume = 30,
number = 17,
pages = "2414--2422",
month = sep,
year = 2014,
language = "en"
}
@ARTICLE{Xie2013-cf,
title = "Epigenomic analysis of multilineage differentiation of human
embryonic stem cells",
author = "Xie, Wei and Schultz, Matthew D and Lister, Ryan and Hou,
Zhonggang and Rajagopal, Nisha and Ray, Pradipta and Whitaker,
John W and Tian, Shulan and Hawkins, R David and Leung, Danny and
Yang, Hongbo and Wang, Tao and Lee, Ah Young and Swanson, Scott A
and Zhang, Jiuchun and Zhu, Yun and Kim, Audrey and Nery, Joseph
R and Urich, Mark A and Kuan, Samantha and Yen, Chia-An and
Klugman, Sarit and Yu, Pengzhi and Suknuntha, Kran and Propson,
Nicholas E and Chen, Huaming and Edsall, Lee E and Wagner, Ulrich
and Li, Yan and Ye, Zhen and Kulkarni, Ashwinikumar and Xuan,
Zhenyu and Chung, Wen-Yu and Chi, Neil C and Antosiewicz-Bourget,
Jessica E and Slukvin, Igor and Stewart, Ron and Zhang, Michael Q
and Wang, Wei and Thomson, James A and Ecker, Joseph R and Ren,
Bing",
abstract = "Epigenetic mechanisms have been proposed to play crucial roles in
mammalian development, but their precise functions are only
partially understood. To investigate epigenetic regulation of
embryonic development, we differentiated human embryonic stem
cells into mesendoderm, neural progenitor cells, trophoblast-like
cells, and mesenchymal stem cells and systematically
characterized DNA methylation, chromatin modifications, and the
transcriptome in each lineage. We found that promoters that are
active in early developmental stages tend to be CG rich and
mainly engage H3K27me3 upon silencing in nonexpressing lineages.
By contrast, promoters for genes expressed preferentially at
later stages are often CG poor and primarily employ DNA
methylation upon repression. Interestingly, the early
developmental regulatory genes are often located in large genomic
domains that are generally devoid of DNA methylation in most
lineages, which we termed DNA methylation valleys (DMVs). Our
results suggest that distinct epigenetic mechanisms regulate
early and late stages of ES cell differentiation.",
journal = "Cell",
volume = 153,
number = 5,
pages = "1134--1148",
month = may,
year = 2013,
language = "en"
}
@ARTICLE{Xie2013-ol,
title = "Dynamic trans-acting factor colocalization in human cells",
author = "Xie, Dan and Boyle, Alan P and Wu, Linfeng and Zhai, Jie and
Kawli, Trupti and Snyder, Michael",
abstract = "Different trans-acting factors (TFs) collaborate and act in
concert at distinct loci to perform accurate regulation of their
target genes. To date, the cobinding of TF pairs has been
investigated in a limited context both in terms of the number of
factors within a cell type and across cell types and the extent
of combinatorial colocalizations. Here, we use an approach to
analyze TF colocalization within a cell type and across multiple
cell lines at an unprecedented level. We extend this approach
with large-scale mass spectrometry analysis of
immunoprecipitations of 50 TFs. Our combined approach reveals
large numbers of interesting TF-TF associations. We observe
extensive change in TF colocalizations both within a cell type
exposed to different conditions and across multiple cell types.
We show distinct functional annotations and properties of
different TF cobinding patterns and provide insights into the
complex regulatory landscape of the cell.",
journal = "Cell",
volume = 155,
number = 3,
pages = "713--724",
month = oct,
year = 2013,
language = "en"
}
@ARTICLE{Landan2012-id,
title = "Epigenetic polymorphism and the stochastic formation of
differentially methylated regions in normal and cancerous tissues",
author = "Landan, Gilad and Cohen, Netta Mendelson and Mukamel, Zohar and
Bar, Amir and Molchadsky, Alina and Brosh, Ran and Horn-Saban,
Shirley and Zalcenstein, Daniela Amann and Goldfinger, Naomi and
Zundelevich, Adi and Gal-Yam, Einav Nili and Rotter, Varda and
Tanay, Amos",
journal = "Nat. Genet.",
volume = 44,
number = 11,
pages = "1207--1214",
year = 2012
}
@ARTICLE{Bock2012-zm,
title = "Analysing and interpreting {DNA} methylation data",
author = "Bock, Christoph",
abstract = "DNA methylation is an epigenetic mark that has suspected
regulatory roles in a broad range of biological processes and
diseases. The technology is now available for studying DNA
methylation genome-wide, at a high resolution and in a large
number of samples. This Review discusses relevant concepts,
computational methods and software tools for analysing and
interpreting DNA methylation data. It focuses not only on the
bioinformatic challenges of large epigenome-mapping projects and
epigenome-wide association studies but also highlights software
tools that make genome-wide DNA methylation mapping more
accessible for laboratories with limited bioinformatics
experience.",
journal = "Nat. Rev. Genet.",
volume = 13,
number = 10,
pages = "705--719",
month = oct,
year = 2012,
language = "en"
}
@ARTICLE{Akalin2015-yk,
title = "genomation: a toolkit to summarize, annotate and visualize
genomic intervals",
author = "Akalin, Altuna and Franke, Vedran and Vlahovi{\v c}ek, Kristian
and Mason, Christopher E and Sch{\"u}beler, Dirk",
abstract = "UNLABELLED: Biological insights can be obtained through
computational integration of genomics data sets consisting of
diverse types of information. The integration is often hampered
by a large variety of existing file formats, often containing
similar information, and the necessity to use complicated tools
to achieve the desired results. We have built an R package,
genomation, to expedite the extraction of biological information
from high throughput data. The package works with a variety of
genomic interval file types and enables easy summarization and
annotation of high throughput data sets with given genomic
annotations. AVAILABILITY AND IMPLEMENTATION: The software is
currently distributed under MIT artistic license and freely
available at http://bioinformatics.mdc-berlin.de/genomation, and
through the Bioconductor framework. CONTACT:
journal = "Bioinformatics",
volume = 31,
number = 7,
pages = "1127--1129",
month = apr,
year = 2015
}
@MISC{Wreczycka2017-yt,
title = "{HOT} or not: Examining the basis of high-occupancy target regions",
author = "Wreczycka, Katarzyna and Franke, Vedran and Uyar, Bora and Wurmus,
Ricardo and Akalin, Altuna",
year = 2017
}
@ARTICLE{Akalin2012-ve,
title = "Base-pair resolution {DNA} methylation sequencing reveals
profoundly divergent epigenetic landscapes in acute myeloid
leukemia",
author = "Akalin, Altuna and Garrett-Bakelman, Francine E and Kormaksson,
Matthias and Busuttil, Jennifer and Zhang, Lu and Khrebtukova,
Irina and Milne, Thomas A and Huang, Yongsheng and Biswas,
Debabrata and Hess, Jay L and Allis, C David and Roeder, Robert G
and Valk, Peter J M and L{\"o}wenberg, Bob and Delwel, Ruud and
Fernandez, Hugo F and Paietta, Elisabeth and Tallman, Martin S
and Schroth, Gary P and Mason, Christopher E and Melnick, Ari and
Figueroa, Maria E",
abstract = "We have developed an enhanced form of reduced representation
bisulfite sequencing with extended genomic coverage, which
resulted in greater capture of DNA methylation information of
regions lying outside of traditional CpG islands. Applying this
method to primary human bone marrow specimens from patients with
Acute Myelogeneous Leukemia (AML), we demonstrated that
genetically distinct AML subtypes display diametrically opposed
DNA methylation patterns. As compared to normal controls, we
observed widespread hypermethylation in IDH mutant AMLs,
preferentially targeting promoter regions and CpG islands
neighboring the transcription start sites of genes. In contrast,
AMLs harboring translocations affecting the MLL gene displayed
extensive loss of methylation of an almost mutually exclusive set
of CpGs, which instead affected introns and distal intergenic CpG
islands and shores. When analyzed in conjunction with gene
expression profiles, it became apparent that these specific
patterns of DNA methylation result in differing roles in gene
expression regulation. However, despite this subtype-specific DNA
methylation patterning, a much smaller set of CpG sites are
consistently affected in both AML subtypes. Most CpG sites in
this common core of aberrantly methylated CpGs were
hypermethylated in both AML subtypes. Therefore, aberrant DNA
methylation patterns in AML do not occur in a stereotypical
manner but rather are highly specific and associated with
specific driving genetic lesions.",
journal = "PLoS Genet.",
volume = 8,
number = 6,
pages = "e1002781",
month = jun,
year = 2012
}
@ARTICLE{Saito2014-ij,
title = "Bisulfighter: accurate detection of methylated cytosines and
differentially methylated regions",
author = "Saito, Yutaka and Tsuji, Junko and Mituyama, Toutai",
abstract = "Analysis of bisulfite sequencing data usually requires two tasks:
to call methylated cytosines (mCs) in a sample, and to detect
differentially methylated regions (DMRs) between paired samples.
Although numerous tools have been proposed for mC calling,
methods for DMR detection have been largely limited. Here, we
present Bisulfighter, a new software package for detecting mCs
and DMRs from bisulfite sequencing data. Bisulfighter combines
the LAST alignment tool for mC calling, and a novel framework for
DMR detection based on hidden Markov models (HMMs). Unlike
previous attempts that depend on empirical parameters,
Bisulfighter can use the expectation-maximization algorithm for
HMMs to adjust parameters for each data set. We conduct extensive
experiments in which accuracy of mC calling and DMR detection is
evaluated on simulated data with various mC contexts, read
qualities, sequencing depths and DMR lengths, as well as on real
data from a wide range of biological processes. We demonstrate
that Bisulfighter consistently achieves better accuracy than
other published tools, providing greater sensitivity for mCs with
fewer false positives, more precise estimates of mC levels, more
exact locations of DMRs and better agreement of DMRs with gene
expression and DNase I hypersensitivity. The source code is
available at http://epigenome.cbrc.jp/bisulfighter.",
journal = "Nucleic Acids Res.",
volume = 42,
number = 6,
pages = "e45",
month = apr,
year = 2014,
language = "en"
}
@ARTICLE{Frith2012-ne,
title = "A mostly traditional approach improves alignment of
bisulfite-converted {DNA}",
author = "Frith, Martin C and Mori, Ryota and Asai, Kiyoshi",
abstract = "Cytosines in genomic DNA are sometimes methylated. This affects
many biological processes and diseases. The standard way of
measuring methylation is to use bisulfite, which converts
unmethylated cytosines to thymines, then sequence the DNA and
compare it to a reference genome sequence. We describe a method
for the critical step of aligning the DNA reads to the correct
genomic locations. Our method builds on classic alignment
techniques, including likelihood-ratio scores and spaced seeds.
In a realistic benchmark, our method has a better combination of
sensitivity, specificity and speed than nine other
high-throughput bisulfite aligners. This study enables more
accurate and rational analysis of DNA methylation. It also
illustrates how to adapt general-purpose alignment methods to a
special case with distorted base patterns: this should be
informative for other special cases such as ancient DNA and
AT-rich genomes.",
journal = "Nucleic Acids Res.",
volume = 40,
number = 13,
pages = "e100",
month = jul,
year = 2012,
language = "en"
}
@ARTICLE{Hovestadt2014-kd,
title = "Decoding the regulatory landscape of medulloblastoma using {DNA}
methylation sequencing",
author = "Hovestadt, Volker and Jones, David T W and Picelli, Simone and
Wang, Wei and Kool, Marcel and Northcott, Paul A and Sultan, Marc
and Stachurski, Katharina and Ryzhova, Marina and Warnatz,
Hans-J{\"o}rg and Ralser, Meryem and Brun, Sonja and Bunt, Jens
and J{\"a}ger, Natalie and Kleinheinz, Kortine and Erkek, Serap
and Weber, Ursula D and Bartholomae, Cynthia C and von Kalle,
Christof and Lawerenz, Chris and Eils, J{\"u}rgen and Koster, Jan
and Versteeg, Rogier and Milde, Till and Witt, Olaf and Schmidt,
Sabine and Wolf, Stephan and Pietsch, Torsten and Rutkowski,
Stefan and Scheurlen, Wolfram and Taylor, Michael D and Brors,
Benedikt and Felsberg, J{\"o}rg and Reifenberger, Guido and
Borkhardt, Arndt and Lehrach, Hans and Wechsler-Reya, Robert J
and Eils, Roland and Yaspo, Marie-Laure and Landgraf, Pablo and
Korshunov, Andrey and Zapatka, Marc and Radlwimmer, Bernhard and
Pfister, Stefan M and Lichter, Peter",
abstract = "Epigenetic alterations, that is, disruption of DNA methylation
and chromatin architecture, are now acknowledged as a universal
feature of tumorigenesis. Medulloblastoma, a clinically
challenging, malignant childhood brain tumour, is no exception.
Despite much progress from recent genomics studies, with
recurrent changes identified in each of the four distinct tumour
subgroups (WNT-pathway-activated, SHH-pathway-activated, and the
less-well-characterized Group 3 and Group 4), many cases still
lack an obvious genetic driver. Here we present whole-genome
bisulphite-sequencing data from thirty-four human and five murine
tumours plus eight human and three murine normal controls,
augmented with matched whole-genome, RNA and chromatin
immunoprecipitation sequencing data. This comprehensive data set
allowed us to decipher several features underlying the interplay
between the genome, epigenome and transcriptome, and its effects
on medulloblastoma pathophysiology. Most notable were highly
prevalent regions of hypomethylation correlating with increased
gene expression, extending tens of kilobases downstream of
transcription start sites. Focal regions of low methylation
linked to transcription-factor-binding sites shed light on
differential transcriptional networks between subgroups, whereas
increased methylation due to re-normalization of repressed
chromatin in DNA methylation valleys was positively correlated
with gene expression. Large, partially methylated domains
affecting up to one-third of the genome showed increased mutation
rates and gene silencing in a subgroup-specific fashion.
Epigenetic alterations also affected novel medulloblastoma
candidate genes (for example, LIN28B), resulting in alternative
promoter usage and/or differential messenger RNA/microRNA
expression. Analysis of mouse medulloblastoma and precursor-cell
methylation demonstrated a somatic origin for many alterations.
Our data provide insights into the epigenetic regulation of
transcription and genome organization in medulloblastoma
pathogenesis, which are probably also of importance in a wider
developmental and disease context.",
journal = "Nature",
volume = 510,
number = 7506,
pages = "537--541",
month = jun,
year = 2014,
language = "en"
}
@ARTICLE{Stirzaker2014-ao,
title = "Mining cancer methylomes: prospects and challenges",
author = "Stirzaker, Clare and Taberlay, Phillippa C and Statham, Aaron L
and Clark, Susan J",
abstract = "There are over 28 million CpG sites in the human genome.
Assessing the methylation status of each of these sites will be
required to understand fully the role of DNA methylation in
health and disease. Genome-wide analysis, using arrays and
high-throughput sequencing, has enabled assessment of large
fractions of the methylome, but each protocol comes with unique
advantages and disadvantages. Notably, except for whole-genome
bisulfite sequencing, most commonly used genome-wide methods
detect <5\% of all CpG sites. Here, we discuss approaches for
methylome studies and compare genome coverage of promoters,
genes, and intergenic regions, and capacity to quantitate
individual CpG methylation states. Finally, we examine the extent
of published cancer methylomes that have been generated using
genome-wide approaches.",
journal = "Trends Genet.",
volume = 30,
number = 2,
pages = "75--84",
month = feb,
year = 2014,
keywords = "DNA methylation; cancer methylome; epigenetics",
language = "en"
}
@INCOLLECTION{Baubec2016-pt,
title = "{Genome-Wide} Analysis of {DNA} Methylation Patterns by
{High-Throughput} Sequencing",
booktitle = "Field Guidelines for Genetic Experimental Designs in
{High-Throughput} Sequencing",
author = "Baubec, Tuncay and Akalin, Altuna",
pages = "197--221",
year = 2016
}
@ARTICLE{Wang2015-of,
title = "{swDMR}: A Sliding Window Approach to Identify Differentially
Methylated Regions Based on Whole Genome Bisulfite Sequencing",
author = "Wang, Zhen and Li, Xianfeng and Jiang, Yi and Shao, Qianzhi and
Liu, Qi and Chen, Bingyu and Huang, Dongsheng",
abstract = "DNA methylation is a widespread epigenetic modification that
plays an essential role in gene expression through
transcriptional regulation and chromatin remodeling. The
emergence of whole genome bisulfite sequencing (WGBS) represents
an important milestone in the detection of DNA methylation.
Characterization of differential methylated regions (DMRs) is
fundamental as well for further functional analysis. In this
study, we present swDMR (http://sourceforge.net/projects/swDMR/)
for the comprehensive analysis of DMRs from whole genome
methylation profiles by a sliding window approach. It is an
integrated tool designed for WGBS data, which not only implements
accessible statistical methods to perform hypothesis test adapted
to two or more samples without replicates, but false discovery
rate was also controlled by multiple test correction. Downstream
analysis tools were also provided, including cluster, annotation
and visualization modules. In summary, based on WGBS data, swDMR
can produce abundant information of differential methylated