-
Notifications
You must be signed in to change notification settings - Fork 0
/
last_paper_bibliography.bib
executable file
·472 lines (418 loc) · 17.7 KB
/
last_paper_bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
@misc{beau_johnston_2017_1134175,
author = {Beau Johnston and
James Price and
Moritz Pflanzer and
Petros Kalos and
Tom Deakin and
Nido Media and
Daniel Saier},
title = {{BeauJoh/Oclgrind: Adding AIWC -- An Architecture
Independent Workload Characterisation Plugin}},
month = dec,
year = 2017,
doi = {10.5281/zenodo.1134175},
howpublished = {https://doi.org/10.5281/zenodo.1134175}
}
@inproceedings{CaparrosCabezas:2011:PDM:1989493.1989506,
author = {Caparr\'{o}s Cabezas, Victoria and Stanley-Marbell, Phillip},
title = {Parallelism and Data Movement Characterization of Contemporary Application Classes},
booktitle = {Proceedings of the Twenty-third Annual {ACM} Symposium on Parallelism in Algorithms and Architectures},
series = {SPAA '11},
year = {2011},
isbn = {978-1-4503-0743-7},
location = {San Jose, California, USA},
pages = {95--104},
numpages = {10},
url = {http://doi.acm.org/10.1145/1989493.1989506},
doi = {10.1145/1989493.1989506},
acmid = {1989506},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {basic-block-level parallelism, berkeley computational motifs, data movement, instruction-level parallelism},
}
@article{spafford2010maestro,
title={Maestro: data orchestration and tuning for {OpenCL} devices},
author={Spafford, Kyle and Meredith, Jeremy and Vetter, Jeffrey},
journal={Euro-Par 2010-Parallel Processing},
pages={275--286},
year={2010},
publisher={Springer}
}
@inproceedings{price2017analyzing,
title={Analyzing and improving performance portability of {OpenCL} applications via auto-tuning},
author={Price, James and McIntosh-Smith, Simon},
booktitle={Proceedings of the 5th International Workshop on {OpenCL}},
pages={14},
year={2017},
organization={ACM}
}
@InProceedings{ansel:pact:2014,
author = "Jason Ansel and Shoaib Kamil and Kalyan Veeramachaneni and Jonathan Ragan-Kelley and Jeffrey Bosboom and Una-May O'Reilly and Saman Amarasinghe",
title = "OpenTuner: An Extensible Framework for Program Autotuning",
url = "http://groups.csail.mit.edu/commit/papers/2014/ansel-pact14-opentuner.pdf",
slides = "http://groups.csail.mit.edu/commit/papers/2014/ansel-pact14-opentuner-slides.pdf",
keywords = "OpenTuner",
month = "August",
year = "2014",
address = "Edmonton, Canada",
booktitle = "International Conference on Parallel Architectures and Compilation Techniques ({PACT})",
}
@inproceedings{chaimov2014toward,
title={Toward multi-target autotuning for accelerators},
author={Chaimov, Nick and Norris, Boyana and Malony, Allen},
booktitle={IEEE International Conference on Parallel and Distributed Systems ({ICPADS})},
pages={534--541},
year={2014},
organization={IEEE}
}
@inproceedings{nugteren2015cltune,
title={CLTune: A generic auto-tuner for {OpenCL} kernels},
author={Nugteren, Cedric and Codreanu, Valeriu},
booktitle={IEEE International Symposium on Embedded Multicore/Many-core Systems-on-Chip ({MCSoC})},
pages={195--202},
year={2015},
organization={IEEE}
}
@inproceedings{meajil1997architecture,
title={An architecture-independent workload characterization model for parallel computer architectures},
author={Meajil, Abdullah I and El-Ghazawi, Tarek and Sterling, Thomas},
booktitle={Second Aizu International Symposium on Parallel Algorithms/Architecture Synthesis},
pages={143--150},
year={1997},
organization={IEEE}
}
@article{chen1996analysis,
title={Analysis of branch prediction via data compression},
author={Chen, I-Cheng K and Coffey, John T and Mudge, Trevor N},
journal={ACM SIGPLAN Notices},
volume={31},
number={9},
pages={128--137},
year={1996},
publisher={ACM}
}
@inproceedings{yokota2007introducing,
title={Introducing entropies for representing program behavior and branch predictor performance},
author={Yokota, Takashi and Ootsu, Kanemitsu and Baba, Takanobu},
booktitle={Proceedings of the 2007 workshop on Experimental computer science},
pages={17},
year={2007},
organization={ACM}
}
@inproceedings{Hammerstrom:1977:ICC:800255.810669,
author = {Hammerstrom, D. W. and Davidson, E. S.},
title = {Information Content of {CPU} Memory Referencing Behavior},
booktitle = {Proceedings of the $4^{th}$ Annual Symposium on Computer Architecture},
series = {ISCA '77},
year = {1977},
pages = {184--192},
numpages = {9},
url = {http://doi.acm.org/10.1145/800255.810669},
doi = {10.1145/800255.810669},
acmid = {810669},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings{shao2013isa,
title={ISA-independent workload characterization and its implications for specialized architectures},
author={Shao, Yakun Sophia and Brooks, David},
booktitle={IEEE International Symposium on Performance Analysis of Systems and Software {(ISPASS)}},
pages={245--255},
year={2013},
organization={IEEE}
}
@inproceedings{johnston18opendwarfs,
author = {Johnston, Beau and Milthorpe, Josh},
title = {{Dwarfs} on Accelerators: Enhancing {OpenCL} Benchmarking for Heterogeneous Computing Architectures},
booktitle = {Proceedings of the $47^{th}$ International Conference on Parallel Processing Companion},
series = {ICPP '18},
year = {2018},
isbn = {978-1-4503-6523-9},
location = {Eugene, OR, USA},
pages = {4:1--4:10},
articleno = {4},
numpages = {10},
url = {http://doi.acm.org/10.1145/3229710.3229729},
doi = {10.1145/3229710.3229729},
acmid = {3229729},
publisher = {ACM},
address = {New York, NY, USA},
}
@misc{johnston18predicting,
title={{OpenCL} Performance Prediction using Architecture-Independent Features},
author={Johnston, Beau and Falzon, Greg and Milthorpe, Josh},
journal={International Workshop on High Performance and Dynamic Reconfigurable Systems and Networks (DRSN-2018) (in press)},
howpublished={http://www.milthorpe.org/pubs/aiwc-perf-prediction},
year={2018}
}
@article{declerck2016cori,
title={Cori - A system to support data-intensive computing},
author={Declerck, Tina and Antypas, Katie and Bard, Deborah and Bhimji, Wahid and Canon, Shane and Cholia, Shreyas and He, Helen Yun and Jacobsen, Douglas and Prabhat, Nicholas J Wright},
journal={Proceedings of the Cray User Group},
pages={8},
year={2016}
}
@misc{bainville2010fft,
title={{OpenCL} Fast {Fourier} Transform},
author={Bainville, Eric},
year={2010},
url={http://www.bealto.com/gpu-fft.html}
}
@inproceedings{che2009rodinia,
title={{Rodinia}: A benchmark suite for heterogeneous computing},
author={Che, Shuai and Boyer, Michael and Meng, Jiayuan and Tarjan, David and Sheaffer, Jeremy W and Lee, Sang-Ha and Skadron, Kevin},
booktitle={Workload Characterization, 2009. IISWC 2009. IEEE International Symposium on},
pages={44--54},
year={2009},
organization={Ieee}
}
@inproceedings{ganesan2008performance,
title={A performance counter based workload characterization on {Blue Gene/P}},
author={Ganesan, Karthik and John, Lizy and Salapura, Valentina and Sexton, James},
booktitle={International Conference on Parallel Processing ({ICPP})},
pages={330--337},
year={2008},
organization={IEEE}
}
@article{depestel2017linear,
author = {De Pestel, Sander and Eyerman, Stijn and Eeckhout, Lieven},
title = {Linear Branch Entropy: Characterizing and Optimizing Branch Behavior in a Micro-Architecture Independent Way},
journal = {IEEE Transactions on Computers},
issue_date = {March 2017},
volume = {66},
number = {3},
month = mar,
year = {2017},
issn = {0018-9340},
pages = {458--472},
numpages = {15},
url = {https://doi.org/10.1109/TC.2016.2601323},
doi = {10.1109/TC.2016.2601323},
acmid = {3057908},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
@article{prakash2008performance,
title={Performance characterization of {SPEC CPU2006} benchmarks on {Intel Core 2 Duo} processor},
author={Prakash, Tribuvan Kumar and Peng, Lu},
journal={ISAST Trans. Comput. Softw. Eng},
volume={2},
number={1},
pages={36--41},
year={2008}
}
@inproceedings{price:15,
title = {Oclgrind: An extensible {OpenCL} device simulator},
author = {Price, James and McIntosh-Smith, Simon},
booktitle = {Proceedings of the 3rd International Workshop on {OpenCL}},
pages = {12},
year = {2015},
organization = {ACM}
}
@misc{kessenich2015,
title={{A Khronos-Defined Intermediate Language for Native Representation of Graphical Shaders and Compute Kernels}},
author={Kessenich, John},
year={2015},
url={https://www.khronos.org/registry/spir-v/papers/WhitePaper.html}
}
@article{hoste2007microarchitecture,
title={Microarchitecture-independent workload characterization},
author={Hoste, Kenneth and Eeckhout, Lieven},
journal={IEEE Micro},
volume={27},
number={3},
year={2007},
publisher={IEEE}
}
@inproceedings{luk2005pin,
title={Pin: building customized program analysis tools with dynamic instrumentation},
author={Luk, Chi-Keung and Cohn, Robert and Muth, Robert and Patil, Harish and Klauser, Artur and Lowney, Geoff and Wallace, Steven and Reddi, Vijay Janapa and Hazelwood, Kim},
booktitle={{ACM SIGPLAN} notices},
volume={40},
number={6},
pages={190--200},
year={2005},
organization={ACM}
}
@inproceedings{hoste2006performance,
title={Performance prediction based on inherent program similarity},
author={Hoste, Kenneth and Phansalkar, Aashish and Eeckhout, Lieven and Georges, Andy and John, Lizy K and De Bosschere, Koen},
booktitle={International Conference on Parallel Architectures and Compilation Techniques ({PACT})},
pages={114--122},
year={2006},
organization={IEEE}
}
@article{lively2011energy,
title={Energy and performance characteristics of different parallel implementations of scientific applications on multicore systems},
author={Lively, Charles and Wu, Xingfu and Taylor, Valerie and Moore, Shirley and Chang, Hung-Ching and Cameron, Kirk},
journal={International Journal of High Performance Computing Applications},
volume={25},
number={3},
pages={342--350},
year={2011},
publisher={SAGE Publications Sage UK: London, England}
}
@article{krommydas2016opendwarfs,
title={Opendwarfs: Characterization of dwarf-based benchmarks on fixed and reconfigurable architectures},
author={Krommydas, Konstantinos and Feng, Wu-chun and Antonopoulos, Christos D and Bellas, Nikolaos},
journal={Journal of Signal Processing Systems},
volume={85},
number={3},
pages={373--392},
year={2016},
publisher={Springer}
}
@techreport{asanovic2006landscape,
title={The landscape of parallel computing research: A view from berkeley},
author={Asanovic, Krste and Bodik, Ras and Catanzaro, Bryan Christopher and Gebis, Joseph James and Husbands, Parry and Keutzer, Kurt and Patterson, David A and Plishker, William Lester and Shalf, John and Williams, Samuel Webb and others},
year={2006},
institution={Technical Report UCB/EECS-2006-183, EECS Department, University of California, Berkeley}
}
@inproceedings{johnston2017embedded,
title={Embedded Accelerators for Scientific High-Performance Computing: An Energy Study of {OpenCL} {Gaussian} Elimination Workloads},
author={Johnston, Beau and Lee, Brian and Angove, Luke and Rendell, Alistair},
booktitle={International Conference on Parallel Processing Workshops ({ICPPW})},
pages={59--68},
year={2017},
organization={IEEE}
}
@inproceedings{mitra2014implementation,
title={Implementation and optimization of the {OpenMP} accelerator model for the {TI Keystone II} architecture},
author={Mitra, Gaurav and Stotzer, Eric and Jayaraj, Ajay and Rendell, Alistair P},
booktitle={International Workshop on OpenMP},
pages={202--214},
year={2014},
organization={Springer}
}
@article{bailey1991parallel,
title={The {NAS} parallel benchmarks},
author={Bailey, David H and Barszcz, Eric and Barton, John T and Browning, David S and Carter, Robert L and Dagum, Leonardo and Fatoohi, Rod A and Frederickson, Paul O and Lasinski, Thomas A and Schreiber, Rob S and others},
journal={International Journal of Supercomputing Applications},
volume={5},
number={3},
pages={63--73},
year={1991},
publisher={Sage Publications Sage CA: Thousand Oaks, CA}
}
@inproceedings{barnes2016evaluating,
title={Evaluating and optimizing the {NERSC} workload on {Knights Landing}},
author={Barnes, Taylor and Cook, Brandon and Deslippe, Jack and Doerfler, Douglas and Friesen, Brian and He, Yun and Kurth, Thorsten and Koskela, Tuomas and Lobet, Mathieu and Malas, Tareq and others},
booktitle={International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS},
pages={43--53},
year={2016},
organization={IEEE}
}
@inproceedings{martineau2016performance,
title={Performance analysis and optimization of {Clang}'s {OpenMP} 4.5 {GPU} support},
author={Martineau, Matt and McIntosh-Smith, Simon and Bertolli, Carlo and Jacob, Arpith C and Antao, Samuel F and Eichenberger, Alexandre and Bercea, Gheorghe-Teodor and Chen, Tong and Jin, Tian and O'Brien, Kevin and others},
booktitle={International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)},
pages={54--64},
year={2016},
organization={IEEE}
}
@inproceedings{marjanovic2016hpc,
title={{HPC} benchmarking: problem size matters},
author={Marjanovi{\'c}, Vladimir and Gracia, Jos{\'e} and Glass, Colin W},
booktitle={International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)},
pages={1--10},
year={2016},
organization={IEEE}
}
@article{feldman_2017,
title={TOP500 Meanderings: Supercomputers Take Big Green Leap in 2017},
url={https://www.top500.org/news/top500-meanderings-supercomputers-take-big-green-leap-in-2017/},
journal={TOP500 Supercomputer Sites},
publisher={Top500.org},
author={Feldman, Michael},
year={2017},
month={Sep}
}
@article{feldman_2017_isambard,
title={Cray to Deliver {ARM}-Powered Supercomputer to {UK} Consortium},
url={https://www.top500.org/news/cray-to-deliver-arm-powered-supercomputer-to-uk-consortium/},
journal={TOP500 Supercomputer Sites},
publisher={Top500.org},
author={Feldman, Michael},
year={2017},
month={Jan}
}
@article{morgan_2016,
title={{NVLink} Takes {GPU} Acceleration To The Next Level},
url={https://www.nextplatform.com/2016/05/04/nvlink-takes-gpu-acceleration-next-level/},
journal={The Next Platform},
author={Morgan, Timothy},
publisher={Stackhouse Publishing Inc.},
year={2016},
month={May}
}
@article{morgan_2017,
title={The {Power9} Rollout Begins With {Summit} And {Sierra} Supercomputers},
url={https://www.nextplatform.com/2017/09/19/power9-rollout-begins-summit-sierra/},
journal={The Next Platform},
author={Morgan, Timothy},
publisher={Stackhouse Publishing Inc.},
year={2017},
month={Sep}
}
@misc{morgan_2016_postk,
title={Inside {Japan}'s Future Exascale {ARM} Supercomputer},
url={https://www.nextplatform.com/2016/06/23/inside-japans-future-exaflops-arm-supercomputer/},
journal={The Next Platform},
publisher={Stackhouse Publishing Inc.},
author={Morgan, Timothy},
year={2016},
month={Jun}}
@inproceedings{lopez2015examining,
title={Examining recent many-core architectures and programming models using {SHOC}},
author={Lopez, M Graham and Young, Jeffrey and Meredith, Jeremy S and Roth, Philip C and Horton, Mitchel and Vetter, Jeffrey S},
booktitle={International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS},
pages={3},
year={2015},
organization={ACM}
}
@inproceedings{hoefler2015scientific,
title={Scientific benchmarking of parallel computing systems: Twelve ways to tell the masses when reporting performance results},
author={Hoefler, Torsten and Belli, Roberto},
booktitle={Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
pages={73},
year={2015},
organization={ACM}
}
@inproceedings{mucci1999papi,
title={{PAPI}: A portable interface to hardware performance counters},
author={Mucci, Philip J and Browne, Shirley and Deane, Christine and Ho, George},
booktitle={Proceedings of the Department of Defense HPCMP users group conference},
volume={710},
year={1999}
}
@misc{johnston2017,
author = {Johnston, B.},
title = {{OpenDwarfs}},
year = {2017},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/BeauJoh/OpenDwarfs}}
}
@misc{opendwarfs2017base,
title={{OpenDwarfs} (base version)},
day={26},
month={Feb},
year={2017},
howpublished={\url{https://github.com/vtsynergy/OpenDwarfs/commit/31c099aff5343e93ba9e8c3cd42bee5ec536aa93}}
}
@article{hager2013exploring,
title={Exploring performance and power properties of modern multi-core chips via simple machine models},
author={Hager, Georg and Treibig, Jan and Habich, Johannes and Wellein, Gerhard},
journal={Concurrency and Computation: Practice and Experience},
volume={28},
number={2},
pages={189--210},
year={2013},
publisher={Wiley Online Library}
}
@article{williams2009roofline,
title={Roofline: An insightful visual performance model for floating-point programs and multicore architectures},
author={Williams, Samuel and Waterman, Andrew and Patterson, David},
journal={Communications of the Association for Computing Machinery},
year={2009}
}