forked from nicooff/EASC2016
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheasc2016.tex
377 lines (322 loc) · 14.4 KB
/
easc2016.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
% This is "sig-alternate.tex" V2.1 April 2013
% This file should be compiled with V2.5 of "sig-alternate.cls" May 2012
%
% This example file demonstrates the use of the 'sig-alternate.cls'
% V2.5 LaTeX2e document class file. It is for those submitting
% articles to ACM Conference Proceedings WHO DO NOT WISH TO
% STRICTLY ADHERE TO THE SIGS (PUBS-BOARD-ENDORSED) STYLE.
% The 'sig-alternate.cls' file will produce a similar-looking,
% albeit, 'tighter' paper resulting in, invariably, fewer pages.
%
% ----------------------------------------------------------------------------------------------------------------
% This .tex file (and associated .cls V2.5) produces:
% 1) The Permission Statement
% 2) The Conference (location) Info information
% 3) The Copyright Line with ACM data
% 4) NO page numbers
%
% as against the acm_proc_article-sp.cls file which
% DOES NOT produce 1) thru' 3) above.
%
% Using 'sig-alternate.cls' you have control, however, from within
% the source .tex file, over both the CopyrightYear
% (defaulted to 200X) and the ACM Copyright Data
% (defaulted to X-XXXXX-XX-X/XX/XX).
% e.g.
% \CopyrightYear{2007} will cause 2007 to appear in the copyright line.
% \crdata{0-12345-67-8/90/12} will cause 0-12345-67-8/90/12 to appear in the copyright line.
%
% ---------------------------------------------------------------------------------------------------------------
% This .tex source is an example which *does* use
% the .bib file (from which the .bbl file % is produced).
% REMEMBER HOWEVER: After having produced the .bbl file,
% and prior to final submission, you *NEED* to 'insert'
% your .bbl file into your source .tex file so as to provide
% ONE 'self-contained' source file.
%
% ================= IF YOU HAVE QUESTIONS =======================
% Questions regarding the SIGS styles, SIGS policies and
% procedures, Conferences etc. should be sent to
% Adrienne Griscti ([email protected])
%
% Technical questions _only_ to
% Gerald Murray ([email protected])
% ===============================================================
%
% For tracking purposes - this is V2.0 - May 2012
\documentclass{sig-alternate}
\begin{document}
% Copyright
\setcopyright{acmcopyright}
%\setcopyright{acmlicensed}
%\setcopyright{rightsretained}
%\setcopyright{usgov}
%\setcopyright{usgovmixed}
%\setcopyright{cagov}
%\setcopyright{cagovmixed}
% DOI
%\doi{000000} ??????
% ISBN
%\isbn{000000} ???????
%Conference
\conferenceinfo{EASC '16}{April 26--29, 2016, Stockholm, Sweden}
%\acmPrice{\$15.00}
%
% --- Author Metadata here ---
\conferenceinfo{EASC2016}{'16, Stockholm, Sweden}
%\CopyrightYear{2007} % Allows default copyright year (20XX) to be over-ridden - IF NEED BE.
%\crdata{0-12345-67-8/90/01} % Allows default copyright data (0-89791-88-6/97/05) to be over-ridden - IF NEED BE.
% --- End of Author Metadata ---
\title{Benchmarking and scaling of Nek5000}
\subtitle{Subtitle}
%
% You need the command \numberofauthors to handle the 'placement
% and alignment' of the authors beneath the title.
%
% For aesthetic reasons, we recommend 'three authors at a time'
% i.e. three 'name/affiliation blocks' be placed beneath the title.
%
% NOTE: You are NOT restricted in how many 'rows' of
% "name/affiliations" may appear. We just ask that you restrict
% the number of 'columns' to three.
%
% Because of the available 'opening page real-estate'
% we ask you to refrain from putting more than six authors
% (two rows with three columns) beneath the article title.
% More than six makes the first-page appear very cluttered indeed.
%
% Use the \alignauthor commands to handle the names
% and affiliations for an 'aesthetic maximum' of six authors.
% Add names, affiliations, addresses for
% the seventh etc. author(s) as the argument for the
% \additionalauthors command.
% These 'additional authors' will be output/set for you
% without further effort on your part as the last section in
% the body of your article BEFORE References or any Appendices.
\numberofauthors{6} % in this sample file, there are a *total*
% of EIGHT authors. SIX appear on the 'first-page' (for formatting
% reasons) and the remaining two appear in the \additionalauthors section.
%
\author{
% You can go ahead and credit any number of authors here,
% e.g. one 'row of three' or two rows (consisting of one row of three
% and a second row of one, two or three).
%
% The command \alignauthor (no curly braces needed) should
% precede each author name, affiliation/snail-mail address and
% e-mail address. Additionally, tag each line of
% affiliation/address with \affaddr, and tag the
% e-mail address with \email.
%
% 1st. author
% 1st. author
\alignauthor
Oana Marin\\
\affaddr{*}\\
\affaddr{*}\\
\affaddr{*}\\
\email{*}
% 2nd. author
\alignauthor
Nicolas Offermans\\
\affaddr{Linn\'{e} Flow Center}\\
\affaddr{KTH Mechanics, Royal Institute of Technology}\\
\affaddr{10044 Stockholm, Sweden}\\
\email{[email protected]}
% 3rd. author
\alignauthor
Adam Peplinski\\
\affaddr{*}\\
\affaddr{*}\\
\affaddr{*}\\
\email{*}
\and % use '\and' if you need 'another row' of author names
% 4th. author
\alignauthor
Michel Schanen\\
\affaddr{*}\\
\affaddr{*}\\
\affaddr{*}\\
\email{*}
% 5th. author
\alignauthor
Philipp Schlatter\\
\affaddr{*}\\
\affaddr{*}\\
\affaddr{*}\\
\email{*}
% 6th. author
\alignauthor
Jing Gong\\
\affaddr{PDC-HPC}\\
\affaddr{KTH, Royal Institute of Technology}\\
\affaddr{10044 Stockholm, Sweden}\\
\email{[email protected]}
}
% There's nothing stopping you putting the seventh, eighth, etc.
% author on the opening page (as the 'third row') but we ask,
% for aesthetic reasons that you place these 'additional authors'
% in the \additional authors block, viz.
%\additionalauthors{Additional authors: John Smith (The Th{\o}rv{\"a}ld Group,
%email: {\texttt{[email protected]}}) and Julius P.~Kumquat
%(The Kumquat Consortium, email: {\texttt{[email protected]}}).}
\date{21 March 2016}
% Just remember to make sure that the TOTAL number of authors
% is the number that will appear on the first page PLUS the
% number that will appear in the \additionalauthors section.
\maketitle
\begin{abstract}
Strong scaling of the high-order spectral element solver Nek5000 is performed. The test cases correspond to a turbulent flow in a straight pipe at four different friction Reynolds numbers $Re_{\tau} = 180$, $360$, $550$ and $1000$. Different architectures are studied, namely IBM Blue Gene/Q, Cray XC40 and Cray XK7 supercomputers. A theoretical model for parallel performance is introduced and compared to the numerical results. We also study the effect of the two coarse grid solvers XXT and AMG on the computational time.
\end{abstract}
% Code generated by the tool at
% http://dl.acm.org/ccs.cfm
%
% I generated what follows quite arbitrarily... Do not hesitate to modify.
% Is it even necessary?
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10010147.10010169.10010170</concept_id>
<concept_desc>Computing methodologies~Parallel algorithms</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10010147.10010169.10010170.10010174</concept_id>
<concept_desc>Computing methodologies~Massively parallel algorithms</concept_desc>
<concept_significance>300</concept_significance>
</concept>
<concept>
<concept_id>10003752.10003777.10003780</concept_id>
<concept_desc>Theory of computation~Communication complexity</concept_desc>
<concept_significance>300</concept_significance>
</concept>
<concept>
<concept_id>10010405.10010432</concept_id>
<concept_desc>Applied computing~Physical sciences and engineering</concept_desc>
<concept_significance>300</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
\ccsdesc[500]{Computing methodologies~Parallel algorithms}
\ccsdesc[300]{Computing methodologies~Massively parallel algorithms}
\ccsdesc[300]{Theory of computation~Communication complexity}
\ccsdesc[300]{Applied computing~Physical sciences and engineering}
%
% End generated code
%
%
% Use this command to print the description
%
\printccsdesc
% We no longer use \terms command
%\terms{Theory}
\keywords{Nek5000; Scaling; Benchmarking}
\section{Introduction}
The resolution of partial differential equations (PDE) is common practice nowadays in the scientific community. However, developing a code that scales linearly on a large number of cores still remains a challenging task. In this paper, we study the behavior of Nek5000, a solver for fluid mechanics and heat transfer problems, on a large number of processors and assess its ability to run efficiently on the next generation of exascale supercomputers. Cite\cite{fischer:scaling} and \cite{tufo:terascale}...
\section{Body}
\subsection{Nek5000}
% Present system of equations
% Describe briefly the algorithm behind nek
% Mention and discuss PN-PN
% Discuss the two coarse grid solver XXT and AMG
\subsection{Test cases}
\subsubsection{Pipe}
The first test case considered is the turbulent flow in a straight pipe. A thorough description of the flow configuration as well as a detailed analysis of the physical results can be found in \cite{Khoury2013}. The flow was run at four different friction Reynolds numbers $Re_{\tau} = 180$, $360$, $550$ and $1000$. A summary of the different simulations and associated number of elements and number of grid points is presented in Table \ref{tab:pipe_conf}. The friction Reynolds number is defined as $Re_{\tau} = u_{\tau} R / \nu$, where $u_{\tau}$ is the friction velocity $R$ is the radius of the pipe and $\nu$ is the kinematic viscosity. The bulk Reynolds number is defined as $Re_{b} = 2 U_b R / \nu$, where $U_b$ is the mean bulk velocity.
\begin{table}
\centering
\caption{Summary of the different pipe flows configurations.}
\begin{tabular}{llrr}
\hline
$Re_{\tau}$&$Re_{b}$&\# of elements & \# of grid points\\
\hline
$180$ & $5300$ & $36,480$ & $18.67 \times 10^6$\\
$360$ & $11,700$ & $237,120$ & $121.4 \times 10^6$\\
$550$ & $19,000$ & $823,632$ & $437.0 \times 10^6$\\
$1000$ & $37,700$ & $1,264,032$ & $2.184 \times 10^9$\\
\hline
\end{tabular}
\label{tab:pipe_conf}
\end{table}
\subsubsection{Rod bundle}
\subsection{Model for parallel performance}
\subsection{Supercomputers}
The test cases were run on three different supercomputers, namely Mira from the Argonne National Laboratory, USA, Titan from the Oak Ridge National Laboratory, USA, and Beskow from the PDC Center for High Performance Computing, KTH, Sweden. A rapid overview of the characteristics of each computer is summarized in Table \ref{tab:computer_charac}
% Add other fields to the table?
% Put latency and bandwidth in another table?
\begin{table*}
\centering
\caption{Overview of the characteristics of the different supercomputers.}
\begin{tabular}{l|ccccc}
\hline
& Architecture & \# of cores & cores/node & $\alpha$ & $\beta$\\
\hline
Mira & IBM BG/Q & $786,432$ & $16$ & $*$ & $*$\\
Titan & Cray XK7 & $299,008$ & $16$ & $2.7981 \cdot 10^{-6}$ & $1.4332 \cdot 10^{-9}$\\
Beskow & Cray XC40 & $53,632$ & $32$ & $*$ & $*$\\
\hline
\end{tabular}
\label{tab:computer_charac}
\end{table*}
The values for the latency $\alpha$ and the inverse bandwidth $\beta$ have been computed following a "ping-pong" test as described in \cite{fischer:scaling}. During this test, the time required to send and receives messages of various sizes between different processors is measured and values of the latency and bandwidth are deduced.
% Show graph with results of the ping pong test?
% Resized figure
\begin{figure} \label{fig:alpha_beta_titan}
\centering
\includegraphics[width=0.45\textwidth]{./figures/alpha_beta_titan.eps}
\caption{The ping-pong test on Titan}
\end{figure}
% Explain that we took minimum values
% Discuss noise and "randomness" of the results on Cray machines
\subsection{Results}
\subsubsection{Timers}
The total time and the time spent in the different parts of the code were computed by measuring the CPU time at appropriate locations in the code. This method is easy to implement, reliable and adds little overhead to the simulations. The measure of the communication time is a trickier task. This is done on Mira using the HPM-sampling tool (? -> add some more info maybe). On the Cray machines, we use the CrayPat performance analysis framework. This tool samples the code during execution at a frequency of $\unit[100]{Hz}$ and reports in which function the sample was found. Then, we assume that the proportion of the total time spent in a given function is equal to the proportion of samples within this function. If the number of samples is large enough, statistics is reliable. Furthermore, it induces a low overhead. A profiling procedure, where all function calls are tracked, could also be used but overhead in time is about $50\%$ and the method is therefore rejected.
In practice, each simulation is restarted from a previously computed turbulent solution and is run during $50$ time steps. The projections for velocity and pressure are turned on after $5$ time steps, the number of the previous pressure solutions saved is $20$ and the timers are turned on during the last $20$ time steps. Therefore, heavy input/output is not included and projections are working fully during the measurement period.
% Talk about synchronization?
\subsubsection{Projections}
% Show improvement due to the new projection method
\subsubsection{Strong scaling}
% Regular figure
%\begin{figure}
%\centering
%\includegraphics{}
%\caption{}
%\end{figure}
% Resized figure
%\begin{figure}
%\centering
%\includegraphics[height=1in, width=1in]{}
%\caption{}
%\end{figure}
% Figure on 2 columns
%\begin{figure*}
%\centering
%\includegraphics{}
%\caption{}
%\end{figure*}
\section{Conclusions}
%\end{document} % This is where a 'short' article might terminate
%ACKNOWLEDGMENTS are optional
\section{Acknowledgments}
%
% The following two commands are all you need in the
% initial runs of your .tex file to
% produce the bibliography for the citations in your paper.
\bibliographystyle{abbrv}
\bibliography{easc2016} % template.bib is the name of the Bibliography in this case
% You must have a proper ".bib" file
% and remember to run:
% latex bibtex latex latex
% to resolve all references
%
% ACM needs 'a single self-contained file'!
%
%APPENDICES are optional
%\balancecolumns
\appendix
%Appendix A
\section{Appendix A}
% This next section command marks the start of
% Appendix B, and does not continue the present hierarchy
%\balancecolumns % GM June 2007
% That's all folks!
\end{document}