Skip to content

Commit

Permalink
Merge pull request #663 from siserte/siserte-patch-3
Browse files Browse the repository at this point in the history
Siserte patch 3
  • Loading branch information
pancetta authored Apr 5, 2024
2 parents 247678d + fa0a0bf commit d1b904a
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 74 deletions.
34 changes: 34 additions & 0 deletions _bibliography/external/serghei-dmr.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
@article{iserte_dmrlib_2021,
title = {{DMRlib}: {Easy}-{Coding} and {Efficient} {Resource} {Management} for {Job} {Malleability}},
volume = {70},
copyright = {All rights reserved},
issn = {1557-9956},
shorttitle = {{DMRlib}},
url = {https://ieeexplore.ieee.org/document/9190024},
doi = {10.1109/TC.2020.3022933},
abstract = {Process malleability has proved to have a highly positive impact on the resource utilization and global productivity in data centers compared with the conventional static resource allocation policy. However, the non-negligible additional development effort this solution imposes has constrained its adoption by the scientific programming community. In this work, we present DMRlib, a library designed to offer the global advantages of process malleability while providing a minimalist MPI-like syntax. The library includes a series of predefined communication patterns that greatly ease the development of malleable applications. In addition, we deploy several scenarios to demonstrate the positive impact of process malleability featuring different scalability patterns. Concretely, we study two job submission modes (rigid and moldable) in order to identify the best-case scenarios for malleability using metrics such as resource allocation rate, completed jobs per second, and energy consumption. The experiments prove that our elastic approach may improve global throughput by a factor higher than 3x compared to the traditional workloads of non-malleable jobs.},
number = {9},
urldate = {2024-01-23},
journal = {IEEE Transactions on Computers},
author = {Iserte, Sergio and Mayo, Rafael and Quintana-Ortí, Enrique S. and Peña, Antonio J.},
month = sep,
year = {2021},
note = {Conference Name: IEEE Transactions on Computers},
pages = {1443--1457},
file = {Full Text:C\:\\Users\\siser\\Zotero\\storage\\7H5IJ6XY\\Iserte et al. - 2021 - DMRlib Easy-Coding and Efficient Resource Managem.pdf:application/pdf},
}

@Article{Caviedes2023,
author = {Daniel Caviedes-Voulli\`eme and Mario Morales-Hern\'andez and Matthew R. Norman and Ilhan \"Ozgen-Xian},
journal = {Geoscientific Model Development},
title = {{SERGHEI} ({SERGHEI}-{SWE}) v1.0: a performance-portable high-performance parallel-computing shallow-water solver for hydrology and environmental hydraulics},
year = {2023},
month = {feb},
number = {3},
pages = {977--1008},
volume = {16},
doi = {10.5194/gmd-16-977-2023},
file = {:Caviedes2023.pdf:PDF},
groups = {Surface flows},
publisher = {Copernicus {GmbH}},
}
202 changes: 128 additions & 74 deletions _data/people.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@
#
#


aach_m:
sur_name: Aach
given_name: Marcel
affiliation: jsc
position:
topics:
email: [email protected]
homepage:

acosta_j:
sur_name: Acosta
given_name: Juan
Expand Down Expand Up @@ -494,6 +504,15 @@ cavelan_a:
email: [email protected]
homepage: http://perso.ens-lyon.fr/aurelien.cavelan/

caviedes-voullieme_d:
sur_name: Daniel
given_name: Caviedes-Voullième
affiliation: jsc
position:
topics:
email:
homepage:

cela_j:
sur_name: Cela Espin
given_name: Jose Maria
Expand Down Expand Up @@ -816,6 +835,15 @@ echevarria_p:
email: [email protected]
homepage:

egele_r:
sur_name: Egele
given_name: Romain
affiliation: anl
position:
topics:
email: [email protected]
homepage:

ejarque_j:
sur_name: Ejarque
given_name: Jorge
Expand Down Expand Up @@ -1246,6 +1274,15 @@ insley_j:
email: [email protected]
homepage:

iserte_s:
sur_name: Iserte
given_name: Sergio
affiliation: bsc
position: post_doc
topics:
email: [email protected]
homepage: http://bit.ly/siserte

ishikawa_y:
sur_name: Ishikawa
given_name: Yukata
Expand Down Expand Up @@ -1597,6 +1634,15 @@ lehmkuhl_o:
email: [email protected]
homepage: https://www.bsc.es/lehmkuhl-oriol

lindquist_n:
sur_name: Lindquist
given_name: Neil
affiliation: utk
position:
topics:
email: [email protected]
homepage:

lintermann_a:
sur_name: Lintermann
given_name: Andreas
Expand All @@ -1615,6 +1661,15 @@ lippert_t:
email: [email protected]
homepage: http://www.fz-juelich.de/SharedDocs/Personen/IAS/JSC/EN/staff/lippert_th.html

liu_x:
sur_name: Liu
given_name: Xin
affiliation: jsc
position:
topics:
email: [email protected]
homepage:

llort_g:
sur_name: Llort
given_name: German
Expand Down Expand Up @@ -1678,6 +1733,15 @@ luettgau_j:
email: [email protected]
homepage:

luszczek_p:
sur_name: Luszczek
given_name: Piotr
affiliation: utk
position: permanent
topics: Numerical methods
email: [email protected]
homepage: https://scholar.google.com/citations?user=a9df4xQAAAAJ&hl=en

martorell_x:
sur_name: Martorell
given_name: Xavier
Expand Down Expand Up @@ -1714,6 +1778,15 @@ maruyama_y:
email: [email protected]
homepage:

miyashita_o:
sur_name: Miyashita
given_name: Osamu
affiliation: riken
position: permanent
topics:
email: [email protected]
homepage: http://www2.riken.jp/TMS2012/cbp/en/member/profile/osamu_miyashita.html

mateevitsi_v:
sur_name: Mateevitsi
given_name: Victor A.
Expand Down Expand Up @@ -1858,6 +1931,15 @@ monniot_j:
email: [email protected]
homepage:

morales-hernandez_m:
sur_name: Morales-Hernández
given_name: Mario
affiliation: external
position:
topics:
email:
homepage:

moreno-chamarro_e:
sur_name: Moreno-Chamarro
given_name: Eduardo
Expand Down Expand Up @@ -2587,6 +2669,15 @@ schoebel_r:
email: [email protected]
homepage:

segovia_j:
sur_name: Segovia
given_name: José
affiliation: external
position:
topics:
email:
homepage:

seidel_ed:
sur_name: Seidel
given_name: Ed
Expand Down Expand Up @@ -2839,6 +2930,15 @@ takizawa_s:
email: [email protected]
homepage:

tama_f:
sur_name: Tama
given_name: Florence
affiliation: riken
position: permanent
topics: biology
email: [email protected]
homepage: https://www.riken.jp/en/research/labs/r-ccs/comput_struct_biol/index.html

tan_n:
sur_name: Tan
given_name: Nigel
Expand All @@ -2857,6 +2957,15 @@ tanaka_m:
email: [email protected]
homepage:

taufer_m:
sur_name: Taufer
given_name: Michela
affiliation: utk
position: permanent
topics:
email: [email protected]
homepage: https://www.eecs.utk.edu/people/faculty/dr-michela-taufer

taylor_v:
sur_name: Taylor
given_name: Valerie
Expand Down Expand Up @@ -3037,6 +3146,15 @@ valero_m:
email: [email protected]
homepage: https://www.bsc.es/cv-mateo/1-summary

valles_p:
sur_name: Vallés
given_name: Pablo
affiliation: external
position:
topics:
email:
homepage:

vazquez_m:
sur_name: Vázquez
given_name: Mariano
Expand Down Expand Up @@ -3154,6 +3272,15 @@ wozniak_j:
email: [email protected]
homepage: http://www.mcs.anl.gov/~wozniak/

wyatt_m:
sur_name: Wyatt
given_name: Michael
affiliation: utk
position:
topics:
email: [email protected]
homepage:

wylie_b:
sur_name: Wylie
given_name: Brian J.N.
Expand Down Expand Up @@ -3217,83 +3344,10 @@ zhou_a:
email: [email protected]
homepage: https://team.inria.fr/kerdata/amelie-chi-zhou/

taufer_m:
sur_name: Taufer
given_name: Michela
affiliation: utk
position: permanent
topics:
email: [email protected]
homepage: https://www.eecs.utk.edu/people/faculty/dr-michela-taufer/

luszczek_p:
sur_name: Luszczek
given_name: Piotr
affiliation: utk
position: permanent
topics: Numerical methods
email: [email protected]
homepage: https://scholar.google.com/citations?user=a9df4xQAAAAJ&hl=en

lindquist_n:
sur_name: Lindquist
given_name: Neil
affiliation: utk
position:
topics:
email: [email protected]
homepage:

wyatt_m:
sur_name: Wyatt
given_name: Michael
affiliation: utk
position:
topics:
email: [email protected]
homepage:

tama_f:
sur_name: Tama
given_name: Florence
affiliation: riken
position: permanent
topics: biology
email: [email protected]
homepage: https://www.riken.jp/en/research/labs/r-ccs/comput_struct_biol/index.html

miyashita_o:
sur_name: Miyashita
given_name: Osamu
affiliation: riken
position: permanent
topics:
email: [email protected]
homepage: http://www2.riken.jp/TMS2012/cbp/en/member/profile/osamu_miyashita.html

liu_x:
sur_name: Liu
given_name: Xin
affiliation: jsc
position:
topics:
email: [email protected]
homepage:

aach_m:
sur_name: Aach
given_name: Marcel
affiliation: jsc
position:
topics:
email: [email protected]
homepage:

egele_r:
sur_name: Egele
given_name: Romain
affiliation: anl
position:
topics:
email: [email protected]
homepage:

46 changes: 46 additions & 0 deletions _projects/serghei-dmr.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
layout: page_project
title: Malleable implementation of SERGHEI-SWE with DMR
date: 2024-04-24
updated: 2024-04-04
navbar: Research
subnavbar: Projects
project_url:
status: starting
topics:
- apps
- prog_lang
keywords:
- Dynamic Resources
- Malleability
- Shallow-water Solver
- Computer Simulation
- Performance-aware Computing
head: iserte_s
members:
- morales-hernandez_m
- segovia_j
- valles_p
- caviedes-voullieme_d
- pena_a
---

## Research topic and goals
SERGHEI-SWE is a performance-portable high-performance parallel-computing shallow-water solver for hydrology and environmental studies. A key application is flood simulation, in which the wet computational domain can dramatically change size. Wet areas are much more computationally expensive to simulate than dry ones, so counting with additional data processing resources for high water stages is critical for providing results in a reasonable time frame. However, underutilizing the resources during dry stages harms the productivity and resource use efficiency of the HPC facility while being unfair to the rest of the users who see delayed their experiments. The variability of the flood size cannot be determined a priori, and therefore optimal resource selection for the job is difficult, especially for long simulations.

This project aims to design, develop, and implement a malleable version of SERGHEI-SWE with the Dynamic Management of Resources (DMR) technology, providing a flexible framework for enabling malleability in HPC applications.

This project aims to:
* Increase cluster productivity in terms of completed simulations per unit of time when running several instances of SERGHEI-SWE malleable in an HPC cluster.
* Extend DMR with performance-aware reconfiguration techniques.
* Design efficient data redistribution strategies for dynamic resources in SERGHEI-SWE.

## Impact and publications

None yet.

## Future plans

Study new reconfiguration strategies and evaluate them in a large workload.

## References

0 comments on commit d1b904a

Please sign in to comment.