-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
doc rtd user guide document using OpenMP
- Loading branch information
Showing
5 changed files
with
114 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
HAMR_SOURCE=../../../../ | ||
HAMR_BUILD=../../../../build_omp | ||
|
||
# NVIDIA HPC Compiler | ||
#CXX=`which nvc++` | ||
#CXX_FLAGS=-mp=gpu -Minfo | ||
|
||
# AMD ROCm compiler | ||
CXX=/opt/rocm/llvm/bin/amdclang++ | ||
CXX_FLAGS=-target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx1030 | ||
|
||
all: | ||
${CXX} ${CXX_FLAGS} hello_openmp.cpp -I${HAMR_SOURCE} -I${HAMR_BUILD} -std=c++14 -L${HAMR_BUILD}/lib/ -lhamr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
template <typename T, typename U> | ||
hamr::buffer<T> add(const hamr::buffer<T> &a1, const hamr::buffer<U> &a2) | ||
{ | ||
// get pointers to the input arrays that are safe to use on the GPU | ||
auto spa1 = a1.get_openmp_accessible(); | ||
const T *pa1 = spa1.get(); | ||
|
||
auto spa2 = a2.get_openmp_accessible(); | ||
const U *pa2 = spa2.get(); | ||
|
||
// allocate the memory for the result on the GPU, and get a pointer to it | ||
size_t n_vals = a1.size(); | ||
hamr::buffer<T> ao(hamr::buffer_allocator::openmp, n_vals, T(0)); | ||
T *pao = ao.data(); | ||
|
||
// launch the kernel to add the arrays | ||
#pragma omp target teams distribute parallel for is_device_ptr(pao, pa1, pa2) | ||
for (size_t i = 0; i < n_vals; ++i) | ||
{ | ||
pao[i] = pa1[i] + pa2[i]; | ||
} | ||
|
||
return ao; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#include <hamr_buffer.h> | ||
|
||
#include <iostream> | ||
#include <memory> | ||
|
||
#include "add.h" | ||
#include "write.h" | ||
|
||
int main(int, char **) | ||
{ | ||
size_t n_vals = 400; | ||
|
||
// allocate and initialize to 1 on the GPU | ||
hamr::buffer<float> a0(hamr::buffer_allocator::openmp, n_vals, 1.0f); | ||
|
||
// allocate and initialize to 1 on the CPU | ||
hamr::buffer<float> a1(hamr::buffer_allocator::malloc, n_vals, 1.0f); | ||
|
||
// add the two arrays | ||
hamr::buffer<float> a2 = add(a0, a1); | ||
|
||
// write the result | ||
write(std::cerr, a2); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
template <typename T> | ||
void write(std::ostream &os, const hamr::buffer<T> &ai) | ||
{ | ||
// get pointer to the input array that is safe to use on the CPU | ||
auto spai = ai.get_cpu_accessible(); | ||
const T *pai = spai.get(); | ||
|
||
// write the elements of the array to the stream | ||
for (size_t i = 0; i < ai.size(); ++i) | ||
{ | ||
os << pai[i] << " "; | ||
} | ||
|
||
os << std::endl; | ||
} |