Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add libthapi-ctl to start/stop collecting #233

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cb3a044
add libthapi-ctl
bd4 Jun 7, 2024
94d4659
xprof: separate bookkeeping events for cuda
bd4 Jun 7, 2024
45f92f2
WIP gtensor int test
bd4 Jun 9, 2024
21cffca
libthapi-ctl: logging with THAPI_CTL_LOG_LEVEL
bd4 Jun 10, 2024
c387ae0
cuda: fix bookkeeping events
bd4 Jun 13, 2024
beb1e54
doxygen comments
bd4 Jun 13, 2024
32b12dc
WIP cuda event based enable/disable
bd4 Jun 14, 2024
20a70f5
clean up WIP cuda event based start/stop
bd4 Jun 20, 2024
a7adec5
log message clean up
bd4 Jun 20, 2024
4dbbe2a
fix
bd4 Jun 20, 2024
b3ddb0c
fix
bd4 Jun 24, 2024
32bddbe
cuda exclusions, cleanup
bd4 Jun 24, 2024
41ef9ff
fix exclude print
bd4 Jun 24, 2024
f567f4c
rename env vars, fix cuda, TRACE_FROM_START env var
bd4 Jun 25, 2024
bf35bc5
working opencl start/stop
bd4 Jun 25, 2024
fefd4b8
make test names match new arg name
bd4 Jun 25, 2024
9b3fd78
WIP better thapi-ctl int tests
bd4 Jun 26, 2024
aafc959
fix cl int test
bd4 Jun 27, 2024
23f280f
missing header
bd4 Jun 27, 2024
803e64f
add opencl fn to header
bd4 Jun 27, 2024
2fd0a71
libthapictl: use autotools dep for lttng-ctl
bd4 Jun 27, 2024
aea464b
WIP omp start/stop, int tests
bd4 Jun 27, 2024
fb6e46f
libthapictl: add design readme
bd4 Jun 27, 2024
72b57fb
ci: use lttng ppa to get latest stable
bd4 Jun 27, 2024
743757c
fix libthapictl header missing in dist
bd4 Jun 28, 2024
33ae513
xprof: make thapi-ctl work w/o PATH mod
bd4 Jun 28, 2024
4ea5771
ci: fix thapictl emtpy sync daemon
bd4 Jun 28, 2024
018682e
working gtensor int test for cuda
bd4 Jun 28, 2024
edf028b
use thapi-ctl fro hip
bd4 Jun 28, 2024
3abe9a4
improve gtensor int tests, add readme
bd4 Jun 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
WIP gtensor int test
  • Loading branch information
bd4 committed Jul 22, 2024
commit 45f92f28f3f0112761b03436a9de7bdd9a880e28
30 changes: 30 additions & 0 deletions integration_tests/gtensor/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.23 FATAL_ERROR)

project(thapi-integration-test)

if(GTENSOR_DEVICE STREQUAL "cuda")
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
endif()

set(THAPI_PATH "/opt/thapi" CACHE STRING "Path to thapi installation")

message(STATUS "${PROJECT_NAME}: THAIP_PATH=${THAPI_PATH}")

# add dependencies
include(cmake/CPM.cmake)
CPMFindPackage(NAME gtensor
GITHUB_REPOSITORY wdmapp/gtensor
GIT_TAG "main"
OPTIONS "GTENSOR_ENABLE_BLAS ON"
"GTENSOR_ENABLE_SOLVER ON")

add_library(thapi_ctl INTERFACE IMPORTED)
target_include_directories(thapi_ctl INTERFACE "${THAPI_PATH}/include")
target_link_libraries(thapi_ctl INTERFACE "${THAPI_PATH}/lib/libthapi-ctl.so")
target_link_libraries(thapi_ctl INTERFACE "lttng-ctl")

add_executable(axpy_start_stop)
target_gtensor_sources(axpy_start_stop PRIVATE axpy_start_stop.cxx)
target_link_libraries(axpy_start_stop gtensor::gtensor)
target_link_libraries(axpy_start_stop thapi_ctl)
94 changes: 94 additions & 0 deletions integration_tests/gtensor/axpy_start_stop.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@

#include <iostream>

#include <gtensor/gtensor.h>

#include "thapi-ctl.h"

// provides convenient shortcuts for common gtensor functions, for example
// underscore ('_') to represent open slice ends.
using namespace gt::placeholders;

/**
* Templated function for daxpy that can work with host gtensor or device
* gtensor. Relies on C++11 mandatory copy elision to move the result into the
* LHS it is assigned to, rather than copying all the data (very important for
* large arrays). Input arrays are passed by reference, again to avoid copying
* all the data which would happen by default (just like std::vector, gtensor
* has copy semantics by default).
*/
template <typename Ex, typename Ey>
auto daxpy(double a, const Ex& x, const Ey& y)
{
// The expression 'a * x + y' generates a gfunction template, which is an
// un-evaluated expression. In this case, we force evaluation in the return
// statementso a new gtensor is created. It can be useful to not add the
// `gt::eval`, in which case the un-evaluated expression will be returned and
// can be combined with other operations before being evaluated, or assigned
// to a gtensor to force immediate evaluation.
return gt::eval(a * x + y);
}

int main(int argc, char** argv)
{
int n = 1024 * 1024;
int nprint = 32;

double a = 0.5;

// Define and allocate two 1d vectors of size n on the host. Declare
// but don't allocate a third 1d host vector for storing the result.
gt::gtensor<double, 1, gt::space::host> h_x(gt::shape(n));
gt::gtensor<double, 1, gt::space::host> h_y = gt::empty_like(h_x);
gt::gtensor<double, 1, gt::space::host> h_axpy;

// initialize the vectors, x is twice it's index values and y is equal
// to it's index values. We will perform .5*x + y, so the result should be
// axpy(i) = 2i.
for (int i = 0; i < n; i++) {
h_x(i) = 2.0 * static_cast<double>(i);
h_y(i) = static_cast<double>(i);
}

#ifndef GTENSOR_HAVE_DEVICE
#error "device required"
#endif

// Define and allocate device versions of h_x and h_y, and declare
// a varaible for the result on gpu.
gt::gtensor<double, 1, gt::space::device> d_x(gt::shape(n));
gt::gtensor<double, 1, gt::space::device> d_y = gt::empty_like(d_x);
gt::gtensor<double, 1, gt::space::device> d_axpy;

// Explicit copies of input from host to device. Note that this is an
// overload of the copy function for gtensor and gtensor_span types, not
// std::copy which has a different signature. The source is the first
// argument and destination the second argument. Currently thrust::copy is
// used under the hood in the implementation.
gt::copy(h_x, d_x);
gt::copy(h_y, d_y);

// This automatically generates a computation kernel to run on the
// device.
auto expr = a * d_x + d_y;

for (int i = 0; i < 10; i++) {
if (i % 2 == 1)
thapi_start_tracing();

d_axpy = gt::eval(expr);
h_axpy = gt::empty_like(h_x);
gt::copy(d_axpy, h_axpy);

if (i % 2 == 1)
thapi_stop_tracing();
}

// Define a slice to print a subset of elements for spot checking the
// result.
auto print_slice = gt::gslice(_, _, n / nprint);
std::cout << "a = " << a << std::endl;
std::cout << "x = " << h_x.view(print_slice) << std::endl;
std::cout << "y = " << h_y.view(print_slice) << std::endl;
std::cout << "a*x + y = " << h_axpy.view(print_slice) << std::endl;
}
24 changes: 24 additions & 0 deletions integration_tests/gtensor/cmake/CPM.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-License-Identifier: MIT
#
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors

set(CPM_DOWNLOAD_VERSION 0.39.0)
set(CPM_HASH_SUM "66639bcac9dd2907b2918de466783554c1334446b9874e90d38e3778d404c2ef")

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)

file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
)

include(${CPM_DOWNLOAD_LOCATION})