diff --git a/packages/tpetra/core/src/CMakeLists.txt b/packages/tpetra/core/src/CMakeLists.txt index 5a9e36c27495..d7a26fa72032 100644 --- a/packages/tpetra/core/src/CMakeLists.txt +++ b/packages/tpetra/core/src/CMakeLists.txt @@ -937,5 +937,5 @@ SET_PROPERTY( # / from this directory, or to / from the 'impl' subdirectory. That ensures # that running "make" will also rerun CMake in order to regenerate Makefiles. # -# Here's another change +# Here's another change. Again. diff --git a/packages/tpetra/core/src/Tpetra_Details_DeepCopyCounter.cpp b/packages/tpetra/core/src/Tpetra_Details_DeepCopyCounter.cpp deleted file mode 100644 index e63324a34261..000000000000 --- a/packages/tpetra/core/src/Tpetra_Details_DeepCopyCounter.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* -// @HEADER -// *********************************************************************** -// -// Tpetra: Templated Linear Algebra Services Package -// Copyright (2008) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// ************************************************************************ -// @HEADER -*/ -#include "Tpetra_Details_DeepCopyCounter.hpp" -#include "TpetraCore_config.h" -#include "Kokkos_Core.hpp" -#include - -namespace Tpetra { -namespace Details { - - namespace DeepCopyCounterDetails { - void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, - Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr, - uint64_t size) { - - if(DeepCopyCounter::count_active) { - if(strcmp(dst_handle.name,src_handle.name)) { - DeepCopyCounter::count++; - } - } - } - - - }// end DeepCopyCounterDetails - - - // Initialize - bool DeepCopyCounter::count_active=false; - size_t DeepCopyCounter::count=0; - - - void DeepCopyCounter::start() { - count_active=true; - Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyCounterDetails::kokkosp_begin_deep_copy); - } - - void DeepCopyCounter::reset() { - count=0; - } - - size_t DeepCopyCounter::stop() { - count_active=false; - return count; - } - - size_t DeepCopyCounter::get_count() { - return count; - } - - -} // namespace Details -} // namespace Tpetra - diff --git a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp new file mode 100644 index 000000000000..7dd95c747adb --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp @@ -0,0 +1,208 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ +#include "Tpetra_Details_KokkosCounter.hpp" +#include "TpetraCore_config.h" +#include "Kokkos_Core.hpp" +#include "Teuchos_TestForException.hpp" +#include + +namespace Tpetra { +namespace Details { + + + /***************************** Deep Copy *****************************/ + namespace DeepCopyCounterDetails { + // Static variables + bool is_initialized=true; + size_t count_same=0; + size_t count_different=0; + bool count_active=false; + + void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, + Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr, + uint64_t size) { + + if(count_active) { + if(strcmp(dst_handle.name,src_handle.name)) + count_different++; + else + count_same++; + } + } + + }// end DeepCopyCounterDetails + + + void DeepCopyCounter::start() { + DeepCopyCounterDetails::count_active=true; + Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyCounterDetails::kokkosp_begin_deep_copy); + } + + void DeepCopyCounter::reset() { + DeepCopyCounterDetails::count_same=0; + DeepCopyCounterDetails::count_different=0; + } + + void DeepCopyCounter::stop() { + DeepCopyCounterDetails::count_active=false; + } + + size_t DeepCopyCounter::get_count_same_space() { + return DeepCopyCounterDetails::count_same; + } + + size_t DeepCopyCounter::get_count_different_space() { + return DeepCopyCounterDetails::count_different; + } + + + + /***************************** Fence *****************************/ + + + namespace FenceCounterDetails { + + // Static variables + bool is_initialized=false; + bool count_active=false; + std::vector count_instance; + std::vector count_global; + int num_devices=0; + + + void kokkosp_begin_fence(const char* name, const uint32_t deviceId, + uint64_t* handle) { + + if(count_active) { + using namespace Kokkos::Tools::Experimental; + ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId); + + // Figure out what count bin to stick this in + int idx = (int) eid.type; + if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization)) + count_global[idx]++; + else + count_instance[idx]++; + } + } + + + std::string get_label(int i) { + using namespace Kokkos::Tools::Experimental; + DeviceType i_type = devicetype_from_uint32t(i); + std::string device_label; + if (i_type == DeviceType::Serial) device_label="Serial"; + else if (i_type == DeviceType::OpenMP) device_label="OpenMP"; + else if (i_type == DeviceType::Cuda) device_label="Cuda"; + else if (i_type == DeviceType::HIP) device_label="HIP"; + else if (i_type == DeviceType::OpenMPTarget) device_label="OpenMPTarget"; + else if (i_type == DeviceType::HPX) device_label="HPX"; + else if (i_type == DeviceType::Threads) device_label="Threats"; + else if (i_type == DeviceType::SYCL) device_label="SYCL"; + else if (i_type == DeviceType::OpenACC) device_label="OpenACC"; + else if (i_type == DeviceType::Unknown) device_label="Unknown"; + + return device_label; + } + + void initialize() { + using namespace Kokkos::Tools::Experimental; + num_devices = (int) DeviceType::Unknown; + count_instance.resize(num_devices); + count_instance.assign(num_devices,0); + count_global.resize(num_devices); + count_global.assign(num_devices,0); + is_initialized=true; + } + + }// end FenceCounterDetails + + + + + void FenceCounter::start() { + if(!FenceCounterDetails::is_initialized) + FenceCounterDetails::initialize(); + FenceCounterDetails::count_active=true; + Kokkos::Tools::Experimental::set_begin_fence_callback(FenceCounterDetails::kokkosp_begin_fence); + } + + void FenceCounter::reset() { + FenceCounterDetails::count_instance.assign(FenceCounterDetails::num_devices,0); + FenceCounterDetails::count_global.assign(FenceCounterDetails::num_devices,0); + } + + void FenceCounter::stop() { + FenceCounterDetails::count_active=false; + } + + size_t FenceCounter::get_count_global(const std::string & device) { + using namespace Kokkos::Tools::Experimental; + for(int i=0;i +#include namespace Tpetra { namespace Details { -/// \brief Counter for Kokkos::deep_copy's between memory spaces. -class DeepCopyCounter { -public: +/// \brief Counter for Kokkos::deep_copy calls +namespace DeepCopyCounter { /// \brief Start the deep_copy counter - static void start(); + void start(); /// \brief Reset the deep_copy counter - static void reset(); + void reset(); /// \brief Stop the deep_copy counter - static size_t stop(); + void stop(); - /// \brief Query the deep_copy counter - static size_t get_count(); + /// \brief Query the deep_copy counter for copies in the same space + size_t get_count_same_space(); + /// \brief Query the deep_copy counter for copies between different spaces + size_t get_count_different_space(); - static size_t count; - static bool count_active; +} + +/// \brief Counter for Kokkos::fence calls +namespace FenceCounter { + /// \brief Start the fence counter + void start(); + + /// \brief Reset the fence counter + void reset(); + + /// \brief Stop the fence counter + void stop(); + + /// \brief Query the fence counter for given device, for an exec_space_instance.fence() + size_t get_count_instance(const std::string & device); + + /// \brief Query the fence counter for given device, for an Kokkos::fence() + size_t get_count_global(const std::string & device); +} -}; } // namespace Details } // namespace Tpetra -#endif // TPETRA_DETAILS_DEEP_COPY_COUNTER_HPP +#endif // TPETRA_DETAILS_KOKKOS_COUNTER_HPP diff --git a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp index 89af7536af02..ffceaeaa2d15 100644 --- a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp +++ b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp @@ -42,7 +42,7 @@ #include "Tpetra_TestingUtilities.hpp" #include "Tpetra_MultiVector.hpp" #include "Tpetra_Vector.hpp" -#include "Tpetra_Details_DeepCopyCounter.hpp" +#include "Tpetra_Details_KokkosCounter.hpp" #include "Kokkos_ArithTraits.hpp" #include "Teuchos_CommHelpers.hpp" #include "Teuchos_DefaultSerialComm.hpp" @@ -5228,42 +5228,147 @@ namespace { } - // Stop / Start (reset first to clear counts from previous unit test calls) + // Stop / Start (reset first to clear counts from previous unit test calls) Tpetra::Details::DeepCopyCounter::reset(); Tpetra::Details::DeepCopyCounter::start(); Kokkos::deep_copy(y_h,x_d); - size_t count = Tpetra::Details::DeepCopyCounter::stop(); + Tpetra::Details::DeepCopyCounter::stop(); + size_t count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,correct_count); // Reset / get_count (should be zero now) Tpetra::Details::DeepCopyCounter::reset(); - count = Tpetra::Details::DeepCopyCounter::get_count(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,0); // Second Stop / Start (should have the original count) Tpetra::Details::DeepCopyCounter::start(); Kokkos::deep_copy(y_h,x_d); - count = Tpetra::Details::DeepCopyCounter::stop(); + Tpetra::Details::DeepCopyCounter::stop(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,correct_count); // This guy should not get counted, since the counter is stopped Kokkos::deep_copy(y_h,x_d); - count = Tpetra::Details::DeepCopyCounter::get_count(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,correct_count); // Third Second Stop / Start (should have double the original count) Tpetra::Details::DeepCopyCounter::start(); Kokkos::deep_copy(y_h,x_d); - count = Tpetra::Details::DeepCopyCounter::stop(); + Tpetra::Details::DeepCopyCounter::stop(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,2*correct_count); } + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, FenceCounterCheck, LO , GO , Scalar , Node ) { + RCP > comm = Tpetra::getDefaultComm (); + auto exec_space = typename Node::execution_space(); + const std::string space = exec_space.name(); + + /***********************************************************************/ + // Global fences + size_t global_correct_count=1; + + // Stop / Start (reset first to clear counts from previous unit test calls) + Tpetra::Details::FenceCounter::reset(); + Tpetra::Details::FenceCounter::start(); + Kokkos::fence(); + Tpetra::Details::FenceCounter::stop(); + size_t global_count = Tpetra::Details::FenceCounter::get_count_global(space); + size_t instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,global_correct_count); + TEST_EQUALITY(instance_count,0); + + // Reset / get_count (should be zero now) + Tpetra::Details::FenceCounter::reset(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,0); + + // Second Stop / Start (should have the original count) + Tpetra::Details::FenceCounter::start(); + Kokkos::fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,global_correct_count); + TEST_EQUALITY(instance_count,0); + + // This guy should not get counted, since the counter is stopped + Kokkos::fence(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,global_correct_count); + TEST_EQUALITY(instance_count,0); + + // Third Second Stop / Start (should have double the original count) + Tpetra::Details::FenceCounter::start(); + Kokkos::fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,2*global_correct_count); + TEST_EQUALITY(instance_count,0); + + /***********************************************************************/ + // Instance Fences + size_t instance_correct_count = 1; + + // Stop / Start (reset first to clear counts from previous unit test calls) + Tpetra::Details::FenceCounter::reset(); + Tpetra::Details::FenceCounter::start(); + exec_space.fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,instance_correct_count); + + // Reset / get_count (should be zero now) + Tpetra::Details::FenceCounter::reset(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,0); + + // Second Stop / Start (should have the original count) + Tpetra::Details::FenceCounter::start(); + exec_space.fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,instance_correct_count); + + // This guy should not get counted, since the counter is stopped + exec_space.fence(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,instance_correct_count); + + // Third Second Stop / Start (should have double the original count) + Tpetra::Details::FenceCounter::start(); + exec_space.fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,2*instance_correct_count); + } + + + + + #ifdef KOKKOS_ENABLE_OPENMP TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, OpenMP_ThreadedSum, LO , GO , Scalar , Node ) { @@ -5337,7 +5442,8 @@ namespace { TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, DimsWithAllZeroRows, LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, Swap, LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, DualViewRefcountCheck, LO, GO, SCALAR, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, CopyCounterCheck, LO, GO, SCALAR, NODE ) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, CopyCounterCheck, LO, GO, SCALAR, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, FenceCounterCheck, LO, GO, SCALAR, NODE ) #ifdef KOKKOS_ENABLE_OPENMP // Add special test for OpenMP