diff --git a/examples/collection/CMakeLists.txt b/examples/collection/CMakeLists.txt index 5e959292e0..69d1a2326d 100644 --- a/examples/collection/CMakeLists.txt +++ b/examples/collection/CMakeLists.txt @@ -10,6 +10,7 @@ set( insertable_collection reduce_integral transpose + do_flops ) foreach(EXAMPLE_NAME ${COLLECTION_EXAMPLES}) diff --git a/examples/collection/do_flops.cc b/examples/collection/do_flops.cc new file mode 100644 index 0000000000..8f8098200f --- /dev/null +++ b/examples/collection/do_flops.cc @@ -0,0 +1,302 @@ +/* +//@HEADER +// ***************************************************************************** +// +// jacobi2d_vt.cc +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include + +#include +#include +#include + +/// [Do Flops example] + +#include +#include + +#include +#include +#include + +static constexpr std::size_t const default_nrow_object = 8; +static constexpr std::size_t const default_num_objs = 4; +static constexpr double const default_tol = 1.0e-02; +static constexpr std::size_t const default_flops_per_iter = 100000; + +volatile double a = 0.5, b = 2.2; + +void +dummy( void *array ) +{ +/* Confuse the compiler so as not to optimize + away the flops in the calling routine */ +/* Cast the array as a void to eliminate unused argument warning */ + ( void ) array; +} + +void +do_flops( int n ) +{ + int i; + double c = 0.11; + + for ( i = 0; i < n; i++ ) { + c += a * b; + } + dummy( ( void * ) &c ); +} + +struct NodeObj { + bool is_finished_ = false; + void workFinishedHandler() { is_finished_ = true; } + bool isWorkFinished() { return is_finished_; } +}; +using NodeObjProxy = vt::objgroup::proxy::Proxy; + +struct GenericWork : vt::Collection { + +private: + size_t iter_ = 0; + size_t msgReceived_ = 0, totalReceive_ = 0; + size_t numObjs_ = 1; + size_t flopsPerIter_ = default_flops_per_iter; + size_t maxIter_ = 8; + NodeObjProxy objProxy_; + +public: + explicit GenericWork() : + iter_(0), msgReceived_(0), totalReceive_(0), + numObjs_(1), flopsPerIter_(default_flops_per_iter), maxIter_(8) + { } + + using BlankMsg = vt::CollectionMessage; + + struct WorkMsg : vt::CollectionMessage { + size_t numObjects = 0; + size_t flopsPerIter = 0; + size_t iterMax = 0; + NodeObjProxy objProxy; + + WorkMsg() = default; + + WorkMsg(const size_t nobjs, const size_t flops, const size_t itMax, NodeObjProxy proxy) : + numObjects(nobjs), flopsPerIter(flops), iterMax(itMax), objProxy(proxy) + { } + }; + + void checkCompleteCB(double normRes) { + auto const iter_max_reached = iter_ > maxIter_; + + if (iter_max_reached) { + fmt::print("\n Maximum Number of Iterations Reached. \n\n"); + objProxy_.broadcast<&NodeObj::workFinishedHandler>(); + } else { + fmt::print(" ## ITER {} completed. \n", iter_); + } + } + + void doIteration() { + iter_ += 1; + + // vt::theContext()->getTask()->startPAPIMetrics(); + + do_flops(flopsPerIter_); + + // vt::theContext()->getTask()->stopPAPIMetrics(); + // auto res = vt::theContext()->getTask()->getPAPIMetrics(); + // for (auto [name, value] : res) { + // fmt::print(" {}: {}\n", name, value); + // } + + auto proxy = this->getCollectionProxy(); + proxy.reduce<&GenericWork::checkCompleteCB, vt::collective::MaxOp>( + proxy[0], 0.0 + ); + } + + struct VecMsg : vt::CollectionMessage { + using MessageParentType = vt::CollectionMessage; + vt_msg_serialize_if_needed_by_parent_or_type1(vt::IdxBase); + + VecMsg() = default; + + VecMsg(vt::IdxBase const& in_index) : + vt::CollectionMessage(), + from_index(in_index) + { } + + template + void serialize(Serializer& s) { + MessageParentType::serialize(s); + s | from_index; + } + + vt::IdxBase from_index = 0; + }; + + void exchange(VecMsg *msg) { + msgReceived_ += 1; + + if (msgReceived_ == totalReceive_) { + msgReceived_ = 0; + doIteration(); + } + } + + void doIter([[maybe_unused]] BlankMsg *msg) { + if (numObjs_ == 1) { + doIteration(); + return; + } + + vt::theContext()->getTask()->startPAPIMetrics(); + + vt::IdxBase const myIdx = getIndex().x(); + auto proxy = this->getCollectionProxy(); + + + if (myIdx > 0) { + proxy[myIdx - 1].send( + myIdx + ); + } + + if (size_t(myIdx) < numObjs_ - 1) { + proxy[myIdx + 1].send( + myIdx + ); + } + + vt::theContext()->getTask()->stopPAPIMetrics(); + auto res = vt::theContext()->getTask()->getPAPIMetrics(); + for (auto [name, value] : res) { + fmt::print(" {}: {}\n", name, value); + } + } + + void init() { + totalReceive_ = 2; + + if (getIndex().x() == 0) { + totalReceive_ -= 1; + } + + if (getIndex().x() == numObjs_ - 1) { + totalReceive_ -= 1; + } + } + + void init(WorkMsg* msg) { + numObjs_ = msg->numObjects; + flopsPerIter_ = msg->flopsPerIter; + maxIter_ = msg->iterMax; + objProxy_ = msg->objProxy; + + init(); + } +}; + +bool isWorkDone(vt::objgroup::proxy::Proxy const& proxy) { + auto const this_node = vt::theContext()->getNode(); + return proxy[this_node].invoke<&NodeObj::isWorkFinished>(); +} + +int main(int argc, char** argv) { + size_t num_objs = default_num_objs; + size_t flopsPerIter = default_flops_per_iter; + size_t maxIter = 8; + + std::string name(argv[0]); + + vt::initialize(argc, argv); + + vt::NodeType this_node = vt::theContext()->getNode(); + vt::NodeType num_nodes = vt::theContext()->getNumNodes(); + + if (argc == 1) { + if (this_node == 0) { + fmt::print(stderr, "{}: using default arguments since none provided\n", name); + } + num_objs = default_num_objs * num_nodes; + } else if (argc == 2) { + num_objs = static_cast(strtol(argv[1], nullptr, 10)); + } else if (argc == 3) { + num_objs = static_cast(strtol(argv[1], nullptr, 10)); + flopsPerIter = static_cast(strtol(argv[2], nullptr, 10)); + } else if (argc == 4) { + num_objs = static_cast(strtol(argv[1], nullptr, 10)); + flopsPerIter = static_cast(strtol(argv[2], nullptr, 10)); + maxIter = static_cast(strtol(argv[3], nullptr, 10)); + } else { + fmt::print(stderr, "usage: {} \n", name); + return 1; + } + + auto grp_proxy = vt::theObjGroup()->makeCollective("examples_generic_work"); + using BaseIndexType = typename vt::Index1D::DenseIndexType; + auto range = vt::Index1D(static_cast(num_objs)); + + auto col_proxy = vt::makeCollection("examples_generic_work") + .bounds(range) + .bulkInsert() + .wait(); + + vt::runInEpochCollective([col_proxy, grp_proxy, num_objs, flopsPerIter, maxIter]{ + col_proxy.broadcastCollective( + num_objs, flopsPerIter, maxIter, grp_proxy + ); + }); + + while(!isWorkDone(grp_proxy)) { + vt::runInEpochCollective([col_proxy]{ + col_proxy.broadcastCollective< + GenericWork::BlankMsg, &GenericWork::doIter + >(); + }); + + vt::thePhase()->nextPhaseCollective(); + } + + vt::finalize(); + + return 0; +} +/// [Do Flops example] diff --git a/src/vt/context/runnable_context/lb_data.cc b/src/vt/context/runnable_context/lb_data.cc index 4020aa5b5a..7d8d510617 100644 --- a/src/vt/context/runnable_context/lb_data.cc +++ b/src/vt/context/runnable_context/lb_data.cc @@ -110,12 +110,8 @@ typename LBData::ElementIDStruct const& LBData::getCurrentElementID() const { std::unordered_map LBData::getPAPIMetrics() { std::unordered_map papi_metrics = {}; - char event_code_str[PAPI_MAX_STR_LEN]; - for (size_t i = 0; i < events_.size(); i++) { - papi_retval_ = PAPI_event_code_to_name(events_[i], event_code_str); - if (papi_retval_ != PAPI_OK) - handle_papi_error(papi_retval_, "LBData getPAPIMetrics: couldn't get name from event code: "); - papi_metrics[std::string(event_code_str)] = papi_values_[i]; + for (size_t i = 0; i < native_events_.size(); i++) { + papi_metrics[native_events_[i]] = papi_values_[i]; } papi_metrics[std::string("real_time")] = end_real_usec_ - start_real_usec_; papi_metrics[std::string("real_cycles")] = end_real_cycles_ - start_real_cycles_; diff --git a/src/vt/context/runnable_context/lb_data.h b/src/vt/context/runnable_context/lb_data.h index 6e6021132f..0746e2fca5 100644 --- a/src/vt/context/runnable_context/lb_data.h +++ b/src/vt/context/runnable_context/lb_data.h @@ -95,12 +95,16 @@ struct LBData { exit(1); } - for (const auto& event : events_) { - papi_retval_ = PAPI_add_event(EventSet_, event); - char event_code_str[PAPI_MAX_STR_LEN]; + for (const auto& event_name : native_events_) { + int native = 0x0; + papi_retval_ = PAPI_event_name_to_code(event_name.c_str(), &native); if (papi_retval_ != PAPI_OK) { - PAPI_event_code_to_name(event, event_code_str); - printf("LBData Constructor 2: Couldn't add %s: PAPI error %d: %s\n", event_code_str, papi_retval_, PAPI_strerror(papi_retval_)); + printf("LBData Constructor 1: Couldn't event_name_to_code for %s: PAPI error %d: %s\n",event_name.c_str(), papi_retval_, PAPI_strerror(papi_retval_)); + exit(1); + } + papi_retval_ = PAPI_add_event(EventSet_, native); + if (papi_retval_ != PAPI_OK) { + printf("LBData Constructor 1: Couldn't add %s to the PAPI Event Set: PAPI error %d: %s\n",event_name.c_str(), papi_retval_, PAPI_strerror(papi_retval_)); exit(1); } } @@ -168,8 +172,8 @@ struct LBData { int papi_retval_; long long start_real_cycles_, end_real_cycles_, start_real_usec_, end_real_usec_; long long start_virt_cycles_, end_virt_cycles_, start_virt_usec_, end_virt_usec_; - std::vector events_ = {PAPI_L1_DCM, PAPI_TOT_INS}; - long_long papi_values_[5]; + std::vector native_events_ = {"instructions", "cache-misses", "fp_arith_inst_retired.scalar_double"}; + long_long papi_values_[3]; }; }} /* end namespace vt::ctx */ diff --git a/src/vt/context/runnable_context/lb_data.impl.h b/src/vt/context/runnable_context/lb_data.impl.h index 61bc76141d..5ceb03e6e7 100644 --- a/src/vt/context/runnable_context/lb_data.impl.h +++ b/src/vt/context/runnable_context/lb_data.impl.h @@ -70,12 +70,16 @@ LBData::LBData(ElmT* in_elm, MsgT* msg) exit(1); } - for (const auto& event : events_) { - papi_retval_ = PAPI_add_event(EventSet_, event); - char event_code_str[PAPI_MAX_STR_LEN]; + for (const auto& event_name : native_events_) { + int native = 0x0; + papi_retval_ = PAPI_event_name_to_code(event_name.c_str(), &native); if (papi_retval_ != PAPI_OK) { - PAPI_event_code_to_name(event, event_code_str); - printf("LBData Constructor 1: Couldn't add %s: PAPI error %d: %s\n", event_code_str, papi_retval_, PAPI_strerror(papi_retval_)); + printf("LBData Constructor 2: Couldn't event_name_to_code for %s: PAPI error %d: %s\n",event_name.c_str(), papi_retval_, PAPI_strerror(papi_retval_)); + exit(1); + } + papi_retval_ = PAPI_add_event(EventSet_, native); + if (papi_retval_ != PAPI_OK) { + printf("LBData Constructor 2: Couldn't add %s to the PAPI Event Set: PAPI error %d: %s\n",event_name.c_str(), papi_retval_, PAPI_strerror(papi_retval_)); exit(1); } }