From 59bf2ee356a6529b63d7230d3172a9ae3aea1d27 Mon Sep 17 00:00:00 2001 From: Daniel Claudino <6d3@ornl.gov> Date: Mon, 18 Dec 2023 21:16:36 +0000 Subject: [PATCH] Enabled shots in HPC virtualization Signed-off-by: Daniel Claudino <6d3@ornl.gov> --- .../hpc-virtualization/MPIProxy.cpp | 18 +- .../hpc-virtualization/MPIProxy.hpp | 53 +++- .../hpc-virtualization/hpc_virt_decorator.cpp | 243 +++++++++++++++--- .../hpc-virtualization/hpc_virt_decorator.hpp | 38 +-- xacc/xacc.cpp | 29 +-- 5 files changed, 295 insertions(+), 86 deletions(-) diff --git a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp index 604ec2e17..3c54d7a33 100644 --- a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp +++ b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.cpp @@ -5,13 +5,27 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ #include "MPIProxy.hpp" -#include "mpi.h" #include #include #include +template <> +MPI_Datatype MPIDataTypeResolver::getMPIDatatype() { + return MPI_INT; +} + +template <> +MPI_Datatype MPIDataTypeResolver::getMPIDatatype() { + return MPI_DOUBLE; +} + +template <> +MPI_Datatype MPIDataTypeResolver::getMPIDatatype() { + return MPI_CHAR; +} + namespace xacc { //Temporary buffers: @@ -129,4 +143,4 @@ std::shared_ptr ProcessGroup::split(int my_subgroup) const return subgroup; } -} //namespace xacc \ No newline at end of file +} //namespace xacc diff --git a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp index eaffa06ff..a36321c38 100644 --- a/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp +++ b/quantum/plugins/decorators/hpc-virtualization/MPIProxy.hpp @@ -9,6 +9,13 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ #include #include #include +#include "mpi.h" + +template +class MPIDataTypeResolver { +public: + MPI_Datatype getMPIDatatype(); +}; namespace xacc { @@ -142,13 +149,57 @@ class ProcessGroup { different MPI processes, thus putting them into disjoint subgroups. **/ std::shared_ptr split(int my_subgroup) const; + + // some useful wrappers + + // I could move this to a single function, but don't + // want to abuse template specialization here + // this broadcasts a single element (int/char/double) + template + void broadcast(T element) { + + MPIDataTypeResolver resolver; + MPI_Datatype mpiType = resolver.getMPIDatatype(); + MPI_Bcast(&element, 1, mpiType, 0, + this->getMPICommProxy().getRef()); + } + + // this broadcasts a vector + template + void broadcast(std::vector &vec) { + + MPIDataTypeResolver resolver; + MPI_Datatype mpiType = resolver.getMPIDatatype(); + MPI_Bcast(vec.data(), vec.size(), mpiType, 0, + this->getMPICommProxy().getRef()); + }; + + + // this Allgatherv's the content of local vectors + // into a global vector + template + void allGatherv(std::vector &local, + std::vector &global, + std::vector &nLocalData, + std::vector &shift) { + + MPIDataTypeResolver resolver; + MPI_Datatype mpiType = resolver.getMPIDatatype(); + MPI_Allgatherv(local.data(), local.size(), mpiType, + global.data(), nLocalData.data(), + shift.data(), mpiType, + this->getMPICommProxy().getRef()); + + } + protected: std::vector process_ranks_; //global ranks of the MPI processes forming the process group MPICommProxy intra_comm_; //associated MPI intra-communicator std::size_t mem_per_process_; //dynamic memory limit per process (bytes) + }; } //namespace xacc -#endif //XACC_MPI_COMM_PROXY_HPP_ \ No newline at end of file +#endif //XACC_MPI_COMM_PROXY_HPP_ diff --git a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp index 92c896e13..33a12a09a 100644 --- a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp +++ b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.cpp @@ -14,13 +14,32 @@ #include "hpc_virt_decorator.hpp" #include "InstructionIterator.hpp" #include "Utils.hpp" -#include "xacc.hpp" +#include "xacc_service.hpp" +#include "TearDown.hpp" #include +namespace { + static bool hpcVirtDecoratorInitializedMpi = false; +} + namespace xacc { namespace quantum { void HPCVirtDecorator::initialize(const HeterogeneousMap ¶ms) { + + if (!qpuComm) { + // Initializing MPI here + int provided, isMPIInitialized; + MPI_Initialized(&isMPIInitialized); + if (!isMPIInitialized) { + MPI_Init_thread(0, NULL, MPI_THREAD_MULTIPLE, &provided); + hpcVirtDecoratorInitializedMpi = true; + if (provided != MPI_THREAD_MULTIPLE) { + xacc::warning("MPI_THREAD_MULTIPLE not provided."); + } + } + } + decoratedAccelerator->initialize(params); if (params.keyExists("n-virtual-qpus")) { @@ -34,6 +53,21 @@ void HPCVirtDecorator::initialize(const HeterogeneousMap ¶ms) { } n_virtual_qpus = params.get("n-virtual-qpus"); } + + if (params.keyExists("shots")) { + shots = params.get("shots"); + if (shots < 1) { + xacc::error("Invalid 'shots' parameter."); + } + } + + isVqeMode = (shots < 1); + if (params.keyExists("vqe-mode")) { + isVqeMode = params.get("vqe-mode"); + if (isVqeMode) { + xacc::info("Enable VQE Mode."); + } + } } void HPCVirtDecorator::updateConfiguration(const HeterogeneousMap &config) { @@ -64,6 +98,7 @@ void HPCVirtDecorator::execute( // their quantum execution across the node sub-groups. // Get the rank and size in the original communicator + auto start = std::chrono::high_resolution_clock::now(); int world_size, world_rank; MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); @@ -109,8 +144,14 @@ void HPCVirtDecorator::execute( // Give that sub communicator to the accelerator void *qpu_comm_ptr = reinterpret_cast(qpuComm->getMPICommProxy().getRef()); - decoratedAccelerator->updateConfiguration( - {{"mpi-communicator", qpu_comm_ptr}}); + + // this enables shot-based simulation + HeterogeneousMap properties; + properties.insert("mpi-communicator", qpu_comm_ptr); + if (!isVqeMode) { + properties.insert("shots", shots); + } + decoratedAccelerator->updateConfiguration(properties); // get the number of sub-communicators // Everybody split the CompositeInstructions vector into n_virtual_qpu @@ -146,16 +187,21 @@ void HPCVirtDecorator::execute( } // broadcast the total number of children - MPI_Bcast(&nGlobalChildren, 1, MPI_INT, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(nGlobalChildren); // broadcast the number of children in each communicator - MPI_Bcast(nLocalChildren.data(), nLocalChildren.size(), MPI_INT, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(nLocalChildren); - // get expectation values and the size of the key of each child buffer - std::vector globalExpVals(nGlobalChildren); + // get expectation values/bitstrings and the size of the key of each child buffer std::vector globalKeySizes(nGlobalChildren); + std::vector globalNumberBitStrings; + std::vector globalExpVals; + if (isVqeMode) { + globalExpVals.resize(nGlobalChildren); + } else { + globalNumberBitStrings.resize(nGlobalChildren); + } + if (world_rank == qpuComm->getProcessRanks()[0]) { // get displacements for the keys in each comm @@ -167,47 +213,82 @@ void HPCVirtDecorator::execute( // get size of each key in the communicator std::vector localExpVals; - std::vector localKeySizes; + std::vector localKeySizes, localNumberBitStrings; for (auto child : my_buffer->getChildren()) { localKeySizes.push_back(child->name().size()); - localExpVals.push_back(child->getExpectationValueZ()); - } - // gather all expectation values - MPI_Allgatherv(localExpVals.data(), localExpVals.size(), MPI_DOUBLE, - globalExpVals.data(), nLocalChildren.data(), - nKeyShift.data(), MPI_DOUBLE, - zeroRanksComm->getMPICommProxy().getRef()); + if (isVqeMode) { + localExpVals.push_back(child->getExpectationValueZ()); + } else { + localNumberBitStrings.push_back(child->getMeasurementCounts().size()); + } + } // gather the size of each child key - MPI_Allgatherv(localKeySizes.data(), localKeySizes.size(), MPI_INT, - globalKeySizes.data(), nLocalChildren.data(), - nKeyShift.data(), MPI_INT, - zeroRanksComm->getMPICommProxy().getRef()); - } + zeroRanksComm->allGatherv(localKeySizes, globalKeySizes, nLocalChildren, nKeyShift); + + if (isVqeMode) { + // gather all expectation values + zeroRanksComm->allGatherv(localExpVals, globalExpVals, nLocalChildren, nKeyShift); + } else { + // gather all bitstrings + zeroRanksComm->allGatherv(localNumberBitStrings, globalNumberBitStrings, nLocalChildren, nKeyShift); + } - // broadcast expectation values - MPI_Bcast(globalExpVals.data(), globalExpVals.size(), MPI_DOUBLE, 0, - qpuComm->getMPICommProxy().getRef()); + } // broadcast size of each key - MPI_Bcast(globalKeySizes.data(), globalKeySizes.size(), MPI_INT, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(globalKeySizes); + + // broadcast results + if (isVqeMode) { + // broadcast expectation values + qpuComm->broadcast(globalExpVals); + } else { + // broadcast number of bit strings + qpuComm->broadcast(globalNumberBitStrings); + } // get the size of all keys auto nGlobalKeyChars = std::accumulate(globalKeySizes.begin(), globalKeySizes.end(), 0); + // get total number of measured bitstrings + auto nGlobalBitStrings = + std::accumulate(globalNumberBitStrings.begin(), globalNumberBitStrings.end(), 0); + // gather all keys chars std::vector globalKeyChars(nGlobalKeyChars); + std::vector globalBitStrings, globalCounts; + if (!isVqeMode) { + globalBitStrings.resize(nGlobalBitStrings); + globalCounts.resize(nGlobalBitStrings); + } if (world_rank == qpuComm->getProcessRanks()[0]) { // get local key char arrays + // and local bitstrings and counts std::vector localKeys; + std::vector localBitStringIndices, localCounts; for (auto child : my_buffer->getChildren()) { + for (auto c : child->name()) { localKeys.push_back(c); } + + // get bitstring decimals and counts + if (!isVqeMode) { + for (auto & count : child->getMeasurementCounts()) { + auto bitString = count.first; + // stoi is MSB + if (decoratedAccelerator->getBitOrder() == Accelerator::BitOrder::LSB) { + std::reverse(bitString.begin(), bitString.end()); + } + auto index = std::stoi(count.first, nullptr, 2); + localBitStringIndices.push_back(index); + localCounts.push_back(count.second); + } + } } // get the size of keys in the communicator @@ -227,18 +308,57 @@ void HPCVirtDecorator::execute( } // gather all key chars - MPI_Allgatherv(localKeys.data(), localKeys.size(), MPI_CHAR, - globalKeyChars.data(), commKeySize.data(), - keySizeShift.data(), MPI_CHAR, - zeroRanksComm->getMPICommProxy().getRef()); + zeroRanksComm->allGatherv(localKeys, globalKeyChars, commKeySize, keySizeShift); + + if (!isVqeMode) { + + // get number of bit strings in the communicator + std::vector commNumberBitStrings(n_virtual_qpus); + shift = 0; + for (int i = 0; i < n_virtual_qpus; i++) { + auto it = globalNumberBitStrings.begin() + shift; + commNumberBitStrings[i] = std::accumulate(it, it + nLocalChildren[i], 0); + shift += nLocalChildren[i]; + } + + // shifts for bit strings + std::vector bitStringShift(n_virtual_qpus); + for (int i = 1; i < n_virtual_qpus; i++) { + bitStringShift[i] = + std::accumulate(commNumberBitStrings.begin(), commNumberBitStrings.begin() + i, 0); + } + + // gather all bit strings + zeroRanksComm->allGatherv(localBitStringIndices, globalBitStrings, commNumberBitStrings, bitStringShift); + // gather all counts + zeroRanksComm->allGatherv(localCounts, globalCounts, commNumberBitStrings, bitStringShift); + } + } // broadcast all keys - MPI_Bcast(globalKeyChars.data(), globalKeyChars.size(), MPI_CHAR, 0, - qpuComm->getMPICommProxy().getRef()); + qpuComm->broadcast(globalKeyChars); + + if (!isVqeMode) { + // broadcast indices + qpuComm->broadcast(globalBitStrings); + qpuComm->broadcast(globalCounts); + } + + // get binary from decimal + const auto getBinary = [=](int n){ + std::string s; + while (n != 0) { + s += (n % 2 == 0 ? "0" : "1" ); + n /= 2; + } + //s += std::string(buffer->size() - s.size(), '0'); + std::reverse(s.begin(), s.end()); + return s; + }; // now every process has everything to rebuild the buffer - int shift = 0; + int shift = 0, countShift = 0; for (int i = 0; i < nGlobalChildren; i++) { // get child name @@ -248,7 +368,26 @@ void HPCVirtDecorator::execute( // create child buffer and append it to buffer auto child = xacc::qalloc(buffer->size()); child->setName(name); - child->addExtraInfo("exp-val-z", globalExpVals[i]); + + if (isVqeMode) { + child->addExtraInfo("exp-val-z", globalExpVals[i]); + } else { + + auto nChildBitStrings = globalNumberBitStrings[i]; + for (int b = 0; b < nChildBitStrings; b++) { + + auto counts = globalCounts[b + countShift]; + if (counts == 0) std::cout << "GOTCHA\n"; + auto bitStringDecimal = globalBitStrings[b + countShift]; + auto nBits = name.length() / 2; + auto bitString = getBinary(bitStringDecimal); + //auto bitString = getBinary(bitStringDecimal, nBits); + child->appendMeasurement(bitString, counts); + + } + countShift += nChildBitStrings; + } + buffer->appendChild(name, child); shift += globalKeySizes[i]; } @@ -261,6 +400,37 @@ void HPCVirtDecorator::execute( return; } +void HPCVirtDecorator::finalize() { + if (qpuComm) { + // Make sure we explicitly release this so that MPICommProxy is destroyed + // before framework shutdown (MPI_Finalize if needed) + qpuComm.reset(); + } +} + +class HPCVirtTearDown : public xacc::TearDown { +public: + virtual void tearDown() override { + auto c = xacc::getService("hpc-virtualization", false); + if (c) { + auto casted = std::dynamic_pointer_cast(c); + assert(casted); + casted->finalize(); + } + + int finalized, initialized; + MPI_Initialized(&initialized); + if (initialized) { + MPI_Finalized(&finalized); + if (!finalized && hpcVirtDecoratorInitializedMpi) { + MPI_Finalize(); + } + } + } + virtual std::string name() const override { return "xacc-hpc-virt"; } +}; + + } // namespace quantum } // namespace xacc @@ -282,6 +452,7 @@ class US_ABI_LOCAL HPCVirtActivator : public BundleActivator { context.RegisterService(c); context.RegisterService(c); + context.RegisterService(std::make_shared()); } /** @@ -291,4 +462,4 @@ class US_ABI_LOCAL HPCVirtActivator : public BundleActivator { } // namespace -CPPMICROSERVICES_EXPORT_BUNDLE_ACTIVATOR(HPCVirtActivator) \ No newline at end of file +CPPMICROSERVICES_EXPORT_BUNDLE_ACTIVATOR(HPCVirtActivator) diff --git a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp index 691597b90..64d044c22 100644 --- a/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp +++ b/quantum/plugins/decorators/hpc-virtualization/hpc_virt_decorator.hpp @@ -14,7 +14,7 @@ #ifndef XACC_HPC_VIRT_DECORATOR_HPP_ #define XACC_HPC_VIRT_DECORATOR_HPP_ -#include "mpi.h" +//#include "mpi.h" #include "xacc.hpp" #include "MPIProxy.hpp" #include "AcceleratorDecorator.hpp" @@ -26,7 +26,8 @@ namespace quantum { class HPCVirtDecorator : public AcceleratorDecorator { protected: - int n_virtual_qpus = 1; + bool isVqeMode; + int n_virtual_qpus = 1, shots = -1; // The MPI communicator for each QPU std::shared_ptr qpuComm; @@ -45,33 +46,32 @@ class HPCVirtDecorator : public AcceleratorDecorator { const std::string name() const override { return "hpc-virtualization"; } const std::string description() const override { return ""; } + void finalize(); - ~HPCVirtDecorator() override { } + ~HPCVirtDecorator() override { }; private: - template - std::vector> split_vector(const std::vector &vec, - size_t n) { - std::vector> outVec; - size_t length = vec.size() / n; - size_t remain = vec.size() % n; +template +std::vector> split_vector(const std::vector& inputVector, size_t numSegments) { + std::vector> result; - size_t begin = 0; - size_t end = 0; + size_t inputSize = inputVector.size(); + size_t segmentSize = (inputSize + numSegments - 1) / numSegments; // Ceiling division - for (size_t i = 0; i < std::min(n, vec.size()); ++i) { - end += (remain > 0) ? (length + !!(remain--)) : length; + auto begin = inputVector.begin(); + auto end = inputVector.end(); - outVec.push_back(std::vector(vec.begin() + begin, vec.begin() + end)); - - begin = end; + for (size_t i = 0; i < numSegments; ++i) { + auto segmentEnd = std::next(begin, std::min(segmentSize, static_cast(std::distance(begin, end)))); + result.emplace_back(begin, segmentEnd); + begin = segmentEnd; } - return outVec; - } + return result; +} }; } // namespace quantum } // namespace xacc -#endif \ No newline at end of file +#endif diff --git a/xacc/xacc.cpp b/xacc/xacc.cpp index 89a9cabce..e4ed05a20 100644 --- a/xacc/xacc.cpp +++ b/xacc/xacc.cpp @@ -30,10 +30,6 @@ #include #include "TearDown.hpp" -#ifdef MPI_ENABLED -#include "mpi.h" -#endif - using namespace cxxopts; namespace xacc { @@ -51,11 +47,6 @@ std::map> std::map> allocated_buffers{}; std::string rootPathString = ""; - -#ifdef MPI_ENABLED -int isMPIInitialized; -#endif - void set_verbose(bool v) { verbose = v; } int getArgc() { return argc; } @@ -116,18 +107,6 @@ void Initialize(int arc, char **arv) { XACCLogger::instance()->dumpQueue(); } - // Initializing MPI here -#ifdef MPI_ENABLED - int provided; - MPI_Initialized(&isMPIInitialized); - if (!isMPIInitialized) { - MPI_Init_thread(0, NULL, MPI_THREAD_MULTIPLE, &provided); - if (provided != MPI_THREAD_MULTIPLE) { - xacc::warning("MPI_THREAD_MULTIPLE not provided."); - } - isMPIInitialized = 1; - } -#endif } void setIsPyApi() { isPyApi = true; } @@ -869,12 +848,6 @@ void Finalize() { compilation_database.clear(); allocated_buffers.clear(); xacc::ServiceAPI_Finalize(); - // This replaces the HPC virtualization TearDown -#ifdef MPI_ENABLED - if (isMPIInitialized) { - MPI_Finalize(); - } -#endif } } @@ -911,4 +884,4 @@ getGradient(const std::string name, const xacc::HeterogeneousMap &¶ms) { return getGradient(name, params); } -} // namespace xacc \ No newline at end of file +} // namespace xacc