Skip to content

Commit

Permalink
Reduce GPU memory usage when using reset(q) in user programs (#942)
Browse files Browse the repository at this point in the history
  • Loading branch information
bmhowe23 authored Nov 17, 2023
1 parent b6a7ce2 commit f5d107c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
11 changes: 2 additions & 9 deletions runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,6 @@ protected:
/// @brief The size of the extra workspace
size_t extraWorkspaceSizeInBytes = 0;

/// @brief Count the number of resets.
int nResets = 0;

custatevecComputeType_t cuStateVecComputeType = CUSTATEVEC_COMPUTE_64F;
cudaDataType_t cuStateVecCudaDataType = CUDA_C_64F;
std::random_device randomDevice;
Expand Down Expand Up @@ -163,10 +160,8 @@ protected:
if (extraWorkspaceSizeInBytes > 0)
HANDLE_CUDA_ERROR(cudaMalloc(&extraWorkspace, extraWorkspaceSizeInBytes));

// When we perform a deallocation we apply a
// qubit reset, and the state does not shrink (trying to minimize device
// memory manipulations), but nQubitsAllocated decrements.
auto localNQubitsAllocated = nQubitsAllocated + nResets;
auto localNQubitsAllocated =
stateDimension > 0 ? std::log2(stateDimension) : 0;

// apply gate
HANDLE_ERROR(custatevecApplyMatrix(
Expand Down Expand Up @@ -266,7 +261,6 @@ protected:
HANDLE_CUDA_ERROR(cudaFree(extraWorkspace));
deviceStateVector = nullptr;
extraWorkspaceSizeInBytes = 0;
nResets = 0;
}

/// @brief Apply the given GateApplicationTask
Expand Down Expand Up @@ -350,7 +344,6 @@ public:
/// @param qubitIdx
void resetQubit(const std::size_t qubitIdx) override {
flushGateQueue();
nResets++;
const int basisBits[] = {(int)qubitIdx};
int parity;
double rand = randomValues(1, 1.0)[0];
Expand Down
20 changes: 20 additions & 0 deletions unittests/qir/NVQIRTester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,26 @@ CUDAQ_TEST(NVQIRTester, checkQuantumIntrinsics) {
}
#endif

CUDAQ_TEST(NVQIRTester, checkReset) {
__quantum__rt__initialize(0, nullptr);
auto qubits = __quantum__rt__qubit_allocate_array(2);
Qubit *q0 = *reinterpret_cast<Qubit **>(
__quantum__rt__array_get_element_ptr_1d(qubits, 0));
Qubit *q1 = *reinterpret_cast<Qubit **>(
__quantum__rt__array_get_element_ptr_1d(qubits, 1));

// Make sure that the state vector doesn't grow with each additional reset
for (int i = 0; i < 100; i++) {
__quantum__qis__reset(q0);
__quantum__qis__reset(q1);
__quantum__qis__x(q1);
__quantum__qis__swap(q0, q1);
assert(*__quantum__qis__mz(q0) == 1);
}
__quantum__rt__qubit_release_array(qubits);
__quantum__rt__finalize();
}

#ifndef CUDAQ_BACKEND_TENSORNET_MPS
// MPS doesn't support gates on more than 2 qubits (controlled swap)
// SWAP with a single ctrl qubit in 0 state.
Expand Down

0 comments on commit f5d107c

Please sign in to comment.