diff --git a/runtime/cudaq/spin/spin_op.cpp b/runtime/cudaq/spin/spin_op.cpp index f1acaf1d70..a898efd0f1 100644 --- a/runtime/cudaq/spin/spin_op.cpp +++ b/runtime/cudaq/spin/spin_op.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -62,48 +61,52 @@ actionOnBra(spin_op &term, const std::string &bitConfiguration) { return std::make_pair(newConfiguration, coeff); } -std::pair, std::vector> -mult(std::vector row, std::vector other_row, - std::complex &rowCoeff, std::complex &otherCoeff) { - // This is term_i * otherTerm_j - std::vector tmp(row.size()), tmp2(row.size()); - std::size_t numQubits = row.size() / 2; +std::pair, std::complex> +mult(const std::vector &p1, const std::vector &p2, + const std::complex &p1Coeff, const std::complex &p2Coeff) { + auto [minSize, maxSize] = std::minmax({p1.size(), p2.size()}); + std::size_t minNumQubits = minSize / 2; + std::size_t maxNumQubits = maxSize / 2; + std::vector result(maxSize, false); + int yCount = 0; + int cPhase = 0; + + for (std::size_t i = 0; i < minNumQubits; ++i) { + bool p1_x = p1[i]; + bool p1_z = p1[i + (p1.size() / 2)]; + bool p2_x = p2[i]; + bool p2_z = p2[i + (p2.size() / 2)]; + + // Compute the resulting Pauli operator + result[i] = p1_x ^ p2_x; + result[i + maxNumQubits] = p1_z ^ p2_z; + + yCount += + (p1_x & p1_z) + (p2_x & p2_z) - (result[i] & result[i + maxNumQubits]); + cPhase += p1_x & p2_z; + } - for (std::size_t i = 0; i < 2 * numQubits; i++) - tmp[i] = row[i] ^ other_row[i]; + const std::vector &big = p1.size() < p2.size() ? p2 : p1; + for (std::size_t i = minNumQubits; i < maxNumQubits; ++i) { + result[i] = big[i]; + result[i + maxNumQubits] = big[i + maxNumQubits]; + } - for (std::size_t i = 0; i < numQubits; i++) - tmp2[i] = (row[i] && other_row[numQubits + i]) || - (row[i + numQubits] && other_row[i]); + // Normalize the phase to a value in the range [0, 3] + int phaseFactor = (2 * cPhase + yCount) % 4; + if (phaseFactor < 0) + phaseFactor += 4; - int orig_phase = 0, other_phase = 0; - for (std::size_t i = 0; i < numQubits; i++) { - if (row[i] && row[i + numQubits]) - orig_phase++; + // Phase correction factors based on the total phase + using namespace std::complex_literals; + std::array, 4> phase{1.0, -1i, -1.0, 1i}; + std::complex resultCoeff = p1Coeff * phase[phaseFactor] * p2Coeff; - if (other_row[i] && other_row[i + numQubits]) - other_phase++; - } + // Handle the "-0" issue + if (std::abs(resultCoeff.real()) < 1e-12) + resultCoeff.real(0); - int sum = 0; - for (auto a : tmp2) - if (a) - sum++; - - auto _phase = orig_phase + other_phase + 2 * sum; - // Based on the phase, figure out an extra coeff to apply - for (std::size_t i = 0; i < numQubits; i++) - if (tmp[i] && tmp[i + numQubits]) - _phase -= 1; - - _phase %= 4; - std::complex imaginary(0, 1); - std::array, 4> phaseCoeffArr{1.0, -1. * imaginary, -1.0, - imaginary}; - auto phase_coeff = phaseCoeffArr[_phase]; - auto coeff = rowCoeff; - coeff *= phase_coeff * otherCoeff; - return std::make_pair(coeff, tmp); + return {result, resultCoeff}; } } // namespace details @@ -425,56 +428,47 @@ spin_op &spin_op::operator-=(const spin_op &v) noexcept { } spin_op &spin_op::operator*=(const spin_op &v) noexcept { - spin_op copy = v; - if (v.num_qubits() > num_qubits()) - expandToNQubits(copy.num_qubits()); - else if (v.num_qubits() < num_qubits()) - copy.expandToNQubits(num_qubits()); - - std::unordered_map, std::complex> newTerms; - std::size_t ourRow = 0, theirRow = 0; - std::vector> composedCoeffs(num_terms() * - copy.num_terms()); - std::vector> composition(num_terms() * copy.num_terms()); - std::map> indexMap; - auto nElements = composition.size(); - for (std::size_t i = 0; i < nElements; i++) { - auto pair = std::make_pair(ourRow, theirRow); - indexMap.emplace(i, pair); - if (theirRow == copy.num_terms() - 1) { - theirRow = 0; - ourRow++; - } else - theirRow++; - } + using term_and_coeff = std::pair, std::complex>; + std::size_t numTerms = num_terms() * v.num_terms(); + std::vector result(numTerms); + std::size_t min = std::min(num_terms(), v.num_terms()); + + // Put the `unordered_map` iterators into vectors to minimize pointer chasing + // when doing the cartesian product of the spin operators' terms. + using Iter = + std::unordered_map>::const_iterator; + std::vector thisTermIt; + std::vector otherTermIt; + thisTermIt.reserve(terms.size()); + otherTermIt.reserve(v.terms.size()); + for (auto it = terms.begin(); it != terms.end(); ++it) + thisTermIt.push_back(it); + for (auto it = v.terms.begin(); it != v.terms.end(); ++it) + otherTermIt.push_back(it); + #if defined(_OPENMP) // Threshold to start OpenMP parallelization. // 16 ~ 4-term * 4-term constexpr std::size_t spin_op_omp_threshold = 16; -#pragma omp parallel for shared(composition) if (nElements > \ - spin_op_omp_threshold) +#pragma omp parallel for shared(result) if (numTerms > spin_op_omp_threshold) #endif - for (std::size_t i = 0; i < nElements; i++) { - auto [j, k] = indexMap[i]; - auto s = terms.begin(); - auto t = copy.terms.begin(); - std::advance(s, j); - std::advance(t, k); - auto res = details::mult(s->first, t->first, s->second, t->second); - composition[i] = res.second; - composedCoeffs[i] = res.first; + for (std::size_t i = 0; i < numTerms; ++i) { + Iter s = thisTermIt[i % min]; + Iter t = otherTermIt[i / min]; + if (terms.size() > v.terms.size()) { + s = thisTermIt[i / min]; + t = otherTermIt[i % min]; + } + result[i] = details::mult(s->first, t->first, s->second, t->second); } - for (std::size_t i = 0; i < nElements; i++) { - auto iter = newTerms.find(composition[i]); - if (iter == newTerms.end()) - newTerms.emplace(composition[i], composedCoeffs[i]); - else - iter->second += composedCoeffs[i]; + terms.clear(); + terms.reserve(numTerms); + for (auto &&[term, coeff] : result) { + auto [it, created] = terms.emplace(term, coeff); + if (!created) + it->second += coeff; } - - terms = newTerms; - return *this; } diff --git a/unittests/spin_op/SpinOpTester.cpp b/unittests/spin_op/SpinOpTester.cpp index d8176c89b3..51b24e3942 100644 --- a/unittests/spin_op/SpinOpTester.cpp +++ b/unittests/spin_op/SpinOpTester.cpp @@ -10,6 +10,89 @@ #include "cudaq/spin_op.h" +enum Pauli : int8_t { I = 0, X, Y, Z }; +constexpr Pauli paulis[4] = {Pauli::I, Pauli::X, Pauli::Y, Pauli::Z}; + +// Function to multiply two single-qubit Pauli operators +static std::pair, Pauli> multiply_paulis(Pauli a, + Pauli b) { + using namespace std::complex_literals; + // I X Y Z + constexpr std::complex table[4][4] = { + {1., 1., 1, 1}, // I + {1., 1., 1i, -1i}, // X + {1., -1i, 1, 1i}, // Y + {1., 1i, -1i, 1} // Z + }; + if (a == b) + return {1.0, Pauli::I}; + if (a == Pauli::I) + return {1.0, b}; + if (b == Pauli::I) + return {1.0, a}; + return {table[a][b], paulis[a ^ b]}; +} + +// Function to multiply two multi-qubit Pauli words +static std::pair, std::vector> +multiply_pauli_words(const std::vector &a, const std::vector &b, + bool verbose = false) { + std::complex phase = 1.0; + std::string info; + std::vector result(a.size(), Pauli::I); + for (size_t i = 0; i < a.size(); ++i) { + auto [p, r] = multiply_paulis(a[i], b[i]); + phase *= p; + result[i] = r; + } + return {phase, result}; +} + +// Generates a pauli word out of a binary representation of it. +static std::vector generate_pauli_word(int64_t id, int64_t num_qubits) { + constexpr int64_t mask = 0x3; + std::vector word(num_qubits, Pauli::I); + for (int64_t i = 0; i < num_qubits; ++i) { + assert((id & mask) < 4); + word[i] = paulis[id & mask]; + id >>= 2; + } + return word; +} + +static std::string generate_pauli_string(const std::vector &word) { + constexpr char paulis_name[4] = {'I', 'X', 'Y', 'Z'}; + std::string result(word.size(), 'I'); + for (int64_t i = 0; i < word.size(); ++i) + result[i] = paulis_name[word[i]]; + return result; +} + +static cudaq::spin_op generate_cudaq_spin(int64_t id, int64_t num_qubits, + bool addI = true) { + constexpr int64_t mask = 0x3; + cudaq::spin_op result; + for (int64_t i = 0; i < num_qubits; ++i) { + switch (paulis[id & mask]) { + case Pauli::I: + if (addI) + result *= cudaq::spin::i(i); + break; + case Pauli::X: + result *= cudaq::spin::x(i); + break; + case Pauli::Y: + result *= cudaq::spin::y(i); + break; + case Pauli::Z: + result *= cudaq::spin::z(i); + break; + } + id >>= 2; + } + return result; +} + using namespace cudaq::spin; TEST(SpinOpTester, checkConstruction) { @@ -82,61 +165,26 @@ TEST(SpinOpTester, checkBug178) { } TEST(SpinOpTester, checkMultiplication) { - auto mult = x(0) * y(1); - mult.dump(); - auto mult3 = y(0) * y(1); - mult3.dump(); - - auto tmp = 2 * y(1); - tmp.dump(); - - auto mult2 = x(3) * tmp; - mult2.dump(); - - std::cout << "X * Z: -iY\n"; - (x(3) * z(3)).dump(); - EXPECT_EQ(y(3), x(3) * z(3)); - EXPECT_EQ((x(3) * z(3)).get_coefficient(), std::complex(0, -1)); - - std::cout << "X * X: I\n"; - (x(2) * x(2)).dump(); - EXPECT_EQ(cudaq::spin_op(), x(2) * x(2)); - EXPECT_EQ((x(2) * x(2)).get_coefficient(), std::complex(1, 0)); - - std::cout << "Y * Y: I\n"; - (y(14) * y(14)).dump(); - EXPECT_EQ(cudaq::spin_op(), y(14) * y(14)); - EXPECT_EQ((y(14) * y(14)).get_coefficient(), std::complex(1, 0)); - - std::cout << "Z * Z: I\n"; - (z(0) * z(0)).dump(); - EXPECT_EQ(cudaq::spin_op(), z(0) * z(0)); - EXPECT_EQ((z(0) * z(0)).get_coefficient(), std::complex(1, 0)); - - std::cout << "X * Y: iZ\n"; - (x(3) * y(3)).dump(); - EXPECT_EQ(z(3), x(3) * y(3)); - EXPECT_EQ((x(3) * y(3)).get_coefficient(), std::complex(0, 1)); - - std::cout << "I * I: I\n"; - (i(2) * i(2)).dump(); - EXPECT_EQ(i(2), i(2) * i(2)); - EXPECT_EQ((i(2) * i(2)).get_coefficient(), std::complex(1, 0)); - - std::cout << "I * Z: Z\n"; - (i(3) * i(3)).dump(); - EXPECT_EQ(z(3), i(3) * z(3)); - EXPECT_EQ((i(3) * z(3)).get_coefficient(), std::complex(1, 0)); - - auto tmp2 = 2 * x(0) * x(1) * y(2) * y(3) + 3 * y(0) * y(1) * x(2) * x(3); - std::cout << "START\n"; - tmp2 = tmp2 * tmp2; - tmp2.dump(); - - EXPECT_EQ(2, tmp2.num_terms()); - auto expected = - 13 * i(0) * i(1) * i(2) * i(3) + 12 * z(0) * z(1) * z(2) * z(3); - EXPECT_EQ(expected, tmp2); + for (int num_qubits = 1; num_qubits <= 4; ++num_qubits) { + int64_t num_words = std::pow(4, num_qubits); + for (int64_t i = 0; i < num_words; ++i) { + for (int64_t j = 0; j < num_words; ++j) { + // Expected result: + std::vector a_word = generate_pauli_word(i, num_qubits); + std::vector b_word = generate_pauli_word(j, num_qubits); + auto [phase, result] = multiply_pauli_words(a_word, b_word); + + // Result: + cudaq::spin_op a_spin = generate_cudaq_spin(i, num_qubits); + cudaq::spin_op b_spin = generate_cudaq_spin(j, num_qubits, false); + cudaq::spin_op result_spin = a_spin * b_spin; + + // Check result + EXPECT_EQ(generate_pauli_string(result), result_spin.to_string(false)); + EXPECT_EQ(phase, result_spin.get_coefficient()); + } + } + } } TEST(SpinOpTester, canBuildDeuteron) {