Skip to content

Commit

Permalink
Enable one to progressively build a Kernel with the kernel_builder (#558
Browse files Browse the repository at this point in the history
)

* Implement progressive kernel_builder construction

---------

Signed-off-by: Alex McCaskey <[email protected]>
  • Loading branch information
amccaskey authored Aug 17, 2023
1 parent 0981a00 commit d47440e
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 38 deletions.
2 changes: 1 addition & 1 deletion python/tests/parallel/test_mpi_mqpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from cudaq import spin
import numpy as np

cudaq.mpi.initialize()
skipIfUnsupported = pytest.mark.skipif(
not (cudaq.num_available_gpus() > 0 and cudaq.mpi.is_initialized() and cudaq.has_target('nvidia-mqpu')),
reason="nvidia-mqpu backend not available or mpi not found"
Expand All @@ -20,6 +19,7 @@
@skipIfUnsupported
def testMPI():
cudaq.set_target('nvidia-mqpu')
cudaq.mpi.initialize()

target = cudaq.get_target()
numQpus = target.num_qpus()
Expand Down
33 changes: 33 additions & 0 deletions python/tests/unittests/test_kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,36 @@ def test_tdg_1_state_negate():

# Qubit should remain in 1-state.
assert counts["1"] == 1000

def test_can_progressively_build():
"""Tests that a kernel can be build progressively."""
cudaq.reset_target()
kernel = cudaq.make_kernel()
q = kernel.qalloc(2)
kernel.h(q[0])
print(kernel)
state = cudaq.get_state(kernel)
assert np.isclose(1. / np.sqrt(2.), state[0].real)
assert np.isclose(0., state[1].real)
assert np.isclose(1. / np.sqrt(2.), state[2].real)
assert np.isclose(0., state[3].real)

counts = cudaq.sample(kernel)
print(counts)
assert '10' in counts
assert '00' in counts

# Continue building the kernel
kernel.cx(q[0],q[1])
print(kernel)
state = cudaq.get_state(kernel)
assert np.isclose(1. / np.sqrt(2.), state[0].real)
assert np.isclose(0., state[1].real)
assert np.isclose(0., state[2].real)
assert np.isclose(1. / np.sqrt(2.), state[3].real)

counts = cudaq.sample(kernel)
print(counts)
assert '11' in counts
assert '00' in counts

69 changes: 37 additions & 32 deletions runtime/cudaq/builder/kernel_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,12 @@ initializeBuilder(MLIRContext *context,

cudaq::info("kernel_builder has {} arguments", arguments.size());

// Every Kernel should have a ReturnOp terminator,
// then we'll set the insertion point to right
// before it.
opBuilder->setInsertionPointToStart(entryBlock);

auto terminator = opBuilder->create<func::ReturnOp>();
opBuilder->setInsertionPoint(terminator);
return opBuilder;
}
void deleteBuilder(ImplicitLocOpBuilder *builder) { delete builder; }
Expand Down Expand Up @@ -694,23 +698,40 @@ void tagEntryPoint(ImplicitLocOpBuilder &builder, ModuleOp &module,
});
}

ExecutionEngine *jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
std::string kernelName,
std::vector<std::string> extraLibPaths) {
if (jit)
return jit;

cudaq::info("kernel_builder running jitCode.");
std::tuple<bool, ExecutionEngine *>
jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
std::unordered_map<ExecutionEngine *, std::size_t> &jitHash,
std::string kernelName, std::vector<std::string> extraLibPaths) {

// Start of by getting the current ModuleOp
auto block = builder.getBlock();
if ((block->getOperations().empty() ||
!block->getOperations().back().hasTrait<OpTrait::IsTerminator>())) {
builder.create<func::ReturnOp>(builder.getUnknownLoc());
}

auto *context = builder.getContext();
auto function = block->getParentOp();
auto currentModule = function->getParentOfType<ModuleOp>();

// Create a unique hash from that ModuleOp
std::string modulePrintOut;
{
llvm::raw_string_ostream os(modulePrintOut);
currentModule.print(os);
}
auto moduleHash = std::hash<std::string>{}(modulePrintOut);

if (jit) {
// Have we added more instructions
// since the last time we jit the code?
// If so, we need to delete this JIT engine
// and create a new one.
if (moduleHash == jitHash[jit])
return std::make_tuple(false, jit);
else {
// need to redo the jit, remove the old one
jitHash.erase(jit);
}
}

cudaq::info("kernel_builder running jitCode.");

auto module = currentModule.clone();
auto ctx = module.getContext();
SmallVector<mlir::NamedAttribute> names;
Expand Down Expand Up @@ -815,7 +836,9 @@ ExecutionEngine *jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
auto kernelReg = reinterpret_cast<void (*)()>(*regFuncPtr);
kernelReg();

return jit;
// Map this JIT Engine to its unique hash integer.
jitHash.insert({jit, moduleHash});
return std::make_tuple(true, jit);
}

void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
Expand Down Expand Up @@ -869,24 +892,6 @@ std::string to_quake(ImplicitLocOpBuilder &builder) {
// to or the print out string will be invalid (verifier failed)).
auto clonedModule = module.clone();

// Look for the main block in the functions we have
// add a return if it does not have one.
clonedModule.walk([](func::FuncOp func) {
Block &block = *func.getBlocks().begin();

auto tmpBuilder = OpBuilder::atBlockEnd(&block);

if (block.getOperations().empty()) {
// If no ops, add a Return
tmpBuilder.create<func::ReturnOp>(tmpBuilder.getUnknownLoc());
} else if (!block.getOperations()
.back()
.hasTrait<OpTrait::IsTerminator>()) {
// if last op is not the terminator, add the return.
tmpBuilder.create<func::ReturnOp>(tmpBuilder.getUnknownLoc());
}
});

func::FuncOp unwrappedParentFunc = llvm::cast<func::FuncOp>(parentFunc);
llvm::StringRef symName = unwrappedParentFunc.getSymName();
tagEntryPoint(builder, clonedModule, symName);
Expand Down
21 changes: 16 additions & 5 deletions runtime/cudaq/builder/kernel_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,10 @@ void applyPasses(PassManager &);

/// @brief Create the `ExecutionEngine` and return a raw
/// pointer, which we will wrap in a `unique_ptr`
ExecutionEngine *jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
std::string kernelName,
std::vector<std::string> extraLibPaths);
std::tuple<bool, ExecutionEngine *>
jitCode(ImplicitLocOpBuilder &, ExecutionEngine *,
std::unordered_map<ExecutionEngine *, std::size_t> &, std::string,
std::vector<std::string>);

/// @brief Invoke the function with the given kernel name.
void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
Expand Down Expand Up @@ -350,6 +351,10 @@ class kernel_builder : public details::kernel_builder_base {
/// out of CUDA Quantum code
std::unique_ptr<ExecutionEngine, void (*)(ExecutionEngine *)> jitEngine;

/// @brief Map created ExecutionEngines to a unique hash of the
/// ModuleOp they derive from.
std::unordered_map<ExecutionEngine *, std::size_t> jitEngineToModuleHash;

/// @brief Name of the CUDA Quantum kernel Quake function
std::string kernelName = "__nvqpp__mlirgen____nvqppBuilderKernel";

Expand Down Expand Up @@ -636,8 +641,14 @@ class kernel_builder : public details::kernel_builder_base {
/// @brief Lower the Quake code to the LLVM Dialect, call
/// `PassManager`.
void jitCode(std::vector<std::string> extraLibPaths = {}) override {
auto *ptr = details::jitCode(*opBuilder, jitEngine.get(), kernelName,
extraLibPaths);
auto [wasChanged, ptr] =
details::jitCode(*opBuilder, jitEngine.get(), jitEngineToModuleHash,
kernelName, extraLibPaths);
// If we had a jitEngine, but the code changed,
// delete the one we had.
if (jitEngine && wasChanged)
details::deleteJitEngine(jitEngine.release());

// Store for the next time if we haven't already
if (!jitEngine)
jitEngine = std::unique_ptr<ExecutionEngine, void (*)(ExecutionEngine *)>(
Expand Down
32 changes: 32 additions & 0 deletions unittests/integration/builder_tester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,3 +652,35 @@ CUDAQ_TEST(BuilderTester, checkEntryPointAttribute) {
R"(func\.func @__nvqpp__mlirgen\w+\(\) attributes \{"cudaq-entrypoint"\})");
EXPECT_TRUE(std::regex_search(quake, functionDecleration));
}

#ifndef CUDAQ_BACKEND_DM

CUDAQ_TEST(BuilderTester, checkCanProgressivelyBuild) {
auto kernel = cudaq::make_kernel();
auto q = kernel.qalloc(2);
kernel.h(q[0]);
auto state = cudaq::get_state(kernel);
EXPECT_NEAR(M_SQRT1_2, state[0].real(), 1e-3);
// Handle sims with different endianness
EXPECT_TRUE(std::fabs(M_SQRT1_2 - state[1].real()) < 1e-3 ||
std::fabs(M_SQRT1_2 - state[2].real()) < 1e-3);
EXPECT_NEAR(0.0, state[3].real(), 1e-3);

auto counts = cudaq::sample(kernel);
EXPECT_TRUE(counts.count("00") != 0);
EXPECT_TRUE(counts.count("10") != 0);

// Continue building the kernel
kernel.x<cudaq::ctrl>(q[0], q[1]);
state = cudaq::get_state(kernel);
EXPECT_NEAR(M_SQRT1_2, state[0].real(), 1e-3);
EXPECT_NEAR(0.0, state[1].real(), 1e-3);
EXPECT_NEAR(0.0, state[2].real(), 1e-3);
EXPECT_NEAR(M_SQRT1_2, state[3].real(), 1e-3);

counts = cudaq::sample(kernel);
EXPECT_TRUE(counts.count("00") != 0);
EXPECT_TRUE(counts.count("11") != 0);
}

#endif

0 comments on commit d47440e

Please sign in to comment.