Enable one to progressively build a Kernel with the kernel_builder (#558

) * Implement progressive kernel_builder construction --------- Signed-off-by: Alex McCaskey <[email protected]>
NVIDIA · Aug 17, 2023 · d47440e · d47440e
1 parent 0981a00
commit d47440e
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 38 deletions.
diff --git a/python/tests/parallel/test_mpi_mqpu.py b/python/tests/parallel/test_mpi_mqpu.py
@@ -10,7 +10,6 @@
 from cudaq import spin
 import numpy as np
 
-cudaq.mpi.initialize()
 skipIfUnsupported = pytest.mark.skipif(
     not (cudaq.num_available_gpus() > 0 and cudaq.mpi.is_initialized() and cudaq.has_target('nvidia-mqpu')),
     reason="nvidia-mqpu backend not available or mpi not found"
@@ -20,6 +19,7 @@
 @skipIfUnsupported
 def testMPI():
     cudaq.set_target('nvidia-mqpu')
+    cudaq.mpi.initialize()
 
     target = cudaq.get_target()
     numQpus = target.num_qpus()

diff --git a/python/tests/unittests/test_kernel_builder.py b/python/tests/unittests/test_kernel_builder.py
@@ -213,3 +213,36 @@ def test_tdg_1_state_negate():
 
     # Qubit should remain in 1-state.
     assert counts["1"] == 1000
+
+def test_can_progressively_build():
+    """Tests that a kernel can be build progressively."""
+    cudaq.reset_target()
+    kernel = cudaq.make_kernel()
+    q = kernel.qalloc(2)
+    kernel.h(q[0])
+    print(kernel)
+    state = cudaq.get_state(kernel)
+    assert np.isclose(1. / np.sqrt(2.), state[0].real)
+    assert np.isclose(0., state[1].real)
+    assert np.isclose(1. / np.sqrt(2.), state[2].real)
+    assert np.isclose(0., state[3].real)
+
+    counts = cudaq.sample(kernel)
+    print(counts)
+    assert '10' in counts 
+    assert '00' in counts 
+
+    # Continue building the kernel
+    kernel.cx(q[0],q[1])
+    print(kernel)
+    state = cudaq.get_state(kernel)
+    assert np.isclose(1. / np.sqrt(2.), state[0].real)
+    assert np.isclose(0., state[1].real)
+    assert np.isclose(0., state[2].real)
+    assert np.isclose(1. / np.sqrt(2.), state[3].real)
+
+    counts = cudaq.sample(kernel)
+    print(counts)
+    assert '11' in counts 
+    assert '00' in counts 
+
diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp
@@ -149,8 +149,12 @@ initializeBuilder(MLIRContext *context,
 
   cudaq::info("kernel_builder has {} arguments", arguments.size());
 
+  // Every Kernel should have a ReturnOp terminator,
+  // then we'll set the insertion point to right
+  // before it.
   opBuilder->setInsertionPointToStart(entryBlock);
-
+  auto terminator = opBuilder->create<func::ReturnOp>();
+  opBuilder->setInsertionPoint(terminator);
   return opBuilder;
 }
 void deleteBuilder(ImplicitLocOpBuilder *builder) { delete builder; }
@@ -694,23 +698,40 @@ void tagEntryPoint(ImplicitLocOpBuilder &builder, ModuleOp &module,
   });
 }
 
-ExecutionEngine *jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
-                         std::string kernelName,
-                         std::vector<std::string> extraLibPaths) {
-  if (jit)
-    return jit;
-
-  cudaq::info("kernel_builder running jitCode.");
+std::tuple<bool, ExecutionEngine *>
+jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
+        std::unordered_map<ExecutionEngine *, std::size_t> &jitHash,
+        std::string kernelName, std::vector<std::string> extraLibPaths) {
 
+  // Start of by getting the current ModuleOp
   auto block = builder.getBlock();
-  if ((block->getOperations().empty() ||
-       !block->getOperations().back().hasTrait<OpTrait::IsTerminator>())) {
-    builder.create<func::ReturnOp>(builder.getUnknownLoc());
-  }
-
   auto *context = builder.getContext();
   auto function = block->getParentOp();
   auto currentModule = function->getParentOfType<ModuleOp>();
+
+  // Create a unique hash from that ModuleOp
+  std::string modulePrintOut;
+  {
+    llvm::raw_string_ostream os(modulePrintOut);
+    currentModule.print(os);
+  }
+  auto moduleHash = std::hash<std::string>{}(modulePrintOut);
+
+  if (jit) {
+    // Have we added more instructions
+    // since the last time we jit the code?
+    // If so, we need to delete this JIT engine
+    // and create a new one.
+    if (moduleHash == jitHash[jit])
+      return std::make_tuple(false, jit);
+    else {
+      // need to redo the jit, remove the old one
+      jitHash.erase(jit);
+    }
+  }
+
+  cudaq::info("kernel_builder running jitCode.");
+
   auto module = currentModule.clone();
   auto ctx = module.getContext();
   SmallVector<mlir::NamedAttribute> names;
@@ -815,7 +836,9 @@ ExecutionEngine *jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
   auto kernelReg = reinterpret_cast<void (*)()>(*regFuncPtr);
   kernelReg();
 
-  return jit;
+  // Map this JIT Engine to its unique hash integer.
+  jitHash.insert({jit, moduleHash});
+  return std::make_tuple(true, jit);
 }
 
 void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
@@ -869,24 +892,6 @@ std::string to_quake(ImplicitLocOpBuilder &builder) {
   // to or the print out string will be invalid (verifier failed)).
   auto clonedModule = module.clone();
 
-  // Look for the main block in the functions we have
-  // add a return if it does not have one.
-  clonedModule.walk([](func::FuncOp func) {
-    Block &block = *func.getBlocks().begin();
-
-    auto tmpBuilder = OpBuilder::atBlockEnd(&block);
-
-    if (block.getOperations().empty()) {
-      // If no ops, add a Return
-      tmpBuilder.create<func::ReturnOp>(tmpBuilder.getUnknownLoc());
-    } else if (!block.getOperations()
-                    .back()
-                    .hasTrait<OpTrait::IsTerminator>()) {
-      // if last op is not the terminator, add the return.
-      tmpBuilder.create<func::ReturnOp>(tmpBuilder.getUnknownLoc());
-    }
-  });
-
   func::FuncOp unwrappedParentFunc = llvm::cast<func::FuncOp>(parentFunc);
   llvm::StringRef symName = unwrappedParentFunc.getSymName();
   tagEntryPoint(builder, clonedModule, symName);

diff --git a/runtime/cudaq/builder/kernel_builder.h b/runtime/cudaq/builder/kernel_builder.h
@@ -208,9 +208,10 @@ void applyPasses(PassManager &);
 
 /// @brief Create the `ExecutionEngine` and return a raw
 /// pointer, which we will wrap in a `unique_ptr`
-ExecutionEngine *jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
-                         std::string kernelName,
-                         std::vector<std::string> extraLibPaths);
+std::tuple<bool, ExecutionEngine *>
+jitCode(ImplicitLocOpBuilder &, ExecutionEngine *,
+        std::unordered_map<ExecutionEngine *, std::size_t> &, std::string,
+        std::vector<std::string>);
 
 /// @brief Invoke the function with the given kernel name.
 void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit,
@@ -350,6 +351,10 @@ class kernel_builder : public details::kernel_builder_base {
   /// out of CUDA Quantum code
   std::unique_ptr<ExecutionEngine, void (*)(ExecutionEngine *)> jitEngine;
 
+  /// @brief Map created ExecutionEngines to a unique hash of the
+  /// ModuleOp they derive from.
+  std::unordered_map<ExecutionEngine *, std::size_t> jitEngineToModuleHash;
+
   /// @brief Name of the CUDA Quantum kernel Quake function
   std::string kernelName = "__nvqpp__mlirgen____nvqppBuilderKernel";
 
@@ -636,8 +641,14 @@ class kernel_builder : public details::kernel_builder_base {
   /// @brief Lower the Quake code to the LLVM Dialect, call
   /// `PassManager`.
   void jitCode(std::vector<std::string> extraLibPaths = {}) override {
-    auto *ptr = details::jitCode(*opBuilder, jitEngine.get(), kernelName,
-                                 extraLibPaths);
+    auto [wasChanged, ptr] =
+        details::jitCode(*opBuilder, jitEngine.get(), jitEngineToModuleHash,
+                         kernelName, extraLibPaths);
+    // If we had a jitEngine, but the code changed,
+    // delete the one we had.
+    if (jitEngine && wasChanged)
+      details::deleteJitEngine(jitEngine.release());
+
     // Store for the next time if we haven't already
     if (!jitEngine)
       jitEngine = std::unique_ptr<ExecutionEngine, void (*)(ExecutionEngine *)>(

diff --git a/unittests/integration/builder_tester.cpp b/unittests/integration/builder_tester.cpp
@@ -652,3 +652,35 @@ CUDAQ_TEST(BuilderTester, checkEntryPointAttribute) {
       R"(func\.func @__nvqpp__mlirgen\w+\(\) attributes \{"cudaq-entrypoint"\})");
   EXPECT_TRUE(std::regex_search(quake, functionDecleration));
 }
+
+#ifndef CUDAQ_BACKEND_DM
+
+CUDAQ_TEST(BuilderTester, checkCanProgressivelyBuild) {
+  auto kernel = cudaq::make_kernel();
+  auto q = kernel.qalloc(2);
+  kernel.h(q[0]);
+  auto state = cudaq::get_state(kernel);
+  EXPECT_NEAR(M_SQRT1_2, state[0].real(), 1e-3);
+  // Handle sims with different endianness
+  EXPECT_TRUE(std::fabs(M_SQRT1_2 - state[1].real()) < 1e-3 ||
+              std::fabs(M_SQRT1_2 - state[2].real()) < 1e-3);
+  EXPECT_NEAR(0.0, state[3].real(), 1e-3);
+
+  auto counts = cudaq::sample(kernel);
+  EXPECT_TRUE(counts.count("00") != 0);
+  EXPECT_TRUE(counts.count("10") != 0);
+
+  // Continue building the kernel
+  kernel.x<cudaq::ctrl>(q[0], q[1]);
+  state = cudaq::get_state(kernel);
+  EXPECT_NEAR(M_SQRT1_2, state[0].real(), 1e-3);
+  EXPECT_NEAR(0.0, state[1].real(), 1e-3);
+  EXPECT_NEAR(0.0, state[2].real(), 1e-3);
+  EXPECT_NEAR(M_SQRT1_2, state[3].real(), 1e-3);
+
+  counts = cudaq::sample(kernel);
+  EXPECT_TRUE(counts.count("00") != 0);
+  EXPECT_TRUE(counts.count("11") != 0);
+}
+
+#endif