From 0130b81655b1fa04b433c4d22f9288df723cefd2 Mon Sep 17 00:00:00 2001
From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:58:16 +0100
Subject: [PATCH] Adding AOCL files

---
 .idea/workspace.xml | 23 ++++++++-----
 ArmPL/sp_gemm.hh    | 84 +++++++++++++++++++++++++++++++++++++++++++++
 Makefile            |  2 +-
 include/doGemm.hh   | 26 +++++++-------
 include/doGemv.hh   | 12 +++----
 include/helpers.hh  | 12 ++++---
 6 files changed, 127 insertions(+), 32 deletions(-)
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index a5afad2..2bb35d8 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -15,14 +15,13 @@
     </configurations>
   </component>
   <component name="ChangeListManager">
-    <list default="true" id="0893f9af-dab8-4239-8892-923019f84a19" name="Changes" comment="No longer overwriting B_">
+    <list default="true" id="0893f9af-dab8-4239-8892-923019f84a19" name="Changes" comment="Adding kernel selection for gemv">
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/ArmPL/sp_gemm.hh" beforeDir="false" afterPath="$PROJECT_DIR$/ArmPL/sp_gemm.hh" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/Makefile" beforeDir="false" afterPath="$PROJECT_DIR$/Makefile" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/include/doGemm.hh" beforeDir="false" afterPath="$PROJECT_DIR$/include/doGemm.hh" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/include/doGemv.hh" beforeDir="false" afterPath="$PROJECT_DIR$/include/doGemv.hh" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/include/kernels/CPU/sp_gemm.hh" beforeDir="false" afterPath="$PROJECT_DIR$/include/kernels/CPU/sp_gemm.hh" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/include/kernels/gemm.hh" beforeDir="false" afterPath="$PROJECT_DIR$/include/kernels/gemm.hh" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/include/kernels/gemv.hh" beforeDir="false" afterPath="$PROJECT_DIR$/include/kernels/gemv.hh" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/src/main.cc" beforeDir="false" afterPath="$PROJECT_DIR$/src/main.cc" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/include/helpers.hh" beforeDir="false" afterPath="$PROJECT_DIR$/include/helpers.hh" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -520,7 +519,15 @@
       <option name="project" value="LOCAL" />
       <updated>1727949079616</updated>
     </task>
-    <option name="localTasksCounter" value="43" />
+    <task id="LOCAL-00043" summary="Adding kernel selection for gemv">
+      <option name="closed" value="true" />
+      <created>1728650780575</created>
+      <option name="number" value="00043" />
+      <option name="presentableId" value="LOCAL-00043" />
+      <option name="project" value="LOCAL" />
+      <updated>1728650780575</updated>
+    </task>
+    <option name="localTasksCounter" value="44" />
     <servers />
   </component>
   <component name="TypeScriptGeneratedFilesManager">
@@ -538,7 +545,6 @@
     </option>
   </component>
   <component name="VcsManagerConfiguration">
-    <MESSAGE value="changes" />
     <MESSAGE value="adding command line kernel selection" />
     <MESSAGE value="Adding basic sparse multiplication kernel for default CPU and GPU" />
     <MESSAGE value="Implementing cuSPARSE kernel" />
@@ -563,6 +569,7 @@
     <MESSAGE value="Adding AOCL files" />
     <MESSAGE value="working changes" />
     <MESSAGE value="No longer overwriting B_" />
-    <option name="LAST_COMMIT_MESSAGE" value="No longer overwriting B_" />
+    <MESSAGE value="Adding kernel selection for gemv" />
+    <option name="LAST_COMMIT_MESSAGE" value="Adding kernel selection for gemv" />
   </component>
 </project>
\ No newline at end of file
diff --git a/ArmPL/sp_gemm.hh b/ArmPL/sp_gemm.hh
index 28a2ca3..612f4f1 100644
--- a/ArmPL/sp_gemm.hh
+++ b/ArmPL/sp_gemm.hh
@@ -89,6 +89,90 @@ class sp_gemm_cpu : public sp_gemm<T> {
   void preLoopRequirements() override {
     // Need to put A_ and B_ into A_armpl_ and B_armpl_
     toCSR_armpl();
+
+    /** providing hints to ARMPL and optimizing the matrix datastructures */
+    // TODO -- is noallocs best here?
+    status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_MEMORY,
+                               ARMPL_SPARSE_MEMORY_NOALLOCS);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+    status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_MEMORY,
+                               ARMPL_SPARSE_MEMORY_NOALLOCS);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+
+    status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_STRUCTURE,
+                               ARMPL_SPARSE_STRUCTURE_UNSTRUCTURED);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+    status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_STRUCTURE,
+                               ARMPL_SPARSE_STRUCTURE_UNSTRUCTURED);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+
+    // TODO -- will this be FEW?
+    status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_INVOCATIONS,
+                               ARMPL_SPARSE_INVOCATIONS_MANY);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+    status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_INVOCATIONS,
+                               ARMPL_SPARSE_INVOCATIONS_MANY);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+
+    status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_OPERATION,
+                               ARMPL_SPARSE_OPERATION_NOTRANS);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+    status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_OPERATION,
+                               ARMPL_SPARSE_OPERATION_NOTRANS);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+
+    // TODO -- investigate whch is better here
+    status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_STRATEGY,
+                               ARMPL_SPARSE_SPMM_STRAT_OPT_PART_STRUCT);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+    status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_STRATEGY,
+                               ARMPL_SPARSE_SPMM_STRAT_OPT_PART_STRUCT);
+    if (status_ != ARMPL_STATUS_SUCCESS) {
+      std::cout << "ERROR " << status_ << std::endl;
+      exit(1);
+    }
+
+//  TODO -- this is thorwing an error -- couldn't immediately fix so come
+//   back to
+
+//    /** provide hints for the optimisation of the spmm execution */
+//    status_ = armpl_spmm_optimize(ARMPL_SPARSE_OPERATION_NOTRANS,
+//                                  ARMPL_SPARSE_OPERATION_NOTRANS,
+//                                  ARMPL_SPARSE_SCALAR_ONE,
+//                                  A_armpl_, B_armpl_,
+//                                  ARMPL_SPARSE_SCALAR_ZERO,
+//                                  C_armpl_);
+//    if (status_ != ARMPL_STATUS_SUCCESS) {
+//      std::cout << "ERROR " << status_ << std::endl;
+//      exit(1);
+//    }
   }
 
   /** Perform any required steps after calling the GEMM kernel that should
diff --git a/Makefile b/Makefile
index e5091e0..22d080c 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ CXX = $(CXX_$(COMPILER))
 
 CXXFLAGS_ARM     = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
 CXXFLAGS_CLANG   = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
-CXXFLAGS_GNU     = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
+CXXFLAGS_GNU     = -std=c++17 -Wall -Wno-deprecated-declarations -Ofast -$(ARCHFLAG)=native
 CXXFLAGS_INTEL   = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native -Wno-tautological-constant-compare
 CXXFLAGS_NVIDIA  = -std=c++17 -Wall -O3 -fast -$(ARCHFLAG)=native
 CXXFLAGS_HIP     = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
diff --git a/include/doGemm.hh b/include/doGemm.hh
index a3e5e77..93cc058 100644
--- a/include/doGemm.hh
+++ b/include/doGemm.hh
@@ -392,8 +392,8 @@ class doGemm {
       cpuResult = gemmCpu_.compute();
       cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime);
       // Write result to CSV file
-      writeLineToCsv(csvFile, "cpu", kernelName, M, N, K, probSize, iterations_,
-                     cpuResult.runtime, cpuResult.gflops);
+      writeLineToCsv(csvFile, "cpu", kernelName, M, N, K, probSize,
+                     0.0, iterations_, cpuResult.runtime, cpuResult.gflops);
     }
 #endif
 
@@ -422,13 +422,13 @@ class doGemm {
 
       // Write results to CSV file
       writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, M, N, K, probSize,
-                     iterations_, gpuResult_once.runtime,
+                     0.0, iterations_, gpuResult_once.runtime,
                      gpuResult_once.gflops);
       writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, M, N, K,
-                     probSize, iterations_, gpuResult_always.runtime,
+                     probSize, 0.0, iterations_, gpuResult_always.runtime,
                      gpuResult_always.gflops);
       writeLineToCsv(csvFile, "gpu_unified", kernelName, M, N, K, probSize,
-                     iterations_, gpuResult_unified.runtime,
+                     0.0, iterations_, gpuResult_unified.runtime,
                      gpuResult_unified.gflops);
     }
 #endif
@@ -578,8 +578,9 @@ class doGemm {
       spGemmCpu_.initialise(N, sparsity);
       time_checksum_gflop cpuResult = spGemmCpu_.compute();
       cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime);
-		  writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, probSize, iterations_,
-		               cpuResult.runtime, cpuResult.gflops);
+		  writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, probSize,
+                     sparsity, iterations_, cpuResult.runtime,
+                     cpuResult.gflops);
     }
 #endif
 #if GPU_ENABLED
@@ -607,13 +608,14 @@ class doGemm {
 
 		// Write lines to CSV file
 		  writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, N, N, N, probSize,
-		               iterations_, gpuResult_once.runtime, gpuResult_once.gflops);
+		                sparsity, iterations_, gpuResult_once.runtime,
+                    gpuResult_once.gflops);
 		  writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, N, N, N, probSize,
-		               iterations_, gpuResult_always.runtime,
-		               gpuResult_always.gflops);
+		                sparsity, iterations_, gpuResult_always.runtime,
+		                gpuResult_always.gflops);
 		  writeLineToCsv(csvFile, "gpu_unified", kernelName, N, N, N, probSize,
-		               iterations_, gpuResult_unified.runtime,
-		               gpuResult_unified.gflops);
+		                sparsity, iterations_, gpuResult_unified.runtime,
+		                gpuResult_unified.gflops);
 
     }
 #endif
diff --git a/include/doGemv.hh b/include/doGemv.hh
index 12cd097..2ab5fb1 100644
--- a/include/doGemv.hh
+++ b/include/doGemv.hh
@@ -207,8 +207,8 @@ class doGemv {
       cpuResult = gemvCpu_.compute();
       cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime);
       // Write result to CSV file
-      writeLineToCsv(csvFile, "cpu", kernelName, M, N, 0, probSize, iterations_,
-                     cpuResult.runtime, cpuResult.gflops);
+      writeLineToCsv(csvFile, "cpu", kernelName, M, N, 0, probSize, 0.0,
+                     iterations_, cpuResult.runtime, cpuResult.gflops);
     }
 #endif
 
@@ -237,13 +237,13 @@ class doGemv {
 
       // Write results to CSV file
       writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, M, N, 0, probSize,
-                     iterations_, gpuResult_once.runtime,
+                     0.0, iterations_, gpuResult_once.runtime,
                      gpuResult_once.gflops);
       writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, M, N, 0,
-                     probSize, iterations_, gpuResult_always.runtime,
+                     probSize, 0.0, iterations_, gpuResult_always.runtime,
                      gpuResult_always.gflops);
       writeLineToCsv(csvFile, "gpu_unified", kernelName, M, N, 0, probSize,
-                     iterations_, gpuResult_unified.runtime,
+                     0.0, iterations_, gpuResult_unified.runtime,
                      gpuResult_unified.gflops);
     }
 #endif
@@ -500,8 +500,8 @@ class doGemv {
   const bool doGPU_ = true;
 
   /** Whether sparse and or dense kernels should be run. */
-  const bool doSparse_;
   const bool doDense_;
+  const bool doSparse_;
 
 #if CPU_ENABLED
   /** The GEMV CPU kernel. */
diff --git a/include/helpers.hh b/include/helpers.hh
index 5618557..d760cd7 100644
--- a/include/helpers.hh
+++ b/include/helpers.hh
@@ -17,8 +17,8 @@ std::ofstream initCSVFile(const std::string filename) {
 
   std::ofstream newFile(filename);
 
-  newFile << "Device,Kernel,M,N,K,Total Problem Size (KiB),Iterations,Total "
-             "Seconds,GFLOP/s"
+  newFile << "Device,Kernel,M,N,K,Total Problem Size (KiB),sparsity,Iterations,"
+             "Total Seconds,GFLOP/s"
           << std::endl;
 
   return newFile;
@@ -28,15 +28,17 @@ std::ofstream initCSVFile(const std::string filename) {
  * Function does not close the file. */
 void writeLineToCsv(std::ofstream& file, const std::string device,
                     const std::string kernel, const int M, const int N,
-                    const int K, const double totalProbSize, const int iters,
-                    const double totalTime, const double gflops) {
+                    const int K, const double totalProbSize, const float
+                    sparsity, const int iters, const double totalTime,
+                    const double gflops) {
   if (!file.is_open()) {
     std::cout << "ERROR - Attempted to write line to a closed CSV file."
               << std::endl;
     exit(1);
   }
   file << device << "," << kernel << "," << M << "," << N << "," << K << ","
-       << std::fixed << std::setprecision(3) << totalProbSize << "," << iters
+       << std::fixed << std::setprecision(3) << totalProbSize << ","
+       << std::fixed << std::setprecision(8) << sparsity << "," << iters
        << "," << std::fixed << std::setprecision(5) << totalTime << ","
        << std::fixed << std::setprecision(3) << gflops << std::endl;
 }