From 0130b81655b1fa04b433c4d22f9288df723cefd2 Mon Sep 17 00:00:00 2001
From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:58:16 +0100
Subject: [PATCH] Adding AOCL files
---
.idea/workspace.xml | 23 ++++++++-----
ArmPL/sp_gemm.hh | 84 +++++++++++++++++++++++++++++++++++++++++++++
Makefile | 2 +-
include/doGemm.hh | 26 +++++++-------
include/doGemv.hh | 12 +++----
include/helpers.hh | 12 ++++---
6 files changed, 127 insertions(+), 32 deletions(-)
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index a5afad2..2bb35d8 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -15,14 +15,13 @@
-
+
+
+
-
-
-
-
+
@@ -520,7 +519,15 @@
1727949079616
-
+
+
+ 1728650780575
+
+
+
+ 1728650780575
+
+
@@ -538,7 +545,6 @@
-
@@ -563,6 +569,7 @@
-
+
+
\ No newline at end of file
diff --git a/ArmPL/sp_gemm.hh b/ArmPL/sp_gemm.hh
index 28a2ca3..612f4f1 100644
--- a/ArmPL/sp_gemm.hh
+++ b/ArmPL/sp_gemm.hh
@@ -89,6 +89,90 @@ class sp_gemm_cpu : public sp_gemm {
void preLoopRequirements() override {
// Need to put A_ and B_ into A_armpl_ and B_armpl_
toCSR_armpl();
+
+ /** providing hints to ARMPL and optimizing the matrix datastructures */
+ // TODO -- is noallocs best here?
+ status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_MEMORY,
+ ARMPL_SPARSE_MEMORY_NOALLOCS);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+ status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_MEMORY,
+ ARMPL_SPARSE_MEMORY_NOALLOCS);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+
+ status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_STRUCTURE,
+ ARMPL_SPARSE_STRUCTURE_UNSTRUCTURED);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+ status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_STRUCTURE,
+ ARMPL_SPARSE_STRUCTURE_UNSTRUCTURED);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+
+ // TODO -- will this be FEW?
+ status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_INVOCATIONS,
+ ARMPL_SPARSE_INVOCATIONS_MANY);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+ status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_INVOCATIONS,
+ ARMPL_SPARSE_INVOCATIONS_MANY);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+
+ status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_OPERATION,
+ ARMPL_SPARSE_OPERATION_NOTRANS);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+ status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_OPERATION,
+ ARMPL_SPARSE_OPERATION_NOTRANS);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+
+ // TODO -- investigate whch is better here
+ status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_STRATEGY,
+ ARMPL_SPARSE_SPMM_STRAT_OPT_PART_STRUCT);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+ status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_STRATEGY,
+ ARMPL_SPARSE_SPMM_STRAT_OPT_PART_STRUCT);
+ if (status_ != ARMPL_STATUS_SUCCESS) {
+ std::cout << "ERROR " << status_ << std::endl;
+ exit(1);
+ }
+
+// TODO -- this is thorwing an error -- couldn't immediately fix so come
+// back to
+
+// /** provide hints for the optimisation of the spmm execution */
+// status_ = armpl_spmm_optimize(ARMPL_SPARSE_OPERATION_NOTRANS,
+// ARMPL_SPARSE_OPERATION_NOTRANS,
+// ARMPL_SPARSE_SCALAR_ONE,
+// A_armpl_, B_armpl_,
+// ARMPL_SPARSE_SCALAR_ZERO,
+// C_armpl_);
+// if (status_ != ARMPL_STATUS_SUCCESS) {
+// std::cout << "ERROR " << status_ << std::endl;
+// exit(1);
+// }
}
/** Perform any required steps after calling the GEMM kernel that should
diff --git a/Makefile b/Makefile
index e5091e0..22d080c 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ CXX = $(CXX_$(COMPILER))
CXXFLAGS_ARM = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
CXXFLAGS_CLANG = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
-CXXFLAGS_GNU = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
+CXXFLAGS_GNU = -std=c++17 -Wall -Wno-deprecated-declarations -Ofast -$(ARCHFLAG)=native
CXXFLAGS_INTEL = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native -Wno-tautological-constant-compare
CXXFLAGS_NVIDIA = -std=c++17 -Wall -O3 -fast -$(ARCHFLAG)=native
CXXFLAGS_HIP = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native
diff --git a/include/doGemm.hh b/include/doGemm.hh
index a3e5e77..93cc058 100644
--- a/include/doGemm.hh
+++ b/include/doGemm.hh
@@ -392,8 +392,8 @@ class doGemm {
cpuResult = gemmCpu_.compute();
cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime);
// Write result to CSV file
- writeLineToCsv(csvFile, "cpu", kernelName, M, N, K, probSize, iterations_,
- cpuResult.runtime, cpuResult.gflops);
+ writeLineToCsv(csvFile, "cpu", kernelName, M, N, K, probSize,
+ 0.0, iterations_, cpuResult.runtime, cpuResult.gflops);
}
#endif
@@ -422,13 +422,13 @@ class doGemm {
// Write results to CSV file
writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, M, N, K, probSize,
- iterations_, gpuResult_once.runtime,
+ 0.0, iterations_, gpuResult_once.runtime,
gpuResult_once.gflops);
writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, M, N, K,
- probSize, iterations_, gpuResult_always.runtime,
+ probSize, 0.0, iterations_, gpuResult_always.runtime,
gpuResult_always.gflops);
writeLineToCsv(csvFile, "gpu_unified", kernelName, M, N, K, probSize,
- iterations_, gpuResult_unified.runtime,
+ 0.0, iterations_, gpuResult_unified.runtime,
gpuResult_unified.gflops);
}
#endif
@@ -578,8 +578,9 @@ class doGemm {
spGemmCpu_.initialise(N, sparsity);
time_checksum_gflop cpuResult = spGemmCpu_.compute();
cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime);
- writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, probSize, iterations_,
- cpuResult.runtime, cpuResult.gflops);
+ writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, probSize,
+ sparsity, iterations_, cpuResult.runtime,
+ cpuResult.gflops);
}
#endif
#if GPU_ENABLED
@@ -607,13 +608,14 @@ class doGemm {
// Write lines to CSV file
writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, N, N, N, probSize,
- iterations_, gpuResult_once.runtime, gpuResult_once.gflops);
+ sparsity, iterations_, gpuResult_once.runtime,
+ gpuResult_once.gflops);
writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, N, N, N, probSize,
- iterations_, gpuResult_always.runtime,
- gpuResult_always.gflops);
+ sparsity, iterations_, gpuResult_always.runtime,
+ gpuResult_always.gflops);
writeLineToCsv(csvFile, "gpu_unified", kernelName, N, N, N, probSize,
- iterations_, gpuResult_unified.runtime,
- gpuResult_unified.gflops);
+ sparsity, iterations_, gpuResult_unified.runtime,
+ gpuResult_unified.gflops);
}
#endif
diff --git a/include/doGemv.hh b/include/doGemv.hh
index 12cd097..2ab5fb1 100644
--- a/include/doGemv.hh
+++ b/include/doGemv.hh
@@ -207,8 +207,8 @@ class doGemv {
cpuResult = gemvCpu_.compute();
cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime);
// Write result to CSV file
- writeLineToCsv(csvFile, "cpu", kernelName, M, N, 0, probSize, iterations_,
- cpuResult.runtime, cpuResult.gflops);
+ writeLineToCsv(csvFile, "cpu", kernelName, M, N, 0, probSize, 0.0,
+ iterations_, cpuResult.runtime, cpuResult.gflops);
}
#endif
@@ -237,13 +237,13 @@ class doGemv {
// Write results to CSV file
writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, M, N, 0, probSize,
- iterations_, gpuResult_once.runtime,
+ 0.0, iterations_, gpuResult_once.runtime,
gpuResult_once.gflops);
writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, M, N, 0,
- probSize, iterations_, gpuResult_always.runtime,
+ probSize, 0.0, iterations_, gpuResult_always.runtime,
gpuResult_always.gflops);
writeLineToCsv(csvFile, "gpu_unified", kernelName, M, N, 0, probSize,
- iterations_, gpuResult_unified.runtime,
+ 0.0, iterations_, gpuResult_unified.runtime,
gpuResult_unified.gflops);
}
#endif
@@ -500,8 +500,8 @@ class doGemv {
const bool doGPU_ = true;
/** Whether sparse and or dense kernels should be run. */
- const bool doSparse_;
const bool doDense_;
+ const bool doSparse_;
#if CPU_ENABLED
/** The GEMV CPU kernel. */
diff --git a/include/helpers.hh b/include/helpers.hh
index 5618557..d760cd7 100644
--- a/include/helpers.hh
+++ b/include/helpers.hh
@@ -17,8 +17,8 @@ std::ofstream initCSVFile(const std::string filename) {
std::ofstream newFile(filename);
- newFile << "Device,Kernel,M,N,K,Total Problem Size (KiB),Iterations,Total "
- "Seconds,GFLOP/s"
+ newFile << "Device,Kernel,M,N,K,Total Problem Size (KiB),sparsity,Iterations,"
+ "Total Seconds,GFLOP/s"
<< std::endl;
return newFile;
@@ -28,15 +28,17 @@ std::ofstream initCSVFile(const std::string filename) {
* Function does not close the file. */
void writeLineToCsv(std::ofstream& file, const std::string device,
const std::string kernel, const int M, const int N,
- const int K, const double totalProbSize, const int iters,
- const double totalTime, const double gflops) {
+ const int K, const double totalProbSize, const float
+ sparsity, const int iters, const double totalTime,
+ const double gflops) {
if (!file.is_open()) {
std::cout << "ERROR - Attempted to write line to a closed CSV file."
<< std::endl;
exit(1);
}
file << device << "," << kernel << "," << M << "," << N << "," << K << ","
- << std::fixed << std::setprecision(3) << totalProbSize << "," << iters
+ << std::fixed << std::setprecision(3) << totalProbSize << ","
+ << std::fixed << std::setprecision(8) << sparsity << "," << iters
<< "," << std::fixed << std::setprecision(5) << totalTime << ","
<< std::fixed << std::setprecision(3) << gflops << std::endl;
}