From 0130b81655b1fa04b433c4d22f9288df723cefd2 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:58:16 +0100 Subject: [PATCH] Adding AOCL files --- .idea/workspace.xml | 23 ++++++++----- ArmPL/sp_gemm.hh | 84 +++++++++++++++++++++++++++++++++++++++++++++ Makefile | 2 +- include/doGemm.hh | 26 +++++++------- include/doGemv.hh | 12 +++---- include/helpers.hh | 12 ++++--- 6 files changed, 127 insertions(+), 32 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index a5afad2..2bb35d8 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -15,14 +15,13 @@ - + + + - - - - + @@ -538,7 +545,6 @@ - @@ -563,6 +569,7 @@ - \ No newline at end of file diff --git a/ArmPL/sp_gemm.hh b/ArmPL/sp_gemm.hh index 28a2ca3..612f4f1 100644 --- a/ArmPL/sp_gemm.hh +++ b/ArmPL/sp_gemm.hh @@ -89,6 +89,90 @@ class sp_gemm_cpu : public sp_gemm { void preLoopRequirements() override { // Need to put A_ and B_ into A_armpl_ and B_armpl_ toCSR_armpl(); + + /** providing hints to ARMPL and optimizing the matrix datastructures */ + // TODO -- is noallocs best here? + status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_MEMORY, + ARMPL_SPARSE_MEMORY_NOALLOCS); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_MEMORY, + ARMPL_SPARSE_MEMORY_NOALLOCS); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + + status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_STRUCTURE, + ARMPL_SPARSE_STRUCTURE_UNSTRUCTURED); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_STRUCTURE, + ARMPL_SPARSE_STRUCTURE_UNSTRUCTURED); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + + // TODO -- will this be FEW? + status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_INVOCATIONS, + ARMPL_SPARSE_INVOCATIONS_MANY); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_INVOCATIONS, + ARMPL_SPARSE_INVOCATIONS_MANY); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + + status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_OPERATION, + ARMPL_SPARSE_OPERATION_NOTRANS); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_OPERATION, + ARMPL_SPARSE_OPERATION_NOTRANS); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + + // TODO -- investigate whch is better here + status_ = armpl_spmat_hint(A_armpl_, ARMPL_SPARSE_HINT_SPMM_STRATEGY, + ARMPL_SPARSE_SPMM_STRAT_OPT_PART_STRUCT); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + status_ = armpl_spmat_hint(B_armpl_, ARMPL_SPARSE_HINT_SPMM_STRATEGY, + ARMPL_SPARSE_SPMM_STRAT_OPT_PART_STRUCT); + if (status_ != ARMPL_STATUS_SUCCESS) { + std::cout << "ERROR " << status_ << std::endl; + exit(1); + } + +// TODO -- this is thorwing an error -- couldn't immediately fix so come +// back to + +// /** provide hints for the optimisation of the spmm execution */ +// status_ = armpl_spmm_optimize(ARMPL_SPARSE_OPERATION_NOTRANS, +// ARMPL_SPARSE_OPERATION_NOTRANS, +// ARMPL_SPARSE_SCALAR_ONE, +// A_armpl_, B_armpl_, +// ARMPL_SPARSE_SCALAR_ZERO, +// C_armpl_); +// if (status_ != ARMPL_STATUS_SUCCESS) { +// std::cout << "ERROR " << status_ << std::endl; +// exit(1); +// } } /** Perform any required steps after calling the GEMM kernel that should diff --git a/Makefile b/Makefile index e5091e0..22d080c 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ CXX = $(CXX_$(COMPILER)) CXXFLAGS_ARM = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native CXXFLAGS_CLANG = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native -CXXFLAGS_GNU = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native +CXXFLAGS_GNU = -std=c++17 -Wall -Wno-deprecated-declarations -Ofast -$(ARCHFLAG)=native CXXFLAGS_INTEL = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native -Wno-tautological-constant-compare CXXFLAGS_NVIDIA = -std=c++17 -Wall -O3 -fast -$(ARCHFLAG)=native CXXFLAGS_HIP = -std=c++17 -Wall -Ofast -$(ARCHFLAG)=native diff --git a/include/doGemm.hh b/include/doGemm.hh index a3e5e77..93cc058 100644 --- a/include/doGemm.hh +++ b/include/doGemm.hh @@ -392,8 +392,8 @@ class doGemm { cpuResult = gemmCpu_.compute(); cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime); // Write result to CSV file - writeLineToCsv(csvFile, "cpu", kernelName, M, N, K, probSize, iterations_, - cpuResult.runtime, cpuResult.gflops); + writeLineToCsv(csvFile, "cpu", kernelName, M, N, K, probSize, + 0.0, iterations_, cpuResult.runtime, cpuResult.gflops); } #endif @@ -422,13 +422,13 @@ class doGemm { // Write results to CSV file writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, M, N, K, probSize, - iterations_, gpuResult_once.runtime, + 0.0, iterations_, gpuResult_once.runtime, gpuResult_once.gflops); writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, M, N, K, - probSize, iterations_, gpuResult_always.runtime, + probSize, 0.0, iterations_, gpuResult_always.runtime, gpuResult_always.gflops); writeLineToCsv(csvFile, "gpu_unified", kernelName, M, N, K, probSize, - iterations_, gpuResult_unified.runtime, + 0.0, iterations_, gpuResult_unified.runtime, gpuResult_unified.gflops); } #endif @@ -578,8 +578,9 @@ class doGemm { spGemmCpu_.initialise(N, sparsity); time_checksum_gflop cpuResult = spGemmCpu_.compute(); cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime); - writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, probSize, iterations_, - cpuResult.runtime, cpuResult.gflops); + writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, probSize, + sparsity, iterations_, cpuResult.runtime, + cpuResult.gflops); } #endif #if GPU_ENABLED @@ -607,13 +608,14 @@ class doGemm { // Write lines to CSV file writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, N, N, N, probSize, - iterations_, gpuResult_once.runtime, gpuResult_once.gflops); + sparsity, iterations_, gpuResult_once.runtime, + gpuResult_once.gflops); writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, N, N, N, probSize, - iterations_, gpuResult_always.runtime, - gpuResult_always.gflops); + sparsity, iterations_, gpuResult_always.runtime, + gpuResult_always.gflops); writeLineToCsv(csvFile, "gpu_unified", kernelName, N, N, N, probSize, - iterations_, gpuResult_unified.runtime, - gpuResult_unified.gflops); + sparsity, iterations_, gpuResult_unified.runtime, + gpuResult_unified.gflops); } #endif diff --git a/include/doGemv.hh b/include/doGemv.hh index 12cd097..2ab5fb1 100644 --- a/include/doGemv.hh +++ b/include/doGemv.hh @@ -207,8 +207,8 @@ class doGemv { cpuResult = gemvCpu_.compute(); cpuResult.gflops = calcGflops(flops, iterations_, cpuResult.runtime); // Write result to CSV file - writeLineToCsv(csvFile, "cpu", kernelName, M, N, 0, probSize, iterations_, - cpuResult.runtime, cpuResult.gflops); + writeLineToCsv(csvFile, "cpu", kernelName, M, N, 0, probSize, 0.0, + iterations_, cpuResult.runtime, cpuResult.gflops); } #endif @@ -237,13 +237,13 @@ class doGemv { // Write results to CSV file writeLineToCsv(csvFile, "gpu_offloadOnce", kernelName, M, N, 0, probSize, - iterations_, gpuResult_once.runtime, + 0.0, iterations_, gpuResult_once.runtime, gpuResult_once.gflops); writeLineToCsv(csvFile, "gpu_offloadAlways", kernelName, M, N, 0, - probSize, iterations_, gpuResult_always.runtime, + probSize, 0.0, iterations_, gpuResult_always.runtime, gpuResult_always.gflops); writeLineToCsv(csvFile, "gpu_unified", kernelName, M, N, 0, probSize, - iterations_, gpuResult_unified.runtime, + 0.0, iterations_, gpuResult_unified.runtime, gpuResult_unified.gflops); } #endif @@ -500,8 +500,8 @@ class doGemv { const bool doGPU_ = true; /** Whether sparse and or dense kernels should be run. */ - const bool doSparse_; const bool doDense_; + const bool doSparse_; #if CPU_ENABLED /** The GEMV CPU kernel. */ diff --git a/include/helpers.hh b/include/helpers.hh index 5618557..d760cd7 100644 --- a/include/helpers.hh +++ b/include/helpers.hh @@ -17,8 +17,8 @@ std::ofstream initCSVFile(const std::string filename) { std::ofstream newFile(filename); - newFile << "Device,Kernel,M,N,K,Total Problem Size (KiB),Iterations,Total " - "Seconds,GFLOP/s" + newFile << "Device,Kernel,M,N,K,Total Problem Size (KiB),sparsity,Iterations," + "Total Seconds,GFLOP/s" << std::endl; return newFile; @@ -28,15 +28,17 @@ std::ofstream initCSVFile(const std::string filename) { * Function does not close the file. */ void writeLineToCsv(std::ofstream& file, const std::string device, const std::string kernel, const int M, const int N, - const int K, const double totalProbSize, const int iters, - const double totalTime, const double gflops) { + const int K, const double totalProbSize, const float + sparsity, const int iters, const double totalTime, + const double gflops) { if (!file.is_open()) { std::cout << "ERROR - Attempted to write line to a closed CSV file." << std::endl; exit(1); } file << device << "," << kernel << "," << M << "," << N << "," << K << "," - << std::fixed << std::setprecision(3) << totalProbSize << "," << iters + << std::fixed << std::setprecision(3) << totalProbSize << "," + << std::fixed << std::setprecision(8) << sparsity << "," << iters << "," << std::fixed << std::setprecision(5) << totalTime << "," << std::fixed << std::setprecision(3) << gflops << std::endl; }