Skip to content

Commit

Permalink
Merge branch 'benchmarks' of https://github.com/lab-cosmo/mops into b…
Browse files Browse the repository at this point in the history
…enchmarks
  • Loading branch information
frostedoyster committed Nov 11, 2023
2 parents 50bcd7f + 37e2ea3 commit 31f84db
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 13 deletions.
41 changes: 39 additions & 2 deletions mops/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ else()
endif()

option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" OFF)

OPTION(MOPS_OPENMP "Try to use OpenMP when compiling Sphericart" ON)
OPTION(MOPS_ARCH_NATIVE "Try to use -march=native when compiling Sphericart" ON)

set(LIB_INSTALL_DIR "lib" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install libraries")
set(BIN_INSTALL_DIR "bin" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install DLL/binaries")
set(INCLUDE_INSTALL_DIR "include" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install headers")
Expand All @@ -39,9 +43,9 @@ option(MOPS_TESTS "build and run mops unit tests" ${MOPS_MAIN_PROJECT})
# Set a default build type if none was specified
if (${MOPS_MAIN_PROJECT})
if("${CMAKE_BUILD_TYPE}" STREQUAL "" AND "${CMAKE_CONFIGURATION_TYPES}" STREQUAL "")
message(STATUS "Setting build type to 'relwithdebinfo' as none was specified.")
message(STATUS "Setting build type to 'release' as none was specified.")
set(
CMAKE_BUILD_TYPE "relwithdebinfo"
CMAKE_BUILD_TYPE "release"
CACHE STRING
"Choose the type of build, options are: none(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) debug release relwithdebinfo minsizerel."
FORCE
Expand Down Expand Up @@ -109,6 +113,39 @@ target_include_directories(mops PUBLIC
)


# Handle optimization and OpenMP flags
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-Wunknown-pragmas" COMPILER_SUPPORTS_WPRAGMAS)
if (MOPS_OPENMP)
find_package(OpenMP)
if(OpenMP_CXX_FOUND)
message(STATUS "OpenMP is enabled")
target_link_libraries(mops PUBLIC OpenMP::OpenMP_CXX)
else()
message(WARNING "Could not find OpenMP")
if(COMPILER_SUPPORTS_WPRAGMAS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
endif()
endif()
else()
if(COMPILER_SUPPORTS_WPRAGMAS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
endif()
endif()

if (MOPS_ARCH_NATIVE)
check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
# for some reason COMPILER_SUPPORTS_MARCH_NATIVE is true with Apple clang,
# but then fails with `the clang compiler does not support '-march=native'`
if(COMPILER_SUPPORTS_MARCH_NATIVE AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
message(STATUS "march=native is enabled")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
else()
message(STATUS "march=native is not supported by this compiler")
endif()
endif()


if (MOPS_TESTS)
enable_testing()
add_subdirectory(tests)
Expand Down
1 change: 0 additions & 1 deletion mops/src/opsa/cpu.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ void mops::outer_product_scatter_add(

for (size_t i=0; i<tensor_a.shape[0]; i++) {
auto i_output = indexes.data[i];
assert(i_output < output.shape[0]);
for (size_t a_j=0; a_j<tensor_a.shape[1]; a_j++) {
for (size_t b_j=0; b_j<tensor_b.shape[1]; b_j++) {
auto output_index = tensor_b.shape[1] * (tensor_a.shape[1] * i_output + a_j) + b_j;
Expand Down
33 changes: 29 additions & 4 deletions python/mops/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,49 @@
import numpy as np
import math
import time

import matplotlib.pyplot as plt
import numpy as np


def benchmark(function, repeats=1000, plot=True):
def benchmark(function, repeats=1000, warmup=10, plot=True):
for _ in range(warmup):
function()

timings = []
for _ in range(repeats):
start = time.time()
function()
end = time.time()
timings.append(end-start)
timings.append(end - start)

times_array = np.array(timings)
mean = np.mean(times_array)
std = np.std(times_array)
if std > 0.1 * mean: print("warning: inconsistent timings")
if std > 0.1 * mean:
print("warning: inconsistent timings")

if plot:
plt.plot(np.arange(times_array.shape[0]), times_array, ".")
plt.savefig("benchmark_plot.pdf")

return mean, std


def format_mean_std(mean, std_dev, decimals=2):
# find the exponent
if mean != 0:
exponent = math.floor(math.log10(abs(mean)))
else:
exponent = 0

# scale the mean and standard deviation by the exponent
scaled_mean = mean / (10**exponent)
scaled_std_dev = std_dev / (10**exponent)

# format the scaled mean and standard deviation
format_string = f"{{:.{decimals}f}}"
formatted_mean = format_string.format(scaled_mean)
formatted_std_dev = format_string.format(scaled_std_dev)
final_string = f"({formatted_mean}±{formatted_std_dev})e{exponent}"

return final_string
10 changes: 5 additions & 5 deletions python/mops/benchmarks/hpe.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
from benchmark import benchmark

from benchmark import benchmark, format_mean_std
from mops.reference_implementations import homogeneous_polynomial_evaluation as ref_hpe

from mops import homogeneous_polynomial_evaluation as hpe

np.random.seed(0xDEADBEEF)
Expand All @@ -13,7 +13,7 @@
ref_mean, ref_std = benchmark(lambda: ref_hpe(A, C, P))
mean, std = benchmark(lambda: hpe(A, C, P))

print(f"{ref_mean:.2e}", f"{ref_std:.2e}")
print(f"{mean:.2e}", f"{std:.2e}")
print("Reference implementation:", format_mean_std(ref_mean, ref_std))
print("Optimized implementation:", format_mean_std(mean, std))

print("Speed-up:", ref_mean/mean)
print("Speed-up:", ref_mean / mean)
20 changes: 20 additions & 0 deletions python/mops/benchmarks/opsa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from benchmark import benchmark, format_mean_std
from mops.reference_implementations import outer_product_scatter_add as ref_opsa

from mops import outer_product_scatter_add as opsa

np.random.seed(0xDEADBEEF)

A = np.random.rand(1000, 20)
B = np.random.rand(1000, 5)

indices = np.sort(np.random.randint(10, size=(1000,)))

ref_mean, ref_std = benchmark(lambda: ref_opsa(A, B, indices, np.max(indices) + 1))
mean, std = benchmark(lambda: opsa(A, B, indices, np.max(indices) + 1))

print("Reference implementation:", format_mean_std(ref_mean, ref_std))
print("Optimized implementation:", format_mean_std(mean, std))

print("Speed-up:", ref_mean / mean)
26 changes: 26 additions & 0 deletions python/mops/benchmarks/opsax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
from benchmark import benchmark, format_mean_std
from mops.reference_implementations import (
outer_product_scatter_add_with_weights as ref_opsax,
)

from mops import outer_product_scatter_add_with_weights as opsax

np.random.seed(0xDEADBEEF)


A = np.random.rand(100, 10)
R = np.random.rand(100, 5)
n_O = 20
X = np.random.rand(n_O, 5)

I = np.random.randint(20, size=(100,))
J = np.random.randint(20, size=(100,))

ref_mean, ref_std = benchmark(lambda: ref_opsax(A, R, X, I, J, 20))
mean, std = benchmark(lambda: opsax(A, R, X, I, J, 20))

print("Reference implementation:", format_mean_std(ref_mean, ref_std))
print("Optimized implementation:", format_mean_std(mean, std))

print("Speed-up:", ref_mean / mean)
24 changes: 24 additions & 0 deletions python/mops/benchmarks/sap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
from benchmark import benchmark, format_mean_std
from mops.reference_implementations import sparse_accumulation_of_products as ref_sap

from mops import sparse_accumulation_of_products as sap

np.random.seed(0xDEADBEEF)

A = np.random.rand(1000, 20)
B = np.random.rand(1000, 6)
C = np.random.rand(100)

P_A = np.random.randint(20, size=(100,))
P_B = np.random.randint(6, size=(100,))
n_O = 50
P_O = np.random.randint(n_O, size=(100,))

ref_mean, ref_std = benchmark(lambda: ref_sap(A, B, C, P_A, P_B, P_O, n_O))
mean, std = benchmark(lambda: sap(A, B, C, P_A, P_B, P_O, n_O))

print("Reference implementation:", format_mean_std(ref_mean, ref_std))
print("Optimized implementation:", format_mean_std(mean, std))

print("Speed-up:", ref_mean / mean)
31 changes: 31 additions & 0 deletions python/mops/benchmarks/sasax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
from benchmark import benchmark, format_mean_std
from mops.reference_implementations import (
sparse_accumulation_scatter_add_with_weights as ref_sasax,
)

from mops import sparse_accumulation_scatter_add_with_weights as sasax

np.random.seed(0xDEADBEEF)

A = np.random.rand(100, 20)
R = np.random.rand(100, 200)
X = np.random.rand(25, 13, 200)
C = np.random.rand(50)
n_O1 = 25
I = np.random.randint(25, size=(100,))
J = np.random.randint(25, size=(100,))
n_O2 = 15
M_1 = np.random.randint(20, size=(50,))
M_2 = np.random.randint(13, size=(50,))
M_3 = np.random.randint(n_O2, size=(50,))

ref_mean, ref_std = benchmark(
lambda: ref_sasax(A, R, X, C, I, J, M_1, M_2, M_3, n_O1, n_O2)
)
mean, std = benchmark(lambda: sasax(A, R, X, C, I, J, M_1, M_2, M_3, n_O1, n_O2))

print("Reference implementation:", format_mean_std(ref_mean, ref_std))
print("Optimized implementation:", format_mean_std(mean, std))

print("Speed-up:", ref_mean / mean)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def run(self):
os.makedirs(build_dir, exist_ok=True)

cmake_options = [
"-DCMAKE_BUILD_TYPE=Debug",
"-DCMAKE_BUILD_TYPE=release",
"-DBUILD_SHARED_LIBS=ON",
f"-DCMAKE_INSTALL_PREFIX={install_dir}",
]
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ deps =
black
blackdoc
isort
clang-format
commands =
isort {[testenv]lint_folders}
black {[testenv]lint_folders}
Expand Down

0 comments on commit 31f84db

Please sign in to comment.