Skip to content

Commit

Permalink
remove openmp
Browse files Browse the repository at this point in the history
  • Loading branch information
akaszynski committed Sep 25, 2024
1 parent edf7665 commit 12c0050
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 44 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ci:
autoupdate_schedule: quarterly
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
rev: v0.6.7
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down Expand Up @@ -36,6 +36,6 @@ repos:

# this validates our github workflow files
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.28.4
rev: 0.29.2
hooks:
- id: check-github-workflows
20 changes: 3 additions & 17 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
cmake_minimum_required(VERSION 3.15...3.26)

project(nanobind_project LANGUAGES CXX)
if(NOT APPLE)
find_package(OpenMP REQUIRED)
endif()

# Try to import all Python components potentially needed by nanobind
find_package(Python 3.9
REQUIRED COMPONENTS Interpreter Development.Module
Expand All @@ -28,23 +23,14 @@ nanobind_add_module(
src/clustering.cpp
)

# Link OpenMP
if(OpenMP_CXX_FOUND)
target_link_libraries(_clustering PRIVATE OpenMP::OpenMP_CXX)
endif()

# Compiler-specific options
if(MSVC)
# Use MSVC optimization levels and OpenMP setup
target_compile_options(_clustering PRIVATE /O2 /std:c++17 /openmp:llvm)
# /openmp:llvm
# Use MSVC optimization levels
target_compile_options(_clustering PRIVATE /O2 /std:c++17)
else()
# Assuming GCC or Clang
if (APPLE)
target_compile_options(_clustering PRIVATE -O3)
else()
target_compile_options(_clustering PRIVATE -O3 -fopenmp)
endif()
target_compile_options(_clustering PRIVATE -O3)

endif()

Expand Down
26 changes: 1 addition & 25 deletions src/clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,9 @@

#include <nanobind/nanobind.h>
#include <nanobind/ndarray.h>
// #include <nanobind/stl/set.h> // needed to convert c++ sets to python sets
// #include <nanobind/stl/vector.h> // needed to convert c++ vectors to python lists

#include "array_support.h"

#if !defined(__APPLE__)
#include <omp.h>
#endif

#if defined(__linux__) || defined(__APPLE__)
typedef int64_t vtk_int;
#else
typedef int32_t vtk_int;
#endif

#ifdef _MSC_VER
#define restrict __restrict
#elif defined(__GNUC__) || defined(__clang__)
Expand Down Expand Up @@ -99,7 +87,6 @@ PointNormals(NDArray<const T, 2> points_arr, NDArray<const int64_t, 2> faces_arr
const T *v = points_arr.data();
const int64_t *f = faces_arr.data();

#pragma omp parallel for
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f[i * 3 + 0];
int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -163,7 +150,6 @@ PointNormals(NDArray<const T, 2> points_arr, NDArray<const int64_t, 2> faces_arr
}

// Normalize point normals
#pragma omp parallel for
for (size_t i = 0; i < n_points; i++) {
T plen = sqrt(
pnorm[i * 3 + 0] * pnorm[i * 3 + 0] + pnorm[i * 3 + 1] * pnorm[i * 3 + 1] +
Expand Down Expand Up @@ -192,7 +178,6 @@ FaceCentroid(const NDArray<const T, 2> points, const NDArray<const int64_t, 2> f
auto fmean_arr = MakeNDArray<T, 2>({n_faces, 3});
T *fmean = fmean_arr.data();

#pragma omp parallel for
for (size_t i = 0; i < n_faces; i++) {
const int64_t point0 = f[i * 3 + 0];
const int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -221,7 +206,6 @@ FaceNormals(const NDArray<const T, 2> points, const NDArray<const int64_t, 2> fa
const T *v = points.data();
const int64_t *f = faces.data();

#pragma omp parallel for
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f[i * 3 + 0];
int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -299,7 +283,6 @@ nb::tuple RayTrace(
int *near_ind = near_ind_arr.data();

// Loop through each face and determine intersections
#pragma omp parallel for num_threads(num_threads)
for (size_t i = 0; i < npoints; i++) {
T prev_dist = std::numeric_limits<T>::infinity();
int near_idx = -1;
Expand Down Expand Up @@ -516,11 +499,8 @@ nb::tuple PointWeights(
const T *v = points_arr.data();
const int64_t *f = faces_arr.data();

#pragma omp parallel num_threads(n_threads)
{
T *local_pweight = AllocateArray<T>(n_points, true);
T *local_pweight = AllocateArray<T>(n_points, true);

#pragma omp for
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f[i * 3 + 0];
int64_t point1 = f[i * 3 + 1];
Expand Down Expand Up @@ -550,21 +530,18 @@ nb::tuple PointWeights(
}

delete[] local_pweight;
}

// ensure this actually helps
const T *pweight_const = pweight;

if (n_add_weights) {
#pragma omp parallel num_threads(n_threads)
for (size_t i = 0; i < n_points; i++) {
const T wgt = aweights[i] * pweight_const[i];
wvertex[i * 3 + 0] = wgt * v[i * 3 + 0];
wvertex[i * 3 + 1] = wgt * v[i * 3 + 1];
wvertex[i * 3 + 2] = wgt * v[i * 3 + 2];
}
} else {
#pragma omp parallel num_threads(n_threads)
for (size_t i = 0; i < n_points; i++) {
const T wgt = pweight[i];
wvertex[i * 3 + 0] = wgt * v[i * 3 + 0];
Expand Down Expand Up @@ -1315,7 +1292,6 @@ template <typename T> NDArray<T, 1> TriArea(NDArray<T, 2> points, NDArray<int64_
auto v = points.view();
auto f = faces.view();

#pragma omp parallel for firstprivate(v, f, tria_view)
for (size_t i = 0; i < n_faces; i++) {
int64_t point0 = f(i, 0);
int64_t point1 = f(i, 1);
Expand Down

0 comments on commit 12c0050

Please sign in to comment.