diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 33713b6..cbbd95a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: autoupdate_schedule: quarterly repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.8 + rev: v0.6.7 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] @@ -36,6 +36,6 @@ repos: # this validates our github workflow files - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.28.4 + rev: 0.29.2 hooks: - id: check-github-workflows diff --git a/CMakeLists.txt b/CMakeLists.txt index 353a206..cd46b64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,5 @@ cmake_minimum_required(VERSION 3.15...3.26) -project(nanobind_project LANGUAGES CXX) -if(NOT APPLE) - find_package(OpenMP REQUIRED) -endif() - # Try to import all Python components potentially needed by nanobind find_package(Python 3.9 REQUIRED COMPONENTS Interpreter Development.Module @@ -28,23 +23,14 @@ nanobind_add_module( src/clustering.cpp ) -# Link OpenMP -if(OpenMP_CXX_FOUND) - target_link_libraries(_clustering PRIVATE OpenMP::OpenMP_CXX) -endif() # Compiler-specific options if(MSVC) - # Use MSVC optimization levels and OpenMP setup - target_compile_options(_clustering PRIVATE /O2 /std:c++17 /openmp:llvm) - # /openmp:llvm + # Use MSVC optimization levels + target_compile_options(_clustering PRIVATE /O2 /std:c++17) else() # Assuming GCC or Clang - if (APPLE) - target_compile_options(_clustering PRIVATE -O3) - else() - target_compile_options(_clustering PRIVATE -O3 -fopenmp) - endif() + target_compile_options(_clustering PRIVATE -O3) endif() diff --git a/src/clustering.cpp b/src/clustering.cpp index 87147e6..7b07da6 100644 --- a/src/clustering.cpp +++ b/src/clustering.cpp @@ -7,21 +7,9 @@ #include #include -// #include // needed to convert c++ sets to python sets -// #include // needed to convert c++ vectors to python lists #include "array_support.h" -#if !defined(__APPLE__) -#include -#endif - -#if defined(__linux__) || defined(__APPLE__) -typedef int64_t vtk_int; -#else -typedef int32_t vtk_int; -#endif - #ifdef _MSC_VER #define restrict __restrict #elif defined(__GNUC__) || defined(__clang__) @@ -99,7 +87,6 @@ PointNormals(NDArray points_arr, NDArray faces_arr const T *v = points_arr.data(); const int64_t *f = faces_arr.data(); -#pragma omp parallel for for (size_t i = 0; i < n_faces; i++) { int64_t point0 = f[i * 3 + 0]; int64_t point1 = f[i * 3 + 1]; @@ -163,7 +150,6 @@ PointNormals(NDArray points_arr, NDArray faces_arr } // Normalize point normals -#pragma omp parallel for for (size_t i = 0; i < n_points; i++) { T plen = sqrt( pnorm[i * 3 + 0] * pnorm[i * 3 + 0] + pnorm[i * 3 + 1] * pnorm[i * 3 + 1] + @@ -192,7 +178,6 @@ FaceCentroid(const NDArray points, const NDArray f auto fmean_arr = MakeNDArray({n_faces, 3}); T *fmean = fmean_arr.data(); -#pragma omp parallel for for (size_t i = 0; i < n_faces; i++) { const int64_t point0 = f[i * 3 + 0]; const int64_t point1 = f[i * 3 + 1]; @@ -221,7 +206,6 @@ FaceNormals(const NDArray points, const NDArray fa const T *v = points.data(); const int64_t *f = faces.data(); -#pragma omp parallel for for (size_t i = 0; i < n_faces; i++) { int64_t point0 = f[i * 3 + 0]; int64_t point1 = f[i * 3 + 1]; @@ -299,7 +283,6 @@ nb::tuple RayTrace( int *near_ind = near_ind_arr.data(); // Loop through each face and determine intersections -#pragma omp parallel for num_threads(num_threads) for (size_t i = 0; i < npoints; i++) { T prev_dist = std::numeric_limits::infinity(); int near_idx = -1; @@ -516,11 +499,8 @@ nb::tuple PointWeights( const T *v = points_arr.data(); const int64_t *f = faces_arr.data(); -#pragma omp parallel num_threads(n_threads) - { - T *local_pweight = AllocateArray(n_points, true); + T *local_pweight = AllocateArray(n_points, true); -#pragma omp for for (size_t i = 0; i < n_faces; i++) { int64_t point0 = f[i * 3 + 0]; int64_t point1 = f[i * 3 + 1]; @@ -550,13 +530,11 @@ nb::tuple PointWeights( } delete[] local_pweight; - } // ensure this actually helps const T *pweight_const = pweight; if (n_add_weights) { -#pragma omp parallel num_threads(n_threads) for (size_t i = 0; i < n_points; i++) { const T wgt = aweights[i] * pweight_const[i]; wvertex[i * 3 + 0] = wgt * v[i * 3 + 0]; @@ -564,7 +542,6 @@ nb::tuple PointWeights( wvertex[i * 3 + 2] = wgt * v[i * 3 + 2]; } } else { -#pragma omp parallel num_threads(n_threads) for (size_t i = 0; i < n_points; i++) { const T wgt = pweight[i]; wvertex[i * 3 + 0] = wgt * v[i * 3 + 0]; @@ -1315,7 +1292,6 @@ template NDArray TriArea(NDArray points, NDArray