Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
ssheorey committed Dec 27, 2024
1 parent 8e43455 commit c325213
Show file tree
Hide file tree
Showing 75 changed files with 2,164 additions and 279 deletions.
10 changes: 8 additions & 2 deletions 3rdparty/find_dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1532,12 +1532,14 @@ open3d_import_3rdparty_library(3rdparty_uvatlas
list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_CUSTOM Open3D::3rdparty_uvatlas)


# SYCL link options are specified here. Compile options are only applied to SYCL source files and are specified in cmake/Open3DSYCLTargetSources.cmake
if(BUILD_SYCL_MODULE)
add_library(3rdparty_sycl INTERFACE)
target_link_libraries(3rdparty_sycl INTERFACE
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:sycl>)
target_link_options(3rdparty_sycl INTERFACE
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=spir64_x86_64>)
$<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=intel_gpu_acm_g10>)
# $<$<AND:$<CXX_COMPILER_ID:IntelLLVM>,$<NOT:$<LINK_LANGUAGE:ISPC>>>:-fsycl -fsycl-targets=spir64,spir64_gen>)
if(NOT BUILD_SHARED_LIBS OR arg_PUBLIC)
install(TARGETS 3rdparty_sycl EXPORT Open3DTargets)
endif()
Expand All @@ -1563,8 +1565,12 @@ if(OPEN3D_USE_ONEAPI_PACKAGES)
GROUPED
INCLUDE_DIRS ${MKL_INCLUDE}/
LIB_DIR ${MKL_ROOT}/lib/intel64
LIBRARIES mkl_intel_ilp64 mkl_tbb_thread mkl_core
LIBRARIES $<$<BOOL:${BUILD_SYCL_MODULE}>:mkl_sycl> mkl_intel_ilp64 mkl_tbb_thread mkl_core
)
if (BUILD_SYCL_MODULE)
# target_link_options(3rdparty_mkl INTERFACE "-Wl,-export-dynamic")
target_link_libraries(3rdparty_mkl INTERFACE OpenCL)
endif()
# MKL definitions
target_compile_options(3rdparty_mkl INTERFACE "$<$<PLATFORM_ID:Linux,Darwin>:$<$<COMPILE_LANGUAGE:CXX>:-m64>>")
target_compile_definitions(3rdparty_mkl INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:MKL_ILP64>")
Expand Down
16 changes: 10 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,19 @@ endif()
if(BUILD_SYCL_MODULE AND NOT GLIBCXX_USE_CXX11_ABI)
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires GLIBCXX_USE_CXX11_ABI=ON")
endif()
if(BUILD_SYCL_MODULE AND BUILD_TENSORFLOW_OPS)
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_TENSORFLOW_OPS=OFF")
endif()
if(BUILD_SYCL_MODULE AND BUILD_PYTORCH_OPS)
message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_PYTORCH_OPS=OFF")
endif()
# if(BUILD_SYCL_MODULE AND BUILD_TENSORFLOW_OPS)
# message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_TENSORFLOW_OPS=OFF")
# endif()
# if(BUILD_SYCL_MODULE AND BUILD_PYTORCH_OPS)
# message(FATAL_ERROR "BUILD_SYCL_MODULE=ON requires BUILD_PYTORCH_OPS=OFF")
# endif()
if(BUILD_SYCL_MODULE AND BUILD_CUDA_MODULE)
message(FATAL_ERROR "BUILD_SYCL_MODULE and BUILD_SYCL_MODULE cannot be on at the same time for now.")
endif()
# Use LLD with icpx for faster linking
if (CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
add_link_options("-fuse-ld=lld")
endif()

# Global flag to set CXX standard.
# This does not affect 3rd party libraries.
Expand Down
6 changes: 4 additions & 2 deletions cmake/Open3DSYCLTargetSources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#
# When BUILD_SYCL_MODULE=ON, set SYCL-specific compile flags for the listed
# source files and call target_sources(). If BUILD_SYCL_MODULE=OFF, this
# function directly calls target_sources().
# function directly calls target_sources(). For SYCL link options, see
# 3rdparty/find_dependencies.cmake
#
# Note: this is not a perfect forwarding to target_sources(), as it only support
# limited set of arguments. See the example usage below.
Expand Down Expand Up @@ -31,7 +32,8 @@ function(open3d_sycl_target_sources target)
if(BUILD_SYCL_MODULE)
foreach(sycl_file IN LISTS arg_UNPARSED_ARGUMENTS)
set_source_files_properties(${sycl_file} PROPERTIES
COMPILE_OPTIONS -fsycl -fsycl-unnamed-lambda -fsycl-targets=spir64_x86_64)
COMPILE_OPTIONS "-fsycl;-fsycl-targets=intel_gpu_acm_g10")
#COMPILE_OPTIONS "-fsycl;-fsycl-targets=spir64,spir64_gen")
if(arg_VERBOSE)
message(STATUS "open3d_sycl_target_sources(${target}): marked ${sycl_file} as SYCL code")
endif()
Expand Down
49 changes: 34 additions & 15 deletions cpp/open3d/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ target_sources(core PRIVATE
CUDAUtils.cpp
Device.cpp
Dtype.cpp
EigenConverter.cpp
Indexer.cpp
MemoryManager.cpp
MemoryManagerCached.cpp
Expand All @@ -23,6 +22,7 @@ target_sources(core PRIVATE

# Compile regardless BUILD_SYCL_MODULE == ON or OFF.
open3d_sycl_target_sources(core PRIVATE
EigenConverter.cpp
SYCLUtils.cpp
)

Expand All @@ -43,22 +43,14 @@ target_sources(core PRIVATE
hashmap/HashSet.cpp
kernel/Kernel.cpp
linalg/AddMM.cpp
linalg/AddMMCPU.cpp
linalg/Det.cpp
linalg/Inverse.cpp
linalg/InverseCPU.cpp
linalg/LeastSquares.cpp
linalg/LeastSquaresCPU.cpp
linalg/LU.cpp
linalg/LUCPU.cpp
linalg/Matmul.cpp
linalg/MatmulCPU.cpp
linalg/Solve.cpp
linalg/SolveCPU.cpp
linalg/SVD.cpp
linalg/SVDCPU.cpp
linalg/Tri.cpp
linalg/TriCPU.cpp
nns/FixedRadiusIndex.cpp
nns/FixedRadiusSearchOps.cpp
nns/KnnIndex.cpp
Expand All @@ -73,21 +65,48 @@ set_target_properties(core_impl PROPERTIES CXX_VISIBILITY_PRESET "hidden")

target_sources(core_impl PRIVATE
kernel/Arange.cpp
kernel/ArangeCPU.cpp
kernel/BinaryEW.cpp
kernel/BinaryEWCPU.cpp
kernel/IndexGetSet.cpp
kernel/IndexGetSetCPU.cpp
kernel/IndexReduction.cpp
kernel/IndexReductionCPU.cpp
kernel/NonZero.cpp
kernel/NonZeroCPU.cpp
kernel/Reduction.cpp
kernel/ReductionCPU.cpp
kernel/UnaryEW.cpp
kernel/ArangeCPU.cpp
kernel/BinaryEWCPU.cpp
kernel/IndexGetSetCPU.cpp
kernel/IndexReductionCPU.cpp
kernel/NonZeroCPU.cpp
kernel/ReductionCPU.cpp
kernel/UnaryEWCPU.cpp
linalg/AddMMCPU.cpp
linalg/InverseCPU.cpp
linalg/LeastSquaresCPU.cpp
linalg/LUCPU.cpp
linalg/MatmulCPU.cpp
linalg/SolveCPU.cpp
linalg/SVDCPU.cpp
linalg/TriCPU.cpp
)

if (BUILD_SYCL_MODULE)
open3d_sycl_target_sources(core_impl PRIVATE
kernel/UnaryEWSYCL.cpp
kernel/BinaryEWSYCL.cpp
kernel/ArangeSYCL.cpp
kernel/IndexGetSetSYCL.cpp
kernel/NonZeroSYCL.cpp
kernel/IndexReductionSYCL.cpp
kernel/ReductionSYCL.cpp
linalg/AddMMSYCL.cpp
linalg/InverseSYCL.cpp
linalg/LeastSquaresSYCL.cpp
linalg/LUSYCL.cpp
linalg/MatmulSYCL.cpp
linalg/SolveSYCL.cpp
linalg/SVDSYCL.cpp
linalg/TriSYCL.cpp
)
endif()

if (BUILD_CUDA_MODULE)
target_sources(core PRIVATE
Expand Down
8 changes: 8 additions & 0 deletions cpp/open3d/core/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,12 @@ struct hash<open3d::core::Device> {
return std::hash<std::string>{}(device.ToString());
}
};

template <>
struct less<open3d::core::Device> {
bool operator()(const open3d::core::Device& lhs,
const open3d::core::Device& rhs) const {
return lhs.ToString() < rhs.ToString();
}
};
} // namespace std
8 changes: 4 additions & 4 deletions cpp/open3d/core/Indexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ class Indexer;
class IndexerIterator;

// Maximum number of dimensions of TensorRef.
static constexpr int64_t MAX_DIMS = 10;
static constexpr int64_t MAX_DIMS = 4;

// Maximum number of inputs of an op.
// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
static constexpr int64_t MAX_INPUTS = 10;
static constexpr int64_t MAX_INPUTS = 4;

// Maximum number of outputs of an op. This number can be increased when
// necessary.
Expand Down Expand Up @@ -110,7 +110,7 @@ struct TensorRef {

TensorRef(const Tensor& t) {
if (t.NumDims() > MAX_DIMS) {
utility::LogError("Tenor has too many dimensions {} > {}.",
utility::LogError("Tensor has too many dimensions {} > {}.",
t.NumDims(), MAX_DIMS);
}
data_ptr_ = const_cast<void*>(t.GetDataPtr());
Expand Down Expand Up @@ -638,7 +638,7 @@ class Indexer {
class IndexerIterator {
public:
struct Iterator {
Iterator(){};
Iterator() {};
Iterator(const Indexer& indexer);
Iterator(Iterator&& other) = default;

Expand Down
4 changes: 2 additions & 2 deletions cpp/open3d/core/Indexer.isph
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
#include "open3d/utility/Helper.isph"

// Maximum number of dimensions of TensorRef.
enum { MAX_DIMS = 10 };
enum { MAX_DIMS = 4 };

// Maximum number of inputs of an op.
// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
enum { MAX_INPUTS = 10 };
enum { MAX_INPUTS = 4 };

// Maximum number of outputs of an op. This number can be increased when
// necessary.
Expand Down
5 changes: 5 additions & 0 deletions cpp/open3d/core/ParallelFor.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ void ParallelForCPU_(const Device& device, int64_t n, const func_t& func) {
/// \note If you use a lambda function, capture only the required variables
/// instead of all to prevent accidental race conditions. If you want the
/// kernel to be used on both CPU and CUDA, capture the variables by value.
/// \note This does not dispatch to SYCL, since SYCL has extra constraints:
/// - Lambdas may capture by value only.
/// - No function pointers / virtual functions.
/// Auto dispatch to SYCL will enforce these conditions even on CPU devices. Use
/// ParallelForSYCL instead.
template <typename func_t>
void ParallelFor(const Device& device, int64_t n, const func_t& func) {
#ifdef __CUDACC__
Expand Down
63 changes: 63 additions & 0 deletions cpp/open3d/core/ParallelForSYCL.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// ----------------------------------------------------------------------------
// - Open3D: www.open3d.org -
// ----------------------------------------------------------------------------
// Copyright (c) 2018-2024 www.open3d.org
// SPDX-License-Identifier: MIT
// ----------------------------------------------------------------------------

#pragma once

#include <cstdint>
#include <type_traits>

#include "open3d/core/Device.h"
#include "open3d/core/Indexer.h"
#include "open3d/core/SYCLContext.h"
#include "open3d/utility/Logging.h"

namespace open3d {
namespace core {

/// Run a function in parallel with SYCL.
template <typename Functor, typename... FuncArgs>
void ParallelForSYCL(const Device& device,
Indexer indexer,
FuncArgs... func_args) {
if (!device.IsSYCL()) {
utility::LogError("ParallelFor for SYCL cannot run on device {}.",
device.ToString());
}
int64_t n = indexer.NumWorkloads();
if (n == 0) {
return;
}
auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
/// TODO: Specify grid size based on device properties
queue.parallel_for<Functor>(n, [indexer, func_args...](int64_t i) {
Functor ef(indexer, func_args...);
ef(i);
}).wait_and_throw();
}

/// Run a function in parallel with SYCL.
template <typename Functor, typename... FuncArgs>
void ParallelForSYCL(const Device& device,
int64_t num_workloads,
FuncArgs... func_args) {
if (!device.IsSYCL()) {
utility::LogError("ParallelFor for SYCL cannot run on device {}.",
device.ToString());
}
if (num_workloads == 0) {
return;
}
auto queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
/// TODO: Specify grid size based on device properties
queue.parallel_for<Functor>(num_workloads, [func_args...](int64_t i) {
Functor ef(func_args...);
ef(i);
}).wait_and_throw();
}

} // namespace core
} // namespace open3d
51 changes: 36 additions & 15 deletions cpp/open3d/core/SYCLContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,44 @@ SYCLContext &SYCLContext::GetInstance() {
bool SYCLContext::IsAvailable() { return devices_.size() > 0; }

bool SYCLContext::IsDeviceAvailable(const Device &device) {
bool rc = false;
for (const Device &device_ : devices_) {
if (device == device_) {
rc = true;
break;
}
return devices_.find(device) != devices_.end();
}
std::vector<Device> SYCLContext::GetAvailableSYCLDevices() {
std::vector<Device> device_vec;
for (const auto &device : devices_) {
device_vec.push_back(device.first);
}
return rc;
return device_vec;
}
std::vector<Device> SYCLContext::GetAvailableSYCLDevices() { return devices_; }

sycl::queue SYCLContext::GetDefaultQueue(const Device &device) {
return device_to_default_queue_.at(device);
return devices_.at(device).queue;
}

SYCLDevice::SYCLDevice(const sycl::device &sycl_device) {
namespace sid = sycl::info::device;
device = sycl_device;
queue = sycl::queue(device);
name = device.get_info<sid::name>();
max_work_group_size = device.get_info<sid::max_work_group_size>();
auto aspects = device.get_info<sid::aspects>();
fp64 = std::find(aspects.begin(), aspects.end(), sycl::aspect::fp64) !=
aspects.end();
if (!fp64) {
utility::LogWarning(
"SYCL device {} does not support double precision. Using "
"emulation.",
name);
}
usm_device_allocations =
std::find(aspects.begin(), aspects.end(),
sycl::aspect::usm_device_allocations) != aspects.end();
if (!usm_device_allocations) {
utility::LogWarning(
"SYCL device {} does not support USM device allocations. "
"Open3D SYCL support may not work.",
name);
}
}

SYCLContext::SYCLContext() {
Expand All @@ -48,9 +73,7 @@ SYCLContext::SYCLContext() {
try {
const sycl::device &sycl_device = sycl::device(sycl::gpu_selector_v);
const Device open3d_device = Device("SYCL:0");
devices_.push_back(open3d_device);
device_to_sycl_device_[open3d_device] = sycl_device;
device_to_default_queue_[open3d_device] = sycl::queue(sycl_device);
devices_.emplace(open3d_device, sycl_device);
} catch (const sycl::exception &e) {
}

Expand All @@ -66,9 +89,7 @@ SYCLContext::SYCLContext() {
const sycl::device &sycl_device = sycl::device(sycl::cpu_selector_v);
const Device open3d_device =
Device("SYCL:" + std::to_string(devices_.size()));
devices_.push_back(open3d_device);
device_to_sycl_device_[open3d_device] = sycl_device;
device_to_default_queue_[open3d_device] = sycl::queue(sycl_device);
devices_.emplace(open3d_device, sycl_device);
} catch (const sycl::exception &e) {
}

Expand Down
Loading

0 comments on commit c325213

Please sign in to comment.