From 445f9e9d9b3379ebf6d252eeb2c91cdacc44fdc2 Mon Sep 17 00:00:00 2001 From: Gilles Daviet Date: Sun, 26 May 2024 17:22:28 -0700 Subject: [PATCH] NanoVDB Index grid support --- docs/modules/functions.rst | 55 + docs/modules/runtime.rst | 41 +- warp/builtins.py | 181 + warp/context.py | 88 +- warp/native/exports.h | 1 + warp/native/mat.h | 12 + warp/native/nanovdb/GridHandle.h | 366 + warp/native/nanovdb/HostBuffer.h | 590 ++ warp/native/nanovdb/NanoVDB.h | 6156 +++++++++++------ warp/native/nanovdb/PNanoVDB.h | 1035 ++- warp/native/nanovdb/PNanoVDBWrite.h | 295 - warp/native/nanovdb/cuda/DeviceBuffer.h | 231 + warp/native/nanovdb/cuda/GridHandle.cuh | 76 + warp/native/nanovdb/math/Math.h | 1448 ++++ .../nanovdb/tools/cuda/PointsToGrid.cuh | 1291 ++++ warp/native/nanovdb/util/Util.h | 657 ++ warp/native/nanovdb/util/cuda/Timer.h | 116 + warp/native/nanovdb/util/cuda/Util.h | 193 + warp/native/volume.cpp | 438 +- warp/native/volume.cu | 52 +- warp/native/volume.h | 1028 ++- warp/native/volume_builder.cu | 741 +- warp/native/volume_builder.h | 17 +- warp/native/volume_impl.h | 61 + warp/native/warp.h | 22 +- warp/stubs.py | 11 + warp/tests/assets/test_index_grid.nvdb | 3 + warp/tests/test_volume.py | 248 +- warp/tests/test_volume_write.py | 86 + warp/types.py | 480 +- 30 files changed, 12606 insertions(+), 3413 deletions(-) create mode 100644 warp/native/nanovdb/GridHandle.h create mode 100644 warp/native/nanovdb/HostBuffer.h delete mode 100644 warp/native/nanovdb/PNanoVDBWrite.h create mode 100644 warp/native/nanovdb/cuda/DeviceBuffer.h create mode 100644 warp/native/nanovdb/cuda/GridHandle.cuh create mode 100644 warp/native/nanovdb/math/Math.h create mode 100644 warp/native/nanovdb/tools/cuda/PointsToGrid.cuh create mode 100644 warp/native/nanovdb/util/Util.h create mode 100644 warp/native/nanovdb/util/cuda/Timer.h create mode 100644 warp/native/nanovdb/util/cuda/Util.h create mode 100644 warp/native/volume_impl.h create mode 100644 warp/tests/assets/test_index_grid.nvdb diff --git a/docs/modules/functions.rst b/docs/modules/functions.rst index e112444a..d010ccf9 100644 --- a/docs/modules/functions.rst +++ b/docs/modules/functions.rst @@ -1563,6 +1563,32 @@ Geometry Volumes --------------- +.. py:function:: volume_sample(id: uint64, uvw: vec3f, sampling_mode: int32, dtype: Any) + + Sample the volume of type `dtype` given by ``id`` at the volume local-space point ``uvw``. + + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR.` + + +.. py:function:: volume_sample_grad(id: uint64, uvw: vec3f, sampling_mode: int32, grad: Any, dtype: Any) + + Sample the volume given by ``id`` and its gradient at the volume local-space point ``uvw``. + + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR.` + + +.. py:function:: volume_lookup(id: uint64, i: int32, j: int32, k: int32, dtype: Any) + + Returns the value of voxel with coordinates ``i``, ``j``, ``k`` for a volume of type type `dtype`. + + If the voxel at this index does not exist, this function returns the background value. + + +.. py:function:: volume_store(id: uint64, i: int32, j: int32, k: int32, value: Any) + + Store ``value`` at the voxel with coordinates ``i``, ``j``, ``k``. + + .. py:function:: volume_sample_f(id: uint64, uvw: vec3f, sampling_mode: int32) -> float Sample the volume given by ``id`` at the volume local-space point ``uvw``. @@ -1625,6 +1651,35 @@ Volumes Store ``value`` at the voxel with coordinates ``i``, ``j``, ``k``. +.. py:function:: volume_sample_index(id: uint64, uvw: vec3f, sampling_mode: int32, voxel_data: Array[Any], background: Any) + + Sample the volume given by ``id`` at the volume local-space point ``uvw``. + + Values for allocated voxels are read from the ``voxel_data`` array, and `background` is used as the value of non-existing voxels. + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR`. + This function is available for both index grids and classical volumes. + + + +.. py:function:: volume_sample_grad_index(id: uint64, uvw: vec3f, sampling_mode: int32, voxel_data: Array[Any], background: Any, grad: Any) + + Sample the volume given by ``id`` and its gradient at the volume local-space point ``uvw``. + + Values for allocated voxels are read from the ``voxel_data`` array, and `background` is used as the value of non-existing voxels. + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR`. + This function is available for both index grids and classical volumes. + + + +.. py:function:: volume_lookup_index(id: uint64, i: int32, j: int32, k: int32) -> int32 + + Returns the index associated to the voxel with coordinates ``i``, ``j``, ``k``. + + If the voxel at this index does not exist, this function returns -1. + This function is available for both index grids and classical volumes. + + + .. py:function:: volume_index_to_world(id: uint64, uvw: vec3f) -> vec3f Transform a point ``uvw`` defined in volume index space to world space given the volume's intrinsic affine transformation. diff --git a/docs/modules/runtime.rst b/docs/modules/runtime.rst index 85400bb9..c4db7348 100644 --- a/docs/modules/runtime.rst +++ b/docs/modules/runtime.rst @@ -979,12 +979,12 @@ or use built-in closest-point or trilinear interpolation to sample grid data fro Volume objects can be created directly from Warp arrays containing a NanoVDB grid, from the contents of a standard ``.nvdb`` file using :func:`load_from_nvdb() `, +from an uncompressed in-memory buffer using :func:`load_from_address() `, or from a dense 3D NumPy array using :func:`load_from_numpy() `. -Volumes can also be created using :func:`allocate() ` or -:func:`allocate_by_tiles() `. The values for a Volume object can be modified in a Warp -kernel using :func:`wp.volume_store_f() `, :func:`wp.volume_store_v() `, and -:func:`wp.volume_store_i() `. +Volumes can also be created using :func:`allocate() `, +:func:`allocate_by_tiles() ` or :func:`allocate_by_voxels() `. +The values for a Volume object can be modified in a Warp kernel using :func:`wp.volume_store() `. .. note:: Warp does not currently support modifying the topology of sparse volumes at runtime. @@ -999,8 +999,11 @@ Below we give an example of creating a Volume object from an existing NanoVDB fi .. note:: Files written by the NanoVDB library, commonly marked by the ``.nvdb`` extension, can contain multiple grids with - various compression methods, but a :class:`Volume` object represents a single NanoVDB grid therefore only files with - a single grid are supported. NanoVDB's uncompressed and zip-compressed file formats are supported. + various compression methods, but a :class:`Volume` object represents a single NanoVDB grid. + The first grid is loaded by default, then Warp volumes corresponding to the other grids in the file can be created + using repeated calls to :func:`load_next_grid() `. + NanoVDB's uncompressed and zip-compressed file formats are supported out-of-the-box, blosc compressed files require + the `blosc` Python package to be installed. To sample the volume inside a kernel we pass a reference to it by ID, and use the built-in sampling modes:: @@ -1018,11 +1021,35 @@ To sample the volume inside a kernel we pass a reference to it by ID, and use th q = wp.volume_world_to_index(volume, p) # sample volume with trilinear interpolation - f = wp.volume_sample_f(volume, q, wp.Volume.LINEAR) + f = wp.volume_sample(volume, q, wp.Volume.LINEAR, dtype=float) # write result samples[tid] = f +Warp also supports NanoVDB index grids, which provide a memory-efficient linearization of voxel indices that can refer +to values in arbitrarily shaped arrays:: + + @wp.kernel + def sample_index_grid(volume: wp.uint64, + points: wp.array(dtype=wp.vec3), + voxel_values: wp.array(dtype=Any)): + + tid = wp.tid() + + # load sample point in world-space + p = points[tid] + + # transform position to the volume's local-space + q = wp.volume_world_to_index(volume, p) + + # sample volume with trilinear interpolation + background_value = voxel_values.dtype(0.0) + f = wp.volume_sample_index(volume, q, wp.Volume.LINEAR, voxel_values, background_value) + +The coordinates of all indexable voxels can be recovered using :func:`get_voxels() `. +NanoVDB grids may also contains embedded *blind* data arrays; those can be accessed with the +:func:`feature_array() ` function. + .. autoclass:: Volume :members: :undoc-members: diff --git a/warp/builtins.py b/warp/builtins.py index cd66410b..e6a45249 100644 --- a/warp/builtins.py +++ b/warp/builtins.py @@ -2103,6 +2103,112 @@ def spatial_vector_constructor_value_func(arg_types, kwds, templates): # --------------------------------- # Volumes +_volume_supported_value_types = { + int32, + int64, + uint32, + float32, + float64, + vec3f, + vec3d, + vec4f, + vec4d, +} + + +def volume_value_func(arg_types, kwds, templates): + try: + dtype = kwds["dtype"] + except KeyError as err: + raise RuntimeError( + "'dtype' keyword argument must be specified when calling generic volume lookup or sampling functions" + ) from err + + if dtype not in _volume_supported_value_types: + raise RuntimeError(f"Unsupported volume type '{type_repr(dtype)}'") + + templates.append(dtype) + + return dtype + + +add_builtin( + "volume_sample", + input_types={"id": uint64, "uvw": vec3, "sampling_mode": int, "dtype": Any}, + value_func=volume_value_func, + export=False, + group="Volumes", + doc="""Sample the volume of type `dtype` given by ``id`` at the volume local-space point ``uvw``. + + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR.`""", +) + + +def check_volume_value_grad_compatibility(dtype, grad_dtype): + if type_is_vector(dtype): + expected = matrix(shape=(type_length(dtype), 3), dtype=type_scalar_type(dtype)) + else: + expected = vector(length=3, dtype=dtype) + + if not types_equal(grad_dtype, expected): + raise RuntimeError(f"Incompatible gradient type, expected {type_repr(expected)}, got {type_repr(grad_dtype)}") + + +def volume_sample_grad_value_func(arg_types, kwds, templates): + dtype = volume_value_func(arg_types, kwds, templates) + + if len(arg_types) < 4: + raise RuntimeError("'volume_sample_grad' requires 4 positional arguments") + + grad_type = arg_types[3] + check_volume_value_grad_compatibility(dtype, grad_type) + return dtype + + +add_builtin( + "volume_sample_grad", + input_types={"id": uint64, "uvw": vec3, "sampling_mode": int, "grad": Any, "dtype": Any}, + value_func=volume_sample_grad_value_func, + export=False, + group="Volumes", + doc="""Sample the volume given by ``id`` and its gradient at the volume local-space point ``uvw``. + + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR.`""", +) + +add_builtin( + "volume_lookup", + input_types={"id": uint64, "i": int, "j": int, "k": int, "dtype": Any}, + value_type=int, + value_func=volume_value_func, + export=False, + group="Volumes", + doc="""Returns the value of voxel with coordinates ``i``, ``j``, ``k`` for a volume of type type `dtype`. + + If the voxel at this index does not exist, this function returns the background value.""", +) + + +def volume_store_value_func(arg_types, kwds, templates): + if len(arg_types) < 4: + raise RuntimeError("'volume_store' requires 5 positional arguments") + + dtype = arg_types[4] + if dtype not in _volume_supported_value_types: + raise RuntimeError(f"Unsupported volume type '{type_repr(dtype)}'") + + return None + + +add_builtin( + "volume_store", + value_func=volume_store_value_func, + input_types={"id": uint64, "i": int, "j": int, "k": int, "value": Any}, + export=False, + group="Volumes", + doc="""Store ``value`` at the voxel with coordinates ``i``, ``j``, ``k``.""", +) + add_builtin( "volume_sample_f", input_types={"id": uint64, "uvw": vec3, "sampling_mode": int}, @@ -2192,6 +2298,81 @@ def spatial_vector_constructor_value_func(arg_types, kwds, templates): doc="""Store ``value`` at the voxel with coordinates ``i``, ``j``, ``k``.""", ) + +def volume_sample_index_value_func(arg_types, kwds, templates): + if len(arg_types) != 5: + raise RuntimeError("'volume_sample_index' requires 5 positional arguments") + + dtype = arg_types[3].dtype + + if not types_equal(dtype, arg_types[4]): + raise RuntimeError("The 'voxel_data' array and the 'background' value must have the same dtype") + + return dtype + + +add_builtin( + "volume_sample_index", + input_types={"id": uint64, "uvw": vec3, "sampling_mode": int, "voxel_data": array(dtype=Any), "background": Any}, + value_func=volume_sample_index_value_func, + export=False, + group="Volumes", + doc="""Sample the volume given by ``id`` at the volume local-space point ``uvw``. + + Values for allocated voxels are read from the ``voxel_data`` array, and `background` is used as the value of non-existing voxels. + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR`. + This function is available for both index grids and classical volumes. + """, +) + + +def volume_sample_grad_index_value_func(arg_types, kwds, templates): + if len(arg_types) != 6: + raise RuntimeError("'volume_sample_grad_index' requires 6 positional arguments") + + dtype = arg_types[3].dtype + + if not types_equal(dtype, arg_types[4]): + raise RuntimeError("The 'voxel_data' array and the 'background' value must have the same dtype") + + grad_type = arg_types[5] + check_volume_value_grad_compatibility(dtype, grad_type) + return dtype + + +add_builtin( + "volume_sample_grad_index", + input_types={ + "id": uint64, + "uvw": vec3, + "sampling_mode": int, + "voxel_data": array(dtype=Any), + "background": Any, + "grad": Any, + }, + value_func=volume_sample_grad_index_value_func, + export=False, + group="Volumes", + doc="""Sample the volume given by ``id`` and its gradient at the volume local-space point ``uvw``. + + Values for allocated voxels are read from the ``voxel_data`` array, and `background` is used as the value of non-existing voxels. + Interpolation should be :attr:`warp.Volume.CLOSEST` or :attr:`wp.Volume.LINEAR`. + This function is available for both index grids and classical volumes. + """, +) + +add_builtin( + "volume_lookup_index", + input_types={"id": uint64, "i": int, "j": int, "k": int}, + value_type=int32, + group="Volumes", + doc="""Returns the index associated to the voxel with coordinates ``i``, ``j``, ``k``. + + If the voxel at this index does not exist, this function returns -1. + This function is available for both index grids and classical volumes. + """, +) + add_builtin( "volume_index_to_world", input_types={"id": uint64, "uvw": vec3}, diff --git a/warp/context.py b/warp/context.py index 98053f7f..49a3ac4e 100644 --- a/warp/context.py +++ b/warp/context.py @@ -2622,22 +2622,36 @@ def __init__(self): ] self.core.cutlass_gemm.restype = ctypes.c_bool - self.core.volume_create_host.argtypes = [ctypes.c_void_p, ctypes.c_uint64] + self.core.volume_create_host.argtypes = [ctypes.c_void_p, ctypes.c_uint64, ctypes.c_bool, ctypes.c_bool] self.core.volume_create_host.restype = ctypes.c_uint64 - self.core.volume_get_buffer_info_host.argtypes = [ + self.core.volume_get_tiles_host.argtypes = [ ctypes.c_uint64, - ctypes.POINTER(ctypes.c_void_p), - ctypes.POINTER(ctypes.c_uint64), + ctypes.c_void_p, ] - self.core.volume_get_tiles_host.argtypes = [ + self.core.volume_get_voxels_host.argtypes = [ ctypes.c_uint64, - ctypes.POINTER(ctypes.c_void_p), - ctypes.POINTER(ctypes.c_uint64), + ctypes.c_void_p, ] self.core.volume_destroy_host.argtypes = [ctypes.c_uint64] - self.core.volume_create_device.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_uint64] + self.core.volume_create_device.argtypes = [ + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_uint64, + ctypes.c_bool, + ctypes.c_bool, + ] self.core.volume_create_device.restype = ctypes.c_uint64 + self.core.volume_get_tiles_device.argtypes = [ + ctypes.c_uint64, + ctypes.c_void_p, + ] + self.core.volume_get_voxels_device.argtypes = [ + ctypes.c_uint64, + ctypes.c_void_p, + ] + self.core.volume_destroy_device.argtypes = [ctypes.c_uint64] + self.core.volume_f_from_tiles_device.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, @@ -2676,24 +2690,68 @@ def __init__(self): ctypes.c_bool, ] self.core.volume_i_from_tiles_device.restype = ctypes.c_uint64 - self.core.volume_get_buffer_info_device.argtypes = [ - ctypes.c_uint64, - ctypes.POINTER(ctypes.c_void_p), - ctypes.POINTER(ctypes.c_uint64), + self.core.volume_index_from_tiles_device.argtypes = [ + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_int, + ctypes.c_float, + ctypes.c_float, + ctypes.c_float, + ctypes.c_float, + ctypes.c_bool, ] - self.core.volume_get_tiles_device.argtypes = [ + self.core.volume_index_from_tiles_device.restype = ctypes.c_uint64 + self.core.volume_from_active_voxels_device.argtypes = [ + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_int, + ctypes.c_float, + ctypes.c_float, + ctypes.c_float, + ctypes.c_float, + ctypes.c_bool, + ] + self.core.volume_from_active_voxels_device.restype = ctypes.c_uint64 + + self.core.volume_get_buffer_info.argtypes = [ ctypes.c_uint64, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_uint64), ] - self.core.volume_destroy_device.argtypes = [ctypes.c_uint64] - self.core.volume_get_voxel_size.argtypes = [ ctypes.c_uint64, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float), ] + self.core.volume_get_tile_and_voxel_count.argtypes = [ + ctypes.c_uint64, + ctypes.POINTER(ctypes.c_uint32), + ctypes.POINTER(ctypes.c_uint64), + ] + self.core.volume_get_grid_info.argtypes = [ + ctypes.c_uint64, + ctypes.POINTER(ctypes.c_uint64), + ctypes.POINTER(ctypes.c_uint32), + ctypes.POINTER(ctypes.c_uint32), + ctypes.c_float * 3, + ctypes.c_float * 9, + ctypes.c_char * 16, + ] + self.core.volume_get_grid_info.restype = ctypes.c_char_p + self.core.volume_get_blind_data_count.argtypes = [ + ctypes.c_uint64, + ] + self.core.volume_get_blind_data_count.restype = ctypes.c_uint64 + self.core.volume_get_blind_data_info.argtypes = [ + ctypes.c_uint64, + ctypes.c_uint32, + ctypes.POINTER(ctypes.c_void_p), + ctypes.POINTER(ctypes.c_uint64), + ctypes.POINTER(ctypes.c_uint32), + ctypes.c_char * 16, + ] + self.core.volume_get_blind_data_info.restype = ctypes.c_char_p bsr_matrix_from_triplets_argtypes = [ ctypes.c_int, diff --git a/warp/native/exports.h b/warp/native/exports.h index 636c7da8..f6f085a4 100644 --- a/warp/native/exports.h +++ b/warp/native/exports.h @@ -968,6 +968,7 @@ WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, in WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); } WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); } WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); } +WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); } WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); } WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); } WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); } diff --git a/warp/native/mat.h b/warp/native/mat.h index 5e569d89..6a2c099f 100644 --- a/warp/native/mat.h +++ b/warp/native/mat.h @@ -877,6 +877,18 @@ inline CUDA_CALLABLE mat_t outer(const vec_t& a, cons return ret; } +template +inline CUDA_CALLABLE vec_t outer(Type a, const vec_t& b) +{ + return mul(a, b); +} + +template +inline CUDA_CALLABLE vec_t outer(const vec_t& a, Type b) +{ + return mul(a, b); +} + template inline CUDA_CALLABLE mat_t<3,3,Type> skew(const vec_t<3,Type>& a) { diff --git a/warp/native/nanovdb/GridHandle.h b/warp/native/nanovdb/GridHandle.h new file mode 100644 index 00000000..9aa3ea66 --- /dev/null +++ b/warp/native/nanovdb/GridHandle.h @@ -0,0 +1,366 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/GridHandle.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Defines GridHandle, which manages a host, and possibly a device, + memory buffer containing one or more NanoVDB grids. +*/ + +#ifndef NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED +#define NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED + +#include // for std::ifstream +#include // for std::cerr/cout +#include +#include + +#include // for toGridType +#include + +namespace nanovdb { + +// --------------------------> GridHandle <------------------------------------ + +struct GridHandleMetaData {uint64_t offset, size; GridType gridType;}; + +/// @brief This class serves to manage a buffer containing one or more NanoVDB Grids. +/// +/// @note It is important to note that this class does NOT depend on OpenVDB. +template +class GridHandle +{ + std::vector mMetaData; + BufferT mBuffer; + + template + static T* no_const(const T* ptr) { return const_cast(ptr); } + +public: + using BufferType = BufferT; + + /// @brief Move constructor from a host buffer + /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle + /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! + template::hasDeviceDual, int>::type = 0> + GridHandle(T&& buffer); + + /// @brief Move constructor from a dual host-device buffer + /// @param buffer buffer containing one or more NanoGrids that will be moved into this GridHandle + /// @throw Will throw and error with the buffer does not contain a valid NanoGrid! + template::hasDeviceDual, int>::type = 0> + GridHandle(T&& buffer); + + /// @brief Constructs an empty GridHandle + GridHandle() = default; + + /// @brief Disallow copy-construction + GridHandle(const GridHandle&) = delete; + + /// @brief Move copy-constructor + GridHandle(GridHandle&& other) noexcept { + mBuffer = std::move(other.mBuffer); + mMetaData = std::move(other.mMetaData); + } + + /// @brief clear this GridHandle to an empty handle + void reset() { + mBuffer.clear(); + mMetaData.clear(); + } + + /// @brief Disallow copy assignment operation + GridHandle& operator=(const GridHandle&) = delete; + + /// @brief Move copy assignment operation + GridHandle& operator=(GridHandle&& other) noexcept { + mBuffer = std::move(other.mBuffer); + mMetaData = std::move(other.mMetaData); + return *this; + } + + /// @brief Performs a deep copy of the GridHandle, possibly templated on a different buffer type + /// @tparam OtherBufferT Buffer type of the deep copy + /// @param buffer optional buffer used for allocation + /// @return A new handle of the specified buffer type that contains a deep copy of the current handle + template + GridHandle copy(const OtherBufferT& buffer = OtherBufferT()) const; + + /// @brief Return a reference to the buffer + BufferT& buffer() { return mBuffer; } + + /// @brief Return a const reference to the buffer + const BufferT& buffer() const { return mBuffer; } + + /// @brief Returns a non-const pointer to the data. + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + void* data() { return mBuffer.data(); } + + /// @brief Returns a const pointer to the data. + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + const void* data() const { return mBuffer.data(); } + + template + typename util::enable_if::hasDeviceDual, const void*>::type + deviceData() const { return mBuffer.deviceData(); } + template + typename util::enable_if::hasDeviceDual, void*>::type + deviceData() { return mBuffer.deviceData(); } + + /// @brief Returns the size in bytes of the raw memory buffer managed by this GridHandle. + uint64_t size() const { return mBuffer.size(); } + + //@{ + /// @brief Return true if this handle is empty, i.e. has no allocated memory + bool empty() const { return this->size() == 0; } + bool isEmpty() const { return this->size() == 0; } + //@} + + /// @brief Return true if this handle contains any grids + operator bool() const { return !this->empty(); } + + /// @brief Returns a const host pointer to the @a n'th NanoVDB grid encoded in this GridHandle. + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (host) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle no host grid, @a n is invalid + /// or if the template parameter does not match the specified grid! + template + const NanoGrid* grid(uint32_t n = 0) const; + + /// @brief Returns a host pointer to the @a n'th NanoVDB grid encoded in this GridHandle. + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (host) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle no host grid, @a n is invalid + /// or if the template parameter does not match the specified grid! + template + NanoGrid* grid(uint32_t n = 0) {return const_cast*>(static_cast(this)->template grid(n));} + + /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index of the (device) grid pointer to be returned + /// @warning Note that the return pointer can be NULL if the GridHandle has no device grid, @a n is invalid, + /// or if the template parameter does not match the specified grid. + template + typename util::enable_if::hasDeviceDual, const NanoGrid*>::type + deviceGrid(uint32_t n=0) const; + + /// @brief Return a const pointer to the @a n'th grid encoded in this GridHandle on the device, e.g. GPU + /// @tparam ValueT Value type of the grid point to be returned + /// @param n Index if of the grid pointer to be returned + /// @param verbose if non-zero error messages will be printed in case something failed + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized, @a n is invalid, + /// or if the template parameter does not match the specified grid. + template + typename util::enable_if::hasDeviceDual, NanoGrid*>::type + deviceGrid(uint32_t n=0){return const_cast*>(static_cast(this)->template deviceGrid(n));} + + /// @brief Upload the grid to the device, e.g. from CPU to GPU + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceUpload(void* stream = nullptr, bool sync = true) { mBuffer.deviceUpload(stream, sync); } + + /// @brief Download the grid to from the device, e.g. from GPU to CPU + /// @note This method is only available if the buffer supports devices + template + typename util::enable_if::hasDeviceDual, void>::type + deviceDownload(void* stream = nullptr, bool sync = true) { mBuffer.deviceDownload(stream, sync); } + + /// @brief Check if the buffer is this handle has any padding, i.e. if the buffer is larger than the combined size of all its grids + /// @return true is the combined size of all grid is smaller than the buffer size + bool isPadded() const {return mMetaData.empty() ? false : mMetaData.back().offset + mMetaData.back().size != mBuffer.size();} + + /// @brief Return the total number of grids contained in this buffer + uint32_t gridCount() const {return static_cast(mMetaData.size());} + + /// @brief Return the grid size of the @a n'th grid in this GridHandle + /// @param n index of the grid (assumed to be less than gridCount()) + /// @return Return the byte size of the specified grid + uint64_t gridSize(uint32_t n = 0) const {return mMetaData[n].size; } + + /// @brief Return the GridType of the @a n'th grid in this GridHandle + /// @param n index of the grid (assumed to be less than gridCount()) + /// @return Return the GridType of the specified grid + GridType gridType(uint32_t n = 0) const {return mMetaData[n].gridType; } + + /// @brief Access to the GridData of the n'th grid in the current handle + /// @param n zero-based ID of the grid + /// @return Const pointer to the n'th GridData in the current handle + const GridData* gridData(uint32_t n = 0) const; + + /// @brief Returns a const point to the @a n'th grid meta data + /// @param n zero-based ID of the grid + /// @warning Note that the return pointer can be NULL if the GridHandle was not initialized + const GridMetaData* gridMetaData(uint32_t n = 0) const; + + /// @brief Write a specific grid in this buffer to an output stream + /// @param os output stream that the buffer will be written to + /// @param n zero-based index of the grid to be written to stream + void write(std::ostream& os, uint32_t n) const { + if (const GridData* data = this->gridData(n)) { + os.write((const char*)data, data->mGridSize); + } else { + throw std::runtime_error("GridHandle does not contain a #" + std::to_string(n) + " grid"); + } + } + + /// @brief Write the entire grid buffer to an output stream + /// @param os output stream that the buffer will be written to + void write(std::ostream& os) const { + for (uint32_t n=0; ngridCount(); ++n) this->write(os, n); + } + + /// @brief Write this entire grid buffer to a file + /// @param fileName string name of the output file + void write(const std::string &fileName) const { + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + this->write(os); + } + + /// @brief Write a specific grid to file + /// @param fileName string name of the output file + /// @param n zero-based index of the grid to be written to file + void write(const std::string &fileName, uint32_t n) const { + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); + if (!os.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for output"); + this->write(os, n); + } + + /// @brief Read an entire raw grid buffer from an input stream + /// @param is input stream containing a raw grid buffer + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid + void read(std::istream& is, const BufferT& pool = BufferT()); + + /// @brief Read a specific grid from an input stream containing a raw grid buffer + /// @param is input stream containing a raw grid buffer + /// @param n zero-based index of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid + void read(std::istream& is, uint32_t n, const BufferT& pool = BufferT()); + + /// @brief Read a specific grid from an input stream containing a raw grid buffer + /// @param is input stream containing a raw grid buffer + /// @param gridName string name of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::logic_error if the stream does not contain a valid raw grid with the speficied name + void read(std::istream& is, const std::string &gridName, const BufferT& pool = BufferT()); + + /// @brief Read a raw grid buffer from a file + /// @param filename string name of the input file containing a raw grid buffer + /// @param pool optional pool from which to allocate the new grid buffe + void read(const std::string &fileName, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, pool); + } + + /// @brief Read a specific grid from a file containing a raw grid buffer + /// @param filename string name of the input file containing a raw grid buffer + /// @param n zero-based index of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::ios_base::failure if the file does not exist and a + /// std::logic_error if the files does not contain a valid raw grid + void read(const std::string &fileName, uint32_t n, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, n, pool); + } + + /// @brief Read a specific grid from a file containing a raw grid buffer + /// @param filename string name of the input file containing a raw grid buffer + /// @param gridName string name of the grid to be read + /// @param pool optional pool from which to allocate the new grid buffer + /// @throw Will throw a std::ios_base::failure if the file does not exist and a + /// std::logic_error if the files does not contain a valid raw grid withe the specified name + void read(const std::string &fileName, const std::string &gridName, const BufferT& pool = BufferT()) { + std::ifstream is(fileName, std::ios::in | std::ios::binary); + if (!is.is_open()) throw std::ios_base::failure("Unable to open file named \"" + fileName + "\" for input"); + this->read(is, gridName, pool); + } +}; // GridHandle + +// --------------------------> Implementation of private methods in GridHandle <------------------------------------ + +template +inline const GridData* GridHandle::gridData(uint32_t n) const +{ + const void *data = this->data(); + if (data == nullptr || n >= mMetaData.size()) return nullptr; + return util::PtrAdd(data, mMetaData[n].offset); +}// const GridData* GridHandle::gridData(uint32_t n) const + +template +inline const GridMetaData* GridHandle::gridMetaData(uint32_t n) const +{ + const auto *data = this->data(); + if (data == nullptr || n >= mMetaData.size()) return nullptr; + return util::PtrAdd(data, mMetaData[n].offset); +}// const GridMetaData* GridHandle::gridMetaData(uint32_t n) const + +inline __hostdev__ void cpyGridHandleMeta(const GridData *data, GridHandleMetaData *meta) +{ + uint64_t offset = 0; + for (auto *p=meta, *q=p+data->mGridCount; p!=q; ++p) { + *p = {offset, data->mGridSize, data->mGridType}; + offset += p->size; + data = util::PtrAdd(data, p->size); + } +}// void cpyGridHandleMeta(const GridData *data, GridHandleMetaData *meta) + +template +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) +{ + static_assert(util::is_same::value, "Expected U==BufferT"); + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyGridHandleMeta(data, mMetaData.data()); + } +}// GridHandle::GridHandle(T&& buffer) + +template +template +inline GridHandle GridHandle::copy(const OtherBufferT& other) const +{ + if (mBuffer.isEmpty()) return GridHandle();// return an empty handle + auto buffer = OtherBufferT::create(mBuffer.size(), &other); + std::memcpy(buffer.data(), mBuffer.data(), mBuffer.size());// deep copy of buffer + return GridHandle(std::move(buffer)); +}// GridHandle GridHandle::copy(const OtherBufferT& other) const + +template +template +inline const NanoGrid* GridHandle::grid(uint32_t n) const +{ + const void *data = mBuffer.data(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != toGridType()) return nullptr; + return util::PtrAdd>(data, mMetaData[n].offset); +}// const NanoGrid* GridHandle::grid(uint32_t n) const + +template +template +inline typename util::enable_if::hasDeviceDual, const NanoGrid*>::type +GridHandle::deviceGrid(uint32_t n) const +{ + const void *data = mBuffer.deviceData(); + if (data == nullptr || n >= mMetaData.size() || mMetaData[n].gridType != toGridType()) return nullptr; + return util::PtrAdd>(data, mMetaData[n].offset); +}// GridHandle::deviceGrid(uint32_t n) cons + + +} // namespace nanovdb + +#if defined(__CUDACC__) +#include +#endif// defined(__CUDACC__) + +#endif // NANOVDB_GRID_HANDLE_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/HostBuffer.h b/warp/native/nanovdb/HostBuffer.h new file mode 100644 index 00000000..c664856a --- /dev/null +++ b/warp/native/nanovdb/HostBuffer.h @@ -0,0 +1,590 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + @file nanovdb/HostBuffer.h + + @date April 20, 2021 + + @brief HostBuffer - a buffer that contains a shared or private bump + pool to either externally or internally managed host memory. + + @details This HostBuffer can be used in multiple ways, most of which are + demonstrated in the examples below. Memory in the pool can + be managed or unmanged (e.g. internal or external) and can + be shared between multiple buffers or belong to a single buffer. + + Example that uses HostBuffer::create inside io::readGrids to create a + full self-managed buffer, i.e. not shared and without padding, per grid in the file. + @code + auto handles = nanovdb::io::readGrids("file.nvdb"); + @endcode + + Example that uses HostBuffer::createFull. Assuming you have a raw pointer + to a NanoVDB grid of unknown type, this examples shows how to create its + GridHandle which can be used to enquire about the grid type and meta data. + @code + void *data;// pointer to a NanoVDB grid of unknown type + uint64_t size;// byte size of NanoVDB grid of unknown type + auto buffer = nanovdb::HostBuffer::createFull(size, data); + nanovdb::GridHandle<> gridHandle(std::move(buffer)); + @endcode + + Example that uses HostBuffer::createPool for internally managed host memory. + Suppose you want to read multiple grids in multiple files, but reuse the same + fixed sized memory buffer to both avoid memory fragmentation as well as + exceeding the fixed memory ceiling! + @code + auto pool = nanovdb::HostBuffer::createPool(1 << 30);// 1 GB memory pool + std::vector> frames;// vector of grid names + for (int i=0; i array(new char[size + NANOVDB_DATA_ALIGNMENT]);// scoped pool of 1 GB with padding + void *buffer = nanovdb::alignPtr(array.get());// 32B aligned buffer + auto pool = nanovdb::HostBuffer::createPool(poolSize, buffer); + auto handles = nanovdb::io::readGrids("file.nvdb", 0, pool); + @endcode +*/ + +#ifndef NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED + +#include // for NANOVDB_DATA_ALIGNMENT; +#include // for types like int32_t etc +#include // for fprintf +#include // for std::malloc/std::realloc/std::free +#include // for std::make_shared +#include // for std::mutex +#include // for std::unordered_set +#include // for assert +#include // for std::stringstream +#include // for memcpy + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +namespace nanovdb { + +template +struct BufferTraits +{ + static constexpr bool hasDeviceDual = false; +}; + +// ----------------------------> HostBuffer <-------------------------------------- + +/// @brief This is a buffer that contains a shared or private pool +/// to either externally or internally managed host memory. +/// +/// @note Terminology: +/// Pool: 0 = buffer.size() < buffer.poolSize() +/// Buffer: 0 < buffer.size() < buffer.poolSize() +/// Full: 0 < buffer.size() = buffer.poolSize() +/// Empty: 0 = buffer.size() = buffer.poolSize() +class HostBuffer +{ + struct Pool;// forward declaration of private pool struct + std::shared_ptr mPool; + uint64_t mSize; // total number of bytes for the NanoVDB grid. + void* mData; // raw buffer for the NanoVDB grid. + +#if defined(DEBUG) || defined(_DEBUG) + static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) + exit(1); + } + if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) { + fprintf(stderr, "Alignment pointer error: %s %s %d\n", msg, file, line); + if (abort) + exit(1); + } + } +#else + static inline void ptrAssert(void*, const char*, const char*, int, bool = true) + { + } +#endif + +public: + /// @brief Return a full buffer or an empty buffer + HostBuffer(uint64_t bufferSize = 0); + + /// @brief Move copy-constructor + HostBuffer(HostBuffer&& other); + + /// @brief Custom descructor + ~HostBuffer() { this->clear(); } + + /// @brief Move copy assignment operation + HostBuffer& operator=(HostBuffer&& other); + + /// @brief Disallow copy-construction + HostBuffer(const HostBuffer&) = delete; + + /// @brief Disallow copy assignment operation + HostBuffer& operator=(const HostBuffer&) = delete; + + /// @brief Return a pool buffer which satisfies: buffer.size == 0, + /// buffer.poolSize() == poolSize, and buffer.data() == nullptr. + /// If data==nullptr, memory for the pool will be allocated. + /// + /// @throw If poolSize is zero. + static HostBuffer createPool(uint64_t poolSize, void *data = nullptr); + + /// @brief Return a full buffer which satisfies: buffer.size == bufferSize, + /// buffer.poolSize() == bufferSize, and buffer.data() == data. + /// If data==nullptr, memory for the pool will be allocated. + /// + /// @throw If bufferSize is zero. + static HostBuffer createFull(uint64_t bufferSize, void *data = nullptr); + + /// @brief Return a buffer with @c bufferSize bytes managed by + /// the specified memory @c pool. If none is provided, i.e. + /// @c pool == nullptr or @c pool->poolSize() == 0, one is + /// created with size @c bufferSize, i.e. a full buffer is returned. + /// + /// @throw If the specified @c pool has insufficient memory for + /// the requested buffer size. + static HostBuffer create(uint64_t bufferSize, const HostBuffer* pool = nullptr); + + /// @brief Initialize as a full buffer with the specified size. If data is NULL + /// the memory is internally allocated. + void init(uint64_t bufferSize, void *data = nullptr); + + //@{ + /// @brief Retuns a pointer to the raw memory buffer managed by this allocator. + /// + /// @warning Note that the pointer can be NULL if the allocator was not initialized! + const void* data() const { return mData; } + void* data() { return mData; } + //@} + + //@{ + /// @brief Returns the size in bytes associated with this buffer. + uint64_t bufferSize() const { return mSize; } + uint64_t size() const { return this->bufferSize(); } + //@} + + /// @brief Returns the size in bytes of the memory pool shared with this instance. + uint64_t poolSize() const; + + /// @brief Return true if memory is managed (using std::malloc and std:free) by the + /// shared pool in this buffer. Else memory is assumed to be managed externally. + bool isManaged() const; + + //@{ + /// @brief Returns true if this buffer has no memory associated with it + bool isEmpty() const { return !mPool || mSize == 0 || mData == nullptr; } + bool empty() const { return this->isEmpty(); } + //@} + + /// @brief Return true if this is a pool, i.e. an empty buffer with a nonempty + /// internal pool, i.e. this->size() == 0 and this->poolSize() != 0 + bool isPool() const { return mSize == 0 && this->poolSize() > 0; } + + /// @brief Return true if the pool exists, is nonempty but has no more available memory + bool isFull() const; + + /// @brief Clear this buffer so it is empty. + void clear(); + + /// @brief Clears all existing buffers that are registered against the memory pool + /// and resets the pool so it can be reused to create new buffers. + /// + /// @throw If this instance is not empty or contains no pool. + /// + /// @warning This method is not thread-safe! + void reset(); + + /// @brief Total number of bytes from the pool currently in use by buffers + uint64_t poolUsage() const; + + /// @brief resize the pool size. It will attempt to resize the existing + /// memory block, but if that fails a deep copy is performed. + /// If @c data is not NULL it will be used as new externally + /// managed memory for the pool. All registered buffers are + /// updated so GridHandle::grid might return a new address (if + /// deep copy was performed). + /// + /// @note This method can be use to resize the memory pool and even + /// change it from internally to externally managed memory or vice versa. + /// + /// @throw if @c poolSize is less than this->poolUsage() the used memory + /// or allocations fail. + void resizePool(uint64_t poolSize, void *data = nullptr); + +}; // HostBuffer class + +// --------------------------> Implementation of HostBuffer::Pool <------------------------------------ + +// This is private struct of HostBuffer so you can safely ignore the API +struct HostBuffer::Pool +{ + using HashTableT = std::unordered_set; + std::mutex mMutex; // mutex for updating mRegister and mFree + HashTableT mRegister; + void *mData, *mFree; + uint64_t mSize, mPadding; + bool mManaged; + + /// @brief External memory ctor + Pool(uint64_t size = 0, void* data = nullptr) + : mData(data) + , mFree(mData) + , mSize(size) + , mPadding(0) + , mManaged(data == nullptr) + { + if (mManaged) { + mData = Pool::alloc(mSize); + if (mData == nullptr) throw std::runtime_error("Pool::Pool malloc failed"); + } + mPadding = alignmentPadding(mData); + if (!mManaged && mPadding != 0) { + throw std::runtime_error("Pool::Pool: external memory buffer is not aligned to " + + std::to_string(NANOVDB_DATA_ALIGNMENT) + + " bytes.\nHint: use nanovdb::alignPtr or std::aligned_alloc (C++17 only)"); + } + mFree = util::PtrAdd(mData, mPadding); + } + + /// @brief Custom destructor + ~Pool() + { + assert(mRegister.empty()); + if (mManaged) std::free(mData); + } + + /// @brief Disallow copy-construction + Pool(const Pool&) = delete; + + /// @brief Disallow move-construction + Pool(const Pool&&) = delete; + + /// @brief Disallow copy assignment operation + Pool& operator=(const Pool&) = delete; + + /// @brief Disallow move assignment operation + Pool& operator=(const Pool&&) = delete; + + /// @brief Return the total number of bytes used from this Pool by buffers + uint64_t usage() const { return util::PtrDiff(mFree, mData) - mPadding; } + + /// @brief Allocate a buffer of the specified size and add it to the register + void add(HostBuffer* buffer, uint64_t size) + { + void *alignedFree = util::PtrAdd(mFree, alignmentPadding(mFree)); + + if (util::PtrAdd(alignedFree, size) > util::PtrAdd(mData, mPadding + mSize)) { + std::stringstream ss; + ss << "HostBuffer::Pool: insufficient memory\n" + << "\tA buffer requested " << size << " bytes with " << NANOVDB_DATA_ALIGNMENT + << "-bytes alignment from a pool with " + << mSize << " bytes of which\n\t" << (util::PtrDiff(alignedFree, mData) - mPadding) + << " bytes are used by " << mRegister.size() << " other buffer(s). " + << "Pool is " << (mManaged ? "internally" : "externally") << " managed.\n"; + //std::cerr << ss.str(); + throw std::runtime_error(ss.str()); + } + buffer->mSize = size; + const std::lock_guard lock(mMutex); + mRegister.insert(buffer); + buffer->mData = alignedFree; + mFree = util::PtrAdd(alignedFree, size); + } + + /// @brief Remove the specified buffer from the register + void remove(HostBuffer *buffer) + { + const std::lock_guard lock(mMutex); + mRegister.erase(buffer); + } + + /// @brief Replaces buffer1 with buffer2 in the register + void replace(HostBuffer *buffer1, HostBuffer *buffer2) + { + const std::lock_guard lock(mMutex); + mRegister.erase( buffer1); + mRegister.insert(buffer2); + } + + /// @brief Reset the register and all its buffers + void reset() + { + for (HostBuffer *buffer : mRegister) { + buffer->mPool.reset(); + buffer->mSize = 0; + buffer->mData = nullptr; + } + mRegister.clear(); + mFree = util::PtrAdd(mData, mPadding); + } + + /// @brief Resize this Pool and update registered buffers as needed. If data is no NULL + /// it is used as externally managed memory. + void resize(uint64_t size, void *data = nullptr) + { + const uint64_t memUsage = this->usage(); + + const bool managed = (data == nullptr); + + if (!managed && alignmentPadding(data) != 0) { + throw std::runtime_error("Pool::resize: external memory buffer is not aligned to " + + std::to_string(NANOVDB_DATA_ALIGNMENT) + " bytes"); + } + + if (memUsage > size) { + throw std::runtime_error("Pool::resize: insufficient memory"); + } + + uint64_t padding = 0; + if (mManaged && managed && size != mSize) { // managed -> managed + padding = mPadding; + data = Pool::realloc(mData, memUsage, size, padding); // performs both copy and free of mData + } else if (!mManaged && managed) { // un-managed -> managed + data = Pool::alloc(size); + padding = alignmentPadding(data); + } + + if (data == nullptr) { + throw std::runtime_error("Pool::resize: allocation failed"); + } else if (data != mData) { + void* paddedData = util::PtrAdd(data, padding); + + if (!(mManaged && managed)) { // no need to copy if managed -> managed + memcpy(paddedData, util::PtrAdd(mData, mPadding), memUsage); + } + + for (HostBuffer* buffer : mRegister) { // update registered buffers + //buffer->mData = paddedData + ptrdiff_t(buffer->mData - (mData + mPadding)); + buffer->mData = util::PtrAdd(paddedData, util::PtrDiff(buffer->mData, util::PtrAdd(mData, mPadding))); + } + mFree = util::PtrAdd(paddedData, memUsage); // update the free pointer + if (mManaged && !managed) {// only free if managed -> un-managed + std::free(mData); + } + + mData = data; + mPadding = padding; + } + mSize = size; + mManaged = managed; + } + /// @brief Return true is all the memory in this pool is in use. + bool isFull() const + { + assert(mFree <= util::PtrAdd(mData, mPadding + mSize)); + return mSize > 0 ? mFree == util::PtrAdd(mData, mPadding + mSize) : false; + } + +private: + + static void* alloc(uint64_t size) + { +//#if (__cplusplus >= 201703L) +// return std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, size);//C++17 or newer +//#else + // make sure we alloc enough space to align the result + return std::malloc(size + NANOVDB_DATA_ALIGNMENT); +//#endif + } + + static void* realloc(void* const origData, + uint64_t origSize, + uint64_t desiredSize, + uint64_t& padding) + { + // make sure we alloc enough space to align the result + void* data = std::realloc(origData, desiredSize + NANOVDB_DATA_ALIGNMENT); + + if (data != nullptr && data != origData) { + uint64_t newPadding = alignmentPadding(data); + // Number of padding bytes may have changed -- move data if that's the case + if (newPadding != padding) { + // Realloc should not happen when shrinking down buffer, but let's be safe + std::memmove(util::PtrAdd(data, newPadding), + util::PtrAdd(data, padding), + math::Min(origSize, desiredSize)); + padding = newPadding; + } + } + + return data; + } + +};// struct HostBuffer::Pool + +// --------------------------> Implementation of HostBuffer <------------------------------------ + +inline HostBuffer::HostBuffer(uint64_t size) : mPool(nullptr), mSize(size), mData(nullptr) +{ + if (size>0) { + mPool = std::make_shared(size); + mData = mPool->mFree; + mPool->mRegister.insert(this); + mPool->mFree = util::PtrAdd(mPool->mFree, size); + } +} + +inline HostBuffer::HostBuffer(HostBuffer&& other) : mPool(other.mPool), mSize(other.mSize), mData(other.mData) +{ + if (mPool && mSize != 0) { + mPool->replace(&other, this); + } + other.mPool.reset(); + other.mSize = 0; + other.mData = nullptr; +} + +inline void HostBuffer::init(uint64_t bufferSize, void *data) +{ + if (bufferSize == 0) { + throw std::runtime_error("HostBuffer: invalid buffer size"); + } + if (mPool) { + mPool.reset(); + } + if (!mPool || mPool->mSize != bufferSize) { + mPool = std::make_shared(bufferSize, data); + } + mPool->add(this, bufferSize); +} + +inline HostBuffer& HostBuffer::operator=(HostBuffer&& other) +{ + if (mPool) { + mPool->remove(this); + } + mPool = other.mPool; + mSize = other.mSize; + mData = other.mData; + if (mPool && mSize != 0) { + mPool->replace(&other, this); + } + other.mPool.reset(); + other.mSize = 0; + other.mData = nullptr; + return *this; +} + +inline uint64_t HostBuffer::poolSize() const +{ + return mPool ? mPool->mSize : 0u; +} + +inline uint64_t HostBuffer::poolUsage() const +{ + return mPool ? mPool->usage(): 0u; +} + +inline bool HostBuffer::isManaged() const +{ + return mPool ? mPool->mManaged : false; +} + +inline bool HostBuffer::isFull() const +{ + return mPool ? mPool->isFull() : false; +} + +inline HostBuffer HostBuffer::createPool(uint64_t poolSize, void *data) +{ + if (poolSize == 0) { + throw std::runtime_error("HostBuffer: invalid pool size"); + } + HostBuffer buffer; + buffer.mPool = std::make_shared(poolSize, data); + // note the buffer is NOT registered by its pool since it is not using its memory + buffer.mSize = 0; + buffer.mData = nullptr; + return buffer; +} + +inline HostBuffer HostBuffer::createFull(uint64_t bufferSize, void *data) +{ + if (bufferSize == 0) { + throw std::runtime_error("HostBuffer: invalid buffer size"); + } + HostBuffer buffer; + buffer.mPool = std::make_shared(bufferSize, data); + buffer.mPool->add(&buffer, bufferSize); + return buffer; +} + +inline HostBuffer HostBuffer::create(uint64_t bufferSize, const HostBuffer* pool) +{ + HostBuffer buffer; + if (pool == nullptr || !pool->mPool) { + buffer.mPool = std::make_shared(bufferSize); + } else { + buffer.mPool = pool->mPool; + } + buffer.mPool->add(&buffer, bufferSize); + return buffer; +} + +inline void HostBuffer::clear() +{ + if (mPool) {// remove self from the buffer register in the pool + mPool->remove(this); + } + mPool.reset(); + mSize = 0; + mData = nullptr; +} + +inline void HostBuffer::reset() +{ + if (this->size()>0) { + throw std::runtime_error("HostBuffer: only empty buffers can call reset"); + } + if (!mPool) { + throw std::runtime_error("HostBuffer: this buffer contains no pool to reset"); + } + mPool->reset(); +} + +inline void HostBuffer::resizePool(uint64_t size, void *data) +{ + if (!mPool) { + throw std::runtime_error("HostBuffer: this buffer contains no pool to resize"); + } + mPool->resize(size, data); +} + +} // namespace nanovdb + +#endif // end of NANOVDB_HOSTBUFFER_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/NanoVDB.h b/warp/native/nanovdb/NanoVDB.h index 2cbf6564..53265ca9 100644 --- a/warp/native/nanovdb/NanoVDB.h +++ b/warp/native/nanovdb/NanoVDB.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file NanoVDB.h + \file nanovdb/NanoVDB.h \author Ken Museth @@ -29,7 +29,7 @@ structure can safely be ignored by most client codes)! - \warning NanoVDB grids can only be constructed via tools like openToNanoVDB + \warning NanoVDB grids can only be constructed via tools like createNanoGrid or the GridBuilder. This explains why none of the grid nodes defined below have public constructors or destructors. @@ -38,6 +38,8 @@ ACM Transactions on Graphics 32(3), 2013, which can be found here: http://www.museth.org/Ken/Publications_files/Museth_TOG13.pdf + NanoVDB was first published there: https://dl.acm.org/doi/fullHtml/10.1145/3450623.3464653 + Overview: This file implements the following fundamental class that when combined forms the backbone of the VDB tree data structure: @@ -64,8 +66,15 @@ Memory layout: + It's important to emphasize that all the grid data (defined below) are explicitly 32 byte + aligned, which implies that any memory buffer that contains a NanoVDB grid must also be at + 32 byte aligned. That is, the memory address of the beginning of a buffer (see ascii diagram below) + must be divisible by 32, i.e. uintptr_t(&buffer)%32 == 0! If this is not the case, the C++ standard + says the behaviour is undefined! Normally this is not a concerns on GPUs, because they use 256 byte + aligned allocations, but the same cannot be said about the CPU. + GridData is always at the very beginning of the buffer immediately followed by TreeData! - The remaining nodes and blind-data are allowed to be scattered thoughout the buffer, + The remaining nodes and blind-data are allowed to be scattered throughout the buffer, though in practice they are arranged as: GridData: 672 bytes (e.g. magic, checksum, major, flags, index, count, size, name, map, world bbox, voxel size, class, type, offset, count) @@ -91,214 +100,315 @@ Array of: LeafNodes of size 8^3: bbox, bit masks, 512 voxel values, and min/max/avg/standard deviation values - Example layout: ("---" implies it has a custom offset, "..." implies zero or more) - [GridData(672B)][TreeData(64B)]---[RootData][N x Root::Tile]---[NodeData<5>]---[ModeData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + Notation: "]---[" implies it has optional padding, and "][" implies zero padding + + [GridData(672B)][TreeData(64B)]---[RootData][N x Root::Tile]---[InternalData<5>]---[InternalData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + ^ ^ ^ ^ ^ ^ + | | | | | | + +-- Start of 32B aligned buffer | | | | +-- Node0::DataType* leafData + GridType::DataType* gridData | | | | + | | | +-- Node1::DataType* lowerData + RootType::DataType* rootData --+ | | + | +-- Node2::DataType* upperData + | + +-- RootType::DataType::Tile* tile */ #ifndef NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED #define NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED -#define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) +// The following two header files are the only mandatory dependencies +#include // for __hostdev__ and lots of other utility functions +#include // for Coord, BBox, Vec3, Vec4 etc + +// Do not change this value! 32 byte alignment is fixed in NanoVDB +#define NANOVDB_DATA_ALIGNMENT 32 + +// NANOVDB_MAGIC_NUMB is currently used for both grids and files (starting with v32.6.0) +// NANOVDB_MAGIC_GRID will soon be used exclusively for grids (serialized to a single buffer) +// NANOVDB_MAGIC_FILE will soon be used exclusively for files +// NANOVDB_MAGIC_NODE will soon be used exclusively for NodeManager +// NANOVDB_MAGIC_FRAG will soon be used exclusively for a fragmented grid, i.e. a grid that is not serialized +// | : 0 in 30 corresponds to 0 in NanoVDB0 +#define NANOVDB_MAGIC_NUMB 0x304244566f6e614eUL // "NanoVDB0" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_GRID 0x314244566f6e614eUL // "NanoVDB1" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FILE 0x324244566f6e614eUL // "NanoVDB2" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_NODE 0x334244566f6e614eUL // "NanoVDB3" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_FRAG 0x344244566f6e614eUL // "NanoVDB4" in hex - little endian (uint64_t) +#define NANOVDB_MAGIC_MASK 0x00FFFFFFFFFFFFFFUL // use this mask to remove the number + +//#define NANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL +//#define NANOVDB_USE_NEW_MAGIC_NUMBERS// used to enable use of the new magic numbers described above #define NANOVDB_MAJOR_VERSION_NUMBER 32 // reflects changes to the ABI and hence also the file format -#define NANOVDB_MINOR_VERSION_NUMBER 3 // reflects changes to the API but not ABI -#define NANOVDB_PATCH_VERSION_NUMBER 3 // reflects changes that does not affect the ABI or API +#define NANOVDB_MINOR_VERSION_NUMBER 7 // reflects changes to the API but not ABI +#define NANOVDB_PATCH_VERSION_NUMBER 0 // reflects changes that does not affect the ABI or API + +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 // This replaces a Coord key at the root level with a single uint64_t -#define USE_SINGLE_ROOT_KEY +#define NANOVDB_USE_SINGLE_ROOT_KEY // This replaces three levels of Coord keys in the ReadAccessor with one Coord -//#define USE_SINGLE_ACCESSOR_KEY +//#define NANOVDB_USE_SINGLE_ACCESSOR_KEY -#define NANOVDB_FPN_BRANCHLESS +// Use this to switch between std::ofstream or FILE implementations +//#define NANOVDB_USE_IOSTREAMS -#define NANOVDB_DATA_ALIGNMENT 32 +// Use this to switch between old and new accessor methods +#define NANOVDB_NEW_ACCESSOR_METHODS + +#define NANOVDB_FPN_BRANCHLESS #if !defined(NANOVDB_ALIGN) #define NANOVDB_ALIGN(n) alignas(n) #endif // !defined(NANOVDB_ALIGN) -#ifdef __CUDACC_RTC__ - -typedef signed char int8_t; -typedef short int16_t; -typedef int int32_t; -typedef long long int64_t; -typedef unsigned char uint8_t; -typedef unsigned int uint32_t; -typedef unsigned short uint16_t; -typedef unsigned long long uint64_t; +namespace nanovdb {// ================================================================= -#define NANOVDB_ASSERT(x) +// --------------------------> Build types <------------------------------------ -#define UINT64_C(x) (x ## ULL) +/// @brief Dummy type for a voxel whose value equals an offset into an external value array +class ValueIndex{}; -#else // __CUDACC_RTC__ +/// @brief Dummy type for a voxel whose value equals an offset into an external value array of active values +class ValueOnIndex{}; -#include // for abs in clang7 -#include // for types like int32_t etc -#include // for size_t type -#include // for assert -#include // for sprinf -#include // for sqrt and fma -#include // for numeric_limits +/// @brief Like @c ValueIndex but with a mutable mask +class ValueIndexMask{}; -// All asserts can be disabled here, even for debug builds -#if 1 -#define NANOVDB_ASSERT(x) assert(x) -#else -#define NANOVDB_ASSERT(x) -#endif +/// @brief Like @c ValueOnIndex but with a mutable mask +class ValueOnIndexMask{}; -#if defined(NANOVDB_USE_INTRINSICS) && defined(_MSC_VER) -#include -#pragma intrinsic(_BitScanReverse) -#pragma intrinsic(_BitScanForward) -#pragma intrinsic(_BitScanReverse64) -#pragma intrinsic(_BitScanForward64) -#endif +/// @brief Dummy type for a voxel whose value equals its binary active state +class ValueMask{}; -#endif // __CUDACC_RTC__ - -#if defined(__CUDACC__) || defined(__HIP__) -// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler -#define __hostdev__ __host__ __device__ -#else -#define __hostdev__ -#endif - -// The following macro will suppress annoying warnings when nvcc -// compiles functions that call (host) intrinsics (which is perfectly valid) -#if defined(_MSC_VER) && defined(__CUDACC__) -#define NANOVDB_HOSTDEV_DISABLE_WARNING __pragma("hd_warning_disable") -#elif defined(__GNUC__) && defined(__CUDACC__) -#define NANOVDB_HOSTDEV_DISABLE_WARNING _Pragma("hd_warning_disable") -#else -#define NANOVDB_HOSTDEV_DISABLE_WARNING -#endif - -// A portable implementation of offsetof - unfortunately it doesn't work with static_assert -#define NANOVDB_OFFSETOF(CLASS, MEMBER) ((int)(size_t)((char*)&((CLASS*)0)->MEMBER - (char*)0)) - -namespace nanovdb { - -// --------------------------> Build types <------------------------------------ - -/// @brief Dummy type for a voxel with a binary mask value, e.g. the active state -class ValueMask {}; - -/// @brief Dummy type for a 16 bit floating point values -class Half {}; +/// @brief Dummy type for a 16 bit floating point values (placeholder for IEEE 754 Half) +class Half{}; /// @brief Dummy type for a 4bit quantization of float point values -class Fp4 {}; +class Fp4{}; /// @brief Dummy type for a 8bit quantization of float point values -class Fp8 {}; +class Fp8{}; /// @brief Dummy type for a 16bit quantization of float point values -class Fp16 {}; +class Fp16{}; /// @brief Dummy type for a variable bit quantization of floating point values -class FpN {}; +class FpN{}; + +/// @brief Dummy type for indexing points into voxels +class Point{}; // --------------------------> GridType <------------------------------------ +/// @brief return the number of characters (including null termination) required to convert enum type to a string +template +__hostdev__ inline constexpr uint32_t strlen(){return (uint32_t)EnumT::StrLen - (uint32_t)EnumT::End;} + /// @brief List of types that are currently supported by NanoVDB /// /// @note To expand on this list do: /// 1) Add the new type between Unknown and End in the enum below /// 2) Add the new type to OpenToNanoVDB::processGrid that maps OpenVDB types to GridType /// 3) Verify that the ConvertTrait in NanoToOpenVDB.h works correctly with the new type -/// 4) Add the new type to mapToGridType (defined below) that maps NanoVDB types to GridType +/// 4) Add the new type to toGridType (defined below) that maps NanoVDB types to GridType /// 5) Add the new type to toStr (defined below) -enum class GridType : uint32_t { Unknown = 0, - Float = 1, // single precision floating point value - Double = 2,// double precision floating point value - Int16 = 3,// half precision signed integer value - Int32 = 4,// single precision signed integer value - Int64 = 5,// double precision signed integer value - Vec3f = 6,// single precision floating 3D vector - Vec3d = 7,// double precision floating 3D vector - Mask = 8,// no value, just the active state - Half = 9,// half precision floating point value - UInt32 = 10,// single precision unsigned integer value - Boolean = 11,// boolean value, encoded in bit array - RGBA8 = 12,// RGBA packed into 32bit word in reverse-order. R in low bits. - Fp4 = 13,// 4bit quantization of float point value - Fp8 = 14,// 8bit quantization of float point value - Fp16 = 15,// 16bit quantization of float point value - FpN = 16,// variable bit quantization of floating point value - Vec4f = 17,// single precision floating 4D vector - Vec4d = 18,// double precision floating 4D vector - End = 19 }; - -#ifndef __CUDACC_RTC__ -/// @brief Retuns a c-string used to describe a GridType -inline const char* toStr(GridType gridType) -{ - static const char * LUT[] = { "?", "float", "double" , "int16", "int32", - "int64", "Vec3f", "Vec3d", "Mask", "Half", - "uint32", "bool", "RGBA8", "Float4", "Float8", - "Float16", "FloatN", "Vec4f", "Vec4d", "End" }; - static_assert( sizeof(LUT)/sizeof(char*) - 1 == int(GridType::End), "Unexpected size of LUT" ); - return LUT[static_cast(gridType)]; +enum class GridType : uint32_t { Unknown = 0, // unknown value type - should rarely be used + Float = 1, // single precision floating point value + Double = 2, // double precision floating point value + Int16 = 3, // half precision signed integer value + Int32 = 4, // single precision signed integer value + Int64 = 5, // double precision signed integer value + Vec3f = 6, // single precision floating 3D vector + Vec3d = 7, // double precision floating 3D vector + Mask = 8, // no value, just the active state + Half = 9, // half precision floating point value (placeholder for IEEE 754 Half) + UInt32 = 10, // single precision unsigned integer value + Boolean = 11, // boolean value, encoded in bit array + RGBA8 = 12, // RGBA packed into 32bit word in reverse-order, i.e. R is lowest byte. + Fp4 = 13, // 4bit quantization of floating point value + Fp8 = 14, // 8bit quantization of floating point value + Fp16 = 15, // 16bit quantization of floating point value + FpN = 16, // variable bit quantization of floating point value + Vec4f = 17, // single precision floating 4D vector + Vec4d = 18, // double precision floating 4D vector + Index = 19, // index into an external array of active and inactive values + OnIndex = 20, // index into an external array of active values + IndexMask = 21, // like Index but with a mutable mask + OnIndexMask = 22, // like OnIndex but with a mutable mask + PointIndex = 23, // voxels encode indices to co-located points + Vec3u8 = 24, // 8bit quantization of floating point 3D vector (only as blind data) + Vec3u16 = 25, // 16bit quantization of floating point 3D vector (only as blind data) + UInt8 = 26, // 8 bit unsigned integer values (eg 0 -> 255 gray scale) + End = 27,// total number of types in this enum (excluding StrLen since it's not a type) + StrLen = End + 12};// this entry is used to determine the minimum size of c-string + +/// @brief Maps a GridType to a c-string +/// @param dst destination string of size 12 or larger +/// @param gridType GridType enum to be mapped to a string +/// @return Retuns a c-string used to describe a GridType +__hostdev__ inline char* toStr(char *dst, GridType gridType) +{ + switch (gridType){ + case GridType::Unknown: return util::strcpy(dst, "?"); + case GridType::Float: return util::strcpy(dst, "float"); + case GridType::Double: return util::strcpy(dst, "double"); + case GridType::Int16: return util::strcpy(dst, "int16"); + case GridType::Int32: return util::strcpy(dst, "int32"); + case GridType::Int64: return util::strcpy(dst, "int64"); + case GridType::Vec3f: return util::strcpy(dst, "Vec3f"); + case GridType::Vec3d: return util::strcpy(dst, "Vec3d"); + case GridType::Mask: return util::strcpy(dst, "Mask"); + case GridType::Half: return util::strcpy(dst, "Half"); + case GridType::UInt32: return util::strcpy(dst, "uint32"); + case GridType::Boolean: return util::strcpy(dst, "bool"); + case GridType::RGBA8: return util::strcpy(dst, "RGBA8"); + case GridType::Fp4: return util::strcpy(dst, "Float4"); + case GridType::Fp8: return util::strcpy(dst, "Float8"); + case GridType::Fp16: return util::strcpy(dst, "Float16"); + case GridType::FpN: return util::strcpy(dst, "FloatN"); + case GridType::Vec4f: return util::strcpy(dst, "Vec4f"); + case GridType::Vec4d: return util::strcpy(dst, "Vec4d"); + case GridType::Index: return util::strcpy(dst, "Index"); + case GridType::OnIndex: return util::strcpy(dst, "OnIndex"); + case GridType::IndexMask: return util::strcpy(dst, "IndexMask"); + case GridType::OnIndexMask: return util::strcpy(dst, "OnIndexMask"); + case GridType::PointIndex: return util::strcpy(dst, "PointIndex"); + case GridType::Vec3u8: return util::strcpy(dst, "Vec3u8"); + case GridType::Vec3u16: return util::strcpy(dst, "Vec3u16"); + case GridType::UInt8: return util::strcpy(dst, "uint8"); + default: return util::strcpy(dst, "End"); + } } -#endif // --------------------------> GridClass <------------------------------------ -/// @brief Classes (defined in OpenVDB) that are currently supported by NanoVDB +/// @brief Classes (superset of OpenVDB) that are currently supported by NanoVDB enum class GridClass : uint32_t { Unknown = 0, - LevelSet = 1, // narrow band level set, e.g. SDF - FogVolume = 2, // fog volume, e.g. density - Staggered = 3, // staggered MAC grid, e.g. velocity + LevelSet = 1, // narrow band level set, e.g. SDF + FogVolume = 2, // fog volume, e.g. density + Staggered = 3, // staggered MAC grid, e.g. velocity PointIndex = 4, // point index grid - PointData = 5, // point data grid + PointData = 5, // point data grid Topology = 6, // grid with active states only (no values) - VoxelVolume = 7, // volume of geometric cubes, e.g. minecraft - End = 8 }; + VoxelVolume = 7, // volume of geometric cubes, e.g. colors cubes in Minecraft + IndexGrid = 8, // grid whose values are offsets, e.g. into an external array + TensorGrid = 9, // Index grid for indexing learnable tensor features + End = 10,// total number of types in this enum (excluding StrLen since it's not a type) + StrLen = End + 7};// this entry is used to determine the minimum size of c-string + -#ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridClass -inline const char* toStr(GridClass gridClass) -{ - static const char * LUT[] = { "?", "SDF", "FOG" , "MAC", "PNTIDX", - "PNTDAT", "TOPO", "VOX", "END" }; - static_assert( sizeof(LUT)/sizeof(char*) - 1 == int(GridClass::End), "Unexpected size of LUT" ); - return LUT[static_cast(gridClass)]; +/// @param dst destination string of size 7 or larger +/// @param gridClass GridClass enum to be converted to a string +__hostdev__ inline char* toStr(char *dst, GridClass gridClass) +{ + switch (gridClass){ + case GridClass::Unknown: return util::strcpy(dst, "?"); + case GridClass::LevelSet: return util::strcpy(dst, "SDF"); + case GridClass::FogVolume: return util::strcpy(dst, "FOG"); + case GridClass::Staggered: return util::strcpy(dst, "MAC"); + case GridClass::PointIndex: return util::strcpy(dst, "PNTIDX"); + case GridClass::PointData: return util::strcpy(dst, "PNTDAT"); + case GridClass::Topology: return util::strcpy(dst, "TOPO"); + case GridClass::VoxelVolume: return util::strcpy(dst, "VOX"); + case GridClass::IndexGrid: return util::strcpy(dst, "INDEX"); + case GridClass::TensorGrid: return util::strcpy(dst, "TENSOR"); + default: return util::strcpy(dst, "END"); + } } -#endif // --------------------------> GridFlags <------------------------------------ /// @brief Grid flags which indicate what extra information is present in the grid buffer. enum class GridFlags : uint32_t { - HasLongGridName = 1 << 0,// grid name is longer than 256 characters - HasBBox = 1 << 1,// nodes contain bounding-boxes of active values - HasMinMax = 1 << 2,// nodes contain min/max of active values - HasAverage = 1 << 3,// nodes contain averages of active values - HasStdDeviation = 1 << 4,// nodes contain standard deviations of active values - IsBreadthFirst = 1 << 5,// nodes are arranged breadth-first in memory - End = 1 << 6, + HasLongGridName = 1 << 0, // grid name is longer than 256 characters + HasBBox = 1 << 1, // nodes contain bounding-boxes of active values + HasMinMax = 1 << 2, // nodes contain min/max of active values + HasAverage = 1 << 3, // nodes contain averages of active values + HasStdDeviation = 1 << 4, // nodes contain standard deviations of active values + IsBreadthFirst = 1 << 5, // nodes are typically arranged breadth-first in memory + End = 1 << 6, // use End - 1 as a mask for the 5 lower bit flags + StrLen = End + 23,// this entry is used to determine the minimum size of c-string }; -#ifndef __CUDACC_RTC__ /// @brief Retuns a c-string used to describe a GridFlags -inline const char* toStr(GridFlags gridFlags) -{ - static const char * LUT[] = { "has long grid name", - "has bbox", - "has min/max", - "has average", - "has standard deviation", - "is breadth-first", - "end" }; - static_assert( 1 << (sizeof(LUT)/sizeof(char*) - 1) == int(GridFlags::End), "Unexpected size of LUT" ); - return LUT[static_cast(gridFlags)]; +/// @param dst destination string of size 23 or larger +/// @param gridFlags GridFlags enum to be converted to a string +__hostdev__ inline const char* toStr(char *dst, GridFlags gridFlags) +{ + switch (gridFlags){ + case GridFlags::HasLongGridName: return util::strcpy(dst, "has long grid name"); + case GridFlags::HasBBox: return util::strcpy(dst, "has bbox"); + case GridFlags::HasMinMax: return util::strcpy(dst, "has min/max"); + case GridFlags::HasAverage: return util::strcpy(dst, "has average"); + case GridFlags::HasStdDeviation: return util::strcpy(dst, "has standard deviation"); + case GridFlags::IsBreadthFirst: return util::strcpy(dst, "is breadth-first"); + default: return util::strcpy(dst, "end"); + } } -#endif + +// --------------------------> MagicType <------------------------------------ + +/// @brief Enums used to identify magic numbers recognized by NanoVDB +enum class MagicType : uint32_t { Unknown = 0,// first 64 bits are neither of the cases below + OpenVDB = 1,// first 32 bits = 0x56444220UL + NanoVDB = 2,// first 64 bits = NANOVDB_MAGIC_NUMB + NanoGrid = 3,// first 64 bits = NANOVDB_MAGIC_GRID + NanoFile = 4,// first 64 bits = NANOVDB_MAGIC_FILE + NanoNode = 5,// first 64 bits = NANOVDB_MAGIC_NODE + NanoFrag = 6,// first 64 bits = NANOVDB_MAGIC_FRAG + End = 7, + StrLen = End + 25};// this entry is used to determine the minimum size of c-string + +/// @brief maps 64 bits of magic number to enum +__hostdev__ inline MagicType toMagic(uint64_t magic) +{ + switch (magic){ + case NANOVDB_MAGIC_NUMB: return MagicType::NanoVDB; + case NANOVDB_MAGIC_GRID: return MagicType::NanoGrid; + case NANOVDB_MAGIC_FILE: return MagicType::NanoFile; + case NANOVDB_MAGIC_NODE: return MagicType::NanoNode; + case NANOVDB_MAGIC_FRAG: return MagicType::NanoFrag; + default: return (magic & ~uint32_t(0)) == 0x56444220UL ? MagicType::OpenVDB : MagicType::Unknown; + } +} + +/// @brief print 64-bit magic number to string +/// @param dst destination string of size 25 or larger +/// @param magic 64 bit magic number to be printed +/// @return return destination string @c dst +__hostdev__ inline char* toStr(char *dst, MagicType magic) +{ + switch (magic){ + case MagicType::Unknown: return util::strcpy(dst, "unknown"); + case MagicType::NanoVDB: return util::strcpy(dst, "nanovdb"); + case MagicType::NanoGrid: return util::strcpy(dst, "nanovdb::Grid"); + case MagicType::NanoFile: return util::strcpy(dst, "nanovdb::File"); + case MagicType::NanoNode: return util::strcpy(dst, "nanovdb::NodeManager"); + case MagicType::NanoFrag: return util::strcpy(dst, "fragmented nanovdb::Grid"); + case MagicType::OpenVDB: return util::strcpy(dst, "openvdb"); + default: return util::strcpy(dst, "end"); + } +} + +// --------------------------> PointType enums <------------------------------------ + +// Define the type used when the points are encoded as blind data in the output grid +enum class PointType : uint32_t { Disable = 0,// no point information e.g. when BuildT != Point + PointID = 1,// linear index of type uint32_t to points + World64 = 2,// Vec3d in world space + World32 = 3,// Vec3f in world space + Grid64 = 4,// Vec3d in grid space + Grid32 = 5,// Vec3f in grid space + Voxel32 = 6,// Vec3f in voxel space + Voxel16 = 7,// Vec3u16 in voxel space + Voxel8 = 8,// Vec3u8 in voxel space + Default = 9,// output matches input, i.e. Vec3d or Vec3f in world space + End =10 }; // --------------------------> GridBlindData enums <------------------------------------ @@ -307,82 +417,79 @@ enum class GridBlindDataClass : uint32_t { Unknown = 0, IndexArray = 1, AttributeArray = 2, GridName = 3, - End = 4 }; + ChannelArray = 4, + End = 5 }; /// @brief Blind-data Semantics that are currently understood by NanoVDB enum class GridBlindDataSemantic : uint32_t { Unknown = 0, - PointPosition = 1, + PointPosition = 1, // 3D coordinates in an unknown space PointColor = 2, PointNormal = 3, PointRadius = 4, PointVelocity = 5, PointId = 6, - End = 7 }; + WorldCoords = 7, // 3D coordinates in world space, e.g. (0.056, 0.8, 1,8) + GridCoords = 8, // 3D coordinates in grid space, e.g. (1.2, 4.0, 5.7), aka index-space + VoxelCoords = 9, // 3D coordinates in voxel space, e.g. (0.2, 0.0, 0.7) + End = 10 }; -// --------------------------> is_same <------------------------------------ - -/// @brief C++11 implementation of std::is_same -template -struct is_same -{ - static constexpr bool value = false; -}; +// --------------------------> BuildTraits <------------------------------------ +/// @brief Define static boolean tests for template build types template -struct is_same -{ - static constexpr bool value = true; -}; +struct BuildTraits +{ + // check if T is an index type + static constexpr bool is_index = util::is_same::value; + static constexpr bool is_onindex = util::is_same::value; + static constexpr bool is_offindex = util::is_same::value; + static constexpr bool is_indexmask = util::is_same::value; + // check if T is a compressed float type with fixed bit precision + static constexpr bool is_FpX = util::is_same::value; + // check if T is a compressed float type with fixed or variable bit precision + static constexpr bool is_Fp = util::is_same::value; + // check if T is a POD float type, i.e float or double + static constexpr bool is_float = util::is_floating_point::value; + // check if T is a template specialization of LeafData, i.e. has T mValues[512] + static constexpr bool is_special = is_index || is_Fp || util::is_same::value; +}; // BuildTraits + +// --------------------------> BuildToValueMap <------------------------------------ -// --------------------------> enable_if <------------------------------------ - -/// @brief C++11 implementation of std::enable_if -template -struct enable_if -{ -}; - -template -struct enable_if +/// @brief Maps one type (e.g. the build types above) to other (actual) types +template +struct BuildToValueMap { + using Type = T; using type = T; }; -// --------------------------> is_floating_point <------------------------------------ - -/// @brief C++11 implementation of std::is_floating_point -template -struct is_floating_point +template<> +struct BuildToValueMap { - static const bool value = is_same::value || is_same::value; + using Type = uint64_t; + using type = uint64_t; }; -// --------------------------> is_specialization <------------------------------------ - -/// @brief Metafunction used to determine if the first template -/// parameter is a specialization of the class template -/// given in the second template parameter. -/// -/// @details is_specialization, Vec3>::value == true; -template class TemplateType> -struct is_specialization +template<> +struct BuildToValueMap { - static const bool value = false; + using Type = uint64_t; + using type = uint64_t; }; -template class TemplateType> -struct is_specialization, TemplateType> + +template<> +struct BuildToValueMap { - static const bool value = true; + using Type = uint64_t; + using type = uint64_t; }; -// --------------------------> Value Map <------------------------------------ - -/// @brief Maps one type (e.g. the build types above) to other (actual) types -template -struct BuildToValueMap +template<> +struct BuildToValueMap { - using Type = T; - using type = T; + using Type = uint64_t; + using type = uint64_t; }; template<> @@ -427,888 +534,197 @@ struct BuildToValueMap using type = float; }; -// --------------------------> PtrDiff PtrAdd <------------------------------------ - -template -__hostdev__ inline static int64_t PtrDiff(const T1* p, const T2* q) +template<> +struct BuildToValueMap { - NANOVDB_ASSERT(p && q); - return reinterpret_cast(p) - reinterpret_cast(q); -} + using Type = uint64_t; + using type = uint64_t; +}; -template -__hostdev__ inline static DstT* PtrAdd(SrcT *p, int64_t offset) -{ - NANOVDB_ASSERT(p); - return reinterpret_cast(reinterpret_cast(p) + offset); -} +// --------------------------> utility functions related to alignment <------------------------------------ -template -__hostdev__ inline static const DstT* PtrAdd(const SrcT *p, int64_t offset) +/// @brief return true if the specified pointer is 32 byte aligned +__hostdev__ inline static bool isAligned(const void* p){return uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;} + +/// @brief return the smallest number of bytes that when added to the specified pointer results in a 32 byte aligned pointer. +__hostdev__ inline static uint64_t alignmentPadding(const void* p) { NANOVDB_ASSERT(p); - return reinterpret_cast(reinterpret_cast(p) + offset); + return (NANOVDB_DATA_ALIGNMENT - (uint64_t(p) % NANOVDB_DATA_ALIGNMENT)) % NANOVDB_DATA_ALIGNMENT; } -// --------------------------> Rgba8 <------------------------------------ -/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int -class Rgba8 -{ - union { - uint8_t c[4];// 4 color channels of red, green, blue and alpha components. - uint32_t packed;// 32 bit packed representation - } mData; -public: - static const int SIZE = 4; - using ValueType = uint8_t; - - Rgba8(const Rgba8&) = default; - Rgba8(Rgba8&&) = default; - Rgba8& operator=(Rgba8&&) = default; - Rgba8& operator=(const Rgba8&) = default; - __hostdev__ Rgba8() : mData{0,0,0,0} {static_assert(sizeof(uint32_t) == sizeof(Rgba8),"Unexpected sizeof");} - __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) : mData{r, g, b, a} {} - explicit __hostdev__ Rgba8(uint8_t v) : Rgba8(v,v,v,v) {} - __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) - : mData{(uint8_t(0.5f + r * 255.0f)),// round to nearest - (uint8_t(0.5f + g * 255.0f)),// round to nearest - (uint8_t(0.5f + b * 255.0f)),// round to nearest - (uint8_t(0.5f + a * 255.0f))}// round to nearest - { - } - __hostdev__ bool operator<(const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } - __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } - __hostdev__ float lengthSqr() const - { - return 0.0000153787005f*(float(mData.c[0])*mData.c[0] + - float(mData.c[1])*mData.c[1] + - float(mData.c[2])*mData.c[2]);//1/255^2 - } - __hostdev__ float length() const { return sqrtf(this->lengthSqr() ); } - __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } - __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } - __hostdev__ const uint32_t& packed() const { return mData.packed; } - __hostdev__ uint32_t& packed() { return mData.packed; } - __hostdev__ const uint8_t& r() const { return mData.c[0]; } - __hostdev__ const uint8_t& g() const { return mData.c[1]; } - __hostdev__ const uint8_t& b() const { return mData.c[2]; } - __hostdev__ const uint8_t& a() const { return mData.c[3]; } - __hostdev__ uint8_t& r() { return mData.c[0]; } - __hostdev__ uint8_t& g() { return mData.c[1]; } - __hostdev__ uint8_t& b() { return mData.c[2]; } - __hostdev__ uint8_t& a() { return mData.c[3]; } -};// Rgba8 - -using PackedRGBA8 = Rgba8;// for backwards compatibility +/// @brief offset the specified pointer so it is 32 byte aligned. Works with both const and non-const pointers. +template +__hostdev__ inline static T* alignPtr(T* p){return util::PtrAdd(p, alignmentPadding(p));} -// --------------------------> isValue(GridType, GridClass) <------------------------------------ +// --------------------------> isFloatingPoint(GridType) <------------------------------------ -/// @brief return true if the GridType maps to a floating point value. +/// @brief return true if the GridType maps to a floating point type __hostdev__ inline bool isFloatingPoint(GridType gridType) { - return gridType == GridType::Float || + return gridType == GridType::Float || gridType == GridType::Double || - gridType == GridType::Fp4 || - gridType == GridType::Fp8 || - gridType == GridType::Fp16 || + gridType == GridType::Half || + gridType == GridType::Fp4 || + gridType == GridType::Fp8 || + gridType == GridType::Fp16 || gridType == GridType::FpN; } -// --------------------------> isValue(GridType, GridClass) <------------------------------------ - -/// @brief return true if the combination of GridType and GridClass is valid. -__hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) -{ - if (gridClass == GridClass::LevelSet || gridClass == GridClass::FogVolume) { - return isFloatingPoint(gridType); - } else if (gridClass == GridClass::Staggered) { - return gridType == GridType::Vec3f || gridType == GridType::Vec3d || - gridType == GridType::Vec4f || gridType == GridType::Vec4d; - } else if (gridClass == GridClass::PointIndex || gridClass == GridClass::PointData) { - return gridType == GridType::UInt32; - } else if (gridClass == GridClass::VoxelVolume) { - return gridType == GridType::RGBA8 || gridType == GridType::Float || gridType == GridType::Double || gridType == GridType::Vec3f || gridType == GridType::Vec3d || gridType == GridType::UInt32; - } - return gridClass < GridClass::End && gridType < GridType::End;// any valid combination -} - -// ----------------------------> Version class <------------------------------------- - -/// @brief Bit-compacted representation of all three version numbers -/// -/// @details major is the top 11 bits, minor is the 11 middle bits and patch is the lower 10 bits -class Version -{ - uint32_t mData;// 11 + 11 + 10 bit packing of major + minor + patch -public: - __hostdev__ Version() : mData( uint32_t(NANOVDB_MAJOR_VERSION_NUMBER) << 21 | - uint32_t(NANOVDB_MINOR_VERSION_NUMBER) << 10 | - uint32_t(NANOVDB_PATCH_VERSION_NUMBER) ) - { - } - __hostdev__ Version(uint32_t major, uint32_t minor, uint32_t patch) - : mData( major << 21 | minor << 10 | patch ) - { - NANOVDB_ASSERT(major < (1u << 11));// max value of major is 2047 - NANOVDB_ASSERT(minor < (1u << 11));// max value of minor is 2047 - NANOVDB_ASSERT(patch < (1u << 10));// max value of patch is 1023 - } - __hostdev__ bool operator==(const Version &rhs) const {return mData == rhs.mData;} - __hostdev__ bool operator< (const Version &rhs) const {return mData < rhs.mData;} - __hostdev__ bool operator<=(const Version &rhs) const {return mData <= rhs.mData;} - __hostdev__ bool operator> (const Version &rhs) const {return mData > rhs.mData;} - __hostdev__ bool operator>=(const Version &rhs) const {return mData >= rhs.mData;} - __hostdev__ uint32_t id() const { return mData; } - __hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1);} - __hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1);} - __hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1);} - -#ifndef __CUDACC_RTC__ - const char* c_str() const - { - char *buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1);// xxxx.xxxx.xxxx\n - sprintf(buffer, "%d.%d.%d", this->getMajor(), this->getMinor(), this->getPatch()); - return buffer; - } -#endif -};// Version - -// ----------------------------> Various math functions <------------------------------------- - -//@{ -/// Tolerance for floating-point comparison -template -struct Tolerance; -template<> -struct Tolerance -{ - __hostdev__ static float value() { return 1e-8f; } -}; -template<> -struct Tolerance -{ - __hostdev__ static double value() { return 1e-15; } -}; -//@} - -//@{ -/// Delta for small floating-point offsets -template -struct Delta; -template<> -struct Delta -{ - __hostdev__ static float value() { return 1e-5f; } -}; -template<> -struct Delta -{ - __hostdev__ static double value() { return 1e-9; } -}; -//@} - -//@{ -/// Maximum floating-point values -template -struct Maximum; -#if defined(__CUDA_ARCH__) || defined(__HIP__) -template<> -struct Maximum -{ - __hostdev__ static int value() { return 2147483647; } -}; -template<> -struct Maximum -{ - __hostdev__ static uint32_t value() { return 4294967295; } -}; -template<> -struct Maximum -{ - __hostdev__ static float value() { return 1e+38f; } -}; -template<> -struct Maximum -{ - __hostdev__ static double value() { return 1e+308; } -}; -#else -template -struct Maximum -{ - static T value() { return std::numeric_limits::max(); } -}; -#endif -//@} - -template -__hostdev__ inline bool isApproxZero(const Type& x) -{ - return !(x > Tolerance::value()) && !(x < -Tolerance::value()); -} - -template -__hostdev__ inline Type Min(Type a, Type b) -{ - return (a < b) ? a : b; -} -__hostdev__ inline int32_t Min(int32_t a, int32_t b) -{ - return int32_t(fminf(float(a), float(b))); -} -__hostdev__ inline uint32_t Min(uint32_t a, uint32_t b) -{ - return uint32_t(fminf(float(a), float(b))); -} -__hostdev__ inline float Min(float a, float b) -{ - return fminf(a, b); -} -__hostdev__ inline double Min(double a, double b) -{ - return fmin(a, b); -} -template -__hostdev__ inline Type Max(Type a, Type b) -{ - return (a > b) ? a : b; -} - -__hostdev__ inline int32_t Max(int32_t a, int32_t b) -{ - return int32_t(fmaxf(float(a), float(b))); -} -__hostdev__ inline uint32_t Max(uint32_t a, uint32_t b) -{ - return uint32_t(fmaxf(float(a), float(b))); -} -__hostdev__ inline float Max(float a, float b) -{ - return fmaxf(a, b); -} -__hostdev__ inline double Max(double a, double b) -{ - return fmax(a, b); -} -__hostdev__ inline float Clamp(float x, float a, float b) -{ - return Max(Min(x, b), a); -} -__hostdev__ inline double Clamp(double x, double a, double b) -{ - return Max(Min(x, b), a); -} - -__hostdev__ inline float Fract(float x) -{ - return x - floorf(x); -} -__hostdev__ inline double Fract(double x) -{ - return x - ::floor(x); -} - -__hostdev__ inline int32_t Floor(float x) -{ - return int32_t(floorf(x)); -} -__hostdev__ inline int32_t Floor(double x) -{ - return int32_t(::floor(x)); -} - -__hostdev__ inline int32_t Ceil(float x) -{ - return int32_t(ceilf(x)); -} -__hostdev__ inline int32_t Ceil(double x) -{ - return int32_t(::ceil(x)); -} - -template -__hostdev__ inline T Pow2(T x) -{ - return x * x; -} - -template -__hostdev__ inline T Pow3(T x) -{ - return x * x * x; -} - -template -__hostdev__ inline T Pow4(T x) -{ - return Pow2(x * x); -} -template -__hostdev__ inline T Abs(T x) -{ - return x < 0 ? -x : x; -} +// --------------------------> isFloatingPointVector(GridType) <------------------------------------ -template<> -__hostdev__ inline float Abs(float x) +/// @brief return true if the GridType maps to a floating point vec3. +__hostdev__ inline bool isFloatingPointVector(GridType gridType) { - return fabsf(x); + return gridType == GridType::Vec3f || + gridType == GridType::Vec3d || + gridType == GridType::Vec4f || + gridType == GridType::Vec4d; } -template<> -__hostdev__ inline double Abs(double x) -{ - return fabs(x); -} +// --------------------------> isInteger(GridType) <------------------------------------ -template<> -__hostdev__ inline int Abs(int x) +/// @brief Return true if the GridType maps to a POD integer type. +/// @details These types are used to associate a voxel with a POD integer type +__hostdev__ inline bool isInteger(GridType gridType) { - return ::abs(x); + return gridType == GridType::Int16 || + gridType == GridType::Int32 || + gridType == GridType::Int64 || + gridType == GridType::UInt32|| + gridType == GridType::UInt8; } -template class Vec3T> -__hostdev__ inline CoordT Round(const Vec3T& xyz); - -template class Vec3T> -__hostdev__ inline CoordT Round(const Vec3T& xyz) -{ - return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2]))); - //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) ); - //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f))); -} +// --------------------------> isIndex(GridType) <------------------------------------ -template class Vec3T> -__hostdev__ inline CoordT Round(const Vec3T& xyz) +/// @brief Return true if the GridType maps to a special index type (not a POD integer type). +/// @details These types are used to index from a voxel into an external array of values, e.g. sidecar or blind data. +__hostdev__ inline bool isIndex(GridType gridType) { - return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5))); + return gridType == GridType::Index ||// index both active and inactive values + gridType == GridType::OnIndex ||// index active values only + gridType == GridType::IndexMask ||// as Index, but with an additional mask + gridType == GridType::OnIndexMask;// as OnIndex, but with an additional mask } -template class Vec3T> -__hostdev__ inline CoordT RoundDown(const Vec3T& xyz) -{ - return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2])); -} +// --------------------------> isValue(GridType, GridClass) <------------------------------------ -//@{ -/// Return the square root of a floating-point value. -__hostdev__ inline float Sqrt(float x) -{ - return sqrtf(x); -} -__hostdev__ inline double Sqrt(double x) +/// @brief return true if the combination of GridType and GridClass is valid. +__hostdev__ inline bool isValid(GridType gridType, GridClass gridClass) { - return ::sqrt(x); + if (gridClass == GridClass::LevelSet || gridClass == GridClass::FogVolume) { + return isFloatingPoint(gridType); + } else if (gridClass == GridClass::Staggered) { + return isFloatingPointVector(gridType); + } else if (gridClass == GridClass::PointIndex || gridClass == GridClass::PointData) { + return gridType == GridType::PointIndex || gridType == GridType::UInt32; + } else if (gridClass == GridClass::Topology) { + return gridType == GridType::Mask; + } else if (gridClass == GridClass::IndexGrid) { + return isIndex(gridType); + } else if (gridClass == GridClass::VoxelVolume) { + return gridType == GridType::RGBA8 || gridType == GridType::Float || + gridType == GridType::Double || gridType == GridType::Vec3f || + gridType == GridType::Vec3d || gridType == GridType::UInt32 || + gridType == GridType::UInt8; + } + return gridClass < GridClass::End && gridType < GridType::End; // any valid combination } -//@} -/// Return the sign of the given value as an integer (either -1, 0 or 1). -template -__hostdev__ inline T Sign(const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); } - -template -__hostdev__ inline int MinIndex(const Vec3T& v) -{ -#if 0 - static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values - const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1 - return hashTable[hashKey]; -#else - if (v[0] < v[1] && v[0] < v[2]) - return 0; - if (v[1] < v[2]) - return 1; - else - return 2; -#endif +// --------------------------> validation of blind data meta data <------------------------------------ + +/// @brief return true if the combination of GridBlindDataClass, GridBlindDataSemantic and GridType is valid. +__hostdev__ inline bool isValid(const GridBlindDataClass& blindClass, + const GridBlindDataSemantic& blindSemantics, + const GridType& blindType) +{ + bool test = false; + switch (blindClass) { + case GridBlindDataClass::IndexArray: + test = (blindSemantics == GridBlindDataSemantic::Unknown || + blindSemantics == GridBlindDataSemantic::PointId) && + isInteger(blindType); + break; + case GridBlindDataClass::AttributeArray: + if (blindSemantics == GridBlindDataSemantic::PointPosition || + blindSemantics == GridBlindDataSemantic::WorldCoords) { + test = blindType == GridType::Vec3f || blindType == GridType::Vec3d; + } else if (blindSemantics == GridBlindDataSemantic::GridCoords) { + test = blindType == GridType::Vec3f; + } else if (blindSemantics == GridBlindDataSemantic::VoxelCoords) { + test = blindType == GridType::Vec3f || blindType == GridType::Vec3u8 || blindType == GridType::Vec3u16; + } else { + test = blindSemantics != GridBlindDataSemantic::PointId; + } + break; + case GridBlindDataClass::GridName: + test = blindSemantics == GridBlindDataSemantic::Unknown && blindType == GridType::Unknown; + break; + default: // captures blindClass == Unknown and ChannelArray + test = blindClass < GridBlindDataClass::End && + blindSemantics < GridBlindDataSemantic::End && + blindType < GridType::End; // any valid combination + break; + } + //if (!test) printf("Invalid combination: GridBlindDataClass=%u, GridBlindDataSemantic=%u, GridType=%u\n",(uint32_t)blindClass, (uint32_t)blindSemantics, (uint32_t)blindType); + return test; } -template -__hostdev__ inline int MaxIndex(const Vec3T& v) -{ -#if 0 - static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values - const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1 - return hashTable[hashKey]; -#else - if (v[0] > v[1] && v[0] > v[2]) - return 0; - if (v[1] > v[2]) - return 1; - else - return 2; -#endif -} +// ----------------------------> Version class <------------------------------------- -/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp -__hostdev__ inline uint64_t AlignUp(uint64_t byteCount) -{ - const uint64_t r = byteCount % wordSize; - return r ? byteCount - r + wordSize : byteCount; -} - -// ------------------------------> Coord <-------------------------------------- - -// forward decleration so we can define Coord::asVec3s and Coord::asVec3d -template class Vec3; - -/// @brief Signed (i, j, k) 32-bit integer coordinate class, similar to openvdb::math::Coord -class Coord -{ - int32_t mVec[3]; // private member data - three signed index coordinates -public: - using ValueType = int32_t; - using IndexType = uint32_t; - - /// @brief Initialize all coordinates to zero. - __hostdev__ Coord() - : mVec{0, 0, 0} - { - } - - /// @brief Initializes all coordinates to the given signed integer. - __hostdev__ explicit Coord(ValueType n) - : mVec{n, n, n} - { - } - - /// @brief Initializes coordinate to the given signed integers. - __hostdev__ Coord(ValueType i, ValueType j, ValueType k) - : mVec{i, j, k} - { - } - - __hostdev__ Coord(ValueType *ptr) - : mVec{ptr[0], ptr[1], ptr[2]} - { - } - - __hostdev__ int32_t x() const { return mVec[0]; } - __hostdev__ int32_t y() const { return mVec[1]; } - __hostdev__ int32_t z() const { return mVec[2]; } - - __hostdev__ int32_t& x() { return mVec[0]; } - __hostdev__ int32_t& y() { return mVec[1]; } - __hostdev__ int32_t& z() { return mVec[2]; } - - __hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); } - - __hostdev__ static Coord min() { return Coord(-int32_t((1u << 31) - 1) - 1); } - - __hostdev__ static size_t memUsage() { return sizeof(Coord); } - - /// @brief Return a const reference to the given Coord component. - /// @warning The argument is assumed to be 0, 1, or 2. - __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; } - - /// @brief Return a non-const reference to the given Coord component. - /// @warning The argument is assumed to be 0, 1, or 2. - __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; } - - /// @brief Assignment operator that works with openvdb::Coord - template - __hostdev__ Coord& operator=(const CoordT &other) - { - static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof"); - mVec[0] = other[0]; - mVec[1] = other[1]; - mVec[2] = other[2]; - return *this; - } - - /// @brief Return a new instance with coordinates masked by the given unsigned integer. - __hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); } - - // @brief Return a new instance with coordinates left-shifted by the given unsigned integer. - __hostdev__ Coord operator<<(IndexType n) const { return Coord(mVec[0] << n, mVec[1] << n, mVec[2] << n); } - - // @brief Return a new instance with coordinates right-shifted by the given unsigned integer. - __hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); } - - /// @brief Return true if this Coord is lexicographically less than the given Coord. - __hostdev__ bool operator<(const Coord& rhs) const - { - return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false : mVec[2] < rhs[2] ? true : false; - } - - // @brief Return true if the Coord components are identical. - __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } - __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } - __hostdev__ Coord& operator&=(int n) - { - mVec[0] &= n; - mVec[1] &= n; - mVec[2] &= n; - return *this; - } - __hostdev__ Coord& operator<<=(uint32_t n) - { - mVec[0] <<= n; - mVec[1] <<= n; - mVec[2] <<= n; - return *this; - } - __hostdev__ Coord& operator+=(int n) - { - mVec[0] += n; - mVec[1] += n; - mVec[2] += n; - return *this; - } - __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } - __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } - __hostdev__ Coord& operator+=(const Coord& rhs) - { - mVec[0] += rhs[0]; - mVec[1] += rhs[1]; - mVec[2] += rhs[2]; - return *this; - } - __hostdev__ Coord& operator-=(const Coord& rhs) - { - mVec[0] -= rhs[0]; - mVec[1] -= rhs[1]; - mVec[2] -= rhs[2]; - return *this; - } - - /// @brief Perform a component-wise minimum with the other Coord. - __hostdev__ Coord& minComponent(const Coord& other) - { - if (other[0] < mVec[0]) - mVec[0] = other[0]; - if (other[1] < mVec[1]) - mVec[1] = other[1]; - if (other[2] < mVec[2]) - mVec[2] = other[2]; - return *this; - } - - /// @brief Perform a component-wise maximum with the other Coord. - __hostdev__ Coord& maxComponent(const Coord& other) - { - if (other[0] > mVec[0]) - mVec[0] = other[0]; - if (other[1] > mVec[1]) - mVec[1] = other[1]; - if (other[2] > mVec[2]) - mVec[2] = other[2]; - return *this; - } - - __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const - { - return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz); - } - - __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); } - - /// Return true if any of the components of @a a are smaller than the - /// corresponding components of @a b. - __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b) - { - return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]); - } - - /// @brief Return the largest integer coordinates that are not greater - /// than @a xyz (node centered conversion). - template - __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); } - - /// @brief Return a hash key derived from the existing coordinates. - /// @details For details on this hash function please see the VDB paper. - template - __hostdev__ uint32_t hash() const { return ((1 << Log2N) - 1) & (mVec[0] * 73856093 ^ mVec[1] * 19349663 ^ mVec[2] * 83492791); } - - /// @brief Return the octant of this Coord - //__hostdev__ size_t octant() const { return (uint32_t(mVec[0])>>31) | ((uint32_t(mVec[1])>>31)<<1) | ((uint32_t(mVec[2])>>31)<<2); } - __hostdev__ uint8_t octant() const { return (uint8_t(bool(mVec[0] & (1u << 31)))) | - (uint8_t(bool(mVec[1] & (1u << 31))) << 1) | - (uint8_t(bool(mVec[2] & (1u << 31))) << 2); } - - /// @brief Return a single precision floating-point vector of this coordinate - __hostdev__ inline Vec3 asVec3s() const; - - /// @brief Return a double precision floating-point vector of this coordinate - __hostdev__ inline Vec3 asVec3d() const; -}; // Coord class - -// ----------------------------> Vec3 <-------------------------------------- - -/// @brief A simple vector class with three double components, similar to openvdb::math::Vec3 -template -class Vec3 -{ - T mVec[3]; - -public: - static const int SIZE = 3; - using ValueType = T; - Vec3() = default; - __hostdev__ explicit Vec3(T x) - : mVec{x, x, x} - { - } - __hostdev__ Vec3(T x, T y, T z) - : mVec{x, y, z} - { - } - template - __hostdev__ explicit Vec3(const Vec3& v) - : mVec{T(v[0]), T(v[1]), T(v[2])} - { - } - __hostdev__ explicit Vec3(const Coord& ijk) - : mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])} - { - } - __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } - __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } - template - __hostdev__ Vec3& operator=(const Vec3T& rhs) - { - mVec[0] = rhs[0]; - mVec[1] = rhs[1]; - mVec[2] = rhs[2]; - return *this; - } - __hostdev__ const T& operator[](int i) const { return mVec[i]; } - __hostdev__ T& operator[](int i) { return mVec[i]; } - template - __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; } - template - __hostdev__ Vec3 cross(const Vec3T& v) const - { - return Vec3(mVec[1] * v[2] - mVec[2] * v[1], - mVec[2] * v[0] - mVec[0] * v[2], - mVec[0] * v[1] - mVec[1] * v[0]); - } - __hostdev__ T lengthSqr() const - { - return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2]; // 5 flops - } - __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } - __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } - __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } - __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } - __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } - __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } - __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } - __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } - __hostdev__ Vec3& operator+=(const Vec3& v) - { - mVec[0] += v[0]; - mVec[1] += v[1]; - mVec[2] += v[2]; - return *this; - } - __hostdev__ Vec3& operator-=(const Vec3& v) - { - mVec[0] -= v[0]; - mVec[1] -= v[1]; - mVec[2] -= v[2]; - return *this; - } - __hostdev__ Vec3& operator*=(const T& s) - { - mVec[0] *= s; - mVec[1] *= s; - mVec[2] *= s; - return *this; - } - __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; } - __hostdev__ Vec3& normalize() { return (*this) /= this->length(); } - /// @brief Perform a component-wise minimum with the other Coord. - __hostdev__ Vec3& minComponent(const Vec3& other) - { - if (other[0] < mVec[0]) - mVec[0] = other[0]; - if (other[1] < mVec[1]) - mVec[1] = other[1]; - if (other[2] < mVec[2]) - mVec[2] = other[2]; - return *this; - } - - /// @brief Perform a component-wise maximum with the other Coord. - __hostdev__ Vec3& maxComponent(const Vec3& other) - { - if (other[0] > mVec[0]) - mVec[0] = other[0]; - if (other[1] > mVec[1]) - mVec[1] = other[1]; - if (other[2] > mVec[2]) - mVec[2] = other[2]; - return *this; - } - /// @brief Return the smallest vector component - __hostdev__ ValueType min() const - { - return mVec[0] < mVec[1] ? (mVec[0] < mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] < mVec[2] ? mVec[1] : mVec[2]); - } - /// @brief Return the largest vector component - __hostdev__ ValueType max() const - { - return mVec[0] > mVec[1] ? (mVec[0] > mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] > mVec[2] ? mVec[1] : mVec[2]); - } - __hostdev__ Coord floor() const { return Coord(Floor(mVec[0]), Floor(mVec[1]), Floor(mVec[2])); } - __hostdev__ Coord ceil() const { return Coord(Ceil(mVec[0]), Ceil(mVec[1]), Ceil(mVec[2])); } - __hostdev__ Coord round() const { return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); } -}; // Vec3 - -template -__hostdev__ inline Vec3 operator*(T1 scalar, const Vec3& vec) -{ - return Vec3(scalar * vec[0], scalar * vec[1], scalar * vec[2]); -} -template -__hostdev__ inline Vec3 operator/(T1 scalar, const Vec3& vec) -{ - return Vec3(scalar / vec[0], scalar / vec[1], scalar / vec[2]); -} - -using Vec3R = Vec3; -using Vec3d = Vec3; -using Vec3f = Vec3; -using Vec3i = Vec3; - -/// @brief Return a single precision floating-point vector of this coordinate -__hostdev__ inline Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); } - -/// @brief Return a double precision floating-point vector of this coordinate -__hostdev__ inline Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); } - -// ----------------------------> Vec4 <-------------------------------------- - -/// @brief A simple vector class with three double components, similar to openvdb::math::Vec4 -template -class Vec4 +/// @details major is the top 11 bits, minor is the 11 middle bits and patch is the lower 10 bits +class Version { - T mVec[4]; - + uint32_t mData; // 11 + 11 + 10 bit packing of major + minor + patch public: - static const int SIZE = 4; - using ValueType = T; - Vec4() = default; - __hostdev__ explicit Vec4(T x) - : mVec{x, x, x, x} + static constexpr uint32_t End = 0, StrLen = 8;// for strlen() + /// @brief Default constructor + __hostdev__ Version() + : mData(uint32_t(NANOVDB_MAJOR_VERSION_NUMBER) << 21 | + uint32_t(NANOVDB_MINOR_VERSION_NUMBER) << 10 | + uint32_t(NANOVDB_PATCH_VERSION_NUMBER)) { - } - __hostdev__ Vec4(T x, T y, T z, T w) - : mVec{x, y, z, w} - { - } - template - __hostdev__ explicit Vec4(const Vec4& v) - : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} - { - } - __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; } - __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; } - template - __hostdev__ Vec4& operator=(const Vec4T& rhs) - { - mVec[0] = rhs[0]; - mVec[1] = rhs[1]; - mVec[2] = rhs[2]; - mVec[3] = rhs[3]; - return *this; - } - __hostdev__ const T& operator[](int i) const { return mVec[i]; } - __hostdev__ T& operator[](int i) { return mVec[i]; } - template - __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; } - __hostdev__ T lengthSqr() const - { - return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops - } - __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } - __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } - __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } - __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } - __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } - __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } - __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } - __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } - __hostdev__ Vec4& operator+=(const Vec4& v) - { - mVec[0] += v[0]; - mVec[1] += v[1]; - mVec[2] += v[2]; - mVec[3] += v[3]; - return *this; - } - __hostdev__ Vec4& operator-=(const Vec4& v) - { - mVec[0] -= v[0]; - mVec[1] -= v[1]; - mVec[2] -= v[2]; - mVec[3] -= v[3]; - return *this; - } - __hostdev__ Vec4& operator*=(const T& s) - { - mVec[0] *= s; - mVec[1] *= s; - mVec[2] *= s; - mVec[3] *= s; - return *this; - } - __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; } - __hostdev__ Vec4& normalize() { return (*this) /= this->length(); } - /// @brief Perform a component-wise minimum with the other Coord. - __hostdev__ Vec4& minComponent(const Vec4& other) - { - if (other[0] < mVec[0]) - mVec[0] = other[0]; - if (other[1] < mVec[1]) - mVec[1] = other[1]; - if (other[2] < mVec[2]) - mVec[2] = other[2]; - if (other[3] < mVec[3]) - mVec[3] = other[3]; - return *this; - } - - /// @brief Perform a component-wise maximum with the other Coord. - __hostdev__ Vec4& maxComponent(const Vec4& other) - { - if (other[0] > mVec[0]) - mVec[0] = other[0]; - if (other[1] > mVec[1]) - mVec[1] = other[1]; - if (other[2] > mVec[2]) - mVec[2] = other[2]; - if (other[3] > mVec[3]) - mVec[3] = other[3]; - return *this; - } -}; // Vec4 - -template -__hostdev__ inline Vec4 operator*(T1 scalar, const Vec4& vec) -{ - return Vec4(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]); -} -template -__hostdev__ inline Vec4 operator/(T1 scalar, const Vec3& vec) -{ - return Vec4(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]); + } + /// @brief Constructor from a raw uint32_t data representation + __hostdev__ Version(uint32_t data) : mData(data) {} + /// @brief Constructor from major.minor.patch version numbers + __hostdev__ Version(uint32_t major, uint32_t minor, uint32_t patch) + : mData(major << 21 | minor << 10 | patch) + { + NANOVDB_ASSERT(major < (1u << 11)); // max value of major is 2047 + NANOVDB_ASSERT(minor < (1u << 11)); // max value of minor is 2047 + NANOVDB_ASSERT(patch < (1u << 10)); // max value of patch is 1023 + } + __hostdev__ bool operator==(const Version& rhs) const { return mData == rhs.mData; } + __hostdev__ bool operator<( const Version& rhs) const { return mData < rhs.mData; } + __hostdev__ bool operator<=(const Version& rhs) const { return mData <= rhs.mData; } + __hostdev__ bool operator>( const Version& rhs) const { return mData > rhs.mData; } + __hostdev__ bool operator>=(const Version& rhs) const { return mData >= rhs.mData; } + __hostdev__ uint32_t id() const { return mData; } + __hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1); } + __hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1); } + __hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1); } + __hostdev__ bool isCompatible() const { return this->getMajor() == uint32_t(NANOVDB_MAJOR_VERSION_NUMBER); } + /// @brief Returns the difference between major version of this instance and NANOVDB_MAJOR_VERSION_NUMBER + /// @return return 0 if the major version equals NANOVDB_MAJOR_VERSION_NUMBER, else a negative age if this + /// instance has a smaller major verion (is older), and a positive age if it is newer, i.e. larger. + __hostdev__ int age() const {return int(this->getMajor()) - int(NANOVDB_MAJOR_VERSION_NUMBER);} +}; // Version + +/// @brief print the verion number to a c-string +/// @param dst destination string of size 8 or more +/// @param v version to be printed +/// @return returns destination string @c dst +__hostdev__ inline char* toStr(char *dst, const Version &v) +{ + return util::sprint(dst, v.getMajor(), ".",v.getMinor(), ".",v.getPatch()); } -using Vec4R = Vec4; -using Vec4d = Vec4; -using Vec4f = Vec4; -using Vec4i = Vec4; - // ----------------------------> TensorTraits <-------------------------------------- -template::value || - is_specialization::value || - is_same::value) ? 1 : 0> +template::value || util::is_specialization::value || util::is_same::value) ? 1 : 0> struct TensorTraits; template @@ -1354,438 +770,261 @@ struct FloatTraits }; template<> -struct FloatTraits +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = uint64_t; +}; + +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte { using FloatType = bool; }; -// ----------------------------> mapping ValueType -> GridType <-------------------------------------- +template<> +struct FloatTraits // size of empty class in C++ is 1 byte and not 0 byte +{ + using FloatType = double; +}; + +// ----------------------------> mapping BuildType -> GridType <-------------------------------------- -/// @brief Maps from a templated value type to a GridType enum +/// @brief Maps from a templated build type to a GridType enum template -__hostdev__ inline GridType mapToGridType() +__hostdev__ inline GridType toGridType() { - if (is_same::value) { // resolved at compile-time + if (util::is_same::value) { // resolved at compile-time return GridType::Float; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Double; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Int16; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Int32; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Int64; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Vec3f; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Vec3d; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::UInt32; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Mask; - } else if (is_same::value) { + } else if (util::is_same::value) { + return GridType::Half; + } else if (util::is_same::value) { + return GridType::Index; + } else if (util::is_same::value) { + return GridType::OnIndex; + } else if (util::is_same::value) { + return GridType::IndexMask; + } else if (util::is_same::value) { + return GridType::OnIndexMask; + } else if (util::is_same::value) { return GridType::Boolean; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::RGBA8; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Fp4; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Fp8; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Fp16; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::FpN; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Vec4f; - } else if (is_same::value) { + } else if (util::is_same::value) { return GridType::Vec4d; + } else if (util::is_same::value) { + return GridType::PointIndex; + } else if (util::is_same::value) { + return GridType::Vec3u8; + } else if (util::is_same::value) { + return GridType::Vec3u16; + } else if (util::is_same::value) { + return GridType::UInt8; } return GridType::Unknown; -} - -// ----------------------------> matMult <-------------------------------------- +}// toGridType -template -__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) -{ - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], xyz[2] * mat[2])), - fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], xyz[2] * mat[5])), - fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} +template +[[deprecated("Use toGridType() instead.")]] +__hostdev__ inline GridType mapToGridType(){return toGridType();} -template -__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) -{ - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), - fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), - fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} +// ----------------------------> mapping BuildType -> GridClass <-------------------------------------- -template -__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) -{ - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], fmaf(xyz[2], mat[2], vec[0]))), - fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[5], vec[1]))), - fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops +/// @brief Maps from a templated build type to a GridClass enum +template +__hostdev__ inline GridClass toGridClass(GridClass defaultClass = GridClass::Unknown) +{ + if (util::is_same::value) { + return GridClass::Topology; + } else if (BuildTraits::is_index) { + return GridClass::IndexGrid; + } else if (util::is_same::value) { + return GridClass::VoxelVolume; + } else if (util::is_same::value) { + return GridClass::PointIndex; + } + return defaultClass; } -template -__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz) +template +[[deprecated("Use toGridClass() instead.")]] +__hostdev__ inline GridClass mapToGridClass(GridClass defaultClass = GridClass::Unknown) { - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], fma(static_cast(xyz[2]), mat[2], vec[0]))), - fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[5], vec[1]))), - fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops + return toGridClass(); } -// matMultT: Multiply with the transpose: +// ----------------------------> BitFlags <-------------------------------------- -template -__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) +template +struct BitArray; +template<> +struct BitArray<8> { - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], xyz[2] * mat[6])), - fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], xyz[2] * mat[7])), - fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} - -template -__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) + uint8_t mFlags{0}; +}; +template<> +struct BitArray<16> { - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), - fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), - fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops -} - -template -__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz) + uint16_t mFlags{0}; +}; +template<> +struct BitArray<32> { - return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], fmaf(xyz[2], mat[6], vec[0]))), - fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[7], vec[1]))), - fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops -} - -template -__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz) + uint32_t mFlags{0}; +}; +template<> +struct BitArray<64> { - return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], fma(static_cast(xyz[2]), mat[6], vec[0]))), - fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[7], vec[1]))), - fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops -} - -// ----------------------------> BBox <------------------------------------- + uint64_t mFlags{0}; +}; -// Base-class for static polymorphism (cannot be constructed directly) -template -struct BaseBBox +template +class BitFlags : public BitArray { - Vec3T mCoord[2]; - __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; - __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; - __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; } - __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } - __hostdev__ Vec3T& min() { return mCoord[0]; } - __hostdev__ Vec3T& max() { return mCoord[1]; } - __hostdev__ const Vec3T& min() const { return mCoord[0]; } - __hostdev__ const Vec3T& max() const { return mCoord[1]; } - __hostdev__ Coord& translate(const Vec3T& xyz) - { - mCoord[0] += xyz; - mCoord[1] += xyz; - return *this; +protected: + using BitArray::mFlags; + +public: + using Type = decltype(mFlags); + BitFlags() {} + BitFlags(Type mask) : BitArray{mask} {} + BitFlags(std::initializer_list list) + { + for (auto bit : list) mFlags |= static_cast(1 << bit); } - // @brief Expand this bounding box to enclose point (i, j, k). - __hostdev__ BaseBBox& expand(const Vec3T& xyz) + template + BitFlags(std::initializer_list list) { - mCoord[0].minComponent(xyz); - mCoord[1].maxComponent(xyz); - return *this; + for (auto mask : list) mFlags |= static_cast(mask); } - //__hostdev__ BaseBBox expandBy(typename Vec3T::ValueType padding) const - //{ - // return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding)); - //} - __hostdev__ bool isInside(const Vec3T& xyz) + __hostdev__ Type data() const { return mFlags; } + __hostdev__ Type& data() { return mFlags; } + __hostdev__ void initBit(std::initializer_list list) { - if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2]) - return false; - if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2]) - return false; - return true; + mFlags = 0u; + for (auto bit : list) mFlags |= static_cast(1 << bit); } - -protected: - __hostdev__ BaseBBox() {} - __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max) - : mCoord{min, max} + template + __hostdev__ void initMask(std::initializer_list list) { + mFlags = 0u; + for (auto mask : list) mFlags |= static_cast(mask); } -}; // BaseBBox + //__hostdev__ Type& data() { return mFlags; } + //__hostdev__ Type data() const { return mFlags; } + __hostdev__ Type getFlags() const { return mFlags & (static_cast(GridFlags::End) - 1u); } // mask out everything except relevant bits -template::value> -struct BBox; + __hostdev__ void setOn() { mFlags = ~Type(0u); } + __hostdev__ void setOff() { mFlags = Type(0u); } -/// @brief Partial template specialization for floating point coordinate types. -/// -/// @note Min is inclusive and max is exclusive. If min = max the dimension of -/// the bounding box is zero and therefore it is also empty. -template -struct BBox : public BaseBBox -{ - using Vec3Type = Vec3T; - using ValueType = typename Vec3T::ValueType; - static_assert(is_floating_point::value, "Expected a floating point coordinate type"); - using BaseT = BaseBBox; - using BaseT::mCoord; - __hostdev__ BBox() - : BaseT(Vec3T( Maximum::value()), - Vec3T(-Maximum::value())) + __hostdev__ void setBitOn(uint8_t bit) { mFlags |= static_cast(1 << bit); } + __hostdev__ void setBitOff(uint8_t bit) { mFlags &= ~static_cast(1 << bit); } + + __hostdev__ void setBitOn(std::initializer_list list) { + for (auto bit : list) mFlags |= static_cast(1 << bit); } - __hostdev__ BBox(const Vec3T& min, const Vec3T& max) - : BaseT(min, max) + __hostdev__ void setBitOff(std::initializer_list list) { + for (auto bit : list) mFlags &= ~static_cast(1 << bit); } - __hostdev__ BBox(const Coord& min, const Coord& max) - : BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])), - Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1))) + + template + __hostdev__ void setMaskOn(MaskT mask) { mFlags |= static_cast(mask); } + template + __hostdev__ void setMaskOff(MaskT mask) { mFlags &= ~static_cast(mask); } + + template + __hostdev__ void setMaskOn(std::initializer_list list) { + for (auto mask : list) mFlags |= static_cast(mask); } - __hostdev__ BBox(const BaseBBox& bbox) : BBox(bbox[0], bbox[1]) {} - __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || - mCoord[0][1] >= mCoord[1][1] || - mCoord[0][2] >= mCoord[1][2]; } - __hostdev__ Vec3T dim() const { return this->empty() ? Vec3T(0) : this->max() - this->min(); } - __hostdev__ bool isInside(const Vec3T& p) const + template + __hostdev__ void setMaskOff(std::initializer_list list) { - return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] && - p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2]; + for (auto mask : list) mFlags &= ~static_cast(mask); } -};// BBox -/// @brief Partial template specialization for integer coordinate types -/// -/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So, -/// if min = max the bounding box contains exactly one point and dim = 1! -template -struct BBox : public BaseBBox -{ - static_assert(is_same::value, "Expected \"int\" coordinate type"); - using BaseT = BaseBBox; - using BaseT::mCoord; - /// @brief Iterator over the domain covered by a BBox - /// @details z is the fastest-moving coordinate. - class Iterator + __hostdev__ void setBit(uint8_t bit, bool on) { on ? this->setBitOn(bit) : this->setBitOff(bit); } + template + __hostdev__ void setMask(MaskT mask, bool on) { on ? this->setMaskOn(mask) : this->setMaskOff(mask); } + + __hostdev__ bool isOn() const { return mFlags == ~Type(0u); } + __hostdev__ bool isOff() const { return mFlags == Type(0u); } + __hostdev__ bool isBitOn(uint8_t bit) const { return 0 != (mFlags & static_cast(1 << bit)); } + __hostdev__ bool isBitOff(uint8_t bit) const { return 0 == (mFlags & static_cast(1 << bit)); } + template + __hostdev__ bool isMaskOn(MaskT mask) const { return 0 != (mFlags & static_cast(mask)); } + template + __hostdev__ bool isMaskOff(MaskT mask) const { return 0 == (mFlags & static_cast(mask)); } + /// @brief return true if any of the masks in the list are on + template + __hostdev__ bool isMaskOn(std::initializer_list list) const { - const BBox& mBBox; - CoordT mPos; - public: - __hostdev__ Iterator(const BBox& b) - : mBBox(b) - , mPos(b.min()) - { - } - __hostdev__ Iterator& operator++() - { - if (mPos[2] < mBBox[1][2]) {// this is the most common case - ++mPos[2]; - } else if (mPos[1] < mBBox[1][1]) { - mPos[2] = mBBox[0][2]; - ++mPos[1]; - } else if (mPos[0] <= mBBox[1][0]) { - mPos[2] = mBBox[0][2]; - mPos[1] = mBBox[0][1]; - ++mPos[0]; - } - return *this; - } - __hostdev__ Iterator operator++(int) - { - auto tmp = *this; - ++(*this); - return tmp; + for (auto mask : list) { + if (0 != (mFlags & static_cast(mask))) return true; } - /// @brief Return @c true if the iterator still points to a valid coordinate. - __hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; } - __hostdev__ const CoordT& operator*() const { return mPos; } - }; // Iterator - __hostdev__ Iterator begin() const { return Iterator{*this}; } - __hostdev__ BBox() - : BaseT(CoordT::max(), CoordT::min()) - { - } - __hostdev__ BBox(const CoordT& min, const CoordT& max) - : BaseT(min, max) - { - } - template - __hostdev__ BBox(BBox& other, const SplitT&) - : BaseT(other.mCoord[0], other.mCoord[1]) - { - NANOVDB_ASSERT(this->is_divisible()); - const int n = MaxIndex(this->dim()); - mCoord[1][n] = (mCoord[0][n] + mCoord[1][n]) >> 1; - other.mCoord[0][n] = mCoord[1][n] + 1; - } - __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && - mCoord[0][1] < mCoord[1][1] && - mCoord[0][2] < mCoord[1][2]; } - /// @brief Return true if this bounding box is empty, i.e. uninitialized - __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || - mCoord[0][1] > mCoord[1][1] || - mCoord[0][2] > mCoord[1][2]; } - __hostdev__ CoordT dim() const { return this->empty() ? Coord(0) : this->max() - this->min() + Coord(1); } - __hostdev__ uint64_t volume() const { auto d = this->dim(); return uint64_t(d[0])*uint64_t(d[1])*uint64_t(d[2]); } - __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } - __hostdev__ bool isInside(const BBox& b) const - { - return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max())); + return false; } - - /// @warning This converts a CoordBBox into a floating-point bounding box which implies that max += 1 ! - template - __hostdev__ BBox> asReal() const + /// @brief return true if any of the masks in the list are off + template + __hostdev__ bool isMaskOff(std::initializer_list list) const { - static_assert(is_floating_point::value, "CoordBBox::asReal: Expected a floating point coordinate"); - return BBox>(Vec3(RealT(mCoord[0][0]), RealT(mCoord[0][1]), RealT(mCoord[0][2])), - Vec3(RealT(mCoord[1][0] + 1), RealT(mCoord[1][1] + 1), RealT(mCoord[1][2] + 1))); + for (auto mask : list) { + if (0 == (mFlags & static_cast(mask))) return true; + } + return false; } - /// @brief Return a new instance that is expanded by the specified padding. - __hostdev__ BBox expandBy(typename CoordT::ValueType padding) const + /// @brief required for backwards compatibility + __hostdev__ BitFlags& operator=(Type n) { - return BBox(mCoord[0].offsetBy(-padding), mCoord[1].offsetBy(padding)); + mFlags = n; + return *this; } -};// BBox - -using CoordBBox = BBox; -using BBoxR = BBox; - -// -------------------> Find lowest and highest bit in a word <---------------------------- - -/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 32 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindLowestOn(uint32_t v) -{ - NANOVDB_ASSERT(v); -#if defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanForward(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return static_cast(__builtin_ctzl(v)); -#else - static const unsigned char DeBruijn[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; -// disable unary minus on unsigned warning -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(push) -#pragma warning(disable : 4146) -#endif - return DeBruijn[uint32_t((v & -v) * 0x077CB531U) >> 27]; -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(pop) -#endif - -#endif -} - -/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 32 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindHighestOn(uint32_t v) -{ - NANOVDB_ASSERT(v); -#if defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanReverse(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v); - -#else - static const unsigned char DeBruijn[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; - v |= v >> 1; // first round down to one less than a power of 2 - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijn[uint32_t(v * 0x07C4ACDDU) >> 27]; -#endif -} - -/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 64 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindLowestOn(uint64_t v) -{ - NANOVDB_ASSERT(v); -#if defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanForward64(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return static_cast(__builtin_ctzll(v)); -#else - static const unsigned char DeBruijn[64] = { - 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, - 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, - 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, - 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12, - }; -// disable unary minus on unsigned warning -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(push) -#pragma warning(disable : 4146) -#endif - return DeBruijn[uint64_t((v & -v) * UINT64_C(0x022FDD63CC95386D)) >> 58]; -#if defined(_MSC_VER) && !defined(__NVCC__) -#pragma warning(pop) -#endif - -#endif -} - -/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 64 bit word -/// -/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ static inline uint32_t FindHighestOn(uint64_t v) -{ - NANOVDB_ASSERT(v); -#if defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) - unsigned long index; - _BitScanReverse64(&index, v); - return static_cast(index); -#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) - return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v); -#else - const uint32_t* p = reinterpret_cast(&v); - return p[1] ? 32u + FindHighestOn(p[1]) : FindHighestOn(p[0]); -#endif -} - -// ----------------------------> CountOn <-------------------------------------- - -/// @return Number of bits that are on in the specified 64-bit word -NANOVDB_HOSTDEV_DISABLE_WARNING -__hostdev__ inline uint32_t CountOn(uint64_t v) -{ -// __popcnt* intrinsic support was added in VS 2019 16.8 -#if defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) - v = __popcnt64(v); -#elif (defined(__GNUC__) || defined(__clang__)) - v = __builtin_popcountll(v); -#else - // Software Implementation - v = v - ((v >> 1) & uint64_t(0x5555555555555555)); - v = (v & uint64_t(0x3333333333333333)) + ((v >> 2) & uint64_t(0x3333333333333333)); - v = (((v + (v >> 4)) & uint64_t(0xF0F0F0F0F0F0F0F)) * uint64_t(0x101010101010101)) >> 56; -#endif - return static_cast(v); -} +}; // BitFlags // ----------------------------> Mask <-------------------------------------- @@ -1794,11 +1033,10 @@ __hostdev__ inline uint32_t CountOn(uint64_t v) template class Mask { +public: static constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask static constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words - uint64_t mWords[WORD_COUNT]; -public: /// @brief Return the memory footprint in bytes of this Mask __hostdev__ static size_t memUsage() { return sizeof(Mask); } @@ -1808,14 +1046,25 @@ class Mask /// @brief Return the number of machine words used by this Mask __hostdev__ static uint32_t wordCount() { return WORD_COUNT; } + /// @brief Return the total number of set bits in this Mask __hostdev__ uint32_t countOn() const { - uint32_t sum = 0, n = WORD_COUNT; + uint32_t sum = 0; + for (const uint64_t *w = mWords, *q = w + WORD_COUNT; w != q; ++w) + sum += util::countOn(*w); + return sum; + } + + /// @brief Return the number of lower set bits in mask up to but excluding the i'th bit + inline __hostdev__ uint32_t countOn(uint32_t i) const + { + uint32_t n = i >> 6, sum = util::countOn(mWords[n] & ((uint64_t(1) << (i & 63u)) - 1u)); for (const uint64_t* w = mWords; n--; ++w) - sum += CountOn(*w); + sum += util::countOn(*w); return sum; } + template class Iterator { public: @@ -1831,17 +1080,60 @@ class Mask } Iterator& operator=(const Iterator&) = default; __hostdev__ uint32_t operator*() const { return mPos; } - __hostdev__ operator bool() const { return mPos != Mask::SIZE; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ operator bool() const { return mPos != Mask::SIZE; } __hostdev__ Iterator& operator++() { - mPos = mParent->findNextOn(mPos + 1); + mPos = mParent->findNext(mPos + 1); return *this; } + __hostdev__ Iterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } private: uint32_t mPos; const Mask* mParent; - }; // Member class MaskIterator + }; // Member class Iterator + + class DenseIterator + { + public: + __hostdev__ DenseIterator(uint32_t pos = Mask::SIZE) + : mPos(pos) + { + } + DenseIterator& operator=(const DenseIterator&) = default; + __hostdev__ uint32_t operator*() const { return mPos; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ operator bool() const { return mPos != Mask::SIZE; } + __hostdev__ DenseIterator& operator++() + { + ++mPos; + return *this; + } + __hostdev__ DenseIterator operator++(int) + { + auto tmp = *this; + ++mPos; + return tmp; + } + + private: + uint32_t mPos; + }; // Member class DenseIterator + + using OnIterator = Iterator; + using OffIterator = Iterator; + + __hostdev__ OnIterator beginOn() const { return OnIterator(this->findFirst(), this); } + + __hostdev__ OffIterator beginOff() const { return OffIterator(this->findFirst(), this); } + + __hostdev__ DenseIterator beginAll() const { return DenseIterator(0); } /// @brief Initialize all bits to zero. __hostdev__ Mask() @@ -1863,44 +1155,44 @@ class Mask mWords[i] = other.mWords[i]; } - /// @brief Return the nth word of the bit mask, for a word of arbitrary size. - template - __hostdev__ WordT getWord(int n) const - { - NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE); - return reinterpret_cast(mWords)[n]; - } + /// @brief Return a pointer to the list of words of the bit mask + __hostdev__ uint64_t* words() { return mWords; } + __hostdev__ const uint64_t* words() const { return mWords; } /// @brief Assignment operator that works with openvdb::util::NodeMask - template - __hostdev__ Mask& operator=(const MaskT& other) + template + __hostdev__ typename util::enable_if::value, Mask&>::type operator=(const MaskT& other) { static_assert(sizeof(Mask) == sizeof(MaskT), "Mismatching sizeof"); static_assert(WORD_COUNT == MaskT::WORD_COUNT, "Mismatching word count"); static_assert(LOG2DIM == MaskT::LOG2DIM, "Mismatching LOG2DIM"); - auto *src = reinterpret_cast(&other); - uint64_t *dst = mWords; - for (uint32_t i = 0; i < WORD_COUNT; ++i) { - *dst++ = *src++; - } + auto* src = reinterpret_cast(&other); + for (uint64_t *dst = mWords, *end = dst + WORD_COUNT; dst != end; ++dst) + *dst = *src++; return *this; } + //__hostdev__ Mask& operator=(const Mask& other){return *util::memcpy(this, &other);} + Mask& operator=(const Mask&) = default; + __hostdev__ bool operator==(const Mask& other) const { for (uint32_t i = 0; i < WORD_COUNT; ++i) { - if (mWords[i] != other.mWords[i]) return false; + if (mWords[i] != other.mWords[i]) + return false; } return true; } __hostdev__ bool operator!=(const Mask& other) const { return !((*this) == other); } - __hostdev__ Iterator beginOn() const { return Iterator(this->findFirstOn(), this); } - /// @brief Return true if the given bit is set. __hostdev__ bool isOn(uint32_t n) const { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); } + /// @brief Return true if the given bit is NOT set. + __hostdev__ bool isOff(uint32_t n) const { return 0 == (mWords[n >> 6] & (uint64_t(1) << (n & 63))); } + + /// @brief Return true if all the bits are set in this Mask. __hostdev__ bool isOn() const { for (uint32_t i = 0; i < WORD_COUNT; ++i) @@ -1909,6 +1201,7 @@ class Mask return true; } + /// @brief Return true if none of the bits are set in this Mask. __hostdev__ bool isOff() const { for (uint32_t i = 0; i < WORD_COUNT; ++i) @@ -1917,86 +1210,144 @@ class Mask return true; } - /// @brief Set the given bit on. + /// @brief Set the specified bit on. __hostdev__ void setOn(uint32_t n) { mWords[n >> 6] |= uint64_t(1) << (n & 63); } + /// @brief Set the specified bit off. __hostdev__ void setOff(uint32_t n) { mWords[n >> 6] &= ~(uint64_t(1) << (n & 63)); } - __hostdev__ void set(uint32_t n, bool On) +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline void setOnAtomic(uint32_t n) + { + atomicOr(reinterpret_cast(this) + (n >> 6), 1ull << (n & 63)); + } + __device__ inline void setOffAtomic(uint32_t n) + { + atomicAnd(reinterpret_cast(this) + (n >> 6), ~(1ull << (n & 63))); + } + __device__ inline void setAtomic(uint32_t n, bool on) + { + on ? this->setOnAtomic(n) : this->setOffAtomic(n); + } +#endif + /// @brief Set the specified bit on or off. + __hostdev__ void set(uint32_t n, bool on) { -#if 1 // switch between branchless - auto &word = mWords[n >> 6]; +#if 1 // switch between branchless + auto& word = mWords[n >> 6]; n &= 63; word &= ~(uint64_t(1) << n); - word |= uint64_t(On) << n; + word |= uint64_t(on) << n; #else - On ? this->setOn(n) : this->setOff(n); + on ? this->setOn(n) : this->setOff(n); #endif } /// @brief Set all bits on __hostdev__ void setOn() { - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = ~uint64_t(0); + for (uint32_t i = 0; i < WORD_COUNT; ++i)mWords[i] = ~uint64_t(0); } /// @brief Set all bits off __hostdev__ void setOff() { - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = uint64_t(0); + for (uint32_t i = 0; i < WORD_COUNT; ++i) mWords[i] = uint64_t(0); } /// @brief Set all bits off __hostdev__ void set(bool on) { const uint64_t v = on ? ~uint64_t(0) : uint64_t(0); - for (uint32_t i = 0; i < WORD_COUNT; ++i) - mWords[i] = v; + for (uint32_t i = 0; i < WORD_COUNT; ++i) mWords[i] = v; } /// brief Toggle the state of all bits in the mask __hostdev__ void toggle() { uint32_t n = WORD_COUNT; - for (auto* w = mWords; n--; ++w) - *w = ~*w; + for (auto* w = mWords; n--; ++w) *w = ~*w; } __hostdev__ void toggle(uint32_t n) { mWords[n >> 6] ^= uint64_t(1) << (n & 63); } -private: + /// @brief Bitwise intersection + __hostdev__ Mask& operator&=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= *w2; + return *this; + } + /// @brief Bitwise union + __hostdev__ Mask& operator|=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 |= *w2; + return *this; + } + /// @brief Bitwise difference + __hostdev__ Mask& operator-=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= ~*w2; + return *this; + } + /// @brief Bitwise XOR + __hostdev__ Mask& operator^=(const Mask& other) + { + uint64_t* w1 = mWords; + const uint64_t* w2 = other.mWords; + for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 ^= *w2; + return *this; + } NANOVDB_HOSTDEV_DISABLE_WARNING - __hostdev__ uint32_t findFirstOn() const + template + __hostdev__ uint32_t findFirst() const { - uint32_t n = 0; + uint32_t n = 0u; const uint64_t* w = mWords; - for (; n < WORD_COUNT && !*w; ++w, ++n) - ; - return n == WORD_COUNT ? SIZE : (n << 6) + FindLowestOn(*w); + for (; n < WORD_COUNT && !(ON ? *w : ~*w); ++w, ++n); + return n < WORD_COUNT ? (n << 6) + util::findLowestOn(ON ? *w : ~*w) : SIZE; + } + + NANOVDB_HOSTDEV_DISABLE_WARNING + template + __hostdev__ uint32_t findNext(uint32_t start) const + { + uint32_t n = start >> 6; // initiate + if (n >= WORD_COUNT) return SIZE; // check for out of bounds + uint32_t m = start & 63u; + uint64_t b = ON ? mWords[n] : ~mWords[n]; + if (b & (uint64_t(1u) << m)) return start; // simple case: start is on/off + b &= ~uint64_t(0u) << m; // mask out lower bits + while (!b && ++n < WORD_COUNT) b = ON ? mWords[n] : ~mWords[n]; // find next non-zero word + return b ? (n << 6) + util::findLowestOn(b) : SIZE; // catch last word=0 } NANOVDB_HOSTDEV_DISABLE_WARNING - __hostdev__ uint32_t findNextOn(uint32_t start) const + template + __hostdev__ uint32_t findPrev(uint32_t start) const { uint32_t n = start >> 6; // initiate - if (n >= WORD_COUNT) - return SIZE; // check for out of bounds - uint32_t m = start & 63; - uint64_t b = mWords[n]; - if (b & (uint64_t(1) << m)) - return start; // simple case: start is on - b &= ~uint64_t(0) << m; // mask out lower bits - while (!b && ++n < WORD_COUNT) - b = mWords[n]; // find next non-zero word - return (!b ? SIZE : (n << 6) + FindLowestOn(b)); // catch last word=0 + if (n >= WORD_COUNT) return SIZE; // check for out of bounds + uint32_t m = start & 63u; + uint64_t b = ON ? mWords[n] : ~mWords[n]; + if (b & (uint64_t(1u) << m)) return start; // simple case: start is on/off + b &= (uint64_t(1u) << m) - 1u; // mask out higher bits + while (!b && n) b = ON ? mWords[--n] : ~mWords[--n]; // find previous non-zero word + return b ? (n << 6) + util::findHighestOn(b) : SIZE; // catch first word=0 } + +private: + uint64_t mWords[WORD_COUNT]; }; // Mask class // ----------------------------> Map <-------------------------------------- /// @brief Defines an affine transform and its inverse represented as a 3x3 matrix and a vec3 translation struct Map -{ +{ // 264B (not 32B aligned!) float mMatF[9]; // 9*4B <- 3x3 matrix float mInvMatF[9]; // 9*4B <- 3x3 matrix float mVecF[3]; // 3*4B <- translation @@ -2006,93 +1357,241 @@ struct Map double mVecD[3]; // 3*8B <- translation double mTaperD; // 8B, placeholder for taper value - // This method can only be called on the host to initialize the member data + /// @brief Default constructor for the identity map + __hostdev__ Map() + : mMatF{ 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + , mInvMatF{1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f} + , mVecF{0.0f, 0.0f, 0.0f} + , mTaperF{1.0f} + , mMatD{ 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mInvMatD{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0} + , mVecD{0.0, 0.0, 0.0} + , mTaperD{1.0} + { + } + __hostdev__ Map(double s, const Vec3d& t = Vec3d(0.0, 0.0, 0.0)) + : mMatF{float(s), 0.0f, 0.0f, 0.0f, float(s), 0.0f, 0.0f, 0.0f, float(s)} + , mInvMatF{1.0f / float(s), 0.0f, 0.0f, 0.0f, 1.0f / float(s), 0.0f, 0.0f, 0.0f, 1.0f / float(s)} + , mVecF{float(t[0]), float(t[1]), float(t[2])} + , mTaperF{1.0f} + , mMatD{s, 0.0, 0.0, 0.0, s, 0.0, 0.0, 0.0, s} + , mInvMatD{1.0 / s, 0.0, 0.0, 0.0, 1.0 / s, 0.0, 0.0, 0.0, 1.0 / s} + , mVecD{t[0], t[1], t[2]} + , mTaperD{1.0} + { + } + + /// @brief Initialize the member data from 3x3 or 4x4 matrices + /// @note This is not _hostdev__ since then MatT=openvdb::Mat4d will produce warnings + template + void set(const MatT& mat, const MatT& invMat, const Vec3T& translate, double taper = 1.0); + + /// @brief Initialize the member data from 4x4 matrices + /// @note The last (4th) row of invMat is actually ignored. + /// This is not _hostdev__ since then Mat4T=openvdb::Mat4d will produce warnings template - __hostdev__ void set(const Mat4T& mat, const Mat4T& invMat, double taper); + void set(const Mat4T& mat, const Mat4T& invMat, double taper = 1.0) { this->set(mat, invMat, mat[3], taper); } template - __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return matMult(mMatD, mVecD, xyz); } + void set(double scale, const Vec3T& translation, double taper = 1.0); + + /// @brief Apply the forward affine transformation to a vector using 64bit floating point arithmetics. + /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation template - __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return matMult(mMatF, mVecF, xyz); } + __hostdev__ Vec3T applyMap(const Vec3T& ijk) const { return math::matMult(mMatD, mVecD, ijk); } + /// @brief Apply the forward affine transformation to a vector using 32bit floating point arithmetics. + /// @note Typically this operation is used for the scale, rotation and translation of index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return Forward mapping for affine transformation, i.e. (mat x ijk) + translation template - __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return matMult(mMatD, xyz); } + __hostdev__ Vec3T applyMapF(const Vec3T& ijk) const { return math::matMult(mMatF, mVecF, ijk); } + + /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. scale and rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear forward 3x3 mapping of the input vector template - __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return matMult(mMatF, xyz); } + __hostdev__ Vec3T applyJacobian(const Vec3T& ijk) const { return math::matMult(mMatD, ijk); } + + /// @brief Apply the linear forward 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, + /// e.g. scale and rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from index -> world mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear forward 3x3 mapping of the input vector + template + __hostdev__ Vec3T applyJacobianF(const Vec3T& ijk) const { return math::matMult(mMatF, ijk); } + /// @brief Apply the inverse affine mapping to a vector using 64bit floating point arithmetics. + /// @note Typically this operation is used for the world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param xyz 3D vector to be mapped - typically floating point world coordinates + /// @return Inverse affine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 template __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { - return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2])); + return math::matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2])); } + + /// @brief Apply the inverse affine mapping to a vector using 32bit floating point arithmetics. + /// @note Typically this operation is used for the world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param xyz 3D vector to be mapped - typically floating point world coordinates + /// @return Inverse affine mapping of the input @c xyz i.e. (xyz - translation) x mat^-1 template __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { - return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2])); + return math::matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2])); } + /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template - __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return matMult(mInvMatD, xyz); } + __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return math::matMult(mInvMatD, xyz); } + + /// @brief Apply the linear inverse 3x3 transformation to an input 3d vector using 32bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template - __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return matMult(mInvMatF, xyz); } - + __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return math::matMult(mInvMatF, xyz); } + + /// @brief Apply the transposed inverse 3x3 transformation to an input 3d vector using 64bit floating point arithmetics, + /// e.g. inverse scale and inverse rotation WITHOUT translation. + /// @note Typically this operation is used for scale and rotation from world -> index mapping + /// @tparam Vec3T Template type of the 3D vector to be mapped + /// @param ijk 3D vector to be mapped - typically floating point index coordinates + /// @return linear inverse 3x3 mapping of the input vector i.e. xyz x mat^-1 template - __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); } + __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return math::matMultT(mInvMatD, xyz); } template - __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); } + __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return math::matMultT(mInvMatF, xyz); } + + /// @brief Return a voxels size in each coordinate direction, measured at the origin + __hostdev__ Vec3d getVoxelSize() const { return this->applyMap(Vec3d(1)) - this->applyMap(Vec3d(0)); } }; // Map -template -__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper) +template +inline void Map::set(const MatT& mat, const MatT& invMat, const Vec3T& translate, double taper) { - float * mf = mMatF, *vf = mVecF; - float* mif = mInvMatF; - double *md = mMatD, *vd = mVecD; - double* mid = mInvMatD; + float * mf = mMatF, *vf = mVecF, *mif = mInvMatF; + double *md = mMatD, *vd = mVecD, *mid = mInvMatD; mTaperF = static_cast(taper); mTaperD = taper; for (int i = 0; i < 3; ++i) { - *vd++ = mat[3][i]; //translation - *vf++ = static_cast(mat[3][i]); + *vd++ = translate[i]; //translation + *vf++ = static_cast(translate[i]); //translation for (int j = 0; j < 3; ++j) { *md++ = mat[j][i]; //transposed *mid++ = invMat[j][i]; - *mf++ = static_cast(mat[j][i]); + *mf++ = static_cast(mat[j][i]); //transposed *mif++ = static_cast(invMat[j][i]); } } } +template +inline void Map::set(double dx, const Vec3T& trans, double taper) +{ + NANOVDB_ASSERT(dx > 0.0); + const double mat[3][3] = { {dx, 0.0, 0.0}, // row 0 + {0.0, dx, 0.0}, // row 1 + {0.0, 0.0, dx} }; // row 2 + const double idx = 1.0 / dx; + const double invMat[3][3] = { {idx, 0.0, 0.0}, // row 0 + {0.0, idx, 0.0}, // row 1 + {0.0, 0.0, idx} }; // row 2 + this->set(mat, invMat, trans, taper); +} + // ----------------------------> GridBlindMetaData <-------------------------------------- struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData -{ - static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less! - int64_t mByteOffset; // byte offset to the blind data, relative to the GridData. - uint64_t mElementCount; // number of elements, e.g. point count - uint32_t mFlags; // flags +{ // 288 bytes + static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less! + int64_t mDataOffset; // byte offset to the blind data, relative to this GridBlindMetaData. + uint64_t mValueCount; // number of blind values, e.g. point count + uint32_t mValueSize;// byte size of each value, e.g. 4 if mDataType=Float and 1 if mDataType=Unknown since that amounts to char GridBlindDataSemantic mSemantic; // semantic meaning of the data. GridBlindDataClass mDataClass; // 4 bytes GridType mDataType; // 4 bytes - char mName[MaxNameSize];// note this include the NULL termination - - /// @brief return memory usage in bytes for the class (note this computes for all blindMetaData structures.) - __hostdev__ static uint64_t memUsage(uint64_t blindDataCount = 0) - { - return blindDataCount * sizeof(GridBlindMetaData); + char mName[MaxNameSize]; // note this includes the NULL termination + // no padding required for 32 byte alignment + + // disallow copy-construction since methods like blindData and getBlindData uses the this pointer! + GridBlindMetaData(const GridBlindMetaData&) = delete; + + // disallow copy-assignment since methods like blindData and getBlindData uses the this pointer! + const GridBlindMetaData& operator=(const GridBlindMetaData&) = delete; + + __hostdev__ void setBlindData(void* blindData) { mDataOffset = util::PtrDiff(blindData, this); } + + // unsafe + __hostdev__ const void* blindData() const {return util::PtrAdd(this, mDataOffset);} + + /// @brief Get a const pointer to the blind data represented by this meta data + /// @tparam BlindDataT Expected value type of the blind data. + /// @return Returns NULL if mGridType!=toGridType(), else a const point of type BlindDataT. + /// @note Use mDataType=Unknown if BlindDataT is a custom data type unknown to NanoVDB. + template + __hostdev__ const BlindDataT* getBlindData() const + { + //if (mDataType != toGridType()) printf("getBlindData mismatch\n"); + return mDataType == toGridType() ? util::PtrAdd(this, mDataOffset) : nullptr; + } + + /// @brief return true if this meta data has a valid combination of semantic, class and value tags + __hostdev__ bool isValid() const + { + auto check = [&]()->bool{ + switch (mDataType){ + case GridType::Unknown: return mValueSize==1u;// i.e. we encode data as mValueCount chars + case GridType::Float: return mValueSize==4u; + case GridType::Double: return mValueSize==8u; + case GridType::Int16: return mValueSize==2u; + case GridType::Int32: return mValueSize==4u; + case GridType::Int64: return mValueSize==8u; + case GridType::Vec3f: return mValueSize==12u; + case GridType::Vec3d: return mValueSize==24u; + case GridType::Half: return mValueSize==2u; + case GridType::RGBA8: return mValueSize==4u; + case GridType::Fp8: return mValueSize==1u; + case GridType::Fp16: return mValueSize==2u; + case GridType::Vec4f: return mValueSize==16u; + case GridType::Vec4d: return mValueSize==32u; + case GridType::Vec3u8: return mValueSize==3u; + case GridType::Vec3u16: return mValueSize==6u; + default: return true;}// all other combinations are valid + }; + return nanovdb::isValid(mDataClass, mSemantic, mDataType) && check(); + } + + /// @brief return size in bytes of the blind data represented by this blind meta data + /// @note This size includes possible padding for 32 byte alignment. The actual amount + /// of bind data is mValueCount * mValueSize + __hostdev__ uint64_t blindDataSize() const + { + return math::AlignUp(mValueCount * mValueSize); } - - __hostdev__ void setBlindData(void *ptr) { mByteOffset = PtrDiff(ptr, this); } - - template - __hostdev__ const T* getBlindData() const { return PtrAdd(this, mByteOffset); } - }; // GridBlindMetaData // ----------------------------> NodeTrait <-------------------------------------- /// @brief Struct to derive node type from its level in a given -/// grid, tree or root while perserving constness +/// grid, tree or root while preserving constness template struct NodeTrait; @@ -2100,14 +1599,14 @@ struct NodeTrait; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); using Type = typename GridOrTreeOrRootT::LeafNodeType; using type = typename GridOrTreeOrRootT::LeafNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); using Type = const typename GridOrTreeOrRootT::LeafNodeType; using type = const typename GridOrTreeOrRootT::LeafNodeType; }; @@ -2115,46 +1614,172 @@ struct NodeTrait template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; - using type = typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = const typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; - using type = const typename GridOrTreeOrRootT::RootType::ChildNodeType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = typename GridOrTreeOrRootT::RootType::ChildNodeType; - using type = typename GridOrTreeOrRootT::RootType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = const typename GridOrTreeOrRootT::RootType::ChildNodeType; - using type = const typename GridOrTreeOrRootT::RootType::ChildNodeType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType::ChildNodeType; }; template struct NodeTrait { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = typename GridOrTreeOrRootT::RootType; - using type = typename GridOrTreeOrRootT::RootType; + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = typename GridOrTreeOrRootT::RootNodeType; + using type = typename GridOrTreeOrRootT::RootNodeType; +}; + +template +struct NodeTrait +{ + static_assert(GridOrTreeOrRootT::RootNodeType::LEVEL == 3, "Tree depth is not supported"); + using Type = const typename GridOrTreeOrRootT::RootNodeType; + using type = const typename GridOrTreeOrRootT::RootNodeType; }; -template -struct NodeTrait +// ----------------------------> Froward decelerations of random access methods <-------------------------------------- + +template +struct GetValue; +template +struct SetValue; +template +struct SetVoxel; +template +struct GetState; +template +struct GetDim; +template +struct GetLeaf; +template +struct ProbeValue; +template +struct GetNodeInfo; + +// ----------------------------> CheckMode <---------------------------------- + +/// @brief List of different modes for computing for a checksum +enum class CheckMode : uint32_t { Disable = 0, // no computation + Empty = 0, + Half = 1, + Partial = 1, // fast but approximate + Default = 1, // defaults to Partial + Full = 2, // slow but accurate + End = 3, // marks the end of the enum list + StrLen = 9 + End}; + +/// @brief Prints CheckMode enum to a c-string +/// @param dst Destination c-string +/// @param mode CheckMode enum to be converted to string +/// @return destinations string @c dst +__hostdev__ inline char* toStr(char *dst, CheckMode mode) +{ + switch (mode){ + case CheckMode::Half: return util::strcpy(dst, "half"); + case CheckMode::Full: return util::strcpy(dst, "full"); + default: return util::strcpy(dst, "disabled"); + } +} + +// ----------------------------> Checksum <---------------------------------- + +/// @brief Class that encapsulates two CRC32 checksums, one for the Grid, Tree and Root node meta data +/// and one for the remaining grid nodes. +class Checksum { - static_assert(GridOrTreeOrRootT::RootType::LEVEL == 3, "Tree depth is not supported"); - using Type = const typename GridOrTreeOrRootT::RootType; - using type = const typename GridOrTreeOrRootT::RootType; -}; + /// Three types of checksums: + /// 1) Empty: all 64 bits are on (used to signify a disabled or undefined checksum) + /// 2) Half: Upper 32 bits are on and not all of lower 32 bits are on (lower 32 bits checksum head of grid) + /// 3) Full: Not all of the 64 bits are one (lower 32 bits checksum head of grid and upper 32 bits checksum tail of grid) + union { uint32_t mCRC32[2]; uint64_t mCRC64; };// mCRC32[0] is checksum of Grid, Tree and Root, and mCRC32[1] is checksum of nodes + +public: + + static constexpr uint32_t EMPTY32 = ~uint32_t{0}; + static constexpr uint64_t EMPTY64 = ~uint64_t(0); + + /// @brief default constructor initiates checksum to EMPTY + __hostdev__ Checksum() : mCRC64{EMPTY64} {} + + /// @brief Constructor that allows the two 32bit checksums to be initiated explicitly + /// @param head Initial 32bit CRC checksum of grid, tree and root data + /// @param tail Initial 32bit CRC checksum of all the nodes and blind data + __hostdev__ Checksum(uint32_t head, uint32_t tail) : mCRC32{head, tail} {} + + /// @brief + /// @param checksum + /// @param mode + __hostdev__ Checksum(uint64_t checksum, CheckMode mode = CheckMode::Full) : mCRC64{mode == CheckMode::Disable ? EMPTY64 : checksum} + { + if (mode == CheckMode::Partial) mCRC32[1] = EMPTY32; + } + + /// @brief return the 64 bit checksum of this instance + [[deprecated("Use Checksum::data instead.")]] + __hostdev__ uint64_t checksum() const { return mCRC64; } + [[deprecated("Use Checksum::head and Ckecksum::tail instead.")]] + __hostdev__ uint32_t& checksum(int i) {NANOVDB_ASSERT(i==0 || i==1); return mCRC32[i]; } + [[deprecated("Use Checksum::head and Ckecksum::tail instead.")]] + __hostdev__ uint32_t checksum(int i) const {NANOVDB_ASSERT(i==0 || i==1); return mCRC32[i]; } + + __hostdev__ uint64_t full() const { return mCRC64; } + __hostdev__ uint64_t& full() { return mCRC64; } + __hostdev__ uint32_t head() const { return mCRC32[0]; } + __hostdev__ uint32_t& head() { return mCRC32[0]; } + __hostdev__ uint32_t tail() const { return mCRC32[1]; } + __hostdev__ uint32_t& tail() { return mCRC32[1]; } + + /// @brief return true if the 64 bit checksum is partial, i.e. of head only + [[deprecated("Use Checksum::isHalf instead.")]] + __hostdev__ bool isPartial() const { return mCRC32[0] != EMPTY32 && mCRC32[1] == EMPTY32; } + __hostdev__ bool isHalf() const { return mCRC32[0] != EMPTY32 && mCRC32[1] == EMPTY32; } + + /// @brief return true if the 64 bit checksum is fill, i.e. of both had and nodes + __hostdev__ bool isFull() const { return mCRC64 != EMPTY64 && mCRC32[1] != EMPTY32; } + + /// @brief return true if the 64 bit checksum is disables (unset) + __hostdev__ bool isEmpty() const { return mCRC64 == EMPTY64; } + + __hostdev__ void disable() { mCRC64 = EMPTY64; } + + /// @brief return the mode of the 64 bit checksum + __hostdev__ CheckMode mode() const + { + return mCRC64 == EMPTY64 ? CheckMode::Disable : + mCRC32[1] == EMPTY32 ? CheckMode::Partial : CheckMode::Full; + } + + /// @brief return true if the checksums are identical + /// @param rhs other Checksum + __hostdev__ bool operator==(const Checksum &rhs) const {return mCRC64 == rhs.mCRC64;} + + /// @brief return true if the checksums are not identical + /// @param rhs other Checksum + __hostdev__ bool operator!=(const Checksum &rhs) const {return mCRC64 != rhs.mCRC64;} +};// Checksum + +/// @brief Maps 64 bit checksum to CheckMode enum +/// @param checksum 64 bit checksum with two CRC32 codes +/// @return CheckMode enum +__hostdev__ inline CheckMode toCheckMode(const Checksum &checksum){return checksum.mode();} // ----------------------------> Grid <-------------------------------------- @@ -2173,7 +1798,7 @@ struct NodeTrait N0 LeafNodes each with a bit mask, N0 ValueTypes and min/max Example layout: ("---" implies it has a custom offset, "..." implies zero or more) - [GridData][TreeData]---[RootData][ROOT TILES...]---[NodeData<5>]---[ModeData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. + [GridData][TreeData]---[RootData][ROOT TILES...]---[InternalData<5>]---[InternalData<4>]---[LeafData<3>]---[BLINDMETA...]---[BLIND0]---[BLIND1]---etc. */ /// @brief Struct with all the member data of the Grid (useful during serialization of an openvdb grid) @@ -2183,76 +1808,83 @@ struct NodeTrait /// /// @note No client code should (or can) interface with this struct so it can safely be ignored! struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData -{// sizeof(GridData) = 672B - static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less - uint64_t mMagic; // 8B magic to validate it is valid grid data. - uint64_t mChecksum; // 8B. Checksum of grid buffer. - Version mVersion;// 4B major, minor, and patch version numbers - uint32_t mFlags; // 4B. flags for grid. - uint32_t mGridIndex;// 4B. Index of this grid in the buffer - uint32_t mGridCount; // 4B. Total number of grids in the buffer - uint64_t mGridSize; // 8B. byte count of this entire grid occupied in the buffer. - char mGridName[MaxNameSize]; // 256B - Map mMap; // 264B. affine transformation between index and world space in both single and double precision - BBox mWorldBBox; // 48B. floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) - Vec3R mVoxelSize; // 24B. size of a voxel in world units - GridClass mGridClass; // 4B. - GridType mGridType; // 4B. - int64_t mBlindMetadataOffset; // 8B. offset of GridBlindMetaData structures that follow this grid. - uint32_t mBlindMetadataCount; // 4B. count of GridBlindMetaData structures that follow this grid. - - - // Set and unset various bit flags - __hostdev__ void setFlagsOff() { mFlags = uint32_t(0); } - __hostdev__ void setMinMaxOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasMinMax); - } else { - mFlags &= ~static_cast(GridFlags::HasMinMax); - } - } - __hostdev__ void setBBoxOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasBBox); - } else { - mFlags &= ~static_cast(GridFlags::HasBBox); - } - } - __hostdev__ void setLongGridNameOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasLongGridName); - } else { - mFlags &= ~static_cast(GridFlags::HasLongGridName); - } - } - __hostdev__ void setAverageOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasAverage); - } else { - mFlags &= ~static_cast(GridFlags::HasAverage); - } - } - __hostdev__ void setStdDeviationOn(bool on = true) - { - if (on) { - mFlags |= static_cast(GridFlags::HasStdDeviation); - } else { - mFlags &= ~static_cast(GridFlags::HasStdDeviation); - } +{ // sizeof(GridData) = 672B + static const int MaxNameSize = 256; // due to NULL termination the maximum length is one less + uint64_t mMagic; // 8B (0) magic to validate it is valid grid data. + Checksum mChecksum; // 8B (8). Checksum of grid buffer. + Version mVersion; // 4B (16) major, minor, and patch version numbers + BitFlags<32> mFlags; // 4B (20). flags for grid. + uint32_t mGridIndex; // 4B (24). Index of this grid in the buffer + uint32_t mGridCount; // 4B (28). Total number of grids in the buffer + uint64_t mGridSize; // 8B (32). byte count of this entire grid occupied in the buffer. + char mGridName[MaxNameSize]; // 256B (40) + Map mMap; // 264B (296). affine transformation between index and world space in both single and double precision + Vec3dBBox mWorldBBox; // 48B (560). floating-point AABB of active values in WORLD SPACE (2 x 3 doubles) + Vec3d mVoxelSize; // 24B (608). size of a voxel in world units + GridClass mGridClass; // 4B (632). + GridType mGridType; // 4B (636). + int64_t mBlindMetadataOffset; // 8B (640). offset to beginning of GridBlindMetaData structures that follow this grid. + uint32_t mBlindMetadataCount; // 4B (648). count of GridBlindMetaData structures that follow this grid. + uint32_t mData0; // 4B (652) unused + uint64_t mData1; // 8B (656) is use for the total number of values indexed by an IndexGrid + uint64_t mData2; // 8B (664) padding to 32 B alignment + /// @brief Use this method to initiate most member data + GridData& operator=(const GridData&) = default; + //__hostdev__ GridData& operator=(const GridData& other){return *util::memcpy(this, &other);} + __hostdev__ void init(std::initializer_list list = {GridFlags::IsBreadthFirst}, + uint64_t gridSize = 0u, + const Map& map = Map(), + GridType gridType = GridType::Unknown, + GridClass gridClass = GridClass::Unknown) + { +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + mMagic = NANOVDB_MAGIC_GRID; +#else + mMagic = NANOVDB_MAGIC_NUMB; +#endif + mChecksum.disable();// all 64 bits ON means checksum is disabled + mVersion = Version(); + mFlags.initMask(list); + mGridIndex = 0u; + mGridCount = 1u; + mGridSize = gridSize; + mGridName[0] = '\0'; + mMap = map; + mWorldBBox = Vec3dBBox();// invalid bbox + mVoxelSize = map.getVoxelSize(); + mGridClass = gridClass; + mGridType = gridType; + mBlindMetadataOffset = mGridSize; // i.e. no blind data + mBlindMetadataCount = 0u; // i.e. no blind data + mData0 = 0u; // zero padding + mData1 = 0u; // only used for index and point grids + mData2 = NANOVDB_MAGIC_GRID; // since version 32.6.0 (will change in the future) + } + /// @brief return true if the magic number and the version are both valid + __hostdev__ bool isValid() const { + // Before v32.6.0: toMagic(mMagic) = MagicType::NanoVDB and mData2 was undefined + // For v32.6.0: toMagic(mMagic) = MagicType::NanoVDB and toMagic(mData2) = MagicType::NanoGrid + // After v32.7.X: toMagic(mMagic) = MagicType::NanoGrid and mData2 will again be undefined + const MagicType magic = toMagic(mMagic); + if (magic == MagicType::NanoGrid || toMagic(mData2) == MagicType::NanoGrid) return true; + bool test = magic == MagicType::NanoVDB;// could be GridData or io::FileHeader + if (test) test = mVersion.isCompatible(); + if (test) test = mGridCount > 0u && mGridIndex < mGridCount; + if (test) test = mGridClass < GridClass::End && mGridType < GridType::End; + return test; } - __hostdev__ void setBreadthFirstOn(bool on = true) + // Set and unset various bit flags + __hostdev__ void setMinMaxOn(bool on = true) { mFlags.setMask(GridFlags::HasMinMax, on); } + __hostdev__ void setBBoxOn(bool on = true) { mFlags.setMask(GridFlags::HasBBox, on); } + __hostdev__ void setLongGridNameOn(bool on = true) { mFlags.setMask(GridFlags::HasLongGridName, on); } + __hostdev__ void setAverageOn(bool on = true) { mFlags.setMask(GridFlags::HasAverage, on); } + __hostdev__ void setStdDeviationOn(bool on = true) { mFlags.setMask(GridFlags::HasStdDeviation, on); } + __hostdev__ bool setGridName(const char* src) { - if (on) { - mFlags |= static_cast(GridFlags::IsBreadthFirst); - } else { - mFlags &= ~static_cast(GridFlags::IsBreadthFirst); - } + const bool success = (util::strncpy(mGridName, src, MaxNameSize)[MaxNameSize-1] == '\0'); + if (!success) mGridName[MaxNameSize-1] = '\0'; + return success; // returns true if input grid name is NOT longer than MaxNameSize characters } - // Affine transformations based on double precision template __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return mMap.applyMap(xyz); } // Pos: index -> world @@ -2277,10 +1909,42 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); } // @brief Return a non-const void pointer to the tree - __hostdev__ void* treePtr() { return this + 1; } + __hostdev__ void* treePtr() { return this + 1; }// TreeData is always right after GridData // @brief Return a const void pointer to the tree - __hostdev__ const void* treePtr() const { return this + 1; } + __hostdev__ const void* treePtr() const { return this + 1; }// TreeData is always right after GridData + + /// @brief Return a non-const void pointer to the first node at @c LEVEL + /// @tparam LEVEL Level of the node. LEVEL 0 means leaf node and LEVEL 3 means root node + template + __hostdev__ const void* nodePtr() const + { + static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); + const void *treeData = this + 1;// TreeData is always right after GridData + const uint64_t nodeOffset = *util::PtrAdd(treeData, 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? util::PtrAdd(treeData, nodeOffset) : nullptr; + } + + /// @brief Return a non-const void pointer to the first node at @c LEVEL + /// @tparam LEVEL of the node. LEVEL 0 means leaf node and LEVEL 3 means root node + /// @warning If not nodes exist at @c LEVEL NULL is returned + template + __hostdev__ void* nodePtr() + { + static_assert(LEVEL >= 0 && LEVEL <= 3, "invalid LEVEL template parameter"); + void *treeData = this + 1;// TreeData is always right after GridData + const uint64_t nodeOffset = *util::PtrAdd(treeData, 8*LEVEL);// skip LEVEL uint64_t + return nodeOffset ? util::PtrAdd(treeData, nodeOffset) : nullptr; + } + + /// @brief Return number of nodes at @c LEVEL + /// @tparam Level of the node. LEVEL 0 means leaf node and LEVEL 2 means upper node + template + __hostdev__ uint32_t nodeCount() const + { + static_assert(LEVEL >= 0 && LEVEL < 3, "invalid LEVEL template parameter"); + return *util::PtrAdd(this + 1, 4*(8 + LEVEL));// TreeData is always right after GridData + } /// @brief Returns a const reference to the blindMetaData at the specified linear offset. /// @@ -2288,16 +1952,55 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData __hostdev__ const GridBlindMetaData* blindMetaData(uint32_t n) const { NANOVDB_ASSERT(n < mBlindMetadataCount); - return PtrAdd(this, mBlindMetadataOffset) + n; + return util::PtrAdd(this, mBlindMetadataOffset) + n; + } + + __hostdev__ const char* gridName() const + { + if (mFlags.isMaskOn(GridFlags::HasLongGridName)) {// search for first blind meta data that contains a name + NANOVDB_ASSERT(mBlindMetadataCount > 0); + for (uint32_t i = 0; i < mBlindMetadataCount; ++i) { + const auto* metaData = this->blindMetaData(i);// EXTREMELY important to be a pointer + if (metaData->mDataClass == GridBlindDataClass::GridName) { + NANOVDB_ASSERT(metaData->mDataType == GridType::Unknown); + return metaData->template getBlindData(); + } + } + NANOVDB_ASSERT(false); // should never hit this! + } + return mGridName; + } + + /// @brief Return memory usage in bytes for this class only. + __hostdev__ static uint64_t memUsage() { return sizeof(GridData); } + + /// @brief return AABB of active values in world space + __hostdev__ const Vec3dBBox& worldBBox() const { return mWorldBBox; } + + /// @brief return AABB of active values in index space + __hostdev__ const CoordBBox& indexBBox() const {return *(const CoordBBox*)(this->nodePtr<3>());} + + /// @brief return the root table has size + __hostdev__ uint32_t rootTableSize() const + { + const void *root = this->nodePtr<3>(); + return root ? *util::PtrAdd(root, sizeof(CoordBBox)) : 0u; } + /// @brief test if the grid is empty, e.i the root table has size 0 + /// @return true if this grid contains not data whatsoever + __hostdev__ bool isEmpty() const {return this->rootTableSize() == 0u;} + + /// @brief return true if RootData follows TreeData in memory without any extra padding + /// @details TreeData is always following right after GridData, but the same might not be true for RootData + __hostdev__ bool isRootConnected() const { return *(const uint64_t*)((const char*)(this + 1) + 24) == 64u;} }; // GridData // Forward declaration of accelerated random access class -template +template class ReadAccessor; -template +template using DefaultReadAccessor = ReadAccessor; /// @brief Highest level of the data structure. Contains a tree and a world->index @@ -2305,14 +2008,18 @@ using DefaultReadAccessor = ReadAccessor; /// /// @note This the API of this class to interface with client code template -class Grid : private GridData +class Grid : public GridData { public: - using TreeType = TreeT; - using RootType = typename TreeT::RootType; - using DataType = GridData; + using TreeType = TreeT; + using RootType = typename TreeT::RootType; + using RootNodeType = RootType; + using UpperNodeType = typename RootNodeType::ChildNodeType; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename RootType::LeafNodeType; + using DataType = GridData; using ValueType = typename TreeT::ValueType; - using BuildType = typename TreeT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildType = typename TreeT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordType = typename TreeT::CoordType; using AccessorType = DefaultReadAccessor; @@ -2330,7 +2037,7 @@ class Grid : private GridData __hostdev__ const DataType* data() const { return reinterpret_cast(this); } /// @brief Return memory usage in bytes for this class only. - __hostdev__ static uint64_t memUsage() { return sizeof(GridData); } + //__hostdev__ static uint64_t memUsage() { return sizeof(GridData); } /// @brief Return the memory footprint of the entire grid, i.e. including all nodes and blind data __hostdev__ uint64_t gridSize() const { return DataType::mGridSize; } @@ -2341,6 +2048,20 @@ class Grid : private GridData /// @brief Return total number of grids in the buffer __hostdev__ uint32_t gridCount() const { return DataType::mGridCount; } + /// @brief @brief Return the total number of values indexed by this IndexGrid + /// + /// @note This method is only defined for IndexGrid = NanoGrid + template + __hostdev__ typename util::enable_if::is_index, const uint64_t&>::type + valueCount() const { return DataType::mData1; } + + /// @brief @brief Return the total number of points indexed by this PointGrid + /// + /// @note This method is only defined for PointGrid = NanoGrid + template + __hostdev__ typename util::enable_if::value, const uint64_t&>::type + pointCount() const { return DataType::mData1; } + /// @brief Return a const reference to the tree __hostdev__ const TreeT& tree() const { return *reinterpret_cast(this->treePtr()); } @@ -2351,7 +2072,7 @@ class Grid : private GridData __hostdev__ AccessorType getAccessor() const { return AccessorType(this->tree().root()); } /// @brief Return a const reference to the size of a voxel in world units - __hostdev__ const Vec3R& voxelSize() const { return DataType::mVoxelSize; } + __hostdev__ const Vec3d& voxelSize() const { return DataType::mVoxelSize; } /// @brief Return a const reference to the Map for this grid __hostdev__ const Map& map() const { return DataType::mMap; } @@ -2403,84 +2124,97 @@ class Grid : private GridData __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); } /// @brief Computes a AABB of active values in world space - __hostdev__ const BBox& worldBBox() const { return DataType::mWorldBBox; } + //__hostdev__ const Vec3dBBox& worldBBox() const { return DataType::mWorldBBox; } /// @brief Computes a AABB of active values in index space /// /// @note This method is returning a floating point bounding box and not a CoordBBox. This makes /// it more useful for clipping rays. - __hostdev__ const BBox& indexBBox() const { return this->tree().bbox(); } + //__hostdev__ const BBox& indexBBox() const { return this->tree().bbox(); } /// @brief Return the total number of active voxels in this tree. __hostdev__ uint64_t activeVoxelCount() const { return this->tree().activeVoxelCount(); } /// @brief Methods related to the classification of this grid - __hostdev__ bool isValid() const { return DataType::mMagic == NANOVDB_MAGIC_NUMBER; } + __hostdev__ bool isValid() const { return DataType::isValid(); } __hostdev__ const GridType& gridType() const { return DataType::mGridType; } __hostdev__ const GridClass& gridClass() const { return DataType::mGridClass; } __hostdev__ bool isLevelSet() const { return DataType::mGridClass == GridClass::LevelSet; } __hostdev__ bool isFogVolume() const { return DataType::mGridClass == GridClass::FogVolume; } __hostdev__ bool isStaggered() const { return DataType::mGridClass == GridClass::Staggered; } __hostdev__ bool isPointIndex() const { return DataType::mGridClass == GridClass::PointIndex; } + __hostdev__ bool isGridIndex() const { return DataType::mGridClass == GridClass::IndexGrid; } __hostdev__ bool isPointData() const { return DataType::mGridClass == GridClass::PointData; } __hostdev__ bool isMask() const { return DataType::mGridClass == GridClass::Topology; } __hostdev__ bool isUnknown() const { return DataType::mGridClass == GridClass::Unknown; } - __hostdev__ bool hasMinMax() const { return DataType::mFlags & static_cast(GridFlags::HasMinMax); } - __hostdev__ bool hasBBox() const { return DataType::mFlags & static_cast(GridFlags::HasBBox); } - __hostdev__ bool hasLongGridName() const { return DataType::mFlags & static_cast(GridFlags::HasLongGridName); } - __hostdev__ bool hasAverage() const { return DataType::mFlags & static_cast(GridFlags::HasAverage); } - __hostdev__ bool hasStdDeviation() const { return DataType::mFlags & static_cast(GridFlags::HasStdDeviation); } - __hostdev__ bool isBreadthFirst() const { return DataType::mFlags & static_cast(GridFlags::IsBreadthFirst); } + __hostdev__ bool hasMinMax() const { return DataType::mFlags.isMaskOn(GridFlags::HasMinMax); } + __hostdev__ bool hasBBox() const { return DataType::mFlags.isMaskOn(GridFlags::HasBBox); } + __hostdev__ bool hasLongGridName() const { return DataType::mFlags.isMaskOn(GridFlags::HasLongGridName); } + __hostdev__ bool hasAverage() const { return DataType::mFlags.isMaskOn(GridFlags::HasAverage); } + __hostdev__ bool hasStdDeviation() const { return DataType::mFlags.isMaskOn(GridFlags::HasStdDeviation); } + __hostdev__ bool isBreadthFirst() const { return DataType::mFlags.isMaskOn(GridFlags::IsBreadthFirst); } /// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. - template + template __hostdev__ bool isSequential() const { return NodeT::FIXED_SIZE && this->isBreadthFirst(); } /// @brief return true if the specified node level is layed out breadth-first in memory and has a fixed size. /// This allows for sequential access to the nodes. - template - __hostdev__ bool isSequential() const { return NodeTrait::type::FIXED_SIZE && this->isBreadthFirst(); } + template + __hostdev__ bool isSequential() const { return NodeTrait::type::FIXED_SIZE && this->isBreadthFirst(); } + + /// @brief return true if nodes at all levels can safely be accessed with simple linear offsets + __hostdev__ bool isSequential() const { return UpperNodeType::FIXED_SIZE && LowerNodeType::FIXED_SIZE && LeafNodeType::FIXED_SIZE && this->isBreadthFirst(); } /// @brief Return a c-string with the name of this grid - __hostdev__ const char* gridName() const - { - if (this->hasLongGridName()) { - const auto &metaData = this->blindMetaData(DataType::mBlindMetadataCount-1);// always the last - NANOVDB_ASSERT(metaData.mDataClass == GridBlindDataClass::GridName); - return metaData.template getBlindData(); - } - return DataType::mGridName; - } + __hostdev__ const char* gridName() const { return DataType::gridName(); } /// @brief Return a c-string with the name of this grid, truncated to 255 characters __hostdev__ const char* shortGridName() const { return DataType::mGridName; } /// @brief Return checksum of the grid buffer. - __hostdev__ uint64_t checksum() const { return DataType::mChecksum; } + __hostdev__ const Checksum& checksum() const { return DataType::mChecksum; } /// @brief Return true if this grid is empty, i.e. contains no values or nodes. - __hostdev__ bool isEmpty() const { return this->tree().isEmpty(); } + //__hostdev__ bool isEmpty() const { return this->tree().isEmpty(); } /// @brief Return the count of blind-data encoded in this grid - __hostdev__ int blindDataCount() const { return DataType::mBlindMetadataCount; } + __hostdev__ uint32_t blindDataCount() const { return DataType::mBlindMetadataCount; } + + /// @brief Return the index of the first blind data with specified name if found, otherwise -1. + __hostdev__ int findBlindData(const char* name) const; - /// @brief Return the index of the blind data with specified semantic if found, otherwise -1. + /// @brief Return the index of the first blind data with specified semantic if found, otherwise -1. __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const; /// @brief Returns a const pointer to the blindData at the specified linear offset. /// - /// @warning Point might be NULL and the linear offset is assumed to be in the valid range + /// @warning Pointer might be NULL and the linear offset is assumed to be in the valid range + // this method is deprecated !!!! + [[deprecated("Use Grid::getBlindData() instead.")]] __hostdev__ const void* blindData(uint32_t n) const { - if (DataType::mBlindMetadataCount == 0) { - return nullptr; - } + printf("\nnanovdb::Grid::blindData is unsafe and hence deprecated! Please use nanovdb::Grid::getBlindData instead.\n\n"); NANOVDB_ASSERT(n < DataType::mBlindMetadataCount); - return this->blindMetaData(n).template getBlindData(); + return this->blindMetaData(n).blindData(); } - __hostdev__ const GridBlindMetaData& blindMetaData(int n) const { return *DataType::blindMetaData(n); } + template + __hostdev__ const BlindDataT* getBlindData(uint32_t n) const + { + if (n >= DataType::mBlindMetadataCount) return nullptr;// index is out of bounds + return this->blindMetaData(n).template getBlindData();// NULL if mismatching BlindDataT + } + + template + __hostdev__ BlindDataT* getBlindData(uint32_t n) + { + if (n >= DataType::mBlindMetadataCount) return nullptr;// index is out of bounds + return const_cast(this->blindMetaData(n).template getBlindData());// NULL if mismatching BlindDataT + } + + __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return *DataType::blindMetaData(n); } private: static_assert(sizeof(GridData) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(GridData) is misaligned"); @@ -2489,40 +2223,70 @@ class Grid : private GridData template __hostdev__ int Grid::findBlindDataForSemantic(GridBlindDataSemantic semantic) const { - for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i) + for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i) { if (this->blindMetaData(i).mSemantic == semantic) return int(i); + } + return -1; +} + +template +__hostdev__ int Grid::findBlindData(const char* name) const +{ + auto test = [&](int n) { + const char* str = this->blindMetaData(n).mName; + for (int i = 0; i < GridBlindMetaData::MaxNameSize; ++i) { + if (name[i] != str[i]) + return false; + if (name[i] == '\0' && str[i] == '\0') + return true; + } + return true; // all len characters matched + }; + for (int i = 0, n = this->blindDataCount(); i < n; ++i) + if (test(i)) + return i; return -1; } // ----------------------------> Tree <-------------------------------------- -template struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData -{// sizeof(TreeData<3>) == 64B - static_assert(ROOT_LEVEL == 3, "Root level is assumed to be three"); - uint64_t mNodeOffset[4];//32B, byte offset from this tree to first leaf, lower, upper and root node - uint32_t mNodeCount[3];// 12B, total number of nodes of type: leaf, lower internal, upper internal - uint32_t mTileCount[3];// 12B, total number of tiles of type: leaf, lower internal, upper internal (node, only active tiles!) - uint64_t mVoxelCount;// 8B, total number of active voxels in the root and all its child nodes. +{ // sizeof(TreeData) == 64B + int64_t mNodeOffset[4];// 32B, byte offset from this tree to first leaf, lower, upper and root node. If mNodeCount[N]=0 => mNodeOffset[N]==mNodeOffset[N+1] + uint32_t mNodeCount[3]; // 12B, total number of nodes of type: leaf, lower internal, upper internal + uint32_t mTileCount[3]; // 12B, total number of active tile values at the lower internal, upper internal and root node levels + uint64_t mVoxelCount; // 8B, total number of active voxels in the root and all its child nodes. + // No padding since it's always 32B aligned + //__hostdev__ TreeData& operator=(const TreeData& other){return *util::memcpy(this, &other);} + TreeData& operator=(const TreeData&) = default; + __hostdev__ void setRoot(const void* root) { + NANOVDB_ASSERT(root); + mNodeOffset[3] = util::PtrDiff(root, this); + } - template - __hostdev__ void setRoot(const RootT* root) { mNodeOffset[3] = PtrDiff(root, this); } - template - __hostdev__ RootT* getRoot() { return PtrAdd(this, mNodeOffset[3]); } - template - __hostdev__ const RootT* getRoot() const { return PtrAdd(this, mNodeOffset[3]); } + /// @brief Get a non-const void pointer to the root node (never NULL) + __hostdev__ void* getRoot() { return util::PtrAdd(this, mNodeOffset[3]); } - template - __hostdev__ void setFirstNode(const NodeT* node) - { - mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0; - } -}; + /// @brief Get a const void pointer to the root node (never NULL) + __hostdev__ const void* getRoot() const { return util::PtrAdd(this, mNodeOffset[3]); } + + template + __hostdev__ void setFirstNode(const NodeT* node) {mNodeOffset[NodeT::LEVEL] = (node ? util::PtrDiff(node, this) : 0);} + + /// @brief Return true if the root is empty, i.e. has not child nodes or constant tiles + __hostdev__ bool isEmpty() const {return mNodeOffset[3] ? *util::PtrAdd(this, mNodeOffset[3] + sizeof(CoordBBox)) == 0 : true;} + + /// @brief Return the index bounding box of all the active values in this tree, i.e. in all nodes of the tree + __hostdev__ CoordBBox bbox() const {return mNodeOffset[3] ? *util::PtrAdd(this, mNodeOffset[3]) : CoordBBox();} + + /// @brief return true if RootData is layout out immediately after TreeData in memory + __hostdev__ bool isRootNext() const {return mNodeOffset[3] ? mNodeOffset[3] == sizeof(TreeData) : false; } +};// TreeData // ----------------------------> GridTree <-------------------------------------- -/// @brief defines a tree type from a grid type while perserving constness +/// @brief defines a tree type from a grid type while preserving constness template struct GridTree { @@ -2540,7 +2304,7 @@ struct GridTree /// @brief VDB Tree, which is a thin wrapper around a RootNode. template -class Tree : private TreeData +class Tree : public TreeData { static_assert(RootT::LEVEL == 3, "Tree depth is not supported"); static_assert(RootT::ChildNodeType::LOG2DIM == 5, "Tree configuration is not supported"); @@ -2548,11 +2312,14 @@ class Tree : private TreeData static_assert(RootT::LeafNodeType::LOG2DIM == 3, "Tree configuration is not supported"); public: - using DataType = TreeData; + using DataType = TreeData; using RootType = RootT; - using LeafNodeType = typename RootT::LeafNodeType; + using RootNodeType = RootT; + using UpperNodeType = typename RootNodeType::ChildNodeType; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename RootType::LeafNodeType; using ValueType = typename RootT::ValueType; - using BuildType = typename RootT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildType = typename RootT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordType = typename RootT::CoordType; using AccessorType = DefaultReadAccessor; @@ -2574,20 +2341,21 @@ class Tree : private TreeData /// @brief return memory usage in bytes for the class __hostdev__ static uint64_t memUsage() { return sizeof(DataType); } - __hostdev__ RootT& root() { return *DataType::template getRoot(); } + __hostdev__ RootT& root() {return *reinterpret_cast(DataType::getRoot());} - __hostdev__ const RootT& root() const { return *DataType::template getRoot(); } + __hostdev__ const RootT& root() const {return *reinterpret_cast(DataType::getRoot());} __hostdev__ AccessorType getAccessor() const { return AccessorType(this->root()); } /// @brief Return the value of the given voxel (regardless of state or location in the tree.) __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->root().getValue(ijk); } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->root().getValue(CoordType(i, j, k)); } /// @brief Return the active state of the given voxel (regardless of state or location in the tree.) __hostdev__ bool isActive(const CoordType& ijk) const { return this->root().isActive(ijk); } /// @brief Return true if this tree is empty, i.e. contains no values or nodes - __hostdev__ bool isEmpty() const { return this->root().isEmpty(); } + //__hostdev__ bool isEmpty() const { return this->root().isEmpty(); } /// @brief Combines the previous two methods in a single call __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->root().probeValue(ijk, v); } @@ -2599,18 +2367,20 @@ class Tree : private TreeData __hostdev__ void extrema(ValueType& min, ValueType& max) const; /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree - __hostdev__ const BBox& bbox() const { return this->root().bbox(); } + //__hostdev__ const BBox& bbox() const { return this->root().bbox(); } /// @brief Return the total number of active voxels in this tree. __hostdev__ uint64_t activeVoxelCount() const { return DataType::mVoxelCount; } /// @brief Return the total number of active tiles at the specified level of the tree. /// - /// @details n = 0 corresponds to leaf level tiles. - __hostdev__ const uint32_t& activeTileCount(uint32_t n) const + /// @details level = 1,2,3 corresponds to active tile count in lower internal nodes, upper + /// internal nodes, and the root level. Note active values at the leaf level are + /// referred to as active voxels (see activeVoxelCount defined above). + __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { - NANOVDB_ASSERT(n < 3); - return DataType::mTileCount[n]; + NANOVDB_ASSERT(level > 0 && level <= 3); // 1, 2, or 3 + return DataType::mTileCount[level - 1]; } template @@ -2626,44 +2396,67 @@ class Tree : private TreeData return DataType::mNodeCount[level]; } + __hostdev__ uint32_t totalNodeCount() const + { + return DataType::mNodeCount[0] + DataType::mNodeCount[1] + DataType::mNodeCount[2]; + } + /// @brief return a pointer to the first node of the specified type /// /// @warning Note it may return NULL if no nodes exist - template + template __hostdev__ NodeT* getFirstNode() { - const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset>0 ? PtrAdd(this, offset) : nullptr; + const int64_t nodeOffset = DataType::mNodeOffset[NodeT::LEVEL]; + return nodeOffset ? util::PtrAdd(this, nodeOffset) : nullptr; } /// @brief return a const pointer to the first node of the specified type /// /// @warning Note it may return NULL if no nodes exist - template + template __hostdev__ const NodeT* getFirstNode() const { - const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL]; - return offset>0 ? PtrAdd(this, offset) : nullptr; + const int64_t nodeOffset = DataType::mNodeOffset[NodeT::LEVEL]; + return nodeOffset ? util::PtrAdd(this, nodeOffset) : nullptr; } /// @brief return a pointer to the first node at the specified level /// /// @warning Note it may return NULL if no nodes exist - template - __hostdev__ typename NodeTrait::type* - getFirstNode() + template + __hostdev__ typename NodeTrait::type* getFirstNode() { - return this->template getFirstNode::type>(); + return this->template getFirstNode::type>(); } /// @brief return a const pointer to the first node of the specified level /// /// @warning Note it may return NULL if no nodes exist - template - __hostdev__ const typename NodeTrait::type* - getFirstNode() const + template + __hostdev__ const typename NodeTrait::type* getFirstNode() const + { + return this->template getFirstNode::type>(); + } + + /// @brief Template specializations of getFirstNode + __hostdev__ LeafNodeType* getFirstLeaf() { return this->getFirstNode(); } + __hostdev__ const LeafNodeType* getFirstLeaf() const { return this->getFirstNode(); } + __hostdev__ typename NodeTrait::type* getFirstLower() { return this->getFirstNode<1>(); } + __hostdev__ const typename NodeTrait::type* getFirstLower() const { return this->getFirstNode<1>(); } + __hostdev__ typename NodeTrait::type* getFirstUpper() { return this->getFirstNode<2>(); } + __hostdev__ const typename NodeTrait::type* getFirstUpper() const { return this->getFirstNode<2>(); } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return this->root().template get(ijk, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) { - return this->template getFirstNode::type>(); + return this->root().template set(ijk, args...); } private: @@ -2678,7 +2471,7 @@ __hostdev__ void Tree::extrema(ValueType& min, ValueType& max) const max = this->root().maximum(); } -// --------------------------> RootNode <------------------------------------ +// --------------------------> RootData <------------------------------------ /// @brief Struct with all the member data of the RootNode (useful during serialization of an openvdb RootNode) /// @@ -2687,15 +2480,15 @@ template struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData { using ValueT = typename ChildT::ValueType; - using BuildT = typename ChildT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildT = typename ChildT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using CoordT = typename ChildT::CoordType; using StatsT = typename ChildT::FloatType; static constexpr bool FIXED_SIZE = false; /// @brief Return a key based on the coordinates of a voxel -#ifdef USE_SINGLE_ROOT_KEY +#ifdef NANOVDB_USE_SINGLE_ROOT_KEY using KeyT = uint64_t; - template + template __hostdev__ static KeyT CoordToKey(const CoordType& ijk) { static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof"); @@ -2706,44 +2499,55 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData } __hostdev__ static CoordT KeyToCoord(const KeyT& key) { - static constexpr uint64_t MASK = (1u << 21) - 1; - return CoordT(((key >> 42) & MASK) << ChildT::TOTAL, - ((key >> 21) & MASK) << ChildT::TOTAL, - (key & MASK) << ChildT::TOTAL); + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return CoordT(((key >> 42) & MASK) << ChildT::TOTAL, // x are the upper 21 bits + ((key >> 21) & MASK) << ChildT::TOTAL, // y are the middle 21 bits + (key & MASK) << ChildT::TOTAL); // z are the lower 21 bits } #else using KeyT = CoordT; __hostdev__ static KeyT CoordToKey(const CoordT& ijk) { return ijk & ~ChildT::MASK; } __hostdev__ static CoordT KeyToCoord(const KeyT& key) { return key; } #endif - BBox mBBox; // 24B. AABB if active values in index space. - uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node + math::BBox mBBox; // 24B. AABB of active values in index space. + uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node ValueT mBackground; // background value, i.e. value of any unset voxel - ValueT mMinimum; // typically 4B, minmum of all the active values + ValueT mMinimum; // typically 4B, minimum of all the active values ValueT mMaximum; // typically 4B, maximum of all the active values StatsT mAverage; // typically 4B, average of all the active values in this node and its child nodes StatsT mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(RootData) - (24 + 4 + 3 * sizeof(ValueT) + 2 * sizeof(StatsT)); + } + struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) Tile { - template - __hostdev__ void setChild(const CoordType& k, const ChildT *ptr, const RootData *data) + template + __hostdev__ void setChild(const CoordType& k, const void* ptr, const RootData* data) { key = CoordToKey(k); - child = PtrDiff(ptr, data); + state = false; + child = util::PtrDiff(ptr, data); } - template - __hostdev__ void setValue(const CoordType& k, bool s, const ValueType &v) + template + __hostdev__ void setValue(const CoordType& k, bool s, const ValueType& v) { key = CoordToKey(k); state = s; value = v; child = 0; } - __hostdev__ bool isChild() const { return child; } + __hostdev__ bool isChild() const { return child != 0; } + __hostdev__ bool isValue() const { return child == 0; } + __hostdev__ bool isActive() const { return child == 0 && state; } __hostdev__ CoordT origin() const { return KeyToCoord(key); } - KeyT key; // USE_SINGLE_ROOT_KEY ? 8B : 12B + KeyT key; // NANOVDB_USE_SINGLE_ROOT_KEY ? 8B : 12B int64_t child; // 8B. signed byte offset from this node to the child node. 0 means it is a constant tile, so use value. uint32_t state; // 4B. state of tile value ValueT value; // value of tile (i.e. no child node) @@ -2763,57 +2567,322 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData return reinterpret_cast(this + 1) + n; } + __hostdev__ Tile* probeTile(const CoordT& ijk) + { +#if 1 // switch between linear and binary seach + const auto key = CoordToKey(ijk); + for (Tile *p = reinterpret_cast(this + 1), *q = p + mTableSize; p < q; ++p) + if (p->key == key) + return p; + return nullptr; +#else // do not enable binary search if tiles are not guaranteed to be sorted!!!!!! + int32_t low = 0, high = mTableSize; // low is inclusive and high is exclusive + while (low != high) { + int mid = low + ((high - low) >> 1); + const Tile* tile = &tiles[mid]; + if (tile->key == key) { + return tile; + } else if (tile->key < key) { + low = mid + 1; + } else { + high = mid; + } + } + return nullptr; +#endif + } + + __hostdev__ inline const Tile* probeTile(const CoordT& ijk) const + { + return const_cast(this)->probeTile(ijk); + } + /// @brief Returns a const reference to the child node in the specified tile. /// /// @warning A child node is assumed to exist in the specified tile __hostdev__ ChildT* getChild(const Tile* tile) { NANOVDB_ASSERT(tile->child); - return PtrAdd(this, tile->child); + return util::PtrAdd(this, tile->child); } __hostdev__ const ChildT* getChild(const Tile* tile) const { NANOVDB_ASSERT(tile->child); - return PtrAdd(this, tile->child); + return util::PtrAdd(this, tile->child); } - __hostdev__ const ValueT& getMin() const { return mMinimum; } - __hostdev__ const ValueT& getMax() const { return mMaximum; } - __hostdev__ const StatsT& average() const { return mAverage; } + __hostdev__ const ValueT& getMin() const { return mMinimum; } + __hostdev__ const ValueT& getMax() const { return mMaximum; } + __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } - __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } - __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } - __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } - __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } + __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } + __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } + __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } + __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } + + /// @brief This class cannot be constructed or deleted + RootData() = delete; + RootData(const RootData&) = delete; + RootData& operator=(const RootData&) = delete; + ~RootData() = delete; +}; // RootData + +// --------------------------> RootNode <------------------------------------ + +/// @brief Top-most node of the VDB tree structure. +template +class RootNode : public RootData +{ +public: + using DataType = RootData; + using ChildNodeType = ChildT; + using RootType = RootNode; // this allows RootNode to behave like a Tree + using RootNodeType = RootType; + using UpperNodeType = ChildT; + using LowerNodeType = typename UpperNodeType::ChildNodeType; + using LeafNodeType = typename ChildT::LeafNodeType; + using ValueType = typename DataType::ValueT; + using FloatType = typename DataType::StatsT; + using BuildType = typename DataType::BuildT; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + + using CoordType = typename ChildT::CoordType; + using BBoxType = math::BBox; + using AccessorType = DefaultReadAccessor; + using Tile = typename DataType::Tile; + static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; + + static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + + template + class BaseIter + { + protected: + using DataT = typename util::match_const::type; + using TileT = typename util::match_const::type; + DataT* mData; + uint32_t mPos, mSize; + __hostdev__ BaseIter(DataT* data = nullptr, uint32_t n = 0) + : mData(data) + , mPos(0) + , mSize(n) + { + } + + public: + __hostdev__ operator bool() const { return mPos < mSize; } + __hostdev__ uint32_t pos() const { return mPos; } + __hostdev__ void next() { ++mPos; } + __hostdev__ TileT* tile() const { return mData->tile(mPos); } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return this->tile()->origin(); + } + __hostdev__ CoordType getCoord() const + { + NANOVDB_ASSERT(*this); + return this->tile()->origin(); + } + }; // Member class BaseIter + + template + class ChildIter : public BaseIter + { + static_assert(util::is_same::type, RootNode>::value, "Invalid RootT"); + using BaseT = BaseIter; + using NodeT = typename util::match_const::type; + + public: + __hostdev__ ChildIter() + : BaseT() + { + } + __hostdev__ ChildIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && !this->tile()->isChild()) + this->next(); + } + __hostdev__ NodeT& operator*() const + { + NANOVDB_ASSERT(*this); + return *BaseT::mData->getChild(this->tile()); + } + __hostdev__ NodeT* operator->() const + { + NANOVDB_ASSERT(*this); + return BaseT::mData->getChild(this->tile()); + } + __hostdev__ ChildIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && this->tile()->isValue()) + this->next(); + return *this; + } + __hostdev__ ChildIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ChildIter + + using ChildIterator = ChildIter; + using ConstChildIterator = ChildIter; + + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } + + template + class ValueIter : public BaseIter + { + using BaseT = BaseIter; + + public: + __hostdev__ ValueIter() + : BaseT() + { + } + __hostdev__ ValueIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && this->tile()->isChild()) + this->next(); + } + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return this->tile()->value; + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return this->tile()->state; + } + __hostdev__ ValueIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && this->tile()->isChild()) + this->next(); + return *this; + } + __hostdev__ ValueIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIter + + using ValueIterator = ValueIter; + using ConstValueIterator = ValueIter; + + __hostdev__ ValueIterator beginValue() { return ValueIterator(this); } + __hostdev__ ConstValueIterator cbeginValueAll() const { return ConstValueIterator(this); } + + template + class ValueOnIter : public BaseIter + { + using BaseT = BaseIter; + + public: + __hostdev__ ValueOnIter() + : BaseT() + { + } + __hostdev__ ValueOnIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + while (*this && !this->tile()->isActive()) + ++BaseT::mPos; + } + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return this->tile()->value; + } + __hostdev__ ValueOnIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + while (*this && !this->tile()->isActive()) + this->next(); + return *this; + } + __hostdev__ ValueOnIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueOnIter + + using ValueOnIterator = ValueOnIter; + using ConstValueOnIterator = ValueOnIter; - /// @brief This class cannot be constructed or deleted - RootData() = delete; - RootData(const RootData&) = delete; - RootData& operator=(const RootData&) = delete; - ~RootData() = delete; -}; // RootData + __hostdev__ ValueOnIterator beginValueOn() { return ValueOnIterator(this); } + __hostdev__ ConstValueOnIterator cbeginValueOn() const { return ConstValueOnIterator(this); } -/// @brief Top-most node of the VDB tree structure. -template -class RootNode : private RootData -{ -public: - using DataType = RootData; - using LeafNodeType = typename ChildT::LeafNodeType; - using ChildNodeType = ChildT; - using RootType = RootNode;// this allows RootNode to behave like a Tree + template + class DenseIter : public BaseIter + { + using BaseT = BaseIter; + using NodeT = typename util::match_const::type; - using ValueType = typename DataType::ValueT; - using FloatType = typename DataType::StatsT; - using BuildType = typename DataType::BuildT;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + public: + __hostdev__ DenseIter() + : BaseT() + { + } + __hostdev__ DenseIter(RootT* parent) + : BaseT(parent->data(), parent->tileCount()) + { + NANOVDB_ASSERT(BaseT::mData); + } + __hostdev__ NodeT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(*this); + NodeT* child = nullptr; + auto* t = this->tile(); + if (t->isChild()) { + child = BaseT::mData->getChild(t); + } else { + value = t->value; + } + return child; + } + __hostdev__ bool isValueOn() const + { + NANOVDB_ASSERT(*this); + return this->tile()->state; + } + __hostdev__ DenseIter& operator++() + { + NANOVDB_ASSERT(BaseT::mData); + this->next(); + return *this; + } + __hostdev__ DenseIter operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class DenseIter - using CoordType = typename ChildT::CoordType; - using AccessorType = DefaultReadAccessor; - using Tile = typename DataType::Tile; - static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; + using DenseIterator = DenseIter; + using ConstDenseIterator = DenseIter; - static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf + __hostdev__ DenseIterator beginDense() { return DenseIterator(this); } + __hostdev__ ConstDenseIterator cbeginDense() const { return ConstDenseIterator(this); } + __hostdev__ ConstDenseIterator cbeginChildAll() const { return ConstDenseIterator(this); } /// @brief This class cannot be constructed or deleted RootNode() = delete; @@ -2828,7 +2897,7 @@ class RootNode : private RootData __hostdev__ const DataType* data() const { return reinterpret_cast(this); } /// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree - __hostdev__ const BBox& bbox() const { return DataType::mBBox; } + __hostdev__ const BBoxType& bbox() const { return DataType::mBBox; } /// @brief Return the total number of active voxels in the root and all its child nodes. @@ -2838,18 +2907,19 @@ class RootNode : private RootData /// @brief Return the number of tiles encoded in this root node __hostdev__ const uint32_t& tileCount() const { return DataType::mTableSize; } + __hostdev__ const uint32_t& getTableSize() const { return DataType::mTableSize; } /// @brief Return a const reference to the minimum active value encoded in this root node and any of its child nodes - __hostdev__ const ValueType& minimum() const { return this->getMin(); } + __hostdev__ const ValueType& minimum() const { return DataType::mMinimum; } /// @brief Return a const reference to the maximum active value encoded in this root node and any of its child nodes - __hostdev__ const ValueType& maximum() const { return this->getMax(); } + __hostdev__ const ValueType& maximum() const { return DataType::mMaximum; } /// @brief Return a const reference to the average of all the active values encoded in this root node and any of its child nodes __hostdev__ const FloatType& average() const { return DataType::mAverage; } /// @brief Return the variance of all the active values encoded in this root node and any of its child nodes - __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; } + __hostdev__ FloatType variance() const { return math::Pow2(DataType::mStdDevi); } /// @brief Return a const reference to the standard deviation of all the active values encoded in this root node and any of its child nodes __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } @@ -2860,31 +2930,42 @@ class RootNode : private RootData /// @brief Return the actual memory footprint of this root node __hostdev__ uint64_t memUsage() const { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); } + /// @brief Return true if this RootNode is empty, i.e. contains no values or nodes + __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + /// @brief Return the value of the given voxel __hostdev__ ValueType getValue(const CoordType& ijk) const { - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = DataType::probeTile(ijk)) { return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value; } return DataType::mBackground; } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->getValue(CoordType(i, j, k)); } __hostdev__ bool isActive(const CoordType& ijk) const { - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = DataType::probeTile(ijk)) { return tile->isChild() ? this->getChild(tile)->isActive(ijk) : tile->state; } return false; } - /// @brief Return true if this RootNode is empty, i.e. contains no values or nodes - __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); } - __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = DataType::probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); return child->probeValue(ijk, v); } v = tile->value; @@ -2896,14 +2977,52 @@ class RootNode : private RootData __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { - const Tile* tile = this->findTile(ijk); + const Tile* tile = DataType::probeTile(ijk); if (tile && tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); return child->probeLeaf(ijk); } return nullptr; } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const + { + const Tile* tile = DataType::probeTile(ijk); + return tile && tile->isChild() ? this->getChild(tile) : nullptr; + } + + __hostdev__ ChildNodeType* probeChild(const CoordType& ijk) + { + const Tile* tile = DataType::probeTile(ijk); + return tile && tile->isChild() ? this->getChild(tile) : nullptr; + } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) + return this->getChild(tile)->template get(ijk, args...); + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + // __hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval()...)) + set(const CoordType& ijk, ArgsT&&... args) + { + if (Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) + return this->getChild(tile)->template set(ijk, args...); + return OpT::set(*tile, args...); + } + return OpT::set(*this, args...); + } + private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(RootData) is misaligned"); static_assert(sizeof(typename DataType::Tile) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(RootData::Tile) is misaligned"); @@ -2913,62 +3032,30 @@ class RootNode : private RootData template friend class Tree; - - /// @brief Private method to find a Tile of this root node by means of binary-search. This is obviously - /// much slower then direct lookup into a linear array (as in the other nodes) which is exactly - /// why it is important to use the ReadAccessor which amortizes this overhead by node caching and - /// inverse tree traversal! - __hostdev__ const Tile* findTile(const CoordType& ijk) const - { - const Tile* tiles = reinterpret_cast(this + 1); - const auto key = DataType::CoordToKey(ijk); -#if 1 // switch between linear and binary seach - for (uint32_t i = 0; i < DataType::mTableSize; ++i) { - if (tiles[i].key == key) return &tiles[i]; - } -#else// do not enable binary search if tiles are not guaranteed to be sorted!!!!!! - // binary-search of pre-sorted elements - int32_t low = 0, high = DataType::mTableSize; // low is inclusive and high is exclusive - while (low != high) { - int mid = low + ((high - low) >> 1); - const Tile* tile = &tiles[mid]; - if (tile->key == key) { - return tile; - } else if (tile->key < key) { - low = mid + 1; - } else { - high = mid; - } - } -#endif - return nullptr; - } - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Private method to return node information and update a ReadAccessor template __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const { using NodeInfoT = typename AccT::NodeInfo; - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->getNodeInfoAndCache(ijk, acc); } - return NodeInfoT{LEVEL, ChildT::dim(), tile->value, tile->value, tile->value, - 0, tile->origin(), tile->origin() + CoordType(ChildT::DIM)}; + return NodeInfoT{LEVEL, ChildT::dim(), tile->value, tile->value, tile->value, 0, tile->origin(), tile->origin() + CoordType(ChildT::DIM)}; } - return NodeInfoT{LEVEL, ChildT::dim(), this->minimum(), this->maximum(), - this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + return NodeInfoT{LEVEL, ChildT::dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; } /// @brief Private method to return a voxel value and update a ReadAccessor template __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const { - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->getValueAndCache(ijk, acc); } @@ -2980,9 +3067,9 @@ class RootNode : private RootData template __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const { - const Tile* tile = this->findTile(ijk); + const Tile* tile = this->probeTile(ijk); if (tile && tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->isActiveAndCache(ijk, acc); } @@ -2992,9 +3079,9 @@ class RootNode : private RootData template __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const { - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->probeValueAndCache(ijk, v, acc); } @@ -3008,21 +3095,22 @@ class RootNode : private RootData template __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const { - const Tile* tile = this->findTile(ijk); + const Tile* tile = this->probeTile(ijk); if (tile && tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->probeLeafAndCache(ijk, acc); } return nullptr; } +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const { - if (const Tile* tile = this->findTile(ijk)) { + if (const Tile* tile = this->probeTile(ijk)) { if (tile->isChild()) { - const auto *child = this->getChild(tile); + const auto* child = this->getChild(tile); acc.insert(ijk, child); return child->getDimAndCache(ijk, ray, acc); } @@ -3030,6 +3118,39 @@ class RootNode : private RootData } return ChildNodeType::dim(); // background } + + template + //__hostdev__ decltype(OpT::get(util::declval(), util::declval()...)) + __hostdev__ auto + getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const + { + if (const Tile* tile = this->probeTile(ijk)) { + if (tile->isChild()) { + const ChildT* child = this->getChild(tile); + acc.insert(ijk, child); + return child->template getAndCache(ijk, acc, args...); + } + return OpT::get(*tile, args...); + } + return OpT::get(*this, args...); + } + + template + // __hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval()...)) + setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) + { + if (Tile* tile = DataType::probeTile(ijk)) { + if (tile->isChild()) { + ChildT* child = this->getChild(tile); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + return OpT::set(*tile, args...); + } + return OpT::set(*this, args...); + } + }; // RootNode class // After the RootNode the memory layout is assumed to be the sorted Tiles @@ -3043,16 +3164,16 @@ template struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData { using ValueT = typename ChildT::ValueType; - using BuildT = typename ChildT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool + using BuildT = typename ChildT::BuildType; // in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool using StatsT = typename ChildT::FloatType; using CoordT = typename ChildT::CoordType; - using MaskT = typename ChildT::template MaskType; + using MaskT = typename ChildT::template MaskType; static constexpr bool FIXED_SIZE = true; union Tile { ValueT value; - int64_t child;//signed 64 bit byte offset relative to the InternalData!! + int64_t child; //signed 64 bit byte offset relative to this InternalData, i.e. child-pointer = Tile::child + this /// @brief This class cannot be constructed or deleted Tile() = delete; Tile(const Tile&) = delete; @@ -3060,7 +3181,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData ~Tile() = delete; }; - BBox mBBox; // 24B. node bounding box. | + math::BBox mBBox; // 24B. node bounding box. | uint64_t mFlags; // 8B. node flags. | 32B aligned MaskT mValueMask; // LOG2DIM(5): 4096B, LOG2DIM(4): 512B | 32B aligned MaskT mChildMask; // LOG2DIM(5): 4096B, LOG2DIM(4): 512B | 32B aligned @@ -3069,16 +3190,27 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData ValueT mMaximum; // typically 4B StatsT mAverage; // typically 4B, average of all the active values in this node and its child nodes StatsT mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes + // possible padding, e.g. 28 byte padding when ValueType = bool + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(InternalData) - (24u + 8u + 2 * (sizeof(MaskT) + sizeof(ValueT) + sizeof(StatsT)) + (1u << (3 * LOG2DIM)) * (sizeof(ValueT) > 8u ? sizeof(ValueT) : 8u)); + } alignas(32) Tile mTable[1u << (3 * LOG2DIM)]; // sizeof(ValueT) x (16*16*16 or 32*32*32) - __hostdev__ void setChild(uint32_t n, const void *ptr) + __hostdev__ static uint64_t memUsage() { return sizeof(InternalData); } + + __hostdev__ void setChild(uint32_t n, const void* ptr) { NANOVDB_ASSERT(mChildMask.isOn(n)); - mTable[n].child = PtrDiff(ptr, this); + mTable[n].child = util::PtrDiff(ptr, this); } - template - __hostdev__ void setValue(uint32_t n, const ValueT &v) + template + __hostdev__ void setValue(uint32_t n, const ValueT& v) { NANOVDB_ASSERT(!mChildMask.isOn(n)); mTable[n].value = v; @@ -3088,26 +3220,47 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData __hostdev__ ChildT* getChild(uint32_t n) { NANOVDB_ASSERT(mChildMask.isOn(n)); - return PtrAdd(this, mTable[n].child); + return util::PtrAdd(this, mTable[n].child); } __hostdev__ const ChildT* getChild(uint32_t n) const { NANOVDB_ASSERT(mChildMask.isOn(n)); - return PtrAdd(this, mTable[n].child); + return util::PtrAdd(this, mTable[n].child); + } + + __hostdev__ ValueT getValue(uint32_t n) const + { + NANOVDB_ASSERT(mChildMask.isOff(n)); + return mTable[n].value; + } + + __hostdev__ bool isActive(uint32_t n) const + { + NANOVDB_ASSERT(mChildMask.isOff(n)); + return mValueMask.isOn(n); } - template + __hostdev__ bool isChild(uint32_t n) const { return mChildMask.isOn(n); } + + template __hostdev__ void setOrigin(const T& ijk) { mBBox[0] = ijk; } - __hostdev__ const ValueT& getMin() const { return mMinimum; } - __hostdev__ const ValueT& getMax() const { return mMaximum; } - __hostdev__ const StatsT& average() const { return mAverage; } + __hostdev__ const ValueT& getMin() const { return mMinimum; } + __hostdev__ const ValueT& getMax() const { return mMaximum; } + __hostdev__ const StatsT& average() const { return mAverage; } __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; } +#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__llvm__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif __hostdev__ void setMin(const ValueT& v) { mMinimum = v; } __hostdev__ void setMax(const ValueT& v) { mMaximum = v; } __hostdev__ void setAvg(const StatsT& v) { mAverage = v; } __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; } +#if defined(__GNUC__) && !defined(__APPLE__) && !defined(__llvm__) +#pragma GCC diagnostic pop +#endif /// @brief This class cannot be constructed or deleted InternalData() = delete; @@ -3116,9 +3269,9 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData ~InternalData() = delete; }; // InternalData -/// @brief Internal nodes of a VDB treedim(), +/// @brief Internal nodes of a VDB tree template -class InternalNode : private InternalData +class InternalNode : public InternalData { public: using DataType = InternalData; @@ -3131,6 +3284,8 @@ class InternalNode : private InternalData static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; template using MaskType = typename ChildT::template MaskType; + template + using MaskIterT = typename Mask::template Iterator; static constexpr uint32_t LOG2DIM = Log2Dim; static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; // dimension in index space @@ -3140,6 +3295,169 @@ class InternalNode : private InternalData static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node + /// @brief Visits child nodes of this node only + template + class ChildIter : public MaskIterT + { + static_assert(util::is_same::type, InternalNode>::value, "Invalid ParentT"); + using BaseT = MaskIterT; + using NodeT = typename util::match_const::type; + ParentT* mParent; + + public: + __hostdev__ ChildIter() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ChildIter(ParentT* parent) + : BaseT(parent->mChildMask.beginOn()) + , mParent(parent) + { + } + ChildIter& operator=(const ChildIter&) = default; + __hostdev__ NodeT& operator*() const + { + NANOVDB_ASSERT(*this); + return *mParent->getChild(BaseT::pos()); + } + __hostdev__ NodeT* operator->() const + { + NANOVDB_ASSERT(*this); + return mParent->getChild(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return (*this)->origin(); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class ChildIter + + using ChildIterator = ChildIter; + using ConstChildIterator = ChildIter; + + __hostdev__ ChildIterator beginChild() { return ChildIterator(this); } + __hostdev__ ConstChildIterator cbeginChild() const { return ConstChildIterator(this); } + + /// @brief Visits all tile values in this node, i.e. both inactive and active tiles + class ValueIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode* mParent; + + public: + __hostdev__ ValueIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueIterator(const InternalNode* parent) + : BaseT(parent->data()->mChildMask.beginOff()) + , mParent(parent) + { + } + ValueIterator& operator=(const ValueIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->getValue(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->isActive(BaseT::mPos); + } + }; // Member class ValueIterator + + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } + + /// @brief Visits active tile values of this node only + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const InternalNode* mParent; + + public: + __hostdev__ ValueOnIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOnIterator(const InternalNode* parent) + : BaseT(parent->data()->mValueMask.beginOn()) + , mParent(parent) + { + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->data()->getValue(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class ValueOnIterator + + __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + + /// @brief Visits all tile values and child nodes of this node + class DenseIterator : public Mask::DenseIterator + { + using BaseT = typename Mask::DenseIterator; + const DataType* mParent; + + public: + __hostdev__ DenseIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ DenseIterator(const InternalNode* parent) + : BaseT(0) + , mParent(parent->data()) + { + } + DenseIterator& operator=(const DenseIterator&) = default; + __hostdev__ const ChildT* probeChild(ValueType& value) const + { + NANOVDB_ASSERT(mParent && bool(*this)); + const ChildT* child = nullptr; + if (mParent->mChildMask.isOn(BaseT::pos())) { + child = mParent->getChild(BaseT::pos()); + } else { + value = mParent->getValue(BaseT::pos()); + } + return child; + } + __hostdev__ bool isValueOn() const + { + NANOVDB_ASSERT(mParent && bool(*this)); + return mParent->isActive(BaseT::pos()); + } + __hostdev__ CoordType getOrigin() const + { + NANOVDB_ASSERT(mParent && bool(*this)); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + __hostdev__ CoordType getCoord() const {return this->getOrigin();} + }; // Member class DenseIterator + + __hostdev__ DenseIterator beginDense() const { return DenseIterator(this); } + __hostdev__ DenseIterator cbeginChildAll() const { return DenseIterator(this); } // matches openvdb + /// @brief This class cannot be constructed or deleted InternalNode() = delete; InternalNode(const InternalNode&) = delete; @@ -3154,13 +3472,15 @@ class InternalNode : private InternalData __hostdev__ static uint32_t dim() { return 1u << TOTAL; } /// @brief Return memory usage in bytes for the class - __hostdev__ static size_t memUsage() { return sizeof(DataType); } + __hostdev__ static size_t memUsage() { return DataType::memUsage(); } /// @brief Return a const reference to the bit mask of active voxels in this internal node __hostdev__ const MaskType& valueMask() const { return DataType::mValueMask; } + __hostdev__ const MaskType& getValueMask() const { return DataType::mValueMask; } /// @brief Return a const reference to the bit mask of child nodes in this internal node __hostdev__ const MaskType& childMask() const { return DataType::mChildMask; } + __hostdev__ const MaskType& getChildMask() const { return DataType::mChildMask; } /// @brief Return the origin in index space of this leaf node __hostdev__ CoordType origin() const { return DataType::mBBox.min() & ~MASK; } @@ -3175,36 +3495,54 @@ class InternalNode : private InternalData __hostdev__ const FloatType& average() const { return DataType::mAverage; } /// @brief Return the variance of all the active values encoded in this internal node and any of its child nodes - __hostdev__ FloatType variance() const { return DataType::mStdDevi*DataType::mStdDevi; } + __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; } /// @brief Return a const reference to the standard deviation of all the active values encoded in this internal node and any of its child nodes __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; } /// @brief Return a const reference to the bounding box in index space of active values in this internal node and any of its child nodes - __hostdev__ const BBox& bbox() const { return DataType::mBBox; } + __hostdev__ const math::BBox& bbox() const { return DataType::mBBox; } + + /// @brief If the first entry in this node's table is a tile, return the tile's value. + /// Otherwise, return the result of calling getFirstValue() on the child. + __hostdev__ ValueType getFirstValue() const + { + return DataType::mChildMask.isOn(0) ? this->getChild(0)->getFirstValue() : DataType::getValue(0); + } + /// @brief If the last entry in this node's table is a tile, return the tile's value. + /// Otherwise, return the result of calling getLastValue() on the child. + __hostdev__ ValueType getLastValue() const + { + return DataType::mChildMask.isOn(SIZE - 1) ? this->getChild(SIZE - 1)->getLastValue() : DataType::getValue(SIZE - 1); + } + +#ifdef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Return the value of the given voxel + __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS __hostdev__ ValueType getValue(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); - return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::mTable[n].value; + return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::getValue(n); } - __hostdev__ bool isActive(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); - return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::mValueMask.isOn(n); + return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n); } - __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { const uint32_t n = CoordToOffset(ijk); if (DataType::mChildMask.isOn(n)) return this->getChild(n)->probeValue(ijk, v); - v = DataType::mTable[n].value; - return DataType::mValueMask.isOn(n); + v = DataType::getValue(n); + return DataType::isActive(n); } - __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const { const uint32_t n = CoordToOffset(ijk); @@ -3213,18 +3551,25 @@ class InternalNode : private InternalData return nullptr; } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + __hostdev__ ChildNodeType* probeChild(const CoordType& ijk) + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr; + } + __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const + { + const uint32_t n = CoordToOffset(ijk); + return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr; + } + /// @brief Return the linear offset corresponding to the given coordinate __hostdev__ static uint32_t CoordToOffset(const CoordType& ijk) { -#if 0 - return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) + - (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) + - ((ijk[2] & MASK) >> ChildT::TOTAL); -#else - return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) | + return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) | // note, we're using bitwise OR instead of + (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) | ((ijk[2] & MASK) >> ChildT::TOTAL); -#endif } /// @return the local coordinate of the n'th tile or child node @@ -3249,15 +3594,31 @@ class InternalNode : private InternalData return ijk; } - /// @brief Retrun true if this node or any of its child nodes contain active values - __hostdev__ bool isActive() const + /// @brief Return true if this node or any of its child nodes contain active values + __hostdev__ bool isActive() const { return DataType::mFlags & uint32_t(2); } + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (this->isChild(n)) + return this->getChild(n)->template get(ijk, args...); + return OpT::get(*this, n, args...); + } + + template + //__hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) + set(const CoordType& ijk, ArgsT&&... args) { - return DataType::mFlags & uint32_t(2); + const uint32_t n = CoordToOffset(ijk); + if (this->isChild(n)) + return this->getChild(n)->template set(ijk, args...); + return OpT::set(*this, n, args...); } private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned"); - //static_assert(offsetof(DataType, mTable) % 32 == 0, "InternalData::mTable is misaligned"); template friend class ReadAccessor; @@ -3267,72 +3628,69 @@ class InternalNode : private InternalData template friend class InternalNode; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Private read access method used by the ReadAccessor template __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) - return DataType::mTable[n].value; + if (DataType::mChildMask.isOff(n)) + return DataType::getValue(n); const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->getValueAndCache(ijk, acc); } - - template - __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const - { - using NodeInfoT = typename AccT::NodeInfo; - const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) { - return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), - this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; - } - const ChildT* child = this->getChild(n); - acc.insert(ijk, child); - return child->getNodeInfoAndCache(ijk, acc); - } - template __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) - return DataType::mValueMask.isOn(n); + if (DataType::mChildMask.isOff(n)) + return DataType::isActive(n); const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->isActiveAndCache(ijk, acc); } - template __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) { - v = DataType::mTable[n].value; - return DataType::mValueMask.isOn(n); + if (DataType::mChildMask.isOff(n)) { + v = DataType::getValue(n); + return DataType::isActive(n); } const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->probeValueAndCache(ijk, v, acc); } - template __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const { const uint32_t n = CoordToOffset(ijk); - if (!DataType::mChildMask.isOn(n)) + if (DataType::mChildMask.isOff(n)) return nullptr; const ChildT* child = this->getChild(n); acc.insert(ijk, child); return child->probeLeafAndCache(ijk, acc); } + template + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const + { + using NodeInfoT = typename AccT::NodeInfo; + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) { + return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + } + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->getNodeInfoAndCache(ijk, acc); + } +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const { - if (DataType::mFlags & uint32_t(1)) - this->dim(); //ship this node if first bit is set + if (DataType::mFlags & uint32_t(1u)) + return this->dim(); // skip this node if the 1st bit is set //if (!ray.intersects( this->bbox() )) return 1< return ChildNodeType::dim(); // tile value } + template + __hostdev__ auto + //__hostdev__ decltype(OpT::get(util::declval(), util::declval(), util::declval()...)) + getAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) const + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return OpT::get(*this, n, args...); + const ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->template getAndCache(ijk, acc, args...); + } + + template + //__hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) + setAndCache(const CoordType& ijk, const AccT& acc, ArgsT&&... args) + { + const uint32_t n = CoordToOffset(ijk); + if (DataType::mChildMask.isOff(n)) + return OpT::set(*this, n, args...); + ChildT* child = this->getChild(n); + acc.insert(ijk, child); + return child->template setAndCache(ijk, acc, args...); + } + }; // InternalNode class -// --------------------------> LeafNode <------------------------------------ +// --------------------------> LeafData <------------------------------------ /// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode) /// @@ -3359,12 +3743,12 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData using ValueType = ValueT; using BuildType = ValueT; using FloatType = typename FloatTraits::FloatType; - using ArrayType = ValueT;// type used for the internal mValue array + using ArrayType = ValueT; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. ValueType mMinimum; // typically 4B @@ -3373,14 +3757,25 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData FloatType mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes alignas(32) ValueType mValues[1u << 3 * LOG2DIM]; - //__hostdev__ const ValueType* values() const { return mValues; } + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12 + 3 + 1 + sizeof(MaskT) + 2 * (sizeof(ValueT) + sizeof(FloatType)) + (1u << (3 * LOG2DIM)) * sizeof(ValueT)); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + + __hostdev__ static bool hasStats() { return true; } + __hostdev__ ValueType getValue(uint32_t i) const { return mValues[i]; } - __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; } - __hostdev__ void setValue(uint32_t offset, const ValueType& value) + __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; } + __hostdev__ void setValue(uint32_t offset, const ValueType& value) { mValueMask.setOn(offset); mValues[offset] = value; } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } __hostdev__ ValueType getMin() const { return mMinimum; } __hostdev__ ValueType getMax() const { return mMaximum; } @@ -3392,9 +3787,15 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData __hostdev__ void setAvg(const FloatType& v) { mAverage = v; } __hostdev__ void setDev(const FloatType& v) { mStdDevi = v; } - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + __hostdev__ void fill(const ValueType& v) + { + for (auto *p = mValues, *q = p + 512; p != q; ++p) + *p = v; + } + /// @brief This class cannot be constructed or deleted LeafData() = delete; LeafData(const LeafData&) = delete; @@ -3402,6 +3803,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData ~LeafData() = delete; }; // LeafData +// --------------------------> LeafFnBase <------------------------------------ + /// @brief Base-class for quantized float leaf nodes template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase @@ -3413,47 +3816,62 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. - float mMinimum; // 4B - minimum of ALL values in this node - float mQuantum; // = (max - min)/15 4B - uint16_t mMin, mMax, mAvg, mDev;// quantized representations of statistics of active values + float mMinimum; // 4B - minimum of ALL values in this node + float mQuantum; // = (max - min)/15 4B + uint16_t mMin, mMax, mAvg, mDev; // quantized representations of statistics of active values + // no padding since it's always 32B aligned + __hostdev__ static uint64_t memUsage() { return sizeof(LeafFnBase); } - void init(float min, float max, uint8_t bitWidth) + __hostdev__ static bool hasStats() { return true; } + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafFnBase) - (12 + 3 + 1 + sizeof(MaskT) + 2 * 4 + 4 * 2); + } + __hostdev__ void init(float min, float max, uint8_t bitWidth) { mMinimum = min; - mQuantum = (max - min)/float((1 << bitWidth)-1); + mQuantum = (max - min) / float((1 << bitWidth) - 1); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + /// @brief return the quantized minimum of the active values in this node - __hostdev__ float getMin() const { return mMin*mQuantum + mMinimum; } + __hostdev__ float getMin() const { return mMin * mQuantum + mMinimum; } /// @brief return the quantized maximum of the active values in this node - __hostdev__ float getMax() const { return mMax*mQuantum + mMinimum; } + __hostdev__ float getMax() const { return mMax * mQuantum + mMinimum; } /// @brief return the quantized average of the active values in this node - __hostdev__ float getAvg() const { return mAvg*mQuantum + mMinimum; } + __hostdev__ float getAvg() const { return mAvg * mQuantum + mMinimum; } /// @brief return the quantized standard deviation of the active values in this node /// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1 - __hostdev__ float getDev() const { return mDev*mQuantum; } + __hostdev__ float getDev() const { return mDev * mQuantum; } /// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1 - __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum)/mQuantum + 0.5f); } + __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum) / mQuantum + 0.5f); } /// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1 - __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum)/mQuantum + 0.5f); } + __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum) / mQuantum + 0.5f); } /// @note min <= avg <= max or 0 <= (avg-min)/(min-max) <= 1 - __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum)/mQuantum + 0.5f); } + __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum) / mQuantum + 0.5f); } /// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1 - __hostdev__ void setDev(float dev) { mDev = uint16_t(dev/mQuantum + 0.5f); } + __hostdev__ void setDev(float dev) { mDev = uint16_t(dev / mQuantum + 0.5f); } - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } -};// LeafFnBase +}; // LeafFnBase + +// --------------------------> LeafData <------------------------------------ /// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode) /// @@ -3464,18 +3882,25 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData; using BuildType = Fp4; - using ArrayType = uint8_t;// type used for the internal mValue array + using ArrayType = uint8_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; - alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)]; + alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)]; // LeafFnBase is 32B aligned and so is mCode + + __hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - (1u << (3 * LOG2DIM - 1)); + } __hostdev__ static constexpr uint8_t bitWidth() { return 4u; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ float getValue(uint32_t i) const { #if 0 const uint8_t c = mCode[i>>1]; return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum; #else - return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BaseT::mQuantum + BaseT::mMinimum; + return ((mCode[i >> 1] >> ((i & 1) << 2)) & uint8_t(15)) * BaseT::mQuantum + BaseT::mMinimum; #endif } @@ -3486,20 +3911,28 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafBase <------------------------------------ + template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData : public LeafFnBase { using BaseT = LeafFnBase; using BuildType = Fp8; - using ArrayType = uint8_t;// type used for the internal mValue array + using ArrayType = uint8_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; alignas(32) uint8_t mCode[1u << 3 * LOG2DIM]; + __hostdev__ static constexpr int64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - (1u << 3 * LOG2DIM); + } __hostdev__ static constexpr uint8_t bitWidth() { return 8u; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ float getValue(uint32_t i) const { - return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/255 + min + return mCode[i] * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/255 + min } /// @brief This class cannot be constructed or deleted LeafData() = delete; @@ -3508,20 +3941,29 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData : public LeafFnBase { using BaseT = LeafFnBase; using BuildType = Fp16; - using ArrayType = uint16_t;// type used for the internal mValue array + using ArrayType = uint16_t; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; alignas(32) uint16_t mCode[1u << 3 * LOG2DIM]; + __hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return sizeof(LeafData) - sizeof(BaseT) - 2 * (1u << 3 * LOG2DIM); + } + __hostdev__ static constexpr uint8_t bitWidth() { return 16u; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ float getValue(uint32_t i) const { - return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/65535 + min + return mCode[i] * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/65535 + min } /// @brief This class cannot be constructed or deleted @@ -3531,54 +3973,61 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData : public LeafFnBase -{ +{ // this class has no additional data members, however every instance is immediately followed by + // bitWidth*64 bytes. Since its base class is 32B aligned so are the bitWidth*64 bytes using BaseT = LeafFnBase; using BuildType = FpN; static constexpr bool FIXED_SIZE = false; + __hostdev__ static constexpr uint32_t padding() + { + static_assert(BaseT::padding() == 0, "expected no padding in LeafFnBase"); + return 0; + } - __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); }// 4,8,16,32 = 2^(2,3,4,5) - __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth()*64; } - __hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth*64; } - __hostdev__ float getValue(uint32_t i) const + __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); } // 4,8,16,32 = 2^(2,3,4,5) + __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth() * 64; } + __hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth * 64; } + __hostdev__ float getValue(uint32_t i) const { -#ifdef NANOVDB_FPN_BRANCHLESS// faster - const int b = BaseT::mFlags >> 5;// b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits -#if 0// use LUT +#ifdef NANOVDB_FPN_BRANCHLESS // faster + const int b = BaseT::mFlags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits +#if 0 // use LUT uint16_t code = reinterpret_cast(this + 1)[i >> (4 - b)]; const static uint8_t shift[5] = {15, 7, 3, 1, 0}; const static uint16_t mask[5] = {1, 3, 15, 255, 65535}; code >>= (i & shift[b]) << b; code &= mask[b]; -#else// no LUT +#else // no LUT uint32_t code = reinterpret_cast(this + 1)[i >> (5 - b)]; - //code >>= (i & ((16 >> b) - 1)) << b; code >>= (i & ((32 >> b) - 1)) << b; - code &= (1 << (1 << b)) - 1; + code &= (1 << (1 << b)) - 1; #endif -#else// use branched version (slow) +#else // use branched version (slow) float code; - auto *values = reinterpret_cast(this+1); + auto* values = reinterpret_cast(this + 1); switch (BaseT::mFlags >> 5) { - case 0u:// 1 bit float - code = float((values[i>>3] >> (i&7) ) & uint8_t(1)); - break; - case 1u:// 2 bits float - code = float((values[i>>2] >> ((i&3)<<1)) & uint8_t(3)); - break; - case 2u:// 4 bits float - code = float((values[i>>1] >> ((i&1)<<2)) & uint8_t(15)); - break; - case 3u:// 8 bits float - code = float(values[i]); - break; - default:// 16 bits float - code = float(reinterpret_cast(values)[i]); + case 0u: // 1 bit float + code = float((values[i >> 3] >> (i & 7)) & uint8_t(1)); + break; + case 1u: // 2 bits float + code = float((values[i >> 2] >> ((i & 3) << 1)) & uint8_t(3)); + break; + case 2u: // 4 bits float + code = float((values[i >> 1] >> ((i & 1) << 2)) & uint8_t(15)); + break; + case 3u: // 8 bits float + code = float(values[i]); + break; + default: // 16 bits float + code = float(reinterpret_cast(values)[i]); } #endif - return float(code) * BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/UNITS + min + return float(code) * BaseT::mQuantum + BaseT::mMinimum; // code * (max-min)/UNITS + min } /// @brief This class cannot be constructed or deleted @@ -3588,6 +4037,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + // Partial template specialization of LeafData with bool template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData @@ -3596,34 +4047,37 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData) == sizeof(Mask), "Mismatching sizeof"); using ValueType = bool; using BuildType = bool; - using FloatType = bool;// dummy value type - using ArrayType = MaskT;// type used for the internal mValue array + using FloatType = bool; // dummy value type + using ArrayType = MaskT; // type used for the internal mValue array static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. MaskT mValues; // LOG2DIM(3): 64B. + uint64_t mPadding[2]; // 16B padding to 32B alignment - //__hostdev__ const ValueType* values() const { return nullptr; } + __hostdev__ static constexpr uint32_t padding() { return sizeof(LeafData) - 12u - 3u - 1u - 2 * sizeof(MaskT) - 16u; } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static bool hasStats() { return false; } __hostdev__ bool getValue(uint32_t i) const { return mValues.isOn(i); } - __hostdev__ bool getMin() const { return false; }// dummy - __hostdev__ bool getMax() const { return false; }// dummy - __hostdev__ bool getAvg() const { return false; }// dummy - __hostdev__ bool getDev() const { return false; }// dummy + __hostdev__ bool getMin() const { return false; } // dummy + __hostdev__ bool getMax() const { return false; } // dummy + __hostdev__ bool getAvg() const { return false; } // dummy + __hostdev__ bool getDev() const { return false; } // dummy __hostdev__ void setValue(uint32_t offset, bool v) { mValueMask.setOn(offset); mValues.set(offset, v); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + __hostdev__ void setMin(const bool&) {} // no-op + __hostdev__ void setMax(const bool&) {} // no-op + __hostdev__ void setAvg(const bool&) {} // no-op + __hostdev__ void setDev(const bool&) {} // no-op - __hostdev__ void setMin(const bool&) {}// no-op - __hostdev__ void setMax(const bool&) {}// no-op - __hostdev__ void setAvg(const bool&) {}// no-op - __hostdev__ void setDev(const bool&) {}// no-op - - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } /// @brief This class cannot be constructed or deleted @@ -3633,6 +4087,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +// --------------------------> LeafData <------------------------------------ + // Partial template specialization of LeafData with ValueMask template class MaskT, uint32_t LOG2DIM> struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData @@ -3641,32 +4097,217 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData) == sizeof(Mask), "Mismatching sizeof"); using ValueType = bool; using BuildType = ValueMask; - using FloatType = bool;// dummy value type - using ArrayType = void;// type used for the internal mValue array - void means missing + using FloatType = bool; // dummy value type + using ArrayType = void; // type used for the internal mValue array - void means missing static constexpr bool FIXED_SIZE = true; CoordT mBBoxMin; // 12B. uint8_t mBBoxDif[3]; // 3B. - uint8_t mFlags; // 1B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN MaskT mValueMask; // LOG2DIM(3): 64B. + uint64_t mPadding[2]; // 16B padding to 32B alignment + + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ static bool hasStats() { return false; } + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); + } - //__hostdev__ const ValueType* values() const { return nullptr; } __hostdev__ bool getValue(uint32_t i) const { return mValueMask.isOn(i); } - __hostdev__ bool getMin() const { return false; }// dummy - __hostdev__ bool getMax() const { return false; }// dummy - __hostdev__ bool getAvg() const { return false; }// dummy - __hostdev__ bool getDev() const { return false; }// dummy - __hostdev__ void setValue(uint32_t offset, bool) + __hostdev__ bool getMin() const { return false; } // dummy + __hostdev__ bool getMax() const { return false; } // dummy + __hostdev__ bool getAvg() const { return false; } // dummy + __hostdev__ bool getDev() const { return false; } // dummy + __hostdev__ void setValue(uint32_t offset, bool) { mValueMask.setOn(offset); } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + __hostdev__ void setMin(const ValueType&) {} // no-op + __hostdev__ void setMax(const ValueType&) {} // no-op + __hostdev__ void setAvg(const FloatType&) {} // no-op + __hostdev__ void setDev(const FloatType&) {} // no-op + + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + + /// @brief This class cannot be constructed or deleted + LeafData() = delete; + LeafData(const LeafData&) = delete; + LeafData& operator=(const LeafData&) = delete; + ~LeafData() = delete; +}; // LeafData + +// --------------------------> LeafIndexBase <------------------------------------ + +// Partial template specialization of LeafData with ValueIndex +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafIndexBase +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = uint64_t; + using FloatType = uint64_t; + using ArrayType = void; // type used for the internal mValue array - void means missing + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + uint64_t mOffset, mPrefixSum; // 8B offset to first value in this leaf node and 9-bit prefix sum + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafIndexBase) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafIndexBase); } + __hostdev__ bool hasStats() const { return mFlags & (uint8_t(1) << 4); } + // return the offset to the first value indexed by this leaf node + __hostdev__ const uint64_t& firstOffset() const { return mOffset; } + __hostdev__ void setMin(const ValueType&) {} // no-op + __hostdev__ void setMax(const ValueType&) {} // no-op + __hostdev__ void setAvg(const FloatType&) {} // no-op + __hostdev__ void setDev(const FloatType&) {} // no-op + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + template + __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } + +protected: + /// @brief This class should be used as an abstract class and only constructed or deleted via child classes + LeafIndexBase() = default; + LeafIndexBase(const LeafIndexBase&) = default; + LeafIndexBase& operator=(const LeafIndexBase&) = default; + ~LeafIndexBase() = default; +}; // LeafIndexBase + +// --------------------------> LeafData <------------------------------------ + +// Partial template specialization of LeafData with ValueIndex +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafIndexBase +{ + using BaseT = LeafIndexBase; + using BuildType = ValueIndex; + // return the total number of values indexed by this leaf node, excluding the optional 4 stats + __hostdev__ static uint32_t valueCount() { return uint32_t(512); } // 8^3 = 2^9 + // return the offset to the last value indexed by this leaf node (disregarding optional stats) + __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + 511u; } // 2^9 - 1 + // if stats are available, they are always placed after the last voxel value in this leaf node + __hostdev__ uint64_t getMin() const { return this->hasStats() ? BaseT::mOffset + 512u : 0u; } + __hostdev__ uint64_t getMax() const { return this->hasStats() ? BaseT::mOffset + 513u : 0u; } + __hostdev__ uint64_t getAvg() const { return this->hasStats() ? BaseT::mOffset + 514u : 0u; } + __hostdev__ uint64_t getDev() const { return this->hasStats() ? BaseT::mOffset + 515u : 0u; } + __hostdev__ uint64_t getValue(uint32_t i) const { return BaseT::mOffset + i; } // dense leaf node with active and inactive voxels +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafIndexBase +{ + using BaseT = LeafIndexBase; + using BuildType = ValueOnIndex; + __hostdev__ uint32_t valueCount() const + { + return util::countOn(BaseT::mValueMask.words()[7]) + (BaseT::mPrefixSum >> 54u & 511u); // last 9 bits of mPrefixSum do not account for the last word in mValueMask + } + __hostdev__ uint64_t lastOffset() const { return BaseT::mOffset + this->valueCount() - 1u; } + __hostdev__ uint64_t getMin() const { return this->hasStats() ? this->lastOffset() + 1u : 0u; } + __hostdev__ uint64_t getMax() const { return this->hasStats() ? this->lastOffset() + 2u : 0u; } + __hostdev__ uint64_t getAvg() const { return this->hasStats() ? this->lastOffset() + 3u : 0u; } + __hostdev__ uint64_t getDev() const { return this->hasStats() ? this->lastOffset() + 4u : 0u; } + __hostdev__ uint64_t getValue(uint32_t i) const + { + //return mValueMask.isOn(i) ? mOffset + mValueMask.countOn(i) : 0u;// for debugging + uint32_t n = i >> 6; + const uint64_t w = BaseT::mValueMask.words()[n], mask = uint64_t(1) << (i & 63u); + if (!(w & mask)) return uint64_t(0); // if i'th value is inactive return offset to background value + uint64_t sum = BaseT::mOffset + util::countOn(w & (mask - 1u)); + if (n--) sum += BaseT::mPrefixSum >> (9u * n) & 511u; + return sum; + } +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafData +{ + using BuildType = ValueIndexMask; + MaskT mMask; + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ bool isMaskOn(uint32_t offset) const { return mMask.isOn(offset); } + __hostdev__ void setMask(uint32_t offset, bool v) { mMask.set(offset, v); } +}; // LeafData + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData + : public LeafData +{ + using BuildType = ValueOnIndexMask; + MaskT mMask; + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + __hostdev__ bool isMaskOn(uint32_t offset) const { return mMask.isOn(offset); } + __hostdev__ void setMask(uint32_t offset, bool v) { mMask.set(offset, v); } +}; // LeafData + +// --------------------------> LeafData <------------------------------------ + +template class MaskT, uint32_t LOG2DIM> +struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +{ + static_assert(sizeof(CoordT) == sizeof(Coord), "Mismatching sizeof"); + static_assert(sizeof(MaskT) == sizeof(Mask), "Mismatching sizeof"); + using ValueType = uint64_t; + using BuildType = Point; + using FloatType = typename FloatTraits::FloatType; + using ArrayType = uint16_t; // type used for the internal mValue array + static constexpr bool FIXED_SIZE = true; + + CoordT mBBoxMin; // 12B. + uint8_t mBBoxDif[3]; // 3B. + uint8_t mFlags; // 1B. bit0: skip render?, bit1: has bbox?, bit3: unused, bit4: has stats, bits5,6,7: bit-width for FpN + MaskT mValueMask; // LOG2DIM(3): 64B. + + uint64_t mOffset; // 8B + uint64_t mPointCount; // 8B + alignas(32) uint16_t mValues[1u << 3 * LOG2DIM]; // 1KB + // no padding + + /// @brief Return padding of this class in bytes, due to aliasing and 32B alignment + /// + /// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members. + __hostdev__ static constexpr uint32_t padding() + { + return sizeof(LeafData) - (12u + 3u + 1u + sizeof(MaskT) + 2 * 8u + (1u << 3 * LOG2DIM) * 2u); + } + __hostdev__ static uint64_t memUsage() { return sizeof(LeafData); } + + __hostdev__ uint64_t offset() const { return mOffset; } + __hostdev__ uint64_t pointCount() const { return mPointCount; } + __hostdev__ uint64_t first(uint32_t i) const { return i ? uint64_t(mValues[i - 1u]) + mOffset : mOffset; } + __hostdev__ uint64_t last(uint32_t i) const { return uint64_t(mValues[i]) + mOffset; } + __hostdev__ uint64_t getValue(uint32_t i) const { return uint64_t(mValues[i]); } + __hostdev__ void setValueOnly(uint32_t offset, uint16_t value) { mValues[offset] = value; } + __hostdev__ void setValue(uint32_t offset, uint16_t value) { mValueMask.setOn(offset); + mValues[offset] = value; } + __hostdev__ void setOn(uint32_t offset) { mValueMask.setOn(offset); } + + __hostdev__ ValueType getMin() const { return mOffset; } + __hostdev__ ValueType getMax() const { return mPointCount; } + __hostdev__ FloatType getAvg() const { return 0.0f; } + __hostdev__ FloatType getDev() const { return 0.0f; } - __hostdev__ void setMin(const ValueType&) {}// no-op - __hostdev__ void setMax(const ValueType&) {}// no-op - __hostdev__ void setAvg(const FloatType&) {}// no-op - __hostdev__ void setDev(const FloatType&) {}// no-op + __hostdev__ void setMin(const ValueType&) {} + __hostdev__ void setMax(const ValueType&) {} + __hostdev__ void setAvg(const FloatType&) {} + __hostdev__ void setDev(const FloatType&) {} - template + template __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; } /// @brief This class cannot be constructed or deleted @@ -3674,18 +4315,22 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData +}; // LeafData + +// --------------------------> LeafNode <------------------------------------ /// @brief Leaf nodes of the VDB tree. (defaults to 8x8x8 = 512 voxels) template class MaskT = Mask, uint32_t Log2Dim = 3> -class LeafNode : private LeafData +class LeafNode : public LeafData { public: struct ChildNodeType { + static constexpr uint32_t TOTAL = 0; + static constexpr uint32_t DIM = 1; __hostdev__ static uint32_t dim() { return 1u; } }; // Voxel using LeafNodeType = LeafNode; @@ -3697,8 +4342,127 @@ class LeafNode : private LeafData static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE; template using MaskType = MaskT; + template + using MaskIterT = typename Mask::template Iterator; + + /// @brief Visits all active values in a leaf node + class ValueOnIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode* mParent; + + public: + __hostdev__ ValueOnIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOnIterator(const LeafNode* parent) + : BaseT(parent->data()->mValueMask.beginOn()) + , mParent(parent) + { + } + ValueOnIterator& operator=(const ValueOnIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(BaseT::pos()); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + }; // Member class ValueOnIterator + + __hostdev__ ValueOnIterator beginValueOn() const { return ValueOnIterator(this); } + __hostdev__ ValueOnIterator cbeginValueOn() const { return ValueOnIterator(this); } + + /// @brief Visits all inactive values in a leaf node + class ValueOffIterator : public MaskIterT + { + using BaseT = MaskIterT; + const LeafNode* mParent; + + public: + __hostdev__ ValueOffIterator() + : BaseT() + , mParent(nullptr) + { + } + __hostdev__ ValueOffIterator(const LeafNode* parent) + : BaseT(parent->data()->mValueMask.beginOff()) + , mParent(parent) + { + } + ValueOffIterator& operator=(const ValueOffIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(BaseT::pos()); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(BaseT::pos()); + } + }; // Member class ValueOffIterator + + __hostdev__ ValueOffIterator beginValueOff() const { return ValueOffIterator(this); } + __hostdev__ ValueOffIterator cbeginValueOff() const { return ValueOffIterator(this); } + + /// @brief Visits all values in a leaf node, i.e. both active and inactive values + class ValueIterator + { + const LeafNode* mParent; + uint32_t mPos; + + public: + __hostdev__ ValueIterator() + : mParent(nullptr) + , mPos(1u << 3 * Log2Dim) + { + } + __hostdev__ ValueIterator(const LeafNode* parent) + : mParent(parent) + , mPos(0) + { + NANOVDB_ASSERT(parent); + } + ValueIterator& operator=(const ValueIterator&) = default; + __hostdev__ ValueType operator*() const + { + NANOVDB_ASSERT(*this); + return mParent->getValue(mPos); + } + __hostdev__ CoordT getCoord() const + { + NANOVDB_ASSERT(*this); + return mParent->offsetToGlobalCoord(mPos); + } + __hostdev__ bool isActive() const + { + NANOVDB_ASSERT(*this); + return mParent->isActive(mPos); + } + __hostdev__ operator bool() const { return mPos < (1u << 3 * Log2Dim); } + __hostdev__ ValueIterator& operator++() + { + ++mPos; + return *this; + } + __hostdev__ ValueIterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + }; // Member class ValueIterator - static_assert(is_same::Type>::value, "Mismatching BuildType"); + __hostdev__ ValueIterator beginValue() const { return ValueIterator(this); } + __hostdev__ ValueIterator cbeginValueAll() const { return ValueIterator(this); } + + static_assert(util::is_same::Type>::value, "Mismatching BuildType"); static constexpr uint32_t LOG2DIM = Log2Dim; static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node @@ -3713,18 +4477,19 @@ class LeafNode : private LeafData /// @brief Return a const reference to the bit mask of active voxels in this leaf node __hostdev__ const MaskType& valueMask() const { return DataType::mValueMask; } + __hostdev__ const MaskType& getValueMask() const { return DataType::mValueMask; } /// @brief Return a const reference to the minimum active value encoded in this leaf node - __hostdev__ ValueType minimum() const { return this->getMin(); } + __hostdev__ ValueType minimum() const { return DataType::getMin(); } /// @brief Return a const reference to the maximum active value encoded in this leaf node - __hostdev__ ValueType maximum() const { return this->getMax(); } + __hostdev__ ValueType maximum() const { return DataType::getMax(); } /// @brief Return a const reference to the average of all the active values encoded in this leaf node __hostdev__ FloatType average() const { return DataType::getAvg(); } /// @brief Return the variance of all the active values encoded in this leaf node - __hostdev__ FloatType variance() const { return DataType::getDev()*DataType::getDev(); } + __hostdev__ FloatType variance() const { return Pow2(DataType::getDev()); } /// @brief Return a const reference to the standard deviation of all the active values encoded in this leaf node __hostdev__ FloatType stdDeviation() const { return DataType::getDev(); } @@ -3734,6 +4499,9 @@ class LeafNode : private LeafData /// @brief Return the origin in index space of this leaf node __hostdev__ CoordT origin() const { return DataType::mBBoxMin & ~MASK; } + /// @brief Compute the local coordinates from a linear offset + /// @param n Linear offset into this nodes dense table + /// @return Local (vs global) 3D coordinates __hostdev__ static CoordT OffsetToLocalCoord(uint32_t n) { NANOVDB_ASSERT(n < SIZE); @@ -3753,15 +4521,15 @@ class LeafNode : private LeafData __hostdev__ static uint32_t dim() { return 1u << LOG2DIM; } /// @brief Return the bounding box in index space of active values in this leaf node - __hostdev__ BBox bbox() const + __hostdev__ math::BBox bbox() const { - BBox bbox(DataType::mBBoxMin, DataType::mBBoxMin); - if ( this->isActive() ) { + math::BBox bbox(DataType::mBBoxMin, DataType::mBBoxMin); + if (this->hasBBox()) { bbox.max()[0] += DataType::mBBoxDif[0]; bbox.max()[1] += DataType::mBBoxDif[1]; bbox.max()[2] += DataType::mBBoxDif[2]; - } else {// very rare case - bbox = BBox();// invalid + } else { // very rare case + bbox = math::BBox(); // invalid } return bbox; } @@ -3769,8 +4537,10 @@ class LeafNode : private LeafData /// @brief Return the total number of voxels (e.g. values) encoded in this leaf node __hostdev__ static uint32_t voxelCount() { return 1u << (3 * LOG2DIM); } - /// @brief return memory usage in bytes for the class - __hostdev__ static uint64_t memUsage() { return sizeof(LeafNodeType); } + __hostdev__ static uint32_t padding() { return DataType::padding(); } + + /// @brief return memory usage in bytes for the leaf node + __hostdev__ uint64_t memUsage() const { return DataType::memUsage(); } /// @brief This class cannot be constructed or deleted LeafNode() = delete; @@ -3779,10 +4549,15 @@ class LeafNode : private LeafData ~LeafNode() = delete; /// @brief Return the voxel value at the given offset. - __hostdev__ ValueType getValue(uint32_t offset) const { return DataType::getValue(offset); } + __hostdev__ ValueType getValue(uint32_t offset) const { return DataType::getValue(offset); } /// @brief Return the voxel value at the given coordinate. - __hostdev__ ValueType getValue(const CoordT& ijk) const { return DataType::getValue(CoordToOffset(ijk)); } + __hostdev__ ValueType getValue(const CoordT& ijk) const { return DataType::getValue(CoordToOffset(ijk)); } + + /// @brief Return the first value in this leaf node. + __hostdev__ ValueType getFirstValue() const { return this->getValue(0); } + /// @brief Return the last value in this leaf node. + __hostdev__ ValueType getLastValue() const { return this->getValue(SIZE - 1); } /// @brief Sets the value at the specified location and activate its state. /// @@ -3802,16 +4577,18 @@ class LeafNode : private LeafData /// @brief Return @c true if any of the voxel value are active in this leaf node. __hostdev__ bool isActive() const { - NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != DataType::mValueMask.isOff() ); - return DataType::mFlags & uint8_t(2); + //NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != DataType::mValueMask.isOff() ); + //return DataType::mFlags & uint8_t(2); + return !DataType::mValueMask.isOff(); } + __hostdev__ bool hasBBox() const { return DataType::mFlags & uint8_t(2); } /// @brief Return @c true if the voxel value at the given coordinate is active and updates @c v with the value. __hostdev__ bool probeValue(const CoordT& ijk, ValueType& v) const { const uint32_t n = CoordToOffset(ijk); - v = DataType::getValue(n); + v = DataType::getValue(n); return DataType::mValueMask.isOn(n); } @@ -3820,25 +4597,44 @@ class LeafNode : private LeafData /// @brief Return the linear offset corresponding to the given coordinate __hostdev__ static uint32_t CoordToOffset(const CoordT& ijk) { - #if 0 - return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK); - #else return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK); - #endif } - /// @brief Updates the local bounding box of active voxels in this node. + /// @brief Updates the local bounding box of active voxels in this node. Return true if bbox was updated. /// /// @warning It assumes that the origin and value mask have already been set. /// /// @details This method is based on few (intrinsic) bit operations and hence is relatively fast. - /// However, it should only only be called of either the value mask has changed or if the - /// active bounding box is still undefined. e.g. during constrution of this node. - __hostdev__ void updateBBox(); + /// However, it should only only be called if either the value mask has changed or if the + /// active bounding box is still undefined. e.g. during construction of this node. + __hostdev__ bool updateBBox(); + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return OpT::get(*this, CoordToOffset(ijk), args...); + } + + template + __hostdev__ auto get(const uint32_t n, ArgsT&&... args) const + { + return OpT::get(*this, n, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) + { + return OpT::set(*this, CoordToOffset(ijk), args...); + } + + template + __hostdev__ auto set(const uint32_t n, ArgsT&&... args) + { + return OpT::set(*this, n, args...); + } private: static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(LeafData) is misaligned"); - //static_assert(offsetof(DataType, mValues) % 32 == 0, "LeafData::mValues is misaligned"); template friend class ReadAccessor; @@ -3848,16 +4644,17 @@ class LeafNode : private LeafData template friend class InternalNode; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS /// @brief Private method to return a voxel value and update a (dummy) ReadAccessor template __hostdev__ ValueType getValueAndCache(const CoordT& ijk, const AccT&) const { return this->getValue(ijk); } /// @brief Return the node information. template - __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const { + __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const + { using NodeInfoT = typename AccT::NodeInfo; - return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), - this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; + return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]}; } template @@ -3868,48 +4665,71 @@ class LeafNode : private LeafData template __hostdev__ const LeafNode* probeLeafAndCache(const CoordT&, const AccT&) const { return this; } +#endif template __hostdev__ uint32_t getDimAndCache(const CoordT&, const RayT& /*ray*/, const AccT&) const { - if (DataType::mFlags & uint8_t(1)) - return this->dim(); // skip this node if first bit is set + if (DataType::mFlags & uint8_t(1u)) + return this->dim(); // skip this node if the 1st bit is set + //if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM; return ChildNodeType::dim(); } + template + __hostdev__ auto + //__hostdev__ decltype(OpT::get(util::declval(), util::declval(), util::declval()...)) + getAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) const + { + return OpT::get(*this, CoordToOffset(ijk), args...); + } + + template + //__hostdev__ auto // occasionally fails with NVCC + __hostdev__ decltype(OpT::set(util::declval(), util::declval(), util::declval()...)) + setAndCache(const CoordType& ijk, const AccT&, ArgsT&&... args) + { + return OpT::set(*this, CoordToOffset(ijk), args...); + } + }; // LeafNode class +// --------------------------> LeafNode::updateBBox <------------------------------------ + template class MaskT, uint32_t LOG2DIM> -__hostdev__ inline void LeafNode::updateBBox() +__hostdev__ inline bool LeafNode::updateBBox() { static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!"); - if (!this->isActive()) return; + if (DataType::mValueMask.isOff()) { + DataType::mFlags &= ~uint8_t(2); // set 2nd bit off, which indicates that this nodes has no bbox + return false; + } auto update = [&](uint32_t min, uint32_t max, int axis) { NANOVDB_ASSERT(min <= max && max < 8); DataType::mBBoxMin[axis] = (DataType::mBBoxMin[axis] & ~MASK) + int(min); DataType::mBBoxDif[axis] = uint8_t(max - min); }; - uint64_t word64 = DataType::mValueMask.template getWord(0); - uint32_t Xmin = word64 ? 0u : 8u; - uint32_t Xmax = Xmin; - for (int i = 1; i < 8; ++i) { // last loop over 8 64 words - if (uint64_t w = DataType::mValueMask.template getWord(i)) { // skip if word has no set bits - word64 |= w; // union 8 x 64 bits words into one 64 bit word - if (Xmin == 8) { + uint64_t *w = DataType::mValueMask.words(), word64 = *w; + uint32_t Xmin = word64 ? 0u : 8u, Xmax = Xmin; + for (int i = 1; i < 8; ++i) { // last loop over 8 64 bit words + if (w[i]) { // skip if word has no set bits + word64 |= w[i]; // union 8 x 64 bits words into one 64 bit word + if (Xmin == 8) Xmin = i; // only set once - } Xmax = i; } } NANOVDB_ASSERT(word64); update(Xmin, Xmax, 0); - update(FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1); + update(util::findLowestOn(word64) >> 3, util::findHighestOn(word64) >> 3, 1); const uint32_t *p = reinterpret_cast(&word64), word32 = p[0] | p[1]; const uint16_t *q = reinterpret_cast(&word32), word16 = q[0] | q[1]; - const uint8_t *b = reinterpret_cast(&word16), byte = b[0] | b[1]; + const uint8_t *b = reinterpret_cast(&word16), byte = b[0] | b[1]; NANOVDB_ASSERT(byte); - update(FindLowestOn(static_cast(byte)), FindHighestOn(static_cast(byte)), 2); + update(util::findLowestOn(static_cast(byte)), util::findHighestOn(static_cast(byte)), 2); + DataType::mFlags |= uint8_t(2); // set 2nd bit on, which indicates that this nodes has a bbox + return true; } // LeafNode::updateBBox // --------------------------> Template specializations and traits <------------------------------------ @@ -3959,31 +4779,131 @@ struct NanoNode using type = NanoRoot; }; -using FloatTree = NanoTree; +using FloatTree = NanoTree; +using Fp4Tree = NanoTree; +using Fp8Tree = NanoTree; +using Fp16Tree = NanoTree; +using FpNTree = NanoTree; using DoubleTree = NanoTree; -using Int32Tree = NanoTree; +using Int32Tree = NanoTree; using UInt32Tree = NanoTree; -using Int64Tree = NanoTree; -using Vec3fTree = NanoTree; -using Vec3dTree = NanoTree; -using Vec4fTree = NanoTree; -using Vec4dTree = NanoTree; -using Vec3ITree = NanoTree; -using MaskTree = NanoTree; -using BoolTree = NanoTree; - -using FloatGrid = Grid; +using Int64Tree = NanoTree; +using Vec3fTree = NanoTree; +using Vec3dTree = NanoTree; +using Vec4fTree = NanoTree; +using Vec4dTree = NanoTree; +using Vec3ITree = NanoTree; +using MaskTree = NanoTree; +using BoolTree = NanoTree; +using IndexTree = NanoTree; +using OnIndexTree = NanoTree; +using IndexMaskTree = NanoTree; +using OnIndexMaskTree = NanoTree; + +using FloatGrid = Grid; +using Fp4Grid = Grid; +using Fp8Grid = Grid; +using Fp16Grid = Grid; +using FpNGrid = Grid; using DoubleGrid = Grid; -using Int32Grid = Grid; +using Int32Grid = Grid; using UInt32Grid = Grid; -using Int64Grid = Grid; -using Vec3fGrid = Grid; -using Vec3dGrid = Grid; -using Vec4fGrid = Grid; -using Vec4dGrid = Grid; -using Vec3IGrid = Grid; -using MaskGrid = Grid; -using BoolGrid = Grid; +using Int64Grid = Grid; +using Vec3fGrid = Grid; +using Vec3dGrid = Grid; +using Vec4fGrid = Grid; +using Vec4dGrid = Grid; +using Vec3IGrid = Grid; +using MaskGrid = Grid; +using BoolGrid = Grid; +using PointGrid = Grid; +using IndexGrid = Grid; +using OnIndexGrid = Grid; +using IndexMaskGrid = Grid; +using OnIndexMaskGrid = Grid; + +// --------------------------> callNanoGrid <------------------------------------ + +/** +* @brief Below is an example of the struct used for generic programming with callNanoGrid +* @details For an example see "struct Crc32TailOld" in nanovdb/tools/GridChecksum.h or +* "struct IsNanoGridValid" in nanovdb/tools/GridValidator.h +* @code +* struct OpT { + // define these two static functions with non-const GridData +* template +* static auto known( GridData *gridData, args...); +* static auto unknown( GridData *gridData, args...); +* // or alternatively these two static functions with const GridData +* template +* static auto known(const GridData *gridData, args...); +* static auto unknown(const GridData *gridData, args...); +* }; +* @endcode +* +* @brief Here is an example of how to use callNanoGrid in client code +* @code +* return callNanoGrid(gridData, args...); +* @endcode +*/ + +/// @brief Use this function, which depends a pointer to GridData, to call +/// other functions that depend on a NanoGrid of a known ValueType. +/// @details This function allows for generic programming by converting GridData +/// to a NanoGrid of the type encoded in GridData::mGridType. +template +auto callNanoGrid(GridDataT *gridData, ArgsT&&... args) +{ + static_assert(util::is_same::value, "Expected gridData to be of type GridData* or const GridData*"); + switch (gridData->mGridType){ + case GridType::Float: + return OpT::template known(gridData, args...); + case GridType::Double: + return OpT::template known(gridData, args...); + case GridType::Int16: + return OpT::template known(gridData, args...); + case GridType::Int32: + return OpT::template known(gridData, args...); + case GridType::Int64: + return OpT::template known(gridData, args...); + case GridType::Vec3f: + return OpT::template known(gridData, args...); + case GridType::Vec3d: + return OpT::template known(gridData, args...); + case GridType::UInt32: + return OpT::template known(gridData, args...); + case GridType::Mask: + return OpT::template known(gridData, args...); + case GridType::Index: + return OpT::template known(gridData, args...); + case GridType::OnIndex: + return OpT::template known(gridData, args...); + case GridType::IndexMask: + return OpT::template known(gridData, args...); + case GridType::OnIndexMask: + return OpT::template known(gridData, args...); + case GridType::Boolean: + return OpT::template known(gridData, args...); + case GridType::RGBA8: + return OpT::template known(gridData, args...); + case GridType::Fp4: + return OpT::template known(gridData, args...); + case GridType::Fp8: + return OpT::template known(gridData, args...); + case GridType::Fp16: + return OpT::template known(gridData, args...); + case GridType::FpN: + return OpT::template known(gridData, args...); + case GridType::Vec4f: + return OpT::template known(gridData, args...); + case GridType::Vec4d: + return OpT::template known(gridData, args...); + case GridType::UInt8: + return OpT::template known(gridData, args...); + default: + return OpT::unknown(gridData, args...); + } +}// callNanoGrid // --------------------------> ReadAccessor <------------------------------------ @@ -3994,7 +4914,7 @@ using BoolGrid = Grid; /// @note By virtue of the fact that a value accessor accelerates random access operations /// by re-using cached access patterns, this access should be reused for multiple access /// operations. In other words, never create an instance of this accessor for a single -/// acccess only. In general avoid single access operations with this accessor, and +/// access only. In general avoid single access operations with this accessor, and /// if that is not possible call the corresponding method on the tree instead. /// /// @warning Since this ReadAccessor internally caches raw pointers to the nodes of the tree @@ -4006,22 +4926,26 @@ using BoolGrid = Grid; /// O(1) random access operations by means of inverse tree traversal, /// which amortizes the non-const time complexity of the root node. -template +template class ReadAccessor { - using RootT = NanoRoot; // root node - using LeafT = NanoLeaf; // Leaf node + using GridT = NanoGrid; // grid + using TreeT = NanoTree; // tree + using RootT = NanoRoot; // root node + using LeafT = NanoLeaf; // Leaf node using FloatType = typename RootT::FloatType; using CoordValueType = typename RootT::CoordType::ValueType; mutable const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const) public: + using BuildType = BuildT; using ValueType = typename RootT::ValueType; using CoordType = typename RootT::CoordType; static const int CacheLevels = 0; - - struct NodeInfo { +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + struct NodeInfo + { uint32_t mLevel; // 4B uint32_t mDim; // 4B ValueType mMinimum; // typically 4B @@ -4031,9 +4955,28 @@ class ReadAccessor CoordType mBBoxMin; // 3*4B CoordType mBBoxMax; // 3*4B }; - +#endif /// @brief Constructor from a root node - __hostdev__ ReadAccessor(const RootT& root) : mRoot{&root} {} + __hostdev__ ReadAccessor(const RootT& root) + : mRoot{&root} + { + } + + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + /// @node Noop since this template specialization has no cache + __hostdev__ void clear() {} __hostdev__ const RootT& root() const { return *mRoot; } @@ -4041,11 +4984,35 @@ class ReadAccessor ReadAccessor(const ReadAccessor&) = default; ~ReadAccessor() = default; ReadAccessor& operator=(const ReadAccessor&) = default; - +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS __hostdev__ ValueType getValue(const CoordType& ijk) const { return mRoot->getValueAndCache(ijk, *this); } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { @@ -4066,12 +5033,23 @@ class ReadAccessor { return mRoot->probeLeafAndCache(ijk, *this); } - +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { return mRoot->getDimAndCache(ijk, ray, *this); } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + return mRoot->template get(ijk, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { + return const_cast(mRoot)->template set(ijk, args...); + } private: /// @brief Allow nodes to insert themselves into the cache. @@ -4088,15 +5066,16 @@ class ReadAccessor }; // ReadAccessor class /// @brief Node caching at a single tree level -template -class ReadAccessor//e.g. 0, 1, 2 +template +class ReadAccessor //e.g. 0, 1, 2 { static_assert(LEVEL0 >= 0 && LEVEL0 <= 2, "LEVEL0 should be 0, 1, or 2"); - using TreeT = NanoTree; - using RootT = NanoRoot; // root node - using LeafT = NanoLeaf; // Leaf node - using NodeT = typename NodeTrait::type; + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; // root node + using LeafT = NanoLeaf; // Leaf node + using NodeT = typename NodeTrait::type; using CoordT = typename RootT::CoordType; using ValueT = typename RootT::ValueType; @@ -4109,13 +5088,14 @@ class ReadAccessor//e.g. 0, 1, 2 mutable const NodeT* mNode; // 8 bytes public: + using BuildType = BuildT; using ValueType = ValueT; using CoordType = CoordT; static const int CacheLevels = 1; - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS using NodeInfo = typename ReadAccessor::NodeInfo; - +#endif /// @brief Constructor from a root node __hostdev__ ReadAccessor(const RootT& root) : mKey(CoordType::max()) @@ -4124,6 +5104,25 @@ class ReadAccessor//e.g. 0, 1, 2 { } + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + __hostdev__ void clear() + { + mKey = CoordType::max(); + mNode = nullptr; + } + __hostdev__ const RootT& root() const { return *mRoot; } /// @brief Defaults constructors @@ -4138,55 +5137,90 @@ class ReadAccessor//e.g. 0, 1, 2 (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2]; } +#ifdef NANOVDB_NEW_ACCESSOR_METHODS __hostdev__ ValueType getValue(const CoordType& ijk) const { - if (this->isCached(ijk)) { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + if (this->isCached(ijk)) return mNode->getValueAndCache(ijk, *this); - } return mRoot->getValueAndCache(ijk, *this); } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->getNodeInfoAndCache(ijk, *this); - } return mRoot->getNodeInfoAndCache(ijk, *this); } __hostdev__ bool isActive(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->isActiveAndCache(ijk, *this); - } return mRoot->isActiveAndCache(ijk, *this); } __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->probeValueAndCache(ijk, v, *this); - } return mRoot->probeValueAndCache(ijk, v, *this); } __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->probeLeafAndCache(ijk, *this); - } return mRoot->probeLeafAndCache(ijk, *this); } - +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { - if (this->isCached(ijk)) { + if (this->isCached(ijk)) return mNode->getDimAndCache(ijk, ray, *this); - } return mRoot->getDimAndCache(ijk, ray, *this); } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { + if (this->isCached(ijk)) + return mNode->template getAndCache(ijk, *this, args...); + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { + if (this->isCached(ijk)) + return const_cast(mNode)->template setAndCache(ijk, *this, args...); + return const_cast(mRoot)->template setAndCache(ijk, *this, args...); + } + private: /// @brief Allow nodes to insert themselves into the cache. template @@ -4209,15 +5243,16 @@ class ReadAccessor//e.g. 0, 1, 2 }; // ReadAccessor -template -class ReadAccessor//e.g. (0,1), (1,2), (0,2) +template +class ReadAccessor //e.g. (0,1), (1,2), (0,2) { static_assert(LEVEL0 >= 0 && LEVEL0 <= 2, "LEVEL0 must be 0, 1, 2"); static_assert(LEVEL1 >= 0 && LEVEL1 <= 2, "LEVEL1 must be 0, 1, 2"); static_assert(LEVEL0 < LEVEL1, "Level 0 must be lower than level 1"); - using TreeT = NanoTree; - using RootT = NanoRoot; - using LeafT = NanoLeaf; + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; + using LeafT = NanoLeaf; using Node1T = typename NodeTrait::type; using Node2T = typename NodeTrait::type; using CoordT = typename RootT::CoordType; @@ -4226,7 +5261,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) using CoordValueType = typename RootT::CoordT::ValueType; // All member data are mutable to allow for access methods to be const -#ifdef USE_SINGLE_ACCESSOR_KEY // 44 bytes total +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY // 44 bytes total mutable CoordT mKey; // 3*4 = 12 bytes #else // 68 bytes total mutable CoordT mKeys[2]; // 2*3*4 = 24 bytes @@ -4236,24 +5271,49 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) mutable const Node2T* mNode2; public: + using BuildType = BuildT; using ValueType = ValueT; using CoordType = CoordT; static const int CacheLevels = 2; +#ifndef NANOVDB_NEW_ACCESSOR_METHODS + using NodeInfo = typename ReadAccessor::NodeInfo; +#endif + /// @brief Constructor from a root node + __hostdev__ ReadAccessor(const RootT& root) +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + : mKey(CoordType::max()) +#else + : mKeys{CoordType::max(), CoordType::max()} +#endif + , mRoot(&root) + , mNode1(nullptr) + , mNode2(nullptr) + { + } + + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } - using NodeInfo = typename ReadAccessor::NodeInfo; + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } - /// @brief Constructor from a root node - __hostdev__ ReadAccessor(const RootT& root) -#ifdef USE_SINGLE_ACCESSOR_KEY - : mKey(CoordType::max()) + /// @brief Reset this access to its initial state, i.e. with an empty cache + __hostdev__ void clear() + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = CoordType::max(); #else - : mKeys{CoordType::max(), CoordType::max()} + mKeys[0] = mKeys[1] = CoordType::max(); #endif - , mRoot(&root) - , mNode1(nullptr) - , mNode2(nullptr) - { + mNode1 = nullptr; + mNode2 = nullptr; } __hostdev__ const RootT& root() const { return *mRoot; } @@ -4263,7 +5323,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) ~ReadAccessor() = default; ReadAccessor& operator=(const ReadAccessor&) = default; -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY __hostdev__ bool isCached1(CoordValueType dirty) const { if (!mNode1) @@ -4303,9 +5363,23 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } #endif +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4317,10 +5391,21 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } return mRoot->getValueAndCache(ijk, *this); } - + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4335,7 +5420,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) __hostdev__ bool isActive(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4350,7 +5435,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4365,7 +5450,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4377,11 +5462,12 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } return mRoot->probeLeafAndCache(ijk, *this); } +#endif // NANOVDB_NEW_ACCESSOR_METHODS template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4394,6 +5480,38 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) return mRoot->getDimAndCache(ijk, ray, *this); } + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return mNode1->template getAndCache(ijk, *this, args...); + } else if (this->isCached2(dirty)) { + return mNode2->template getAndCache(ijk, *this, args...); + } + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached1(dirty)) { + return const_cast(mNode1)->template setAndCache(ijk, *this, args...); + } else if (this->isCached2(dirty)) { + return const_cast(mNode2)->template setAndCache(ijk, *this, args...); + } + return const_cast(mRoot)->template setAndCache(ijk, *this, args...); + } + private: /// @brief Allow nodes to insert themselves into the cache. template @@ -4406,7 +5524,7 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) /// @brief Inserts a leaf node and key pair into this ReadAccessor __hostdev__ void insert(const CoordType& ijk, const Node1T* node) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = ijk; #else mKeys[0] = ijk & ~Node1T::MASK; @@ -4415,27 +5533,27 @@ class ReadAccessor//e.g. (0,1), (1,2), (0,2) } __hostdev__ void insert(const CoordType& ijk, const Node2T* node) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = ijk; #else mKeys[1] = ijk & ~Node2T::MASK; #endif mNode2 = node; } - template + template __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {} }; // ReadAccessor - /// @brief Node caching at all (three) tree levels -template +template class ReadAccessor { - using TreeT = NanoTree; - using RootT = NanoRoot; // root node + using GridT = NanoGrid; // grid + using TreeT = NanoTree; + using RootT = NanoRoot; // root node using NodeT2 = NanoUpper; // upper internal node using NodeT1 = NanoLower; // lower internal node - using LeafT = NanoLeaf< BuildT>; // Leaf node + using LeafT = NanoLeaf; // Leaf node using CoordT = typename RootT::CoordType; using ValueT = typename RootT::ValueType; @@ -4443,25 +5561,26 @@ class ReadAccessor using CoordValueType = typename RootT::CoordT::ValueType; // All member data are mutable to allow for access methods to be const -#ifdef USE_SINGLE_ACCESSOR_KEY // 44 bytes total +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY // 44 bytes total mutable CoordT mKey; // 3*4 = 12 bytes #else // 68 bytes total mutable CoordT mKeys[3]; // 3*3*4 = 36 bytes #endif mutable const RootT* mRoot; - mutable const void* mNode[3]; // 4*8 = 32 bytes + mutable const void* mNode[3]; // 4*8 = 32 bytes public: + using BuildType = BuildT; using ValueType = ValueT; using CoordType = CoordT; static const int CacheLevels = 3; - +#ifndef NANOVDB_NEW_ACCESSOR_METHODS using NodeInfo = typename ReadAccessor::NodeInfo; - +#endif /// @brief Constructor from a root node __hostdev__ ReadAccessor(const RootT& root) -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY : mKey(CoordType::max()) #else : mKeys{CoordType::max(), CoordType::max(), CoordType::max()} @@ -4471,6 +5590,18 @@ class ReadAccessor { } + /// @brief Constructor from a grid + __hostdev__ ReadAccessor(const GridT& grid) + : ReadAccessor(grid.tree().root()) + { + } + + /// @brief Constructor from a tree + __hostdev__ ReadAccessor(const TreeT& tree) + : ReadAccessor(tree.root()) + { + } + __hostdev__ const RootT& root() const { return *mRoot; } /// @brief Defaults constructors @@ -4485,11 +5616,30 @@ class ReadAccessor __hostdev__ const NodeT* getNode() const { using T = typename NodeTrait::type; - static_assert(is_same::value, "ReadAccessor::getNode: Invalid node type"); + static_assert(util::is_same::value, "ReadAccessor::getNode: Invalid node type"); return reinterpret_cast(mNode[NodeT::LEVEL]); } -#ifdef USE_SINGLE_ACCESSOR_KEY + template + __hostdev__ const typename NodeTrait::type* getNode() const + { + using T = typename NodeTrait::type; + static_assert(LEVEL >= 0 && LEVEL <= 2, "ReadAccessor::getNode: Invalid node type"); + return reinterpret_cast(mNode[LEVEL]); + } + + /// @brief Reset this access to its initial state, i.e. with an empty cache + __hostdev__ void clear() + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + mKey = CoordType::max(); +#else + mKeys[0] = mKeys[1] = mKeys[2] = CoordType::max(); +#endif + mNode[0] = mNode[1] = mNode[2] = nullptr; + } + +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY template __hostdev__ bool isCached(CoordValueType dirty) const { @@ -4510,13 +5660,29 @@ class ReadAccessor template __hostdev__ bool isCached(const CoordType& ijk) const { - return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; + return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && + (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && + (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2]; } #endif +#ifdef NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const + { + return this->template get>(ijk); + } + __hostdev__ ValueType getValue(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ ValueType operator()(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ ValueType operator()(int i, int j, int k) const { return this->template get>(CoordType(i, j, k)); } + __hostdev__ auto getNodeInfo(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool isActive(const CoordType& ijk) const { return this->template get>(ijk); } + __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->template get>(ijk, v); } + __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { return this->template get>(ijk); } +#else // NANOVDB_NEW_ACCESSOR_METHODS + __hostdev__ ValueType getValue(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4530,10 +5696,22 @@ class ReadAccessor } return mRoot->getValueAndCache(ijk, *this); } + __hostdev__ ValueType operator()(const CoordType& ijk) const + { + return this->getValue(ijk); + } + __hostdev__ ValueType operator()(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } + __hostdev__ ValueType getValue(int i, int j, int k) const + { + return this->getValue(CoordType(i, j, k)); + } __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4550,7 +5728,7 @@ class ReadAccessor __hostdev__ bool isActive(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4567,7 +5745,7 @@ class ReadAccessor __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4581,10 +5759,9 @@ class ReadAccessor } return mRoot->probeValueAndCache(ijk, v, *this); } - __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4598,11 +5775,48 @@ class ReadAccessor } return mRoot->probeLeafAndCache(ijk, *this); } +#endif // NANOVDB_NEW_ACCESSOR_METHODS + + template + __hostdev__ auto get(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((const LeafT*)mNode[0])->template getAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((const NodeT1*)mNode[1])->template getAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((const NodeT2*)mNode[2])->template getAndCache(ijk, *this, args...); + } + return mRoot->template getAndCache(ijk, *this, args...); + } + + template + __hostdev__ auto set(const CoordType& ijk, ArgsT&&... args) const + { +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY + const CoordValueType dirty = this->computeDirty(ijk); +#else + auto&& dirty = ijk; +#endif + if (this->isCached(dirty)) { + return ((LeafT*)mNode[0])->template setAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((NodeT1*)mNode[1])->template setAndCache(ijk, *this, args...); + } else if (this->isCached(dirty)) { + return ((NodeT2*)mNode[2])->template setAndCache(ijk, *this, args...); + } + return ((RootT*)mRoot)->template setAndCache(ijk, *this, args...); + } template __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY const CoordValueType dirty = this->computeDirty(ijk); #else auto&& dirty = ijk; @@ -4630,7 +5844,7 @@ class ReadAccessor template __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const { -#ifdef USE_SINGLE_ACCESSOR_KEY +#ifdef NANOVDB_USE_SINGLE_ACCESSOR_KEY mKey = ijk; #else mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK; @@ -4653,20 +5867,20 @@ class ReadAccessor /// createAccessor<1,2>(grid): Caching of lower and upper internal nodes /// createAccessor<0,1,2>(grid): Caching of all nodes at all tree levels -template -ReadAccessor createAccessor(const NanoGrid &grid) +template +ReadAccessor createAccessor(const NanoGrid& grid) { - return ReadAccessor(grid.tree().root()); + return ReadAccessor(grid); } -template -ReadAccessor createAccessor(const NanoTree &tree) +template +ReadAccessor createAccessor(const NanoTree& tree) { - return ReadAccessor(tree().root()); + return ReadAccessor(tree); } -template -ReadAccessor createAccessor(const NanoRoot &root) +template +ReadAccessor createAccessor(const NanoRoot& root) { return ReadAccessor(root); } @@ -4678,70 +5892,115 @@ ReadAccessor createAccessor(const NanoRoot; - __hostdev__ const GridT& grid() const { return *reinterpret_cast(this); } +{ // 768 bytes (32 byte aligned) + GridData mGridData; // 672B + TreeData mTreeData; // 64B + CoordBBox mIndexBBox; // 24B. AABB of active values in index space. + uint32_t mRootTableSize, mPadding{0}; // 8B public: - __hostdev__ bool isValid() const { return this->grid().isValid(); } - __hostdev__ uint64_t gridSize() const { return this->grid().gridSize(); } - __hostdev__ uint32_t gridIndex() const { return this->grid().gridIndex(); } - __hostdev__ uint32_t gridCount() const { return this->grid().gridCount(); } - __hostdev__ const char* shortGridName() const { return this->grid().shortGridName(); } - __hostdev__ GridType gridType() const { return this->grid().gridType(); } - __hostdev__ GridClass gridClass() const { return this->grid().gridClass(); } - __hostdev__ bool isLevelSet() const { return this->grid().isLevelSet(); } - __hostdev__ bool isFogVolume() const { return this->grid().isFogVolume(); } - __hostdev__ bool isPointIndex() const { return this->grid().isPointIndex(); } - __hostdev__ bool isPointData() const { return this->grid().isPointData(); } - __hostdev__ bool isMask() const { return this->grid().isMask(); } - __hostdev__ bool isStaggered() const { return this->grid().isStaggered(); } - __hostdev__ bool isUnknown() const { return this->grid().isUnknown(); } - __hostdev__ const Map& map() const { return this->grid().map(); } - __hostdev__ const BBox& worldBBox() const { return this->grid().worldBBox(); } - __hostdev__ const BBox& indexBBox() const { return this->grid().indexBBox(); } - __hostdev__ Vec3R voxelSize() const { return this->grid().voxelSize(); } - __hostdev__ int blindDataCount() const { return this->grid().blindDataCount(); } - __hostdev__ const GridBlindMetaData& blindMetaData(int n) const { return this->grid().blindMetaData(n); } - __hostdev__ uint64_t activeVoxelCount() const { return this->grid().activeVoxelCount(); } - __hostdev__ uint32_t activeTileCount(uint32_t n) const { return this->grid().tree().activeTileCount(n); } - __hostdev__ uint32_t nodeCount(uint32_t level) const { return this->grid().tree().nodeCount(level); } - __hostdev__ uint64_t checksum() const { return this->grid().checksum(); } - __hostdev__ bool isEmpty() const { return this->grid().isEmpty(); } - __hostdev__ Version version() const { return this->grid().version(); } + template + GridMetaData(const NanoGrid& grid) + { + mGridData = *grid.data(); + mTreeData = *grid.tree().data(); + mIndexBBox = grid.indexBBox(); + mRootTableSize = grid.tree().root().getTableSize(); + } + GridMetaData(const GridData* gridData) + { + if (GridMetaData::safeCast(gridData)) { + *this = *reinterpret_cast(gridData); + //util::memcpy(this, (const GridMetaData*)gridData); + } else {// otherwise copy each member individually + mGridData = *gridData; + mTreeData = *reinterpret_cast(gridData->treePtr()); + mIndexBBox = gridData->indexBBox(); + mRootTableSize = gridData->rootTableSize(); + } + } + GridMetaData& operator=(const GridMetaData&) = default; + /// @brief return true if the RootData follows right after the TreeData. + /// If so, this implies that it's safe to cast the grid from which + /// this instance was constructed to a GridMetaData + __hostdev__ bool safeCast() const { return mTreeData.isRootNext(); } + + /// @brief return true if it is safe to cast the grid to a pointer + /// of type GridMetaData, i.e. construction can be avoided. + __hostdev__ static bool safeCast(const GridData *gridData){ + NANOVDB_ASSERT(gridData && gridData->isValid()); + return gridData->isRootConnected(); + } + /// @brief return true if it is safe to cast the grid to a pointer + /// of type GridMetaData, i.e. construction can be avoided. + template + __hostdev__ static bool safeCast(const NanoGrid& grid){return grid.tree().isRootNext();} + __hostdev__ bool isValid() const { return mGridData.isValid(); } + __hostdev__ const GridType& gridType() const { return mGridData.mGridType; } + __hostdev__ const GridClass& gridClass() const { return mGridData.mGridClass; } + __hostdev__ bool isLevelSet() const { return mGridData.mGridClass == GridClass::LevelSet; } + __hostdev__ bool isFogVolume() const { return mGridData.mGridClass == GridClass::FogVolume; } + __hostdev__ bool isStaggered() const { return mGridData.mGridClass == GridClass::Staggered; } + __hostdev__ bool isPointIndex() const { return mGridData.mGridClass == GridClass::PointIndex; } + __hostdev__ bool isGridIndex() const { return mGridData.mGridClass == GridClass::IndexGrid; } + __hostdev__ bool isPointData() const { return mGridData.mGridClass == GridClass::PointData; } + __hostdev__ bool isMask() const { return mGridData.mGridClass == GridClass::Topology; } + __hostdev__ bool isUnknown() const { return mGridData.mGridClass == GridClass::Unknown; } + __hostdev__ bool hasMinMax() const { return mGridData.mFlags.isMaskOn(GridFlags::HasMinMax); } + __hostdev__ bool hasBBox() const { return mGridData.mFlags.isMaskOn(GridFlags::HasBBox); } + __hostdev__ bool hasLongGridName() const { return mGridData.mFlags.isMaskOn(GridFlags::HasLongGridName); } + __hostdev__ bool hasAverage() const { return mGridData.mFlags.isMaskOn(GridFlags::HasAverage); } + __hostdev__ bool hasStdDeviation() const { return mGridData.mFlags.isMaskOn(GridFlags::HasStdDeviation); } + __hostdev__ bool isBreadthFirst() const { return mGridData.mFlags.isMaskOn(GridFlags::IsBreadthFirst); } + __hostdev__ uint64_t gridSize() const { return mGridData.mGridSize; } + __hostdev__ uint32_t gridIndex() const { return mGridData.mGridIndex; } + __hostdev__ uint32_t gridCount() const { return mGridData.mGridCount; } + __hostdev__ const char* shortGridName() const { return mGridData.mGridName; } + __hostdev__ const Map& map() const { return mGridData.mMap; } + __hostdev__ const Vec3dBBox& worldBBox() const { return mGridData.mWorldBBox; } + __hostdev__ const CoordBBox& indexBBox() const { return mIndexBBox; } + __hostdev__ Vec3d voxelSize() const { return mGridData.mVoxelSize; } + __hostdev__ int blindDataCount() const { return mGridData.mBlindMetadataCount; } + __hostdev__ uint64_t activeVoxelCount() const { return mTreeData.mVoxelCount; } + __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return mTreeData.mTileCount[level - 1]; } + __hostdev__ uint32_t nodeCount(uint32_t level) const { return mTreeData.mNodeCount[level]; } + __hostdev__ const Checksum& checksum() const { return mGridData.mChecksum; } + __hostdev__ uint32_t rootTableSize() const { return mRootTableSize; } + __hostdev__ bool isEmpty() const { return mRootTableSize == 0; } + __hostdev__ Version version() const { return mGridData.mVersion; } }; // GridMetaData /// @brief Class to access points at a specific voxel location -template -class PointAccessor : public DefaultReadAccessor +/// +/// @note If GridClass::PointIndex AttT should be uint32_t and if GridClass::PointData Vec3f +template +class PointAccessor : public DefaultReadAccessor { - using AccT = DefaultReadAccessor; - const UInt32Grid* mGrid; - const AttT* mData; + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; + const AttT* mData; public: - using LeafNodeType = typename NanoRoot::LeafNodeType; - - PointAccessor(const UInt32Grid& grid) + PointAccessor(const NanoGrid& grid) : AccT(grid.tree().root()) - , mGrid(&grid) - , mData(reinterpret_cast(grid.blindData(0))) + , mGrid(grid) + , mData(grid.template getBlindData(0)) { - NANOVDB_ASSERT(grid.gridType() == GridType::UInt32); - NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same::value) || - (grid.gridClass() == GridClass::PointData && is_same::value)); - NANOVDB_ASSERT(grid.blindDataCount() >= 1); + NANOVDB_ASSERT(grid.gridType() == toGridType()); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value)); } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mData != nullptr; } + + __hostdev__ const NanoGrid& grid() const { return mGrid; } + /// @brief Return the total number of point in the grid and set the /// iterators to the complete range of points. __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const { - const uint64_t count = mGrid->blindMetaData(0).mElementCount; + const uint64_t count = mGrid.blindMetaData(0u).mValueCount; begin = mData; end = begin + count; return count; @@ -4760,23 +6019,606 @@ class PointAccessor : public DefaultReadAccessor return leaf->maximum(); } - /// @brief get iterators over offsets to points at a specific voxel location + /// @brief get iterators over attributes to points at a specific voxel location __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + begin = end = nullptr; + if (auto* leaf = this->probeLeaf(ijk)) { + const uint32_t offset = NanoLeaf::CoordToOffset(ijk); + if (leaf->isActive(offset)) { + begin = mData + leaf->minimum(); + end = begin + leaf->getValue(offset); + if (offset > 0u) + begin += leaf->getValue(offset - 1); + } + } + return end - begin; + } +}; // PointAccessor + +template +class PointAccessor : public DefaultReadAccessor +{ + using AccT = DefaultReadAccessor; + const NanoGrid& mGrid; + const AttT* mData; + +public: + PointAccessor(const NanoGrid& grid) + : AccT(grid.tree().root()) + , mGrid(grid) + , mData(grid.template getBlindData(0)) + { + NANOVDB_ASSERT(mData); + NANOVDB_ASSERT(grid.gridType() == GridType::PointIndex); + NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value) || + (grid.gridClass() == GridClass::PointData && util::is_same::value)); + } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mData != nullptr; } + + __hostdev__ const NanoGrid& grid() const { return mGrid; } + + /// @brief Return the total number of point in the grid and set the + /// iterators to the complete range of points. + __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const + { + const uint64_t count = mGrid.blindMetaData(0u).mValueCount; + begin = mData; + end = begin + count; + return count; + } + /// @brief Return the number of points in the leaf node containing the coordinate @a ijk. + /// If this return value is larger than zero then the iterators @a begin and @a end + /// will point to all the attributes contained within that leaf node. + __hostdev__ uint64_t leafPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const { auto* leaf = this->probeLeaf(ijk); if (leaf == nullptr) return 0; - const uint32_t offset = LeafNodeType::CoordToOffset(ijk); - if (leaf->isActive(offset)) { - auto* p = mData + leaf->minimum(); - begin = p + (offset == 0 ? 0 : leaf->getValue(offset - 1)); - end = p + leaf->getValue(offset); - return end - begin; + begin = mData + leaf->offset(); + end = begin + leaf->pointCount(); + return leaf->pointCount(); + } + + /// @brief get iterators over attributes to points at a specific voxel location + __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const + { + if (auto* leaf = this->probeLeaf(ijk)) { + const uint32_t n = NanoLeaf::CoordToOffset(ijk); + if (leaf->isActive(n)) { + begin = mData + leaf->first(n); + end = mData + leaf->last(n); + return end - begin; + } } - return 0; + begin = end = nullptr; + return 0u; // no leaf or inactive voxel } -}; // PointAccessor +}; // PointAccessor + +/// @brief Class to access values in channels at a specific voxel location. +/// +/// @note The ChannelT template parameter can be either const and non-const. +template +class ChannelAccessor : public DefaultReadAccessor +{ + static_assert(BuildTraits::is_index, "Expected an index build type"); + using BaseT = DefaultReadAccessor; + + const NanoGrid& mGrid; + ChannelT* mChannel; + +public: + using ValueType = ChannelT; + using TreeType = NanoTree; + using AccessorType = ChannelAccessor; + + /// @brief Ctor from an IndexGrid and an integer ID of an internal channel + /// that is assumed to exist as blind data in the IndexGrid. + __hostdev__ ChannelAccessor(const NanoGrid& grid, uint32_t channelID = 0u) + : BaseT(grid.tree().root()) + , mGrid(grid) + , mChannel(nullptr) + { + NANOVDB_ASSERT(isIndex(grid.gridType())); + NANOVDB_ASSERT(grid.gridClass() == GridClass::IndexGrid); + this->setChannel(channelID); + } + + /// @brief Ctor from an IndexGrid and an external channel + __hostdev__ ChannelAccessor(const NanoGrid& grid, ChannelT* channelPtr) + : BaseT(grid.tree().root()) + , mGrid(grid) + , mChannel(channelPtr) + { + NANOVDB_ASSERT(isIndex(grid.gridType())); + NANOVDB_ASSERT(grid.gridClass() == GridClass::IndexGrid); + } + + /// @brief return true if this access was initialized correctly + __hostdev__ operator bool() const { return mChannel != nullptr; } + + /// @brief Return a const reference to the IndexGrid + __hostdev__ const NanoGrid& grid() const { return mGrid; } + + /// @brief Return a const reference to the tree of the IndexGrid + __hostdev__ const TreeType& tree() const { return mGrid.tree(); } + + /// @brief Return a vector of the axial voxel sizes + __hostdev__ const Vec3d& voxelSize() const { return mGrid.voxelSize(); } + + /// @brief Return total number of values indexed by the IndexGrid + __hostdev__ const uint64_t& valueCount() const { return mGrid.valueCount(); } + + /// @brief Change to an external channel + /// @return Pointer to channel data + __hostdev__ ChannelT* setChannel(ChannelT* channelPtr) {return mChannel = channelPtr;} + + /// @brief Change to an internal channel, assuming it exists as as blind data + /// in the IndexGrid. + /// @return Pointer to channel data, which could be NULL if channelID is out of range or + /// if ChannelT does not match the value type of the blind data + __hostdev__ ChannelT* setChannel(uint32_t channelID) + { + return mChannel = const_cast(mGrid.template getBlindData(channelID)); + } + + /// @brief Return the linear offset into a channel that maps to the specified coordinate + __hostdev__ uint64_t getIndex(const math::Coord& ijk) const { return BaseT::getValue(ijk); } + __hostdev__ uint64_t idx(int i, int j, int k) const { return BaseT::getValue(math::Coord(i, j, k)); } + + /// @brief Return the value from a cached channel that maps to the specified coordinate + __hostdev__ ChannelT& getValue(const math::Coord& ijk) const { return mChannel[BaseT::getValue(ijk)]; } + __hostdev__ ChannelT& operator()(const math::Coord& ijk) const { return this->getValue(ijk); } + __hostdev__ ChannelT& operator()(int i, int j, int k) const { return this->getValue(math::Coord(i, j, k)); } + + /// @brief return the state and updates the value of the specified voxel + __hostdev__ bool probeValue(const math::Coord& ijk, typename util::remove_const::type& v) const + { + uint64_t idx; + const bool isActive = BaseT::probeValue(ijk, idx); + v = mChannel[idx]; + return isActive; + } + /// @brief Return the value from a specified channel that maps to the specified coordinate + /// + /// @note The template parameter can be either const or non-const + template + __hostdev__ T& getValue(const math::Coord& ijk, T* channelPtr) const { return channelPtr[BaseT::getValue(ijk)]; } + +}; // ChannelAccessor + +#if 0 +// This MiniGridHandle class is only included as a stand-alone example. Note that aligned_alloc is a C++17 feature! +// Normally we recommend using GridHandle defined in util/GridHandle.h but this minimal implementation could be an +// alternative when using the IO methods defined below. +struct MiniGridHandle { + struct BufferType { + uint8_t *data; + uint64_t size; + BufferType(uint64_t n=0) : data(std::aligned_alloc(NANOVDB_DATA_ALIGNMENT, n)), size(n) {assert(isValid(data));} + BufferType(BufferType &&other) : data(other.data), size(other.size) {other.data=nullptr; other.size=0;} + ~BufferType() {std::free(data);} + BufferType& operator=(const BufferType &other) = delete; + BufferType& operator=(BufferType &&other){data=other.data; size=other.size; other.data=nullptr; other.size=0; return *this;} + static BufferType create(size_t n, BufferType* dummy = nullptr) {return BufferType(n);} + } buffer; + MiniGridHandle(BufferType &&buf) : buffer(std::move(buf)) {} + const uint8_t* data() const {return buffer.data;} +};// MiniGridHandle +#endif + +namespace io { + +/// @brief Define compression codecs +/// +/// @note NONE is the default, ZIP is slow but compact and BLOSC offers a great balance. +/// +/// @throw NanoVDB optionally supports ZIP and BLOSC compression and will throw an exception +/// if its support is required but missing. +enum class Codec : uint16_t { NONE = 0, + ZIP = 1, + BLOSC = 2, + End = 3, + StrLen = 6 + End }; + +__hostdev__ inline const char* toStr(char *dst, Codec codec) +{ + switch (codec){ + case Codec::NONE: return util::strcpy(dst, "NONE"); + case Codec::ZIP: return util::strcpy(dst, "ZIP"); + case Codec::BLOSC : return util::strcpy(dst, "BLOSC"); + default: return util::strcpy(dst, "END"); + } +} + +__hostdev__ inline Codec toCodec(const char *str) +{ + if (util::streq(str, "none")) return Codec::NONE; + if (util::streq(str, "zip")) return Codec::ZIP; + if (util::streq(str, "blosc")) return Codec::BLOSC; + return Codec::End; +} + +/// @brief Data encoded at the head of each segment of a file or stream. +/// +/// @note A file or stream is composed of one or more segments that each contain +// one or more grids. +struct FileHeader {// 16 bytes + uint64_t magic;// 8 bytes + Version version;// 4 bytes version numbers + uint16_t gridCount;// 2 bytes + Codec codec;// 2 bytes + bool isValid() const {return magic == NANOVDB_MAGIC_NUMB || magic == NANOVDB_MAGIC_FILE;} +}; // FileHeader ( 16 bytes = 2 words ) + +// @brief Data encoded for each of the grids associated with a segment. +// Grid size in memory (uint64_t) | +// Grid size on disk (uint64_t) | +// Grid name hash key (uint64_t) | +// Numer of active voxels (uint64_t) | +// Grid type (uint32_t) | +// Grid class (uint32_t) | +// Characters in grid name (uint32_t) | +// AABB in world space (2*3*double) | one per grid in file +// AABB in index space (2*3*int) | +// Size of a voxel in world units (3*double) | +// Byte size of the grid name (uint32_t) | +// Number of nodes per level (4*uint32_t) | +// Numer of active tiles per level (3*uint32_t) | +// Codec for file compression (uint16_t) | +// Padding due to 8B alignment (uint16_t) | +// Version number (uint32_t) | +struct FileMetaData +{// 176 bytes + uint64_t gridSize, fileSize, nameKey, voxelCount; // 4 * 8 = 32B. + GridType gridType; // 4B. + GridClass gridClass; // 4B. + Vec3dBBox worldBBox; // 2 * 3 * 8 = 48B. + CoordBBox indexBBox; // 2 * 3 * 4 = 24B. + Vec3d voxelSize; // 24B. + uint32_t nameSize; // 4B. + uint32_t nodeCount[4]; //4 x 4 = 16B + uint32_t tileCount[3];// 3 x 4 = 12B + Codec codec; // 2B + uint16_t padding;// 2B, due to 8B alignment from uint64_t + Version version;// 4B +}; // FileMetaData + +// the following code block uses std and therefore needs to be ignored by CUDA and HIP +#if !defined(__CUDA_ARCH__) && !defined(__HIP__) + +// Note that starting with version 32.6.0 it is possible to write and read raw grid buffers to +// files, e.g. os.write((const char*)&buffer.data(), buffer.size()) or more conveniently as +// handle.write(fileName). In addition to this simple approach we offer the methods below to +// write traditional uncompressed nanovdb files that unlike raw files include metadata that +// is used for tools like nanovdb_print. + +/// +/// @brief This is a standalone alternative to io::writeGrid(...,Codec::NONE) defined in util/IO.h +/// Unlike the latter this function has no dependencies at all, not even NanoVDB.h, so it also +/// works if client code only includes PNanoVDB.h! +/// +/// @details Writes a raw NanoVDB buffer, possibly with multiple grids, to a stream WITHOUT compression. +/// It follows all the conventions in util/IO.h so the stream can be read by all existing client +/// code of NanoVDB. +/// +/// @note This method will always write uncompressed grids to the stream, i.e. Blosc or ZIP compression +/// is never applied! This is a fundamental limitation and feature of this standalone function. +/// +/// @throw std::invalid_argument if buffer does not point to a valid NanoVDB grid. +/// +/// @warning This is pretty ugly code that involves lots of pointer and bit manipulations - not for the faint of heart :) +template // StreamT class must support: "void write(const char*, size_t)" +void writeUncompressedGrid(StreamT& os, const GridData* gridData, bool raw = false) +{ + NANOVDB_ASSERT(gridData->mMagic == NANOVDB_MAGIC_NUMB || gridData->mMagic == NANOVDB_MAGIC_GRID); + NANOVDB_ASSERT(gridData->mVersion.isCompatible()); + if (!raw) {// segment with a single grid: FileHeader, FileMetaData, gridName, Grid +#ifdef NANOVDB_USE_NEW_MAGIC_NUMBERS + FileHeader head{NANOVDB_MAGIC_FILE, gridData->mVersion, 1u, Codec::NONE}; +#else + FileHeader head{NANOVDB_MAGIC_NUMB, gridData->mVersion, 1u, Codec::NONE}; +#endif + const char* gridName = gridData->gridName(); + const uint32_t nameSize = util::strlen(gridName) + 1;// include '\0' + const TreeData* treeData = (const TreeData*)(gridData->treePtr()); + FileMetaData meta{gridData->mGridSize, gridData->mGridSize, 0u, treeData->mVoxelCount, + gridData->mGridType, gridData->mGridClass, gridData->mWorldBBox, + treeData->bbox(), gridData->mVoxelSize, nameSize, + {treeData->mNodeCount[0], treeData->mNodeCount[1], treeData->mNodeCount[2], 1u}, + {treeData->mTileCount[0], treeData->mTileCount[1], treeData->mTileCount[2]}, + Codec::NONE, 0u, gridData->mVersion }; // FileMetaData + os.write((const char*)&head, sizeof(FileHeader)); // write header + os.write((const char*)&meta, sizeof(FileMetaData)); // write meta data + os.write(gridName, nameSize); // write grid name + } + os.write((const char*)gridData, gridData->mGridSize);// write the grid +}// writeUncompressedGrid + +/// @brief write multiple NanoVDB grids to a single file, without compression. +/// @note To write all grids in a single GridHandle simply use handle.write("fieNane") +template class VecT> +void writeUncompressedGrids(const char* fileName, const VecT& handles, bool raw = false) +{ +#ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ofstream or FILE implementations + std::ofstream os(fileName, std::ios::out | std::ios::binary | std::ios::trunc); +#else + struct StreamT { + FILE* fptr; + StreamT(const char* name) { fptr = fopen(name, "wb"); } + ~StreamT() { fclose(fptr); } + void write(const char* data, size_t n) { fwrite(data, 1, n, fptr); } + bool is_open() const { return fptr != NULL; } + } os(fileName); +#endif + if (!os.is_open()) { + fprintf(stderr, "nanovdb::writeUncompressedGrids: Unable to open file \"%s\"for output\n", fileName); + exit(EXIT_FAILURE); + } + for (auto& h : handles) { + for (uint32_t n=0; n class VecT> +VecT readUncompressedGrids(StreamT& is, const typename GridHandleT::BufferType& pool = typename GridHandleT::BufferType()) +{ + VecT handles; + GridData data; + is.read((char*)&data, sizeof(GridData)); + if (data.isValid()) {// stream contains a raw grid buffer + uint64_t size = data.mGridSize, sum = 0u; + while(data.mGridIndex + 1u < data.mGridCount) { + is.skip(data.mGridSize - sizeof(GridData));// skip grid + is.read((char*)&data, sizeof(GridData));// read sizeof(GridData) bytes + sum += data.mGridSize; + } + is.skip(-int64_t(sum + sizeof(GridData)));// rewind to start + auto buffer = GridHandleT::BufferType::create(size + sum, &pool); + is.read((char*)(buffer.data()), buffer.size()); + handles.emplace_back(std::move(buffer)); + } else {// Header0, MetaData0, gridName0, Grid0...HeaderN, MetaDataN, gridNameN, GridN + is.skip(-sizeof(GridData));// rewind + FileHeader head; + while(is.read((char*)&head, sizeof(FileHeader))) { + if (!head.isValid()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid magic number = \"%s\"\n", (const char*)&(head.magic)); + exit(EXIT_FAILURE); + } else if (!head.version.isCompatible()) { + char str[20]; + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid major version = \"%s\"\n", toStr(str, head.version)); + exit(EXIT_FAILURE); + } else if (head.codec != Codec::NONE) { + char str[8]; + fprintf(stderr, "nanovdb::readUncompressedGrids: invalid codec = \"%s\"\n", toStr(str, head.codec)); + exit(EXIT_FAILURE); + } + FileMetaData meta; + for (uint16_t i = 0; i < head.gridCount; ++i) { // read all grids in segment + is.read((char*)&meta, sizeof(FileMetaData));// read meta data + is.skip(meta.nameSize); // skip grid name + auto buffer = GridHandleT::BufferType::create(meta.gridSize, &pool); + is.read((char*)buffer.data(), meta.gridSize);// read grid + handles.emplace_back(std::move(buffer)); + }// loop over grids in segment + }// loop over segments + } + return handles; +} // readUncompressedGrids + +/// @brief Read a multiple un-compressed NanoVDB grids from a file and return them as a vector. +template class VecT> +VecT readUncompressedGrids(const char* fileName, const typename GridHandleT::BufferType& buffer = typename GridHandleT::BufferType()) +{ +#ifdef NANOVDB_USE_IOSTREAMS // use this to switch between std::ifstream or FILE implementations + struct StreamT : public std::ifstream { + StreamT(const char* name) : std::ifstream(name, std::ios::in | std::ios::binary){} + void skip(int64_t off) { this->seekg(off, std::ios_base::cur); } + }; +#else + struct StreamT { + FILE* fptr; + StreamT(const char* name) { fptr = fopen(name, "rb"); } + ~StreamT() { fclose(fptr); } + bool read(char* data, size_t n) { + size_t m = fread(data, 1, n, fptr); + return n == m; + } + void skip(int64_t off) { fseek(fptr, (long int)off, SEEK_CUR); } + bool is_open() const { return fptr != NULL; } + }; +#endif + StreamT is(fileName); + if (!is.is_open()) { + fprintf(stderr, "nanovdb::readUncompressedGrids: Unable to open file \"%s\"for input\n", fileName); + exit(EXIT_FAILURE); + } + return readUncompressedGrids(is, buffer); +} // readUncompressedGrids + +#endif // if !defined(__CUDA_ARCH__) && !defined(__HIP__) + +} // namespace io + +// ----------------------------> Implementations of random access methods <-------------------------------------- + +/// @brief Implements Tree::getValue(math::Coord), i.e. return the value associated with a specific coordinate @c ijk. +/// @tparam BuildT Build type of the grid being called +/// @details The value at a coordinate maps to the background, a tile value or a leaf value. +template +struct GetValue +{ + __hostdev__ static auto get(const NanoRoot& root) { return root.mBackground; } + __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.value; } + __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mTable[n].value; } + __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mTable[n].value; } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.getValue(n); } // works with all build types +}; // GetValue + +template +struct SetValue +{ + static_assert(!BuildTraits::is_special, "SetValue does not support special value types"); + using ValueT = typename NanoLeaf::ValueType; + __hostdev__ static auto set(NanoRoot&, const ValueT&) {} // no-op + __hostdev__ static auto set(typename NanoRoot::Tile& tile, const ValueT& v) { tile.value = v; } + __hostdev__ static auto set(NanoUpper& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } + __hostdev__ static auto set(NanoLower& node, uint32_t n, const ValueT& v) { node.mTable[n].value = v; } + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } +}; // SetValue + +template +struct SetVoxel +{ + static_assert(!BuildTraits::is_special, "SetVoxel does not support special value types"); + using ValueT = typename NanoLeaf::ValueType; + __hostdev__ static auto set(NanoRoot&, const ValueT&) {} // no-op + __hostdev__ static auto set(typename NanoRoot::Tile&, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoUpper&, uint32_t, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoLower&, uint32_t, const ValueT&) {} // no-op + __hostdev__ static auto set(NanoLeaf& leaf, uint32_t n, const ValueT& v) { leaf.mValues[n] = v; } +}; // SetVoxel + +/// @brief Implements Tree::isActive(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetState +{ + __hostdev__ static auto get(const NanoRoot&) { return false; } + __hostdev__ static auto get(const typename NanoRoot::Tile& tile) { return tile.state > 0; } + __hostdev__ static auto get(const NanoUpper& node, uint32_t n) { return node.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLower& node, uint32_t n) { return node.mValueMask.isOn(n); } + __hostdev__ static auto get(const NanoLeaf& leaf, uint32_t n) { return leaf.mValueMask.isOn(n); } +}; // GetState + +/// @brief Implements Tree::getDim(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetDim +{ + __hostdev__ static uint32_t get(const NanoRoot&) { return 0u; } // background + __hostdev__ static uint32_t get(const typename NanoRoot::Tile&) { return 4096u; } + __hostdev__ static uint32_t get(const NanoUpper&, uint32_t) { return 128u; } + __hostdev__ static uint32_t get(const NanoLower&, uint32_t) { return 8u; } + __hostdev__ static uint32_t get(const NanoLeaf&, uint32_t) { return 1u; } +}; // GetDim + +/// @brief Return the pointer to the leaf node that contains math::Coord. Implements Tree::probeLeaf(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetLeaf +{ + __hostdev__ static const NanoLeaf* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoUpper&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoLower&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLeaf* get(const NanoLeaf& leaf, uint32_t) { return &leaf; } +}; // GetLeaf + +/// @brief Return point to the lower internal node where math::Coord maps to one of its values, i.e. terminates +/// @tparam BuildT Build type of the grid being called +template +struct GetLower +{ + __hostdev__ static const NanoLower* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoLower* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoLower* get(const NanoUpper&, uint32_t) { return nullptr; } + __hostdev__ static const NanoLower* get(const NanoLower& node, uint32_t) { return &node; } + __hostdev__ static const NanoLower* get(const NanoLeaf&, uint32_t) { return nullptr; } +}; // GetLower + +/// @brief Return point to the upper internal node where math::Coord maps to one of its values, i.e. terminates +/// @tparam BuildT Build type of the grid being called +template +struct GetUpper +{ + __hostdev__ static const NanoUpper* get(const NanoRoot&) { return nullptr; } + __hostdev__ static const NanoUpper* get(const typename NanoRoot::Tile&) { return nullptr; } + __hostdev__ static const NanoUpper* get(const NanoUpper& node, uint32_t) { return &node; } + __hostdev__ static const NanoUpper* get(const NanoLower& node, uint32_t) { return nullptr; } + __hostdev__ static const NanoUpper* get(const NanoLeaf&, uint32_t) { return nullptr; } +}; // GetUpper + +/// @brief Implements Tree::probeLeaf(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct ProbeValue +{ + using ValueT = typename BuildToValueMap::Type; + __hostdev__ static bool get(const NanoRoot& root, ValueT& v) + { + v = root.mBackground; + return false; + } + __hostdev__ static bool get(const typename NanoRoot::Tile& tile, ValueT& v) + { + v = tile.value; + return tile.state > 0u; + } + __hostdev__ static bool get(const NanoUpper& node, uint32_t n, ValueT& v) + { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + __hostdev__ static bool get(const NanoLower& node, uint32_t n, ValueT& v) + { + v = node.mTable[n].value; + return node.mValueMask.isOn(n); + } + __hostdev__ static bool get(const NanoLeaf& leaf, uint32_t n, ValueT& v) + { + v = leaf.getValue(n); + return leaf.mValueMask.isOn(n); + } +}; // ProbeValue + +/// @brief Implements Tree::getNodeInfo(math::Coord) +/// @tparam BuildT Build type of the grid being called +template +struct GetNodeInfo +{ + using ValueType = typename NanoLeaf::ValueType; + using FloatType = typename NanoLeaf::FloatType; + struct NodeInfo + { + uint32_t level, dim; + ValueType minimum, maximum; + FloatType average, stdDevi; + CoordBBox bbox; + }; + __hostdev__ static NodeInfo get(const NanoRoot& root) + { + return NodeInfo{3u, NanoUpper::DIM, root.minimum(), root.maximum(), root.average(), root.stdDeviation(), root.bbox()}; + } + __hostdev__ static NodeInfo get(const typename NanoRoot::Tile& tile) + { + return NodeInfo{3u, NanoUpper::DIM, tile.value, tile.value, static_cast(tile.value), 0, CoordBBox::createCube(tile.origin(), NanoUpper::DIM)}; + } + __hostdev__ static NodeInfo get(const NanoUpper& node, uint32_t n) + { + return NodeInfo{2u, node.dim(), node.minimum(), node.maximum(), node.average(), node.stdDeviation(), node.bbox()}; + } + __hostdev__ static NodeInfo get(const NanoLower& node, uint32_t n) + { + return NodeInfo{1u, node.dim(), node.minimum(), node.maximum(), node.average(), node.stdDeviation(), node.bbox()}; + } + __hostdev__ static NodeInfo get(const NanoLeaf& leaf, uint32_t n) + { + return NodeInfo{0u, leaf.dim(), leaf.minimum(), leaf.maximum(), leaf.average(), leaf.stdDeviation(), leaf.bbox()}; + } +}; // GetNodeInfo -} // namespace nanovdb +} // namespace nanovdb =================================================================== #endif // end of NANOVDB_NANOVDB_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/PNanoVDB.h b/warp/native/nanovdb/PNanoVDB.h index ac5796b5..40888f24 100644 --- a/warp/native/nanovdb/PNanoVDB.h +++ b/warp/native/nanovdb/PNanoVDB.h @@ -3,11 +3,11 @@ // SPDX-License-Identifier: MPL-2.0 /*! - \file PNanoVDB.h + \file nanovdb/PNanoVDB.h \author Andrew Reidmeyer - \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port + \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port of NanoVDB.h, which is compatible with most graphics APIs. */ @@ -56,8 +56,10 @@ #endif #ifdef PNANOVDB_CMATH +#ifndef __CUDACC_RTC__ #include #endif +#endif // ------------------------------------------------ Buffer ----------------------------------------------------------- @@ -72,38 +74,11 @@ #endif #if defined(PNANOVDB_BUF_C) -// #include -#if !defined(_STDINT) && !defined(__GNUC__) -//typedef signed char int8_t; -//typedef signed short int16_t; -typedef signed int int32_t; -typedef signed long long int64_t; -//typedef signed char int_fast8_t; -//typedef signed short int_fast16_t; -//typedef signed int int_fast32_t; -//typedef signed long long int_fast64_t; -//typedef signed char int_least8_t; -//typedef signed short int_least16_t; -//typedef signed int int_least32_t; -//typedef signed long long int_least64_t; -//typedef signed long long intmax_t; -//typedef signed long intptr_t; -//typedef unsigned char uint8_t; -//typedef unsigned short uint16_t; -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; -//typedef unsigned char uint_fast8_t; -//typedef unsigned short uint_fast16_t; -//typedef unsigned int uint_fast32_t; -//typedef unsigned long long uint_fast64_t; -//typedef unsigned char uint_least8_t; -//typedef unsigned short uint_least16_t; -//typedef unsigned int uint_least32_t; -//typedef unsigned long long uint_least64_t; -//typedef unsigned long long uintmax_t; +#ifndef __CUDACC_RTC__ +#include #endif #if defined(__CUDACC__) -#define PNANOVDB_BUF_FORCE_INLINE __host__ __device__ static __forceinline__ +#define PNANOVDB_BUF_FORCE_INLINE static __host__ __device__ __forceinline__ #elif defined(_WIN32) #define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline #else @@ -146,6 +121,32 @@ PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, return data64[wordaddress64]; #endif } +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; +} +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} #elif defined(PNANOVDB_ADDRESS_64) PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) { @@ -167,6 +168,32 @@ PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, return data64[wordaddress64]; #endif } +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; + } +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} #endif typedef uint32_t pnanovdb_grid_type_t; #define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn @@ -184,6 +211,14 @@ uint2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); return ret; } +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uint2 value) +{ + // NOP, by default no write in HLSL +} #elif defined(PNANOVDB_ADDRESS_64) #define pnanovdb_buf_t StructuredBuffer uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) @@ -197,6 +232,14 @@ uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) ret = ret + (uint64_t(pnanovdb_buf_read_uint32(buf, byte_offset + 4u)) << 32u); return ret; } +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + // NOP, by default no write in HLSL +} #endif #define pnanovdb_grid_type_t uint #define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn @@ -216,6 +259,14 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); return ret; } +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value) +{ + // NOP, by default no write in HLSL +} #define pnanovdb_grid_type_t uint #define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn #endif @@ -225,7 +276,7 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) // force inline #if defined(PNANOVDB_C) #if defined(__CUDACC__) -#define PNANOVDB_FORCE_INLINE __host__ __device__ static __forceinline__ +#define PNANOVDB_FORCE_INLINE static __host__ __device__ __forceinline__ #elif defined(_WIN32) #define PNANOVDB_FORCE_INLINE static inline __forceinline #else @@ -240,7 +291,11 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) // struct typedef, static const, inout #if defined(PNANOVDB_C) #define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; +#if defined(__CUDA_ARCH__) +#define PNANOVDB_STATIC_CONST constexpr __constant__ +#else #define PNANOVDB_STATIC_CONST static const +#endif #define PNANOVDB_INOUT(X) X* #define PNANOVDB_IN(X) const X* #define PNANOVDB_DEREF(X) (*X) @@ -264,9 +319,13 @@ uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) // basic types, type conversion #if defined(PNANOVDB_C) #define PNANOVDB_NATIVE_64 -// #include +#ifndef __CUDACC_RTC__ +#include +#endif #if !defined(PNANOVDB_MEMCPY_CUSTOM) +#ifndef __CUDACC_RTC__ #include +#endif #define pnanovdb_memcpy memcpy #endif typedef uint32_t pnanovdb_uint32_t; @@ -288,10 +347,10 @@ PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_ PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return (pnanovdb_int64_t)v; } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return (pnanovdb_uint64_t)v; } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return (pnanovdb_uint32_t)v; } -// PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return *((float*)&v); } -// PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return *((double*)&v); } PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { float vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { double vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)v; } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)(v >> 32u); } PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return ((pnanovdb_uint64_t)x) | (((pnanovdb_uint64_t)y) << 32u); } @@ -317,6 +376,7 @@ typedef float3 pnanovdb_vec3_t; pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return asfloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } float pnanovdb_floor(float v) { return floor(v); } pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } @@ -329,6 +389,7 @@ typedef int2 pnanovdb_int64_t; pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int2(v); } pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint2(v); } double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(v.x, v.y); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint2(x, y); } @@ -341,6 +402,7 @@ typedef int64_t pnanovdb_int64_t; pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int64_t(v); } pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint64_t(v); } double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(uint(v), uint(v >> 32u)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return uint64_t(ret.x) + (uint64_t(ret.y) << 32u); } pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return uint(v); } pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return uint(v >> 32u); } pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint64_t(x) + (uint64_t(y) << 32u); } @@ -363,7 +425,9 @@ pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return ivec2(v) pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uvec2(v); } pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return uintBitsToFloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return packDouble2x32(uvec2(v.x, v.y)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uvec2(x, y); } @@ -437,14 +501,6 @@ PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_max(const pnanovdb_vec3_t a, v.z = a.z > b.z ? a.z : b.z; return v; } -PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_vec3_round_to_coord(const pnanovdb_vec3_t a) -{ - pnanovdb_coord_t v; - v.x = pnanovdb_float_to_int32(roundf(a.x)); - v.y = pnanovdb_float_to_int32(roundf(a.y)); - v.z = pnanovdb_float_to_int32(roundf(a.z)); - return v; -} PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) { pnanovdb_vec3_t v; @@ -493,6 +549,119 @@ pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return ivec3(a, a, pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } #endif +// ------------------------------------------------ Uint64 Utils ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) +{ +#if defined(PNANOVDB_C) +#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) + return __popcnt(value); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) + return __builtin_popcount(value); +#else + value = value - ((value >> 1) & 0x55555555); + value = (value & 0x33333333) + ((value >> 2) & 0x33333333); + value = (value + (value >> 4)) & 0x0F0F0F0F; + return (value * 0x01010101) >> 24; +#endif +#elif defined(PNANOVDB_HLSL) + return countbits(value); +#elif defined(PNANOVDB_GLSL) + return bitCount(value); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_countbits(pnanovdb_uint64_t value) +{ + return pnanovdb_uint32_countbits(pnanovdb_uint64_low(value)) + pnanovdb_uint32_countbits(pnanovdb_uint64_high(value)); +} + +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + low += b; + if (low < b) + { + high += 1u; + } + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + if (low == 0u) + { + high -= 1u; + } + low -= 1u; + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + return (b >= 32u) ? + (high >> (b - 32)) : + ((low >> b) | ((b > 0) ? (high << (32u - b)) : 0u)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + pnanovdb_uint32_t mask_low = bit_idx < 32u ? 1u << bit_idx : 0u; + pnanovdb_uint32_t mask_high = bit_idx >= 32u ? 1u << (bit_idx - 32u) : 0u; + return pnanovdb_uint32_as_uint64(mask_low, mask_high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return pnanovdb_uint32_as_uint64( + pnanovdb_uint64_low(a) & pnanovdb_uint64_low(b), + pnanovdb_uint64_high(a) & pnanovdb_uint64_high(b) + ); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return pnanovdb_uint64_low(a) != 0u || pnanovdb_uint64_high(a) != 0u; +} + +#else +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return a + b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + return a - 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return pnanovdb_uint64_low(a >> b); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + return 1llu << bit_idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return a & b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return a != 0llu; +} +#endif + // ------------------------------------------------ Address Type ----------------------------------------------------------- #if defined(PNANOVDB_ADDRESS_32) @@ -527,6 +696,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_addr ret.byte_offset += pnanovdb_uint64_low(byte_offset); return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint64_low(byte_offset) * multiplier; + return ret; +} PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) { return address.byte_offset & mask; @@ -581,6 +756,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_addr ret.byte_offset += byte_offset; return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) { return pnanovdb_uint64_low(address.byte_offset) & mask; @@ -640,12 +821,41 @@ PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_read_coord(pnanovdb_buf_t buf, p ret.z = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); return ret; } -PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3f(pnanovdb_buf_t buf, pnanovdb_address_t address) +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + ret.x = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 0u)); + ret.y = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 4u)); + ret.z = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 8u)); + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 3) << 3)) & 255; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u16(pnanovdb_buf_t buf, pnanovdb_address_t address) { pnanovdb_vec3_t ret; - ret.x = pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 0u))); - ret.y = pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 4u))); - ret.z = pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); + const float scale = 1.f / 65535.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 4u))) - 0.5f; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 255.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 1u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; return ret; } @@ -677,13 +887,54 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr } #endif +// ------------------------------------------------ High Level Buffer Write ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) +{ + pnanovdb_buf_write_uint32(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) +{ + pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) +{ + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) +{ + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 0u), PNANOVDB_DEREF(value).x); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 4u), PNANOVDB_DEREF(value).y); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 8u), PNANOVDB_DEREF(value).z); +} + // ------------------------------------------------ Core Structures ----------------------------------------------------------- #define PNANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL// "NanoVDB0" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_GRID 0x314244566f6e614eUL// "NanoVDB1" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_FILE 0x324244566f6e614eUL// "NanoVDB2" in hex - little endian (uint64_t) #define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI -#define PNANOVDB_MINOR_VERSION_NUMBER 3// reflects changes to the API but not ABI -#define PNANOVDB_PATCH_VERSION_NUMBER 3// reflects bug-fixes with no ABI or API changes +#define PNANOVDB_MINOR_VERSION_NUMBER 7// reflects changes to the API but not ABI +#define PNANOVDB_PATCH_VERSION_NUMBER 0// reflects bug-fixes with no ABI or API changes #define PNANOVDB_GRID_TYPE_UNKNOWN 0 #define PNANOVDB_GRID_TYPE_FLOAT 1 @@ -704,17 +955,27 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_GRID_TYPE_FPN 16 #define PNANOVDB_GRID_TYPE_VEC4F 17 #define PNANOVDB_GRID_TYPE_VEC4D 18 -#define PNANOVDB_GRID_TYPE_END 19 +#define PNANOVDB_GRID_TYPE_INDEX 19 +#define PNANOVDB_GRID_TYPE_ONINDEX 20 +#define PNANOVDB_GRID_TYPE_INDEXMASK 21 +#define PNANOVDB_GRID_TYPE_ONINDEXMASK 22 +#define PNANOVDB_GRID_TYPE_POINTINDEX 23 +#define PNANOVDB_GRID_TYPE_VEC3U8 24 +#define PNANOVDB_GRID_TYPE_VEC3U16 25 +#define PNANOVDB_GRID_TYPE_UINT8 26 +#define PNANOVDB_GRID_TYPE_END 27 #define PNANOVDB_GRID_CLASS_UNKNOWN 0 -#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band levelset, e.g. SDF +#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF #define PNANOVDB_GRID_CLASS_FOG_VOLUME 2 // fog volume, e.g. density #define PNANOVDB_GRID_CLASS_STAGGERED 3 // staggered MAC grid, e.g. velocity #define PNANOVDB_GRID_CLASS_POINT_INDEX 4 // point index grid #define PNANOVDB_GRID_CLASS_POINT_DATA 5 // point data grid #define PNANOVDB_GRID_CLASS_TOPOLOGY 6 // grid with active states only (no values) #define PNANOVDB_GRID_CLASS_VOXEL_VOLUME 7 // volume of geometric cubes, e.g. minecraft -#define PNANOVDB_GRID_CLASS_END 8 +#define PNANOVDB_GRID_CLASS_INDEX_GRID 8 // grid whose values are offsets, e.g. into an external array +#define PNANOVDB_GRID_CLASS_TENSOR_GRID 9 // grid which can have extra metadata and features +#define PNANOVDB_GRID_CLASS_END 10 #define PNANOVDB_GRID_FLAGS_HAS_LONG_GRID_NAME (1 << 0) #define PNANOVDB_GRID_FLAGS_HAS_BBOX (1 << 1) @@ -727,13 +988,23 @@ PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_addr #define PNANOVDB_LEAF_TYPE_DEFAULT 0 #define PNANOVDB_LEAF_TYPE_LITE 1 #define PNANOVDB_LEAF_TYPE_FP 2 - -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64 }; -PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0 }; +#define PNANOVDB_LEAF_TYPE_INDEX 3 +#define PNANOVDB_LEAF_TYPE_INDEXMASK 4 +#define PNANOVDB_LEAF_TYPE_POINTINDEX 5 + +// BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... +// bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48, 8 }; +// bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64, 64 }; +// bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48, 8 }; +// bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16, 8 }; +// bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32, 32 }; +// one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0, 0 }; struct pnanovdb_map_t { @@ -786,6 +1057,31 @@ PNANOVDB_FORCE_INLINE double pnanovdb_map_get_taperd(pnanovdb_buf_t buf, pnanovd return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD)); } +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); +} + struct pnanovdb_grid_t { pnanovdb_uint64_t magic; // 8 bytes, 0 @@ -875,6 +1171,54 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_blind_metadata_count(p return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT)); } +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch_num) +{ + return (major << 21u) | (minor << 10u) | patch_num; +} + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_major(pnanovdb_uint32_t version) { return (version >> 21u) & ((1u << 11u) - 1u); @@ -890,9 +1234,9 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_patch(pnanovdb_uint struct pnanovdb_gridblindmetadata_t { - pnanovdb_int64_t byte_offset; // 8 bytes, 0 - pnanovdb_uint64_t element_count; // 8 bytes, 8 - pnanovdb_uint32_t flags; // 4 bytes, 16 + pnanovdb_int64_t data_offset; // 8 bytes, 0 + pnanovdb_uint64_t value_count; // 8 bytes, 8 + pnanovdb_uint32_t value_size; // 4 bytes, 16 pnanovdb_uint32_t semantic; // 4 bytes, 20 pnanovdb_uint32_t data_class; // 4 bytes, 24 pnanovdb_uint32_t data_type; // 4 bytes, 28 @@ -904,22 +1248,22 @@ PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_handle_t) #define PNANOVDB_GRIDBLINDMETADATA_SIZE 288 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET 0 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT 8 -#define PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS 16 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET 0 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT 8 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE 16 #define PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC 20 #define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS 24 #define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE 28 #define PNANOVDB_GRIDBLINDMETADATA_OFF_NAME 32 -PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_byte_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET)); +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_data_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_OFFSET)); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_element_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT)); +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_value_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_COUNT)); } -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_flags(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { - return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS)); +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_value_size(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_VALUE_SIZE)); } PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_semantic(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC)); @@ -1000,6 +1344,40 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_voxel_count(pnanovdb_b return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT)); } +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); +} + struct pnanovdb_root_t { pnanovdb_coord_t bbox_min; @@ -1028,6 +1406,16 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_tile_count(pnanovdb_bu return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE)); } +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); +} + struct pnanovdb_root_tile_t { pnanovdb_uint64_t key; @@ -1056,6 +1444,16 @@ PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_tile_get_state(pnanovdb_bu return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE)); } +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { + pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); +} + struct pnanovdb_upper_t { pnanovdb_coord_t bbox_min; @@ -1097,6 +1495,20 @@ PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_child_mask(pnanovdb_buf return ((value >> (bit_index & 31u)) & 1) != 0u; } +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + struct pnanovdb_lower_t { pnanovdb_coord_t bbox_min; @@ -1138,6 +1550,20 @@ PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_child_mask(pnanovdb_buf return ((value >> (bit_index & 31u)) & 1) != 0u; } +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + struct pnanovdb_leaf_t { pnanovdb_coord_t bbox_min; @@ -1172,6 +1598,13 @@ PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_get_value_mask(pnanovdb_buf_ return ((value >> (bit_index & 31u)) & 1) != 0u; } +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); +} + struct pnanovdb_grid_type_constants_t { pnanovdb_uint32_t root_off_background; @@ -1205,27 +1638,36 @@ struct pnanovdb_grid_type_constants_t }; PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_type_constants_t) +// The following table with offsets will nedd to be updates as new GridTypes are added in NanoVDB.h PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constants[PNANOVDB_GRID_TYPE_END] = { - {28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, - {28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, - {28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, - {32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, - {28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, - {28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, - {28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, - {28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, - {28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, - {32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, +{28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, +{32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, +{28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, +{28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, +{32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, +{28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, +{28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, +{28, 29, 30, 32, 36, 64, 8, 8, 20, 32, 8224, 8225, 8228, 8232, 8256, 270400, 1056, 1057, 1060, 1064, 1088, 33856, 80, 81, 84, 88, 96, 608}, }; // ------------------------------------------------ Basic Lookup ----------------------------------------------------------- @@ -1239,12 +1681,11 @@ PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_grid return meta; } -PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanodvb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) { pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); - pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_byte_offset(buf, meta); - pnanovdb_address_t address = grid.address; - address = pnanovdb_address_offset64(address, pnanovdb_int64_as_uint64(byte_offset)); + pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_data_offset(buf, meta); + pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); return address; } @@ -1319,6 +1760,8 @@ PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_find_tile(pnanov return null_handle; } +// ----------------------------- Leaf Node --------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) { return (((PNANOVDB_DEREF(ijk).x & 7) >> 0) << (2 * 3)) + @@ -1362,6 +1805,8 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address(pnanovd return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); } +// ----------------------------- Leaf FP Types Specialization --------------------------------------- + PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t value_log_bits) { // value_log_bits // 2 3 4 @@ -1401,11 +1846,301 @@ PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fpn_read_float(pnanovdb_buf_t buf, pna return pnanovdb_leaf_fp_read_float(buf, address, ijk, value_log_bits); } +// ----------------------------- Leaf Index Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_index_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, min_address), 512u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 513u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 514u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 515u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + return pnanovdb_uint64_offset(offset, n); +} + +// ----------------------------- Leaf IndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_index_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_index_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_index_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_index_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_index_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_index_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_indexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf OnIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindex_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * 7u)); + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64( + buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table) + 8u)); + return pnanovdb_uint64_countbits(val_mask) + (pnanovdb_uint64_to_uint32_lsr(prefix_sum, 54u) & 511u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_uint64_offset( + pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table))), + pnanovdb_leaf_onindex_get_value_count(buf, leaf) - 1u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindex_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(min_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 1u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(max_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 2u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(ave_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 3u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(dev_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 4u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + + pnanovdb_uint32_t word_idx = n >> 6u; + pnanovdb_uint32_t bit_idx = n & 63u; + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * word_idx)); + pnanovdb_uint64_t mask = pnanovdb_uint64_bit_mask(bit_idx); + pnanovdb_uint64_t value_index = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_uint64_any_bit(pnanovdb_uint64_and(val_mask, mask))) + { + pnanovdb_uint32_t sum = 0u; + sum += pnanovdb_uint64_countbits(pnanovdb_uint64_and(val_mask, pnanovdb_uint64_dec(mask))); + if (word_idx > 0u) + { + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64(buf, pnanovdb_address_offset(value_address, 8u)); + sum += pnanovdb_uint64_to_uint32_lsr(prefix_sum, 9u * (word_idx - 1u)) & 511u; + } + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + value_index = pnanovdb_uint64_offset(offset, sum); + } + return value_index; +} + +// ----------------------------- Leaf OnIndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindexmask_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_value_count(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_last_offset(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_onindex_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_onindex_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_onindex_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_onindex_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_onindex_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_onindexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf PointIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_min_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_max_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint32_as_uint64_low(pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value_only(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_address_t addr = pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i); + pnanovdb_uint32_t raw32 = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(addr, 3u)); + if ((i & 1) == 0u) + { + raw32 = (raw32 & 0xFFFF0000) | (value & 0x0000FFFF); + } + else + { + raw32 = (raw32 & 0x0000FFFF) | (value << 16u); + } + pnanovdb_write_uint32(buf, addr, raw32); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + pnanovdb_uint32_t word_idx = i >> 5; + pnanovdb_uint32_t bit_idx = i & 31; + pnanovdb_address_t addr = pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * word_idx); + pnanovdb_uint32_t val_mask = pnanovdb_read_uint32(buf, addr); + val_mask = val_mask | (1u << bit_idx); + pnanovdb_write_uint32(buf, addr, val_mask); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_leaf_pointindex_set_on(buf, leaf, i); + pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); +} + +// ------------------------------------------------ Lower Node ----------------------------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) { return (((PNANOVDB_DEREF(ijk).x & 127) >> 3) << (2 * 4)) + - (((PNANOVDB_DEREF(ijk).y & 127) >> 3) << (4)) + - ((PNANOVDB_DEREF(ijk).z & 127) >> 3); + (((PNANOVDB_DEREF(ijk).y & 127) >> 3) << (4)) + + ((PNANOVDB_DEREF(ijk).z & 127) >> 3); } PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) @@ -1475,11 +2210,13 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address(pnanov return pnanovdb_lower_get_value_address_and_level(grid_type, buf, lower, ijk, PNANOVDB_REF(level)); } +// ------------------------------------------------ Upper Node ----------------------------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) { return (((PNANOVDB_DEREF(ijk).x & 4095) >> 7) << (2 * 5)) + - (((PNANOVDB_DEREF(ijk).y & 4095) >> 7) << (5)) + - ((PNANOVDB_DEREF(ijk).z & 4095) >> 7); + (((PNANOVDB_DEREF(ijk).y & 4095) >> 7) << (5)) + + ((PNANOVDB_DEREF(ijk).z & 4095) >> 7); } PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) @@ -1548,6 +2285,14 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address(pnanov return pnanovdb_upper_get_value_address_and_level(grid_type, buf, upper, ijk, PNANOVDB_REF(level)); } +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, bufAddress, child); +} + +// ------------------------------------------------ Root ----------------------------------------------------------- + PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) { pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_min); @@ -1670,6 +2415,92 @@ PNANOVDB_FORCE_INLINE float pnanovdb_root_fpn_read_float(pnanovdb_buf_t buf, pna return ret; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_index_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_onindex_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( + pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_end +) +{ + pnanovdb_uint32_t local_range_begin = 0u; + pnanovdb_uint32_t local_range_end = 0u; + pnanovdb_uint64_t offset = pnanovdb_uint32_as_uint64_low(0u); + if (level == 0) + { + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + // recover leaf address + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_POINTINDEX, leaf_off_table) + 2u * n) }; + if (n > 0u) + { + local_range_begin = pnanovdb_read_uint16(buf, pnanovdb_address_offset_neg(value_address, 2u)); + } + local_range_end = pnanovdb_read_uint16(buf, value_address); + offset = pnanovdb_leaf_pointindex_get_offset(buf, leaf); + } + PNANOVDB_DEREF(range_begin) = pnanovdb_uint64_offset(offset, local_range_begin); + PNANOVDB_DEREF(range_end) = pnanovdb_uint64_offset(offset, local_range_end); + return pnanovdb_uint32_as_uint64_low(local_range_end - local_range_begin); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_address_range( + pnanovdb_buf_t buf, + pnanovdb_grid_type_t value_type, + pnanovdb_address_t value_address, + pnanovdb_address_t blindmetadata_value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_address_t)address_begin, + PNANOVDB_INOUT(pnanovdb_address_t)address_end +) +{ + pnanovdb_uint64_t range_begin; + pnanovdb_uint64_t range_end; + pnanovdb_uint64_t range_size = pnanovdb_root_pointindex_get_point_range(buf, value_address, ijk, level, PNANOVDB_REF(range_begin), PNANOVDB_REF(range_end)); + + pnanovdb_uint32_t stride = 12u; // vec3f + if (value_type == PNANOVDB_GRID_TYPE_VEC3U8) + { + stride = 3u; + } + else if (value_type == PNANOVDB_GRID_TYPE_VEC3U16) + { + stride = 6u; + } + PNANOVDB_DEREF(address_begin) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_begin, stride); + PNANOVDB_DEREF(address_end) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_end, stride); + return range_size; +} + // ------------------------------------------------ ReadAccessor ----------------------------------------------------------- struct pnanovdb_readaccessor_t @@ -1760,6 +2591,12 @@ PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_ca return pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, lower, ijk, acc, PNANOVDB_REF(level)); } +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, table_address, child); +} + PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) { pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); diff --git a/warp/native/nanovdb/PNanoVDBWrite.h b/warp/native/nanovdb/PNanoVDBWrite.h deleted file mode 100644 index 916c48d8..00000000 --- a/warp/native/nanovdb/PNanoVDBWrite.h +++ /dev/null @@ -1,295 +0,0 @@ - -// Copyright Contributors to the OpenVDB Project -// SPDX-License-Identifier: MPL-2.0 - -/*! - \file PNanoVDBWrite.h - - \author Andrew Reidmeyer - - \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port - of NanoVDBWrite.h, which is compatible with most graphics APIs. -*/ - -#ifndef NANOVDB_PNANOVDB_WRITE_H_HAS_BEEN_INCLUDED -#define NANOVDB_PNANOVDB_WRITE_H_HAS_BEEN_INCLUDED - -#if defined(PNANOVDB_BUF_C) -#if defined(PNANOVDB_ADDRESS_32) -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) -{ - uint32_t wordaddress = (byte_offset >> 2u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - if (wordaddress < buf.size_in_words) - { - buf.data[wordaddress] = value; - } -#else - buf.data[wordaddress] = value; -#endif -} -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) -{ - uint64_t* data64 = (uint64_t*)buf.data; - uint32_t wordaddress64 = (byte_offset >> 3u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words64 = buf.size_in_words >> 1u; - if (wordaddress64 < size_in_words64) - { - data64[wordaddress64] = value; - } -#else - data64[wordaddress64] = value; -#endif -} -#elif defined(PNANOVDB_ADDRESS_64) -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) -{ - uint64_t wordaddress = (byte_offset >> 2u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - if (wordaddress < buf.size_in_words) - { - buf.data[wordaddress] = value; - } -#else - buf.data[wordaddress] = value; -#endif -} -PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) -{ - uint64_t* data64 = (uint64_t*)buf.data; - uint64_t wordaddress64 = (byte_offset >> 3u); -#ifdef PNANOVDB_BUF_BOUNDS_CHECK - uint64_t size_in_words64 = buf.size_in_words >> 1u; - if (wordaddress64 < size_in_words64) - { - data64[wordaddress64] = value; - } -#else - data64[wordaddress64] = value; -#endif -} -#endif -#endif - -#if defined(PNANOVDB_C) -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } -#elif defined(PNANOVDB_HLSL) -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } -#elif defined(PNANOVDB_GLSL) -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } -PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } -#endif - -PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) -{ - pnanovdb_buf_write_uint32(buf, address.byte_offset, value); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) -{ - pnanovdb_buf_write_uint64(buf, address.byte_offset, value); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) -{ - pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) -{ - pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) -{ - pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) -{ - pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) -{ - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); -} -PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) -{ - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_float_as_uint32(PNANOVDB_DEREF(value).x)); - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_float_as_uint32(PNANOVDB_DEREF(value).y)); - pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_float_as_uint32(PNANOVDB_DEREF(value).z)); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); -} -PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { - pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); -} -PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); -} -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { - pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); - pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); - if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } - if (value) valueMask |= (1u << (bit_index & 31u)); - pnanovdb_write_uint32(buf, addr, valueMask); -} -PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) -{ - pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); - pnanovdb_write_int64(buf, bufAddress, child); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); -} -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { - pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); - pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); - if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } - if (value) valueMask |= (1u << (bit_index & 31u)); - pnanovdb_write_uint32(buf, addr, valueMask); -} -PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) -{ - pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); - pnanovdb_write_int64(buf, table_address, child); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { - pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); -} -PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { - pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); -} -PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); -} - -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_get_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_get_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { - pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { - pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); -} -PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { - pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); -} - -PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch) -{ - return (major << 21u) | (minor << 10u) | (patch); -} - -#endif \ No newline at end of file diff --git a/warp/native/nanovdb/cuda/DeviceBuffer.h b/warp/native/nanovdb/cuda/DeviceBuffer.h new file mode 100644 index 00000000..52c151ca --- /dev/null +++ b/warp/native/nanovdb/cuda/DeviceBuffer.h @@ -0,0 +1,231 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file DeviceBuffer.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Implements a simple dual (host/device) CUDA buffer. + + \note This file has no device-only kernel functions, + which explains why it's a .h and not .cuh file. +*/ + +#ifndef NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED + +#include // for BufferTraits +#include // for cudaMalloc/cudaMallocManaged/cudaFree + +namespace nanovdb {// ================================================================ + +namespace cuda {// =================================================================== + +// ----------------------------> DeviceBuffer <-------------------------------------- + +/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC. +/// Obviously this class is making explicit used of CUDA so replace it with your own memory +/// allocator if you are not using CUDA. +/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device +/// it is significantly slower then cached (un-pinned) memory on the host. +class DeviceBuffer +{ + uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) + void *mCpuData, *mGpuData; // raw pointers to the host and device buffers + bool mManaged; + +public: + /// @brief Static factory method that return an instance of this buffer + /// @param size byte size of buffer to be initialized + /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, const DeviceBuffer* dummy = nullptr, bool host = true, void* stream = nullptr); + + /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory + /// @param size byte size of buffer specified by external memory + /// @param cpuData pointer to externally managed host memory + /// @param gpuData pointer to externally managed device memory + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, void* cpuData, void* gpuData); + + /// @brief Constructor + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + DeviceBuffer(uint64_t size = 0, bool host = true, void* stream = nullptr) + : mSize(0) + , mCpuData(nullptr) + , mGpuData(nullptr) + , mManaged(false) + { + if (size > 0) this->init(size, host, stream); + } + + DeviceBuffer(uint64_t size, void* cpuData, void* gpuData) + : mSize(size) + , mCpuData(cpuData) + , mGpuData(gpuData) + , mManaged(false) + { + } + + /// @brief Disallow copy-construction + DeviceBuffer(const DeviceBuffer&) = delete; + + /// @brief Move copy-constructor + DeviceBuffer(DeviceBuffer&& other) noexcept + : mSize(other.mSize) + , mCpuData(other.mCpuData) + , mGpuData(other.mGpuData) + , mManaged(other.mManaged) + { + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + } + + /// @brief Disallow copy assignment operation + DeviceBuffer& operator=(const DeviceBuffer&) = delete; + + /// @brief Move copy assignment operation + DeviceBuffer& operator=(DeviceBuffer&& other) noexcept + { + this->clear(); + mSize = other.mSize; + mCpuData = other.mCpuData; + mGpuData = other.mGpuData; + mManaged = other.mManaged; + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + return *this; + } + + /// @brief Destructor frees memory on both the host and device + ~DeviceBuffer() { this->clear(); }; + + /// @brief Initialize buffer + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @note All existing buffers are first cleared + /// @warning size is expected to be non-zero. Use clear() clear buffer! + void init(uint64_t size, bool host = true, void* stream = nullptr); + + /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void* data() const { return mCpuData; } + + /// @brief Retuns a raw pointer to the device/GPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void* deviceData() const { return mGpuData; } + + /// @brief Upload this buffer from the host to the device, i.e. CPU -> GPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the device/GPU buffer does not exist it is first allocated + /// @warning Assumes that the host/CPU buffer already exists + void deviceUpload(void* stream = nullptr, bool sync = true) const; + + /// @brief Upload this buffer from the device to the host, i.e. GPU -> CPU. + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + /// @param sync if false the memory copy is asynchronous + /// @note If the host/CPU buffer does not exist it is first allocated + /// @warning Assumes that the device/GPU buffer already exists + void deviceDownload(void* stream = nullptr, bool sync = true) const; + + /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. + uint64_t size() const { return mSize; } + + //@{ + /// @brief Returns true if this allocator is empty, i.e. has no allocated memory + bool empty() const { return mSize == 0; } + bool isEmpty() const { return mSize == 0; } + //@} + + /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL + void clear(void* stream = nullptr); + +}; // DeviceBuffer class + +// --------------------------> Implementations below <------------------------------------ + +inline DeviceBuffer DeviceBuffer::create(uint64_t size, const DeviceBuffer*, bool host, void* stream) +{ + return DeviceBuffer(size, host, stream); +} + +inline DeviceBuffer DeviceBuffer::create(uint64_t size, void* cpuData, void* gpuData) +{ + return DeviceBuffer(size, cpuData, gpuData); +} + +inline void DeviceBuffer::init(uint64_t size, bool host, void* stream) +{ + if (mSize>0) this->clear(stream); + NANOVDB_ASSERT(size > 0); + if (host) { + cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + checkPtr(mCpuData, "cuda::DeviceBuffer::init: failed to allocate host buffer"); + } else { + cudaCheck(util::cuda::mallocAsync((void**)&mGpuData, size, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! + checkPtr(mGpuData, "cuda::DeviceBuffer::init: failed to allocate device buffer"); + } + mSize = size; + mManaged = true; +} // DeviceBuffer::init + +inline void DeviceBuffer::deviceUpload(void* stream, bool sync) const +{ + if (!mManaged) throw std::runtime_error("DeviceBuffer::deviceUpload called on externally managed memory. Replace deviceUpload call with the appropriate external copy operation."); + + checkPtr(mCpuData, "uninitialized cpu data"); + if (mGpuData == nullptr) { + cudaCheck(util::cuda::mallocAsync((void**)&mGpuData, mSize, reinterpret_cast(stream))); // un-managed memory on the device, always 32B aligned! + } + checkPtr(mGpuData, "uninitialized gpu data"); + cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // DeviceBuffer::gpuUpload + +inline void DeviceBuffer::deviceDownload(void* stream, bool sync) const +{ + if (!mManaged) throw std::runtime_error("DeviceBuffer::deviceDownload called on externally managed memory. Replace deviceDownload call with the appropriate external copy operation."); + + checkPtr(mGpuData, "uninitialized gpu data"); + if (mCpuData == nullptr) { + cudaCheck(cudaMallocHost((void**)&mCpuData, mSize)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + } + checkPtr(mCpuData, "uninitialized cpu data"); + cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast(stream))); + if (sync) cudaCheck(cudaStreamSynchronize(reinterpret_cast(stream))); +} // DeviceBuffer::gpuDownload + +inline void DeviceBuffer::clear(void *stream) +{ + if (mManaged && mGpuData) cudaCheck(util::cuda::freeAsync(mGpuData, reinterpret_cast(stream))); + if (mManaged && mCpuData) cudaCheck(cudaFreeHost(mCpuData)); + mCpuData = mGpuData = nullptr; + mSize = 0; + mManaged = false; +} // DeviceBuffer::clear + +}// namespace cuda + +using CudaDeviceBuffer [[deprecated("Use nanovdb::cuda::DeviceBudder instead")]] = cuda::DeviceBuffer; + +template<> +struct BufferTraits +{ + static constexpr bool hasDeviceDual = true; +}; + +}// namespace nanovdb + +#endif // end of NANOVDB_CUDA_DEVICEBUFFER_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/cuda/GridHandle.cuh b/warp/native/nanovdb/cuda/GridHandle.cuh new file mode 100644 index 00000000..602087be --- /dev/null +++ b/warp/native/nanovdb/cuda/GridHandle.cuh @@ -0,0 +1,76 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/cuda/GridHandle.cuh + + \author Ken Museth, Doyub Kim + + \date August 3, 2023 + + \brief Contains cuda kernels for GridHandle + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED +#define NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED + +#include // required for instantiation of move c-tor of GridHandle +#include + +namespace nanovdb { + +namespace cuda { + +namespace {// anonymous namespace +__global__ void cpyGridHandleMeta(const GridData *d_data, GridHandleMetaData *d_meta) +{ + nanovdb::cpyGridHandleMeta(d_data, d_meta); +} + +__global__ void updateGridCount(GridData *d_data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty) +{ + NANOVDB_ASSERT(gridIndex < gridCount); + if (*d_dirty = d_data->mGridIndex != gridIndex || d_data->mGridCount != gridCount) { + d_data->mGridIndex = gridIndex; + d_data->mGridCount = gridCount; + if (d_data->mChecksum.isEmpty()) *d_dirty = false;// no need to update checksum if it didn't already exist + } +} +}// anonymous namespace + +}// namespace cuda + +template +template::hasDeviceDual, int>::type> +GridHandle::GridHandle(T&& buffer) +{ + static_assert(util::is_same::value, "Expected U==BufferT"); + mBuffer = std::move(buffer); + if (auto *data = reinterpret_cast(mBuffer.data())) { + if (!data->isValid()) throw std::runtime_error("GridHandle was constructed with an invalid host buffer"); + mMetaData.resize(data->mGridCount); + cpyGridHandleMeta(data, mMetaData.data()); + } else { + if (auto *d_data = reinterpret_cast(mBuffer.deviceData())) { + GridData tmp; + cudaCheck(cudaMemcpy(&tmp, d_data, sizeof(GridData), cudaMemcpyDeviceToHost)); + if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer"); + GridHandleMetaData *d_metaData; + cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData)); + cuda::cpyGridHandleMeta<<<1,1>>>(d_data, d_metaData); + mMetaData.resize(tmp.mGridCount); + cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost)); + cudaCheck(cudaFree(d_metaData)); + } + } +}// GridHandle(T&& buffer) + +// Dummy function that ensures instantiation of the move-constructor above when BufferT=cuda::DeviceBuffer +namespace {auto __dummy(){return GridHandle(std::move(cuda::DeviceBuffer()));}} + +} // namespace nanovdb + +#endif // NANOVDB_CUDA_GRIDHANDLE_CUH_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/math/Math.h b/warp/native/nanovdb/math/Math.h new file mode 100644 index 00000000..d9523074 --- /dev/null +++ b/warp/native/nanovdb/math/Math.h @@ -0,0 +1,1448 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file Math.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Math functions and classes + +*/ + +#ifndef NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED +#define NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED + +#include // for __hostdev__ and lots of other utility functions + +namespace nanovdb {// ================================================================= + +namespace math {// ============================================================= + +// ----------------------------> Various math functions <------------------------------------- + +//@{ +/// @brief Pi constant taken from Boost to match old behaviour +template +inline __hostdev__ constexpr T pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr float pi() +{ + return 3.141592653589793238462643383279502884e+00F; +} +template<> +inline __hostdev__ constexpr double pi() +{ + return 3.141592653589793238462643383279502884e+00; +} +template<> +inline __hostdev__ constexpr long double pi() +{ + return 3.141592653589793238462643383279502884e+00L; +} +//@} + +//@{ +/// Tolerance for floating-point comparison +template +struct Tolerance; +template<> +struct Tolerance +{ + __hostdev__ static float value() { return 1e-8f; } +}; +template<> +struct Tolerance +{ + __hostdev__ static double value() { return 1e-15; } +}; +//@} + +//@{ +/// Delta for small floating-point offsets +template +struct Delta; +template<> +struct Delta +{ + __hostdev__ static float value() { return 1e-5f; } +}; +template<> +struct Delta +{ + __hostdev__ static double value() { return 1e-9; } +}; +//@} + +//@{ +/// Maximum floating-point values +template +struct Maximum; +#if defined(__CUDA_ARCH__) || defined(__HIP__) +template<> +struct Maximum +{ + __hostdev__ static int value() { return 2147483647; } +}; +template<> +struct Maximum +{ + __hostdev__ static uint32_t value() { return 4294967295u; } +}; +template<> +struct Maximum +{ + __hostdev__ static float value() { return 1e+38f; } +}; +template<> +struct Maximum +{ + __hostdev__ static double value() { return 1e+308; } +}; +#else +template +struct Maximum +{ + static T value() { return std::numeric_limits::max(); } +}; +#endif +//@} + +template +__hostdev__ inline bool isApproxZero(const Type& x) +{ + return !(x > Tolerance::value()) && !(x < -Tolerance::value()); +} + +template +__hostdev__ inline Type Min(Type a, Type b) +{ + return (a < b) ? a : b; +} +__hostdev__ inline int32_t Min(int32_t a, int32_t b) +{ + return int32_t(fminf(float(a), float(b))); +} +__hostdev__ inline uint32_t Min(uint32_t a, uint32_t b) +{ + return uint32_t(fminf(float(a), float(b))); +} +__hostdev__ inline float Min(float a, float b) +{ + return fminf(a, b); +} +__hostdev__ inline double Min(double a, double b) +{ + return fmin(a, b); +} +template +__hostdev__ inline Type Max(Type a, Type b) +{ + return (a > b) ? a : b; +} + +__hostdev__ inline int32_t Max(int32_t a, int32_t b) +{ + return int32_t(fmaxf(float(a), float(b))); +} +__hostdev__ inline uint32_t Max(uint32_t a, uint32_t b) +{ + return uint32_t(fmaxf(float(a), float(b))); +} +__hostdev__ inline float Max(float a, float b) +{ + return fmaxf(a, b); +} +__hostdev__ inline double Max(double a, double b) +{ + return fmax(a, b); +} +__hostdev__ inline float Clamp(float x, float a, float b) +{ + return Max(Min(x, b), a); +} +__hostdev__ inline double Clamp(double x, double a, double b) +{ + return Max(Min(x, b), a); +} + +__hostdev__ inline float Fract(float x) +{ + return x - floorf(x); +} +__hostdev__ inline double Fract(double x) +{ + return x - floor(x); +} + +__hostdev__ inline int32_t Floor(float x) +{ + return int32_t(floorf(x)); +} +__hostdev__ inline int32_t Floor(double x) +{ + return int32_t(floor(x)); +} + +__hostdev__ inline int32_t Ceil(float x) +{ + return int32_t(ceilf(x)); +} +__hostdev__ inline int32_t Ceil(double x) +{ + return int32_t(ceil(x)); +} + +template +__hostdev__ inline T Pow2(T x) +{ + return x * x; +} + +template +__hostdev__ inline T Pow3(T x) +{ + return x * x * x; +} + +template +__hostdev__ inline T Pow4(T x) +{ + return Pow2(x * x); +} +template +__hostdev__ inline T Abs(T x) +{ + return x < 0 ? -x : x; +} + +template<> +__hostdev__ inline float Abs(float x) +{ + return fabsf(x); +} + +template<> +__hostdev__ inline double Abs(double x) +{ + return fabs(x); +} + +template<> +__hostdev__ inline int Abs(int x) +{ + return abs(x); +} + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz); + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz) +{ + return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2]))); + //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) ); + //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f))); +} + +template class Vec3T> +__hostdev__ inline CoordT Round(const Vec3T& xyz) +{ + return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5))); +} + +template class Vec3T> +__hostdev__ inline CoordT RoundDown(const Vec3T& xyz) +{ + return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2])); +} + +//@{ +/// Return the square root of a floating-point value. +__hostdev__ inline float Sqrt(float x) +{ + return sqrtf(x); +} +__hostdev__ inline double Sqrt(double x) +{ + return sqrt(x); +} +//@} + +/// Return the sign of the given value as an integer (either -1, 0 or 1). +template +__hostdev__ inline T Sign(const T& x) +{ + return ((T(0) < x) ? T(1) : T(0)) - ((x < T(0)) ? T(1) : T(0)); +} + +template +__hostdev__ inline int MinIndex(const Vec3T& v) +{ +#if 0 + static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values + const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1 + return hashTable[hashKey]; +#else + if (v[0] < v[1] && v[0] < v[2]) + return 0; + if (v[1] < v[2]) + return 1; + else + return 2; +#endif +} + +template +__hostdev__ inline int MaxIndex(const Vec3T& v) +{ +#if 0 + static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values + const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1 + return hashTable[hashKey]; +#else + if (v[0] > v[1] && v[0] > v[2]) + return 0; + if (v[1] > v[2]) + return 1; + else + return 2; +#endif +} + +/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp +__hostdev__ inline uint64_t AlignUp(uint64_t byteCount) +{ + const uint64_t r = byteCount % wordSize; + return r ? byteCount - r + wordSize : byteCount; +} + +// ------------------------------> Coord <-------------------------------------- + +// forward declaration so we can define Coord::asVec3s and Coord::asVec3d +template +class Vec3; + +/// @brief Signed (i, j, k) 32-bit integer coordinate class, similar to openvdb::math::Coord +class Coord +{ + int32_t mVec[3]; // private member data - three signed index coordinates +public: + using ValueType = int32_t; + using IndexType = uint32_t; + + /// @brief Initialize all coordinates to zero. + __hostdev__ Coord() + : mVec{0, 0, 0} + { + } + + /// @brief Initializes all coordinates to the given signed integer. + __hostdev__ explicit Coord(ValueType n) + : mVec{n, n, n} + { + } + + /// @brief Initializes coordinate to the given signed integers. + __hostdev__ Coord(ValueType i, ValueType j, ValueType k) + : mVec{i, j, k} + { + } + + __hostdev__ Coord(ValueType* ptr) + : mVec{ptr[0], ptr[1], ptr[2]} + { + } + + __hostdev__ int32_t x() const { return mVec[0]; } + __hostdev__ int32_t y() const { return mVec[1]; } + __hostdev__ int32_t z() const { return mVec[2]; } + + __hostdev__ int32_t& x() { return mVec[0]; } + __hostdev__ int32_t& y() { return mVec[1]; } + __hostdev__ int32_t& z() { return mVec[2]; } + + __hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); } + + __hostdev__ static Coord min() { return Coord(-int32_t((1u << 31) - 1) - 1); } + + __hostdev__ static size_t memUsage() { return sizeof(Coord); } + + /// @brief Return a const reference to the given Coord component. + /// @warning The argument is assumed to be 0, 1, or 2. + __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; } + + /// @brief Return a non-const reference to the given Coord component. + /// @warning The argument is assumed to be 0, 1, or 2. + __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; } + + /// @brief Assignment operator that works with openvdb::Coord + template + __hostdev__ Coord& operator=(const CoordT& other) + { + static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof"); + mVec[0] = other[0]; + mVec[1] = other[1]; + mVec[2] = other[2]; + return *this; + } + + /// @brief Return a new instance with coordinates masked by the given unsigned integer. + __hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); } + + // @brief Return a new instance with coordinates left-shifted by the given unsigned integer. + __hostdev__ Coord operator<<(IndexType n) const { return Coord(mVec[0] << n, mVec[1] << n, mVec[2] << n); } + + // @brief Return a new instance with coordinates right-shifted by the given unsigned integer. + __hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); } + + /// @brief Return true if this Coord is lexicographically less than the given Coord. + __hostdev__ bool operator<(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] < rhs[2] ? true : false; + } + + /// @brief Return true if this Coord is lexicographically less or equal to the given Coord. + __hostdev__ bool operator<=(const Coord& rhs) const + { + return mVec[0] < rhs[0] ? true + : mVec[0] > rhs[0] ? false + : mVec[1] < rhs[1] ? true + : mVec[1] > rhs[1] ? false + : mVec[2] <=rhs[2] ? true : false; + } + + // @brief Return true if this Coord is lexicographically greater than the given Coord. + __hostdev__ bool operator>(const Coord& rhs) const + { + return mVec[0] > rhs[0] ? true + : mVec[0] < rhs[0] ? false + : mVec[1] > rhs[1] ? true + : mVec[1] < rhs[1] ? false + : mVec[2] > rhs[2] ? true : false; + } + + // @brief Return true if this Coord is lexicographically greater or equal to the given Coord. + __hostdev__ bool operator>=(const Coord& rhs) const + { + return mVec[0] > rhs[0] ? true + : mVec[0] < rhs[0] ? false + : mVec[1] > rhs[1] ? true + : mVec[1] < rhs[1] ? false + : mVec[2] >=rhs[2] ? true : false; + } + + // @brief Return true if the Coord components are identical. + __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + __hostdev__ Coord& operator&=(int n) + { + mVec[0] &= n; + mVec[1] &= n; + mVec[2] &= n; + return *this; + } + __hostdev__ Coord& operator<<=(uint32_t n) + { + mVec[0] <<= n; + mVec[1] <<= n; + mVec[2] <<= n; + return *this; + } + __hostdev__ Coord& operator>>=(uint32_t n) + { + mVec[0] >>= n; + mVec[1] >>= n; + mVec[2] >>= n; + return *this; + } + __hostdev__ Coord& operator+=(int n) + { + mVec[0] += n; + mVec[1] += n; + mVec[2] += n; + return *this; + } + __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); } + __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); } + __hostdev__ Coord operator-() const { return Coord(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Coord& operator+=(const Coord& rhs) + { + mVec[0] += rhs[0]; + mVec[1] += rhs[1]; + mVec[2] += rhs[2]; + return *this; + } + __hostdev__ Coord& operator-=(const Coord& rhs) + { + mVec[0] -= rhs[0]; + mVec[1] -= rhs[1]; + mVec[2] -= rhs[2]; + return *this; + } + + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Coord& minComponent(const Coord& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Coord& maxComponent(const Coord& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + return *this; + } +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline Coord& minComponentAtomic(const Coord& other) + { + atomicMin(&mVec[0], other[0]); + atomicMin(&mVec[1], other[1]); + atomicMin(&mVec[2], other[2]); + return *this; + } + __device__ inline Coord& maxComponentAtomic(const Coord& other) + { + atomicMax(&mVec[0], other[0]); + atomicMax(&mVec[1], other[1]); + atomicMax(&mVec[2], other[2]); + return *this; + } +#endif + + __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const + { + return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz); + } + + __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); } + + /// Return true if any of the components of @a a are smaller than the + /// corresponding components of @a b. + __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b) + { + return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]); + } + + /// @brief Return the largest integer coordinates that are not greater + /// than @a xyz (node centered conversion). + template + __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(math::Floor(xyz[0]), math::Floor(xyz[1]), math::Floor(xyz[2])); } + + /// @brief Return a hash key derived from the existing coordinates. + /// @details The hash function is originally taken from the SIGGRAPH paper: + /// "VDB: High-resolution sparse volumes with dynamic topology" + /// and the prime numbers are modified based on the ACM Transactions on Graphics paper: + /// "Real-time 3D reconstruction at scale using voxel hashing" (the second number had a typo!) + template + __hostdev__ uint32_t hash() const { return ((1 << Log2N) - 1) & (mVec[0] * 73856093 ^ mVec[1] * 19349669 ^ mVec[2] * 83492791); } + + /// @brief Return the octant of this Coord + //__hostdev__ size_t octant() const { return (uint32_t(mVec[0])>>31) | ((uint32_t(mVec[1])>>31)<<1) | ((uint32_t(mVec[2])>>31)<<2); } + __hostdev__ uint8_t octant() const { return (uint8_t(bool(mVec[0] & (1u << 31)))) | + (uint8_t(bool(mVec[1] & (1u << 31))) << 1) | + (uint8_t(bool(mVec[2] & (1u << 31))) << 2); } + + /// @brief Return a single precision floating-point vector of this coordinate + __hostdev__ inline Vec3 asVec3s() const; + + /// @brief Return a double precision floating-point vector of this coordinate + __hostdev__ inline Vec3 asVec3d() const; + + // returns a copy of itself, so it mimics the behaviour of Vec3::round() + __hostdev__ inline Coord round() const { return *this; } +}; // Coord class + +// ----------------------------> Vec3 <-------------------------------------- + +/// @brief A simple vector class with three components, similar to openvdb::math::Vec3 +template +class Vec3 +{ + T mVec[3]; + +public: + static const int SIZE = 3; + static const int size = 3; // in openvdb::math::Tuple + using ValueType = T; + Vec3() = default; + __hostdev__ explicit Vec3(T x) + : mVec{x, x, x} + { + } + __hostdev__ Vec3(T x, T y, T z) + : mVec{x, y, z} + { + } + template class Vec3T, class T2> + __hostdev__ Vec3(const Vec3T& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + } + template + __hostdev__ explicit Vec3(const Vec3& v) + : mVec{T(v[0]), T(v[1]), T(v[2])} + { + } + __hostdev__ explicit Vec3(const Coord& ijk) + : mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])} + { + } + __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; } + __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; } + template class Vec3T, class T2> + __hostdev__ Vec3& operator=(const Vec3T& rhs) + { + static_assert(Vec3T::size == size, "expected Vec3T::size==3!"); + mVec[0] = rhs[0]; + mVec[1] = rhs[1]; + mVec[2] = rhs[2]; + return *this; + } + __hostdev__ const T& operator[](int i) const { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } + template + __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; } + template + __hostdev__ Vec3 cross(const Vec3T& v) const + { + return Vec3(mVec[1] * v[2] - mVec[2] * v[1], + mVec[2] * v[0] - mVec[0] * v[2], + mVec[0] * v[1] - mVec[1] * v[0]); + } + __hostdev__ T lengthSqr() const + { + return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2]; // 5 flops + } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); } + __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); } + __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); } + __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); } + __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); } + __hostdev__ Vec3 operator+(const Coord& ijk) const { return Vec3(mVec[0] + ijk[0], mVec[1] + ijk[1], mVec[2] + ijk[2]); } + __hostdev__ Vec3 operator-(const Coord& ijk) const { return Vec3(mVec[0] - ijk[0], mVec[1] - ijk[1], mVec[2] - ijk[2]); } + __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); } + __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ Vec3& operator+=(const Vec3& v) + { + mVec[0] += v[0]; + mVec[1] += v[1]; + mVec[2] += v[2]; + return *this; + } + __hostdev__ Vec3& operator+=(const Coord& ijk) + { + mVec[0] += T(ijk[0]); + mVec[1] += T(ijk[1]); + mVec[2] += T(ijk[2]); + return *this; + } + __hostdev__ Vec3& operator-=(const Vec3& v) + { + mVec[0] -= v[0]; + mVec[1] -= v[1]; + mVec[2] -= v[2]; + return *this; + } + __hostdev__ Vec3& operator-=(const Coord& ijk) + { + mVec[0] -= T(ijk[0]); + mVec[1] -= T(ijk[1]); + mVec[2] -= T(ijk[2]); + return *this; + } + __hostdev__ Vec3& operator*=(const T& s) + { + mVec[0] *= s; + mVec[1] *= s; + mVec[2] *= s; + return *this; + } + __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; } + __hostdev__ Vec3& normalize() { return (*this) /= this->length(); } + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Vec3& minComponent(const Vec3& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Vec3& maxComponent(const Vec3& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + return *this; + } + /// @brief Return the smallest vector component + __hostdev__ ValueType min() const + { + return mVec[0] < mVec[1] ? (mVec[0] < mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] < mVec[2] ? mVec[1] : mVec[2]); + } + /// @brief Return the largest vector component + __hostdev__ ValueType max() const + { + return mVec[0] > mVec[1] ? (mVec[0] > mVec[2] ? mVec[0] : mVec[2]) : (mVec[1] > mVec[2] ? mVec[1] : mVec[2]); + } + /// @brief Round each component if this Vec up to its integer value + /// @return Return an integer Coord + __hostdev__ Coord floor() const { return Coord(Floor(mVec[0]), Floor(mVec[1]), Floor(mVec[2])); } + /// @brief Round each component if this Vec down to its integer value + /// @return Return an integer Coord + __hostdev__ Coord ceil() const { return Coord(Ceil(mVec[0]), Ceil(mVec[1]), Ceil(mVec[2])); } + /// @brief Round each component if this Vec to its closest integer value + /// @return Return an integer Coord + __hostdev__ Coord round() const + { + if (util::is_same::value) { + return Coord(Floor(mVec[0] + 0.5f), Floor(mVec[1] + 0.5f), Floor(mVec[2] + 0.5f)); + } else if (util::is_same::value) { + return Coord(mVec[0], mVec[1], mVec[2]); + } else { + return Coord(Floor(mVec[0] + 0.5), Floor(mVec[1] + 0.5), Floor(mVec[2] + 0.5)); + } + } + + /// @brief return a non-const raw constant pointer to array of three vector components + __hostdev__ T* asPointer() { return mVec; } + /// @brief return a const raw constant pointer to array of three vector components + __hostdev__ const T* asPointer() const { return mVec; } +}; // Vec3 + +template +__hostdev__ inline Vec3 operator*(T1 scalar, const Vec3& vec) +{ + return Vec3(scalar * vec[0], scalar * vec[1], scalar * vec[2]); +} +template +__hostdev__ inline Vec3 operator/(T1 scalar, const Vec3& vec) +{ + return Vec3(scalar / vec[0], scalar / vec[1], scalar / vec[2]); +} + +/// @brief Return a single precision floating-point vector of this coordinate +__hostdev__ inline Vec3 Coord::asVec3s() const +{ + return Vec3(float(mVec[0]), float(mVec[1]), float(mVec[2])); +} + +/// @brief Return a double precision floating-point vector of this coordinate +__hostdev__ inline Vec3 Coord::asVec3d() const +{ + return Vec3(double(mVec[0]), double(mVec[1]), double(mVec[2])); +} + +// ----------------------------> Vec4 <-------------------------------------- + +/// @brief A simple vector class with four components, similar to openvdb::math::Vec4 +template +class Vec4 +{ + T mVec[4]; + +public: + static const int SIZE = 4; + static const int size = 4; + using ValueType = T; + Vec4() = default; + __hostdev__ explicit Vec4(T x) + : mVec{x, x, x, x} + { + } + __hostdev__ Vec4(T x, T y, T z, T w) + : mVec{x, y, z, w} + { + } + template + __hostdev__ explicit Vec4(const Vec4& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + } + template class Vec4T, class T2> + __hostdev__ Vec4(const Vec4T& v) + : mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])} + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + } + __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; } + __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; } + template class Vec4T, class T2> + __hostdev__ Vec4& operator=(const Vec4T& rhs) + { + static_assert(Vec4T::size == size, "expected Vec4T::size==4!"); + mVec[0] = rhs[0]; + mVec[1] = rhs[1]; + mVec[2] = rhs[2]; + mVec[3] = rhs[3]; + return *this; + } + + __hostdev__ const T& operator[](int i) const { return mVec[i]; } + __hostdev__ T& operator[](int i) { return mVec[i]; } + template + __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; } + __hostdev__ T lengthSqr() const + { + return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops + } + __hostdev__ T length() const { return Sqrt(this->lengthSqr()); } + __hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); } + __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); } + __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); } + __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); } + __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); } + __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); } + __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); } + __hostdev__ Vec4& operator+=(const Vec4& v) + { + mVec[0] += v[0]; + mVec[1] += v[1]; + mVec[2] += v[2]; + mVec[3] += v[3]; + return *this; + } + __hostdev__ Vec4& operator-=(const Vec4& v) + { + mVec[0] -= v[0]; + mVec[1] -= v[1]; + mVec[2] -= v[2]; + mVec[3] -= v[3]; + return *this; + } + __hostdev__ Vec4& operator*=(const T& s) + { + mVec[0] *= s; + mVec[1] *= s; + mVec[2] *= s; + mVec[3] *= s; + return *this; + } + __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; } + __hostdev__ Vec4& normalize() { return (*this) /= this->length(); } + /// @brief Perform a component-wise minimum with the other Coord. + __hostdev__ Vec4& minComponent(const Vec4& other) + { + if (other[0] < mVec[0]) + mVec[0] = other[0]; + if (other[1] < mVec[1]) + mVec[1] = other[1]; + if (other[2] < mVec[2]) + mVec[2] = other[2]; + if (other[3] < mVec[3]) + mVec[3] = other[3]; + return *this; + } + + /// @brief Perform a component-wise maximum with the other Coord. + __hostdev__ Vec4& maxComponent(const Vec4& other) + { + if (other[0] > mVec[0]) + mVec[0] = other[0]; + if (other[1] > mVec[1]) + mVec[1] = other[1]; + if (other[2] > mVec[2]) + mVec[2] = other[2]; + if (other[3] > mVec[3]) + mVec[3] = other[3]; + return *this; + } +}; // Vec4 + +template +__hostdev__ inline Vec4 operator*(T1 scalar, const Vec4& vec) +{ + return Vec4(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]); +} +template +__hostdev__ inline Vec4 operator/(T1 scalar, const Vec4& vec) +{ + return Vec4(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]); +} + +// ----------------------------> matMult <-------------------------------------- + +/// @brief Multiply a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz +template +__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to a linear mapping, e.g. scaling, rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the matrix +/// @return result of matrix-vector multiplication, i.e. mat x xyz +template +__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], static_cast(xyz[2]) * mat[2])), + fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[5])), + fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec +template +__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[1], fmaf(static_cast(xyz[2]), mat[2], vec[0]))), + fmaf(static_cast(xyz[0]), mat[3], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[5], vec[1]))), + fmaf(static_cast(xyz[0]), mat[6], fmaf(static_cast(xyz[1]), mat[7], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops +} + +/// @brief Multiply a 3x3 matrix to a 3d vector and add another 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an affine transformation, i.e a linear mapping followed by a translation. e.g. scale/rotation and translation +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param vec 3d vector to be added AFTER the matrix multiplication +/// @param xyz input vector to be multiplied by the matrix and a translated by @c vec +/// @return result of affine transformation, i.e. (mat x xyz) + vec +template +__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[1], fma(static_cast(xyz[2]), mat[2], vec[0]))), + fma(static_cast(xyz[0]), mat[3], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[5], vec[1]))), + fma(static_cast(xyz[0]), mat[6], fma(static_cast(xyz[1]), mat[7], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops +} + +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 32bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz +template +__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +/// @brief Multiply the transposed of a 3x3 matrix and a 3d vector using 64bit floating point arithmetics +/// @note This corresponds to an inverse linear mapping, e.g. inverse scaling, inverse rotation etc. +/// @tparam Vec3T Template type of the input and output 3d vectors +/// @param mat pointer to an array of floats with the 3x3 matrix +/// @param xyz input vector to be multiplied by the transposed matrix +/// @return result of matrix-vector multiplication, i.e. mat^T x xyz +template +__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], static_cast(xyz[2]) * mat[6])), + fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], static_cast(xyz[2]) * mat[7])), + fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], static_cast(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops +} + +template +__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz) +{ + return Vec3T(fmaf(static_cast(xyz[0]), mat[0], fmaf(static_cast(xyz[1]), mat[3], fmaf(static_cast(xyz[2]), mat[6], vec[0]))), + fmaf(static_cast(xyz[0]), mat[1], fmaf(static_cast(xyz[1]), mat[4], fmaf(static_cast(xyz[2]), mat[7], vec[1]))), + fmaf(static_cast(xyz[0]), mat[2], fmaf(static_cast(xyz[1]), mat[5], fmaf(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fmaf = 9 flops +} + +template +__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz) +{ + return Vec3T(fma(static_cast(xyz[0]), mat[0], fma(static_cast(xyz[1]), mat[3], fma(static_cast(xyz[2]), mat[6], vec[0]))), + fma(static_cast(xyz[0]), mat[1], fma(static_cast(xyz[1]), mat[4], fma(static_cast(xyz[2]), mat[7], vec[1]))), + fma(static_cast(xyz[0]), mat[2], fma(static_cast(xyz[1]), mat[5], fma(static_cast(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops +} + +// ----------------------------> BBox <------------------------------------- + +// Base-class for static polymorphism (cannot be constructed directly) +template +struct BaseBBox +{ + Vec3T mCoord[2]; + __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; }; + __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; }; + __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; } + __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; } + __hostdev__ Vec3T& min() { return mCoord[0]; } + __hostdev__ Vec3T& max() { return mCoord[1]; } + __hostdev__ const Vec3T& min() const { return mCoord[0]; } + __hostdev__ const Vec3T& max() const { return mCoord[1]; } + __hostdev__ BaseBBox& translate(const Vec3T& xyz) + { + mCoord[0] += xyz; + mCoord[1] += xyz; + return *this; + } + /// @brief Expand this bounding box to enclose point @c xyz. + __hostdev__ BaseBBox& expand(const Vec3T& xyz) + { + mCoord[0].minComponent(xyz); + mCoord[1].maxComponent(xyz); + return *this; + } + + /// @brief Expand this bounding box to enclose the given bounding box. + __hostdev__ BaseBBox& expand(const BaseBBox& bbox) + { + mCoord[0].minComponent(bbox[0]); + mCoord[1].maxComponent(bbox[1]); + return *this; + } + + /// @brief Intersect this bounding box with the given bounding box. + __hostdev__ BaseBBox& intersect(const BaseBBox& bbox) + { + mCoord[0].maxComponent(bbox[0]); + mCoord[1].minComponent(bbox[1]); + return *this; + } + + //__hostdev__ BaseBBox expandBy(typename Vec3T::ValueType padding) const + //{ + // return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding)); + //} + __hostdev__ bool isInside(const Vec3T& xyz) + { + if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2]) + return false; + if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2]) + return false; + return true; + } + +protected: + __hostdev__ BaseBBox() {} + __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max) + : mCoord{min, max} + { + } +}; // BaseBBox + +template::value> +struct BBox; + +/// @brief Partial template specialization for floating point coordinate types. +/// +/// @note Min is inclusive and max is exclusive. If min = max the dimension of +/// the bounding box is zero and therefore it is also empty. +template +struct BBox : public BaseBBox +{ + using Vec3Type = Vec3T; + using ValueType = typename Vec3T::ValueType; + static_assert(util::is_floating_point::value, "Expected a floating point coordinate type"); + using BaseT = BaseBBox; + using BaseT::mCoord; + /// @brief Default construction sets BBox to an empty bbox + __hostdev__ BBox() + : BaseT(Vec3T( Maximum::value()), + Vec3T(-Maximum::value())) + { + } + __hostdev__ BBox(const Vec3T& min, const Vec3T& max) + : BaseT(min, max) + { + } + __hostdev__ BBox(const Coord& min, const Coord& max) + : BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])), + Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1))) + { + } + __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim) + { + return BBox(min, min.offsetBy(dim)); + } + + __hostdev__ BBox(const BaseBBox& bbox) + : BBox(bbox[0], bbox[1]) + { + } + __hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] || + mCoord[0][1] >= mCoord[1][1] || + mCoord[0][2] >= mCoord[1][2]; } + __hostdev__ operator bool() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + __hostdev__ Vec3T dim() const { return *this ? this->max() - this->min() : Vec3T(0); } + __hostdev__ bool isInside(const Vec3T& p) const + { + return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] && + p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2]; + } + +}; // BBox + +/// @brief Partial template specialization for integer coordinate types +/// +/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So, +/// if min = max the bounding box contains exactly one point and dim = 1! +template +struct BBox : public BaseBBox +{ + static_assert(util::is_same::value, "Expected \"int\" coordinate type"); + using BaseT = BaseBBox; + using BaseT::mCoord; + /// @brief Iterator over the domain covered by a BBox + /// @details z is the fastest-moving coordinate. + class Iterator + { + const BBox& mBBox; + CoordT mPos; + + public: + __hostdev__ Iterator(const BBox& b) + : mBBox(b) + , mPos(b.min()) + { + } + __hostdev__ Iterator(const BBox& b, const Coord& p) + : mBBox(b) + , mPos(p) + { + } + __hostdev__ Iterator& operator++() + { + if (mPos[2] < mBBox[1][2]) { // this is the most common case + ++mPos[2];// increment z + } else if (mPos[1] < mBBox[1][1]) { + mPos[2] = mBBox[0][2];// reset z + ++mPos[1];// increment y + } else if (mPos[0] <= mBBox[1][0]) { + mPos[2] = mBBox[0][2];// reset z + mPos[1] = mBBox[0][1];// reset y + ++mPos[0];// increment x + } + return *this; + } + __hostdev__ Iterator operator++(int) + { + auto tmp = *this; + ++(*this); + return tmp; + } + __hostdev__ bool operator==(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos == rhs.mPos; + } + __hostdev__ bool operator!=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos != rhs.mPos; + } + __hostdev__ bool operator<(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos < rhs.mPos; + } + __hostdev__ bool operator<=(const Iterator& rhs) const + { + NANOVDB_ASSERT(mBBox == rhs.mBBox); + return mPos <= rhs.mPos; + } + /// @brief Return @c true if the iterator still points to a valid coordinate. + __hostdev__ operator bool() const { return mPos <= mBBox[1]; } + __hostdev__ const CoordT& operator*() const { return mPos; } + }; // Iterator + __hostdev__ Iterator begin() const { return Iterator{*this}; } + __hostdev__ Iterator end() const { return Iterator{*this, CoordT(mCoord[1][0]+1, mCoord[0][1], mCoord[0][2])}; } + __hostdev__ BBox() + : BaseT(CoordT::max(), CoordT::min()) + { + } + __hostdev__ BBox(const CoordT& min, const CoordT& max) + : BaseT(min, max) + { + } + + template + __hostdev__ BBox(BBox& other, const SplitT&) + : BaseT(other.mCoord[0], other.mCoord[1]) + { + NANOVDB_ASSERT(this->is_divisible()); + const int n = MaxIndex(this->dim()); + mCoord[1][n] = (mCoord[0][n] + mCoord[1][n]) >> 1; + other.mCoord[0][n] = mCoord[1][n] + 1; + } + + __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim) + { + return BBox(min, min.offsetBy(dim - 1)); + } + + __hostdev__ static BBox createCube(typename CoordT::ValueType min, typename CoordT::ValueType max) + { + return BBox(CoordT(min), CoordT(max)); + } + + __hostdev__ bool is_divisible() const { return mCoord[0][0] < mCoord[1][0] && + mCoord[0][1] < mCoord[1][1] && + mCoord[0][2] < mCoord[1][2]; } + /// @brief Return true if this bounding box is empty, e.g. uninitialized + __hostdev__ bool empty() const { return mCoord[0][0] > mCoord[1][0] || + mCoord[0][1] > mCoord[1][1] || + mCoord[0][2] > mCoord[1][2]; } + /// @brief Convert this BBox to boolean true if it is not empty + __hostdev__ operator bool() const { return mCoord[0][0] <= mCoord[1][0] && + mCoord[0][1] <= mCoord[1][1] && + mCoord[0][2] <= mCoord[1][2]; } + __hostdev__ CoordT dim() const { return *this ? this->max() - this->min() + Coord(1) : Coord(0); } + __hostdev__ uint64_t volume() const + { + auto d = this->dim(); + return uint64_t(d[0]) * uint64_t(d[1]) * uint64_t(d[2]); + } + __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); } + /// @brief Return @c true if the given bounding box is inside this bounding box. + __hostdev__ bool isInside(const BBox& b) const + { + return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max())); + } + + /// @brief Return @c true if the given bounding box overlaps with this bounding box. + __hostdev__ bool hasOverlap(const BBox& b) const + { + return !(CoordT::lessThan(this->max(), b.min()) || CoordT::lessThan(b.max(), this->min())); + } + + /// @warning This converts a CoordBBox into a floating-point bounding box which implies that max += 1 ! + template + __hostdev__ BBox> asReal() const + { + static_assert(util::is_floating_point::value, "CoordBBox::asReal: Expected a floating point coordinate"); + return BBox>(Vec3(RealT(mCoord[0][0]), RealT(mCoord[0][1]), RealT(mCoord[0][2])), + Vec3(RealT(mCoord[1][0] + 1), RealT(mCoord[1][1] + 1), RealT(mCoord[1][2] + 1))); + } + /// @brief Return a new instance that is expanded by the specified padding. + __hostdev__ BBox expandBy(typename CoordT::ValueType padding) const + { + return BBox(mCoord[0].offsetBy(-padding), mCoord[1].offsetBy(padding)); + } + + /// @brief @brief transform this coordinate bounding box by the specified map + /// @param map mapping of index to world coordinates + /// @return world bounding box + template + __hostdev__ auto transform(const Map& map) const + { + using Vec3T = Vec3; + const Vec3T tmp = map.applyMap(Vec3T(mCoord[0][0], mCoord[0][1], mCoord[0][2])); + BBox bbox(tmp, tmp);// return value + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[0][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[1][1], mCoord[0][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[0][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[0][0], mCoord[1][1], mCoord[1][2]))); + bbox.expand(map.applyMap(Vec3T(mCoord[1][0], mCoord[1][1], mCoord[1][2]))); + return bbox; + } + +#if defined(__CUDACC__) // the following functions only run on the GPU! + __device__ inline BBox& expandAtomic(const CoordT& ijk) + { + mCoord[0].minComponentAtomic(ijk); + mCoord[1].maxComponentAtomic(ijk); + return *this; + } + __device__ inline BBox& expandAtomic(const BBox& bbox) + { + mCoord[0].minComponentAtomic(bbox[0]); + mCoord[1].maxComponentAtomic(bbox[1]); + return *this; + } + __device__ inline BBox& intersectAtomic(const BBox& bbox) + { + mCoord[0].maxComponentAtomic(bbox[0]); + mCoord[1].minComponentAtomic(bbox[1]); + return *this; + } +#endif +}; // BBox + +// --------------------------> Rgba8 <------------------------------------ + +/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int +class Rgba8 +{ + union + { + uint8_t c[4]; // 4 integer color channels of red, green, blue and alpha components. + uint32_t packed; // 32 bit packed representation + } mData; + +public: + static const int SIZE = 4; + using ValueType = uint8_t; + + /// @brief Default copy constructor + Rgba8(const Rgba8&) = default; + + /// @brief Default move constructor + Rgba8(Rgba8&&) = default; + + /// @brief Default move assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(Rgba8&&) = default; + + /// @brief Default copy assignment operator + /// @return non-const reference to this instance + Rgba8& operator=(const Rgba8&) = default; + + /// @brief Default ctor initializes all channels to zero + __hostdev__ Rgba8() + : mData{{0, 0, 0, 0}} + { + static_assert(sizeof(uint32_t) == sizeof(Rgba8), "Unexpected sizeof"); + } + + /// @brief integer r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0u to 255u + __hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) + : mData{{r, g, b, a}} + { + } + + /// @brief @brief ctor where all channels are initialized to the same value + /// @note value should be in the range 0u to 255u + explicit __hostdev__ Rgba8(uint8_t v) + : mData{{v, v, v, v}} + { + } + + /// @brief floating-point r,g,b,a ctor where alpha channel defaults to opaque + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(float r, float g, float b, float a = 1.0f) + : mData{{static_cast(0.5f + r * 255.0f), // round floats to nearest integers + static_cast(0.5f + g * 255.0f), // double {{}} is needed due to union + static_cast(0.5f + b * 255.0f), + static_cast(0.5f + a * 255.0f)}} + { + } + + /// @brief Vec3f r,g,b ctor (alpha channel it set to 1) + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec3& rgb) + : Rgba8(rgb[0], rgb[1], rgb[2]) + { + } + + /// @brief Vec4f r,g,b,a ctor + /// @note all values should be in the range 0.0f to 1.0f + __hostdev__ Rgba8(const Vec4& rgba) + : Rgba8(rgba[0], rgba[1], rgba[2], rgba[3]) + { + } + + __hostdev__ bool operator< (const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; } + __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; } + __hostdev__ float lengthSqr() const + { + return 0.0000153787005f * (float(mData.c[0]) * mData.c[0] + + float(mData.c[1]) * mData.c[1] + + float(mData.c[2]) * mData.c[2]); //1/255^2 + } + __hostdev__ float length() const { return sqrtf(this->lengthSqr()); } + /// @brief return n'th color channel as a float in the range 0 to 1 + __hostdev__ float asFloat(int n) const { return 0.003921569f*float(mData.c[n]); }// divide by 255 + __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; } + __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; } + __hostdev__ const uint32_t& packed() const { return mData.packed; } + __hostdev__ uint32_t& packed() { return mData.packed; } + __hostdev__ const uint8_t& r() const { return mData.c[0]; } + __hostdev__ const uint8_t& g() const { return mData.c[1]; } + __hostdev__ const uint8_t& b() const { return mData.c[2]; } + __hostdev__ const uint8_t& a() const { return mData.c[3]; } + __hostdev__ uint8_t& r() { return mData.c[0]; } + __hostdev__ uint8_t& g() { return mData.c[1]; } + __hostdev__ uint8_t& b() { return mData.c[2]; } + __hostdev__ uint8_t& a() { return mData.c[3]; } + __hostdev__ operator Vec3() const { + return Vec3(this->asFloat(0), this->asFloat(1), this->asFloat(2)); + } + __hostdev__ operator Vec4() const { + return Vec4(this->asFloat(0), this->asFloat(1), this->asFloat(2), this->asFloat(3)); + } +}; // Rgba8 + +using Vec3d = Vec3; +using Vec3f = Vec3; +using Vec3i = Vec3; +using Vec3u = Vec3; +using Vec3u8 = Vec3; +using Vec3u16 = Vec3; + +using Vec4R = Vec4; +using Vec4d = Vec4; +using Vec4f = Vec4; +using Vec4i = Vec4; + +}// namespace math =============================================================== + +using Rgba8 [[deprecated("Use math::Rgba8 instead.")]] = math::Rgba8; +using math::Coord; + +using Vec3d = math::Vec3; +using Vec3f = math::Vec3; +using Vec3i = math::Vec3; +using Vec3u = math::Vec3; +using Vec3u8 = math::Vec3; +using Vec3u16 = math::Vec3; + +using Vec4R = math::Vec4; +using Vec4d = math::Vec4; +using Vec4f = math::Vec4; +using Vec4i = math::Vec4; + +using CoordBBox = math::BBox; +using Vec3dBBox = math::BBox; +using BBoxR [[deprecated("Use Vec3dBBox instead.")]] = math::BBox; + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_MATH_MATH_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/tools/cuda/PointsToGrid.cuh b/warp/native/nanovdb/tools/cuda/PointsToGrid.cuh new file mode 100644 index 00000000..3404d887 --- /dev/null +++ b/warp/native/nanovdb/tools/cuda/PointsToGrid.cuh @@ -0,0 +1,1291 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/tools/cuda/PointsToGrid.cuh + + \authors Greg Klar (initial version) and Ken Museth (final version) + + \brief Generates NanoVDB grids from a list of voxels or points on the device + + \warning The header file contains cuda device code so be sure + to only include it in .cu files (or other .cuh files) +*/ + +#ifndef NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED +#define NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + Note: 4.29 billion (=2^32) coordinates of type Vec3f have a memory footprint of 48 GB! +*/ + +namespace nanovdb {// ================================================================================ + +namespace tools { namespace cuda {// ============================================================================ + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device +/// @param pointCount number of point in the list @c d_world +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param type Defined the way point information is represented in the output grid (see PointType enum NanoVDB.h) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +pointsToGrid(const PtrT dWorldPoints, + int pointCount, + double voxelSize, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +/// @brief Generates a NanoGrid from a list of point coordinates on the device. This method is +/// mainly used as a means to build a BVH acceleration structure for points, e.g. for efficient rendering. +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dWorldPoints Raw or fancy pointer to list of point coordinates in world space on the device +/// @param pointCount total number of point in the list @c d_world +/// @param maxPointsPerVoxel Max density of points per voxel, i.e. maximum number of points in any voxel +/// @param tolerance allow for point density to vary by the specified tolerance (defaults to 1). That is, the voxel size +/// is selected such that the max density is +/- the tolerance. +/// @param maxIterations Maximum number of iterations used to seach for a voxel size that produces a point density +/// with specified tolerance takes. +/// @param type Defined the way point information is represented in the output grid (see PointType enum in NanoVDB.h) +/// Should not be PointType::Disable! +/// @param buffer Instance of the device buffer used for memory allocation +/// @param stream optional CUDA stream (defaults to CUDA stream 0) +/// @return Returns a handle with a grid of type NanoGrid where point information, e.g. coordinates, +/// are represented as blind data defined by @c type. +template +GridHandle +pointsToGrid(const PtrT dWorldPoints, + int pointCount, + int maxPointPerVoxel, + int tolerance = 1, + int maxIterations = 10, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +template +GridHandle +pointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//----------------------------------------------------------------------------------------------------- + +/// @brief Generates a NanoGrid of any type from a list of voxel coordinates on the device. Unlike @c cudaPointsToGrid +/// this method only builds the grid but does not encode the coordinates as blind data. It is mainly useful as a +/// means to generate a grid that is know to contain the voxels given in the list. +/// @tparam BuildT Template type of the return grid +/// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world space. Dereferencing should return Vec3f or Vec3d. +/// @tparam BufferT Template type of buffer used for memory allocation on the device +/// @tparam AllocT Template type of optional device allocator for internal temporary memory +/// @param dGridVoxels Raw or fancy pointer to list of voxel coordinates in grid (or index) space on the device +/// @param pointCount number of voxel in the list @c dGridVoxels +/// @param voxelSize Size of a voxel in world units used for the output grid +/// @param buffer Instance of the device buffer used for memory allocation +/// @return Returns a handle with the grid of type NanoGrid +template +GridHandle +voxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +//------------------------------------------------------------------------------------------------------- + +template +GridHandle +voxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0); + +} }// namespace tools::cuda ======================================================================== + +/// @brief Example class of a fancy pointer that can optionally be used as a template for writing +/// a custom fancy pointer that allows for particle coordinates to be arrange non-linearly +/// in memory. For instance with coordinates are interlaced with other dats, i.e. an array +/// of structs, a custom implementation of fancy_ptr::operator[](size_t i) can account for +/// strides that skip other interlaces data. +/// @tparam T Template type that specifies the type use for the coordinates of the points +template +class fancy_ptr +{ + const T* mPtr; +public: + /// @brief Default constructor. + /// @note This method is atcually not required by cuda::PointsToGrid + /// @param ptr Pointer to array of elements + __hostdev__ explicit fancy_ptr(const T* ptr = nullptr) : mPtr(ptr) {} + /// @brief Index acces into the array pointed to by the stored pointer. + /// @note This method is required by cuda::PointsToGrid! + /// @param i Unsigned index of the element to be returned + /// @return Const refernce to the element at the i'th poisiton + __hostdev__ inline const T& operator[](size_t i) const {return mPtr[i];} + /// @brief Dummy implementation required by pointer_traits. + /// @note Note that only the return type matters! + /// @details Unlike operator[] it is safe to assume that all pointer types have operator*, + /// which is why pointer_traits makes use of it to determine the element_type that + /// a pointer class is pointing to. E.g. operator[] is not always defined for std::shared_ptr! + __hostdev__ inline const T& operator*() const {return *mPtr;} +};// fancy_ptr + +/// @brief Simple stand-alone function that can be used to conveniently construct a fancy_ptr +/// @tparam T Template type that specifies the type use for the coordinates of the points +/// @param ptr Raw pointer to data +/// @return a new instance of a fancy_ptr +template +fancy_ptr make_fancy(const T* ptr = nullptr) {return fancy_ptr(ptr);} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +/// @brief Trait of points, like type of pointer and size of the pointer type +template +struct pointer_traits; + +template +struct pointer_traits { + using element_type = T; + static constexpr size_t element_size = sizeof(T); +}; + +template +struct pointer_traits { + using element_type = typename util::remove_reference())>::type;// assumes T::operator*() exists! + static constexpr size_t element_size = sizeof(element_type); +}; + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +/// @brief computes the relative 8-bit voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel 8-bit output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3u8 &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<8) - 1); + voxel[0] = uint8_t( encode*(ijk[0] - math::Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint8_t( encode*(ijk[1] - math::Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint8_t( encode*(ijk[2] - math::Floor(ijk[2] + 0.5) + 0.5) ); +} + +/// @brief computes the relative 16-bit voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel 16-bit output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3u16 &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + static constexpr double encode = double((1<<16) - 1); + voxel[0] = uint16_t( encode*(ijk[0] - math::Floor(ijk[0] + 0.5) + 0.5) ); + voxel[1] = uint16_t( encode*(ijk[1] - math::Floor(ijk[1] + 0.5) + 0.5) ); + voxel[2] = uint16_t( encode*(ijk[2] - math::Floor(ijk[2] + 0.5) + 0.5) ); +} + +/// @brief computes the relative float voxel offsets from a world coordinate +/// @tparam Vec3T Type of the world coordinate +/// @param voxel float output coordinates that are relative to a voxel +/// @param world input world coordinates +/// @param indexToWorld Transform from index to world space +template +__hostdev__ inline static void worldToVoxel(Vec3f &voxel, const Vec3T &world, const Map &indexToWorld) +{ + const Vec3d ijk = indexToWorld.applyInverseMap(world);// world -> index + voxel[0] = float( ijk[0] - math::Floor(ijk[0] + 0.5) ); + voxel[1] = float( ijk[1] - math::Floor(ijk[1] + 0.5) ); + voxel[2] = float( ijk[2] - math::Floor(ijk[2] + 0.5) ); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u8 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<8) - 1); + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3u16 &voxel, const Coord &ijk, const Map &map) +{ + static constexpr double decode = 1.0/double((1<<16) - 1); + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + decode*voxel[0] - 0.5, ijk[1] + decode*voxel[1] - 0.5, ijk[2] + decode*voxel[2] - 0.5)); + } else { + return map.applyMapF(Vec3f(ijk[0] + decode*voxel[0] - 0.5f, ijk[1] + decode*voxel[1] - 0.5f, ijk[2] + decode*voxel[2] - 0.5f)); + } +} + +template +__hostdev__ inline static Vec3T voxelToWorld(const Vec3f &voxel, const Coord &ijk, const Map &map) +{ + if constexpr(util::is_same::value) { + return map.applyMap( Vec3d(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } else { + return map.applyMapF(Vec3f(ijk[0] + voxel[0], ijk[1] + voxel[1], ijk[2] + voxel[2])); + } +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +namespace tools{ namespace cuda { + +template +class PointsToGrid +{ +public: + + struct Data { + Map map; + void *d_bufferPtr; + uint64_t *d_keys, *d_tile_keys, *d_lower_keys, *d_leaf_keys;// device pointer to 64 bit keys + uint64_t grid, tree, root, upper, lower, leaf, meta, blind, size;// byte offsets to nodes in buffer + uint32_t *d_indx;// device pointer to point indices (or IDs) + uint32_t nodeCount[3], *pointsPerLeafPrefix, *pointsPerLeaf;// 0=leaf,1=lower, 2=upper + uint32_t voxelCount, *pointsPerVoxelPrefix, *pointsPerVoxel; + BitFlags<16> flags; + __hostdev__ NanoGrid& getGrid() const {return *util::PtrAdd>(d_bufferPtr, grid);} + __hostdev__ NanoTree& getTree() const {return *util::PtrAdd>(d_bufferPtr, tree);} + __hostdev__ NanoRoot& getRoot() const {return *util::PtrAdd>(d_bufferPtr, root);} + __hostdev__ NanoUpper& getUpper(int i) const {return *(util::PtrAdd>(d_bufferPtr, upper)+i);} + __hostdev__ NanoLower& getLower(int i) const {return *(util::PtrAdd>(d_bufferPtr, lower)+i);} + __hostdev__ NanoLeaf& getLeaf(int i) const {return *(util::PtrAdd>(d_bufferPtr, leaf)+i);} + __hostdev__ GridBlindMetaData& getMeta() const { return *util::PtrAdd(d_bufferPtr, meta);}; + template + __hostdev__ Vec3T& getPoint(int i) const {return *(util::PtrAdd(d_bufferPtr, blind)+i);} + };// Data + + /// @brief Map constructor, which other constructors might call + /// @param map Map to be used for the output device grid + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(const Map &map, cudaStream_t stream = 0) + : mStream(stream) + , mPointType(util::is_same::value ? PointType::Default : PointType::Disable) + { + mData.map = map; + mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst}); + mDeviceData = mMemPool.template alloc(mStream); + } + + /// @brief Default constructor that calls the Map constructor defined above + /// @param scale Voxel size in world units + /// @param trans Translation of origin in world units + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(const double scale = 1.0, const Vec3d &trans = Vec3d(0.0), cudaStream_t stream = 0) + : PointsToGrid(Map(scale, trans), stream){} + + /// @brief Constructor from a target maximum number of particles per voxel. Calls the Map constructor defined above + /// @param maxPointsPerVoxel Maximum number of points oer voxel + /// @param stream optional CUDA stream (defaults to CUDA stream 0) + PointsToGrid(int maxPointsPerVoxel, int tolerance = 1, int maxIterations = 10, cudaStream_t stream = 0) + : PointsToGrid(Map(1.0), stream) + { + mMaxPointsPerVoxel = maxPointsPerVoxel; + mTolerance = tolerance; + mMaxIterations = maxIterations; + } + + /// @brief Destructor + ~PointsToGrid() {mMemPool.free(mDeviceData);} + + /// @brief Toggle on and off verbose mode + /// @param level Verbose level: 0=quiet, 1=timing, 2=benchmarking + void setVerbose(int level = 1) {mVerbose = level; mData.flags.setBit(7u, level); } + + /// @brief Set the mode for checksum computation, which is disabled by default + /// @param mode Mode of checksum computation + void setChecksum(CheckMode mode = CheckMode::Disable){mChecksum = mode;} + + /// @brief Toggle on and off the computation of a bounding-box + /// @param on If true bbox will be computed + void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); } + + /// @brief Set the name of the output grid + /// @param name name of the output grid + void setGridName(const std::string &name) {mGridName = name;} + + // only available when BuildT == Point + template typename util::enable_if::value>::type + setPointType(PointType type) { mPointType = type; } + + /// @brief Creates a handle to a grid with the specified build type from a list of points in index or world space + /// @tparam BuildT Build type of the output grid, i.e NanoGrid + /// @tparam PtrT Template type to a raw or fancy-pointer of point coordinates in world or index space. + /// @tparam BufferT Buffer type used for allocation of the grid handle + /// @param points device point to an array of points in world space + /// @param pointCount number of input points or voxels + /// @param buffer optional buffer (currently ignored) + /// @return returns a handle with a grid of type NanoGrid + template + GridHandle getHandle(const PtrT points, + size_t pointCount, + const BufferT &buffer = BufferT()); + + template + void countNodes(const PtrT points, size_t pointCount); + + template + void processGridTreeRoot(const PtrT points, size_t pointCount); + + void processUpperNodes(); + + void processLowerNodes(); + + template + void processLeafNodes(const PtrT points); + + template + void processPoints(const PtrT points, size_t pointCount); + + void processBBox(); + + // the following methods are only defined when BuildT == Point + template typename util::enable_if::value, uint32_t>::type + maxPointsPerVoxel() const {return mMaxPointsPerVoxel;} + template typename util::enable_if::value, uint32_t>::type + maxPointsPerLeaf() const {return mMaxPointsPerLeaf;} + +private: + static constexpr unsigned int mNumThreads = 128;// seems faster than the old value of 256! + static unsigned int numBlocks(unsigned int n) {return (n + mNumThreads - 1) / mNumThreads;} + + cudaStream_t mStream{0}; + util::cuda::Timer mTimer; + PointType mPointType; + std::string mGridName; + int mVerbose{0}; + Data mData, *mDeviceData; + uint32_t mMaxPointsPerVoxel{0u}, mMaxPointsPerLeaf{0u}; + int mTolerance{1}, mMaxIterations{1}; + CheckMode mChecksum{CheckMode::Disable}; + + // wrapper of AllocT, defaulting to cub::CachingDeviceAllocator, which offers a shared scratch space + struct Allocator { + AllocT mAllocator; + void* d_scratch; + size_t scratchSize, actualScratchSize; + Allocator() : d_scratch(nullptr), scratchSize(0), actualScratchSize(0) {} + ~Allocator() { + if (scratchSize > 0) this->free(d_scratch);// a bug in cub makes this necessary + mAllocator.FreeAllCached(); + } + template + T* alloc(size_t count, cudaStream_t stream) { + T* d_ptr = nullptr; + cudaCheck(mAllocator.DeviceAllocate((void**)&d_ptr, sizeof(T)*count, stream)); + return d_ptr; + } + template + T* alloc(cudaStream_t stream) {return this->template alloc(1, stream);} + void free(void *d_ptr) {if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr));} + template + void free(void *d_ptr, T... other) { + if (d_ptr) cudaCheck(mAllocator.DeviceFree(d_ptr)); + this->free(other...); + } + void adjustScratch(cudaStream_t stream){ + if (scratchSize > actualScratchSize) { + if (actualScratchSize>0) cudaCheck(mAllocator.DeviceFree(d_scratch)); + cudaCheck(mAllocator.DeviceAllocate((void**)&d_scratch, scratchSize, stream)); + actualScratchSize = scratchSize; + } + } + } mMemPool; + + template + BufferT getBuffer(const PtrT points, size_t pointCount, const BufferT &buffer); +};// tools::cuda::PointsToGrid + +namespace kernels { +/// @details Used by cuda::PointsToGrid::processLeafNodes before the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +/// function in a lambda through lambdaKernel wrapper defined in CudaUtils.h. +template +__global__ void fillValueIndexKernel(const size_t numItems, uint64_t* devValueIndex, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + devValueIndex[tid] = static_cast(d_data->getLeaf(tid).mValueMask.countOn()); +} + +/// @details Used by PointsToGrid::processLeafNodes for the computation +/// of prefix-sum for index grid. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void leafPrefixSumKernel(const size_t numItems, uint64_t* devValueIndexPrefix, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + + auto &leaf = d_data->getLeaf(tid); + leaf.mOffset = 1u;// will be re-set below + const uint64_t *w = leaf.mValueMask.words(); + uint64_t &prefixSum = leaf.mPrefixSum, sum = util::countOn(*w++); + prefixSum = sum; + for (int n = 9; n < 55; n += 9) {// n=i*9 where i=1,2,..6 + sum += util::countOn(*w++); + prefixSum |= sum << n;// each pre-fixed sum is encoded in 9 bits + } + if (tid==0) { + d_data->getGrid().mData1 = 1u + devValueIndexPrefix[d_data->nodeCount[0]-1];// set total count + d_data->getTree().mVoxelCount = devValueIndexPrefix[d_data->nodeCount[0]-1]; + } else { + leaf.mOffset = 1u + devValueIndexPrefix[tid-1];// background is index 0 + } +} + +/// @details Used by PointsToGrid::processLeafNodes to make sure leaf.mMask - leaf.mValueMask. +/// Moving this away from an implementation using the lambdaKernel wrapper +/// to fix the following on Windows platform: +/// error : For this host platform/dialect, an extended lambda cannot be defined inside the 'if' +/// or 'else' block of a constexpr if statement. +template +__global__ void setMaskEqValMaskKernel(const size_t numItems, typename PointsToGrid::Data* d_data) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + auto &leaf = d_data->getLeaf(tid); + leaf.mMask = leaf.mValueMask; +} +} // namespace kernels + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// Define utility macro used to call cub functions that use dynamic temporary storage +#ifndef CALL_CUBS +#ifdef _WIN32 +#define CALL_CUBS(func, ...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, __VA_ARGS__, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, __VA_ARGS__, mStream)); +#else// fdef _WIN32 +#define CALL_CUBS(func, args...) \ + cudaCheck(cub::func(nullptr, mMemPool.scratchSize, args, mStream)); \ + mMemPool.adjustScratch(mStream); \ + cudaCheck(cub::func(mMemPool.d_scratch, mMemPool.scratchSize, args, mStream)); +#endif// ifdef _WIN32 +#endif// ifndef CALL_CUBS + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline GridHandle +PointsToGrid::getHandle(const PtrT points, + size_t pointCount, + const BufferT &pool) +{ + if (mVerbose==1) mTimer.start("\nCounting nodes"); + this->countNodes(points, pointCount); + + if (mVerbose==1) mTimer.restart("Initiate buffer"); + auto buffer = this->getBuffer(points, pointCount, pool); + + if (mVerbose==1) mTimer.restart("Process grid,tree,root"); + this->processGridTreeRoot(points, pointCount); + + if (mVerbose==1) mTimer.restart("Process upper nodes"); + this->processUpperNodes(); + + if (mVerbose==1) mTimer.restart("Process lower nodes"); + this->processLowerNodes(); + + if (mVerbose==1) mTimer.restart("Process leaf nodes"); + this->processLeafNodes(points); + + if (mVerbose==1) mTimer.restart("Process points"); + this->processPoints(points, pointCount); + + if (mVerbose==1) mTimer.restart("Process bbox"); + this->processBBox(); + if (mVerbose==1) mTimer.stop(); + + return GridHandle(std::move(buffer)); +}// PointsToGrid::getHandle + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// --- CUB helpers --- +template +struct ShiftRight +{ + __hostdev__ inline OutT operator()(const InT& v) const {return static_cast(v >> BitCount);} +}; + +template +struct ShiftRightIterator : public cub::TransformInputIterator, InT*> +{ + using BASE = cub::TransformInputIterator, InT*>; + __hostdev__ inline ShiftRightIterator(uint64_t* input_itr) : BASE(input_itr, ShiftRight()) {} +}; + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +void PointsToGrid::countNodes(const PtrT points, size_t pointCount) +{ + using Vec3T = typename util::remove_const::element_type>::type; + if constexpr(util::is_same::value) { + static_assert(util::is_same::value, "Point (vs voxels) coordinates should be represented as Vec3f or Vec3d"); + } else { + static_assert(util::is_same::value, "Voxel coordinates should be represented as Coord, Vec3f or Vec3d"); + } + + mMaxPointsPerVoxel = math::Min(mMaxPointsPerVoxel, pointCount); + int iterCounter = 0; + struct Foo {// pairs current voxel size, dx, with the corresponding particle density, i.e. maximum number of points per voxel + double dx; + uint32_t density; + bool operator<(const Foo &rhs) const {return density < rhs.density || (density == rhs.density && dx < rhs.dx);} + } min{0.0, 1}, max{0.0, 0};// min: as dx -> 0 density -> 1 point per voxel, max: density is 0 i.e. undefined + +jump:// this marks the beginning of the actual algorithm + + mData.d_keys = mMemPool.template alloc(pointCount, mStream); + mData.d_indx = mMemPool.template alloc(pointCount, mStream);// uint32_t can index 4.29 billion Coords, corresponding to 48 GB + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy mData from CPU -> GPU + + if (mVerbose==2) mTimer.start("\nAllocating arrays for keys and indices"); + auto *d_keys = mMemPool.template alloc(pointCount, mStream); + auto *d_indx = mMemPool.template alloc(pointCount, mStream); + + if (mVerbose==2) mTimer.restart("Generate tile keys"); + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, const Data *d_data, const PtrT points) { + auto coordToKey = [](const Coord &ijk)->uint64_t{ + // Note: int32_t has a range of -2^31 to 2^31 - 1 whereas uint32_t has a range of 0 to 2^32 - 1 + static constexpr int64_t offset = 1 << 31; + return (uint64_t(uint32_t(int64_t(ijk[2]) + offset) >> 12) ) | // z is the lower 21 bits + (uint64_t(uint32_t(int64_t(ijk[1]) + offset) >> 12) << 21) | // y is the middle 21 bits + (uint64_t(uint32_t(int64_t(ijk[0]) + offset) >> 12) << 42); // x is the upper 21 bits + };// coordToKey lambda functor + d_indx[tid] = uint32_t(tid); + uint64_t &key = d_keys[tid]; + if constexpr(util::is_same::value) {// points are in world space + if constexpr(util::is_same::value) { + key = coordToKey(d_data->map.applyInverseMapF(points[tid]).round()); + } else {// points are Vec3d + key = coordToKey(d_data->map.applyInverseMap(points[tid]).round()); + } + } else if constexpr(util::is_same::value) {// points Coord are in index space + key = coordToKey(points[tid]); + } else {// points are Vec3f or Vec3d in index space + key = coordToKey(points[tid].round()); + } + }, mDeviceData, points); + cudaCheckError(); + if (mVerbose==2) mTimer.restart("DeviceRadixSort of "+std::to_string(pointCount)+" tile keys"); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys, mData.d_keys, d_indx, mData.d_indx, pointCount, 0, 62);// 21 bits per coord + std::swap(d_indx, mData.d_indx);// sorted indices are now in d_indx + + if (mVerbose==2) mTimer.restart("Allocate runs"); + auto *d_points_per_tile = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_node_count = mMemPool.template alloc(3, mStream); + + if (mVerbose==2) mTimer.restart("DeviceRunLengthEncode tile keys"); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, d_points_per_tile, d_node_count+2, pointCount); + cudaCheck(cudaMemcpyAsync(mData.nodeCount+2, d_node_count+2, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_tile_keys = mMemPool.template alloc(mData.nodeCount[2], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_tile_keys, d_keys, mData.nodeCount[2]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + if (mVerbose) mTimer.restart("DeviceRadixSort of " + std::to_string(pointCount) + " voxel keys in " + std::to_string(mData.nodeCount[2]) + " tiles"); + uint32_t *points_per_tile = new uint32_t[mData.nodeCount[2]]; + cudaCheck(cudaMemcpyAsync(points_per_tile, d_points_per_tile, mData.nodeCount[2]*sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_points_per_tile); + + for (uint32_t id = 0, offset = 0; id < mData.nodeCount[2]; ++id) { + const uint32_t count = points_per_tile[id]; + util::cuda::lambdaKernel<<>>(count, [=] __device__(size_t tid, const Data *d_data) { + auto voxelKey = [] __device__ (uint64_t tileID, const Coord &ijk){ + return tileID << 36 | // upper offset: 64-15-12-9=28, i.e. last 28 bits + uint64_t(NanoUpper::CoordToOffset(ijk)) << 21 | // lower offset: 32^3 = 2^15, i.e. next 15 bits + uint64_t(NanoLower::CoordToOffset(ijk)) << 9 | // leaf offset: 16^3 = 2^12, i.e. next 12 bits + uint64_t(NanoLeaf< BuildT>::CoordToOffset(ijk)); // voxel offset: 8^3 = 2^9, i.e. first 9 bits + };// voxelKey lambda functor + tid += offset; + Vec3T p = points[d_indx[tid]]; + if constexpr(util::is_same::value) p = util::is_same::value ? d_data->map.applyInverseMapF(p) : d_data->map.applyInverseMap(p); + d_keys[tid] = voxelKey(id, p.round()); + }, mDeviceData); cudaCheckError(); + CALL_CUBS(DeviceRadixSort::SortPairs, d_keys + offset, mData.d_keys + offset, d_indx + offset, mData.d_indx + offset, count, 0, 36);// 9+12+15=36 + offset += count; + } + mMemPool.free(d_indx); + delete [] points_per_tile; + + if (mVerbose==2) mTimer.restart("Count points per voxel"); + + mData.pointsPerVoxel = mMemPool.template alloc(pointCount, mStream); + uint32_t *d_voxel_count = mMemPool.template alloc(mStream); + CALL_CUBS(DeviceRunLengthEncode::Encode, mData.d_keys, d_keys, mData.pointsPerVoxel, d_voxel_count, pointCount); + cudaCheck(cudaMemcpyAsync(&mData.voxelCount, d_voxel_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_voxel_count); + + if (util::is_same::value) { + if (mVerbose==2) mTimer.restart("Count max points per voxel"); + uint32_t *d_maxPointsPerVoxel = mMemPool.template alloc(mStream), maxPointsPerVoxel; + CALL_CUBS(DeviceReduce::Max, mData.pointsPerVoxel, d_maxPointsPerVoxel, mData.voxelCount); + cudaCheck(cudaMemcpyAsync(&maxPointsPerVoxel, d_maxPointsPerVoxel, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mMemPool.free(d_maxPointsPerVoxel); + double dx = mData.map.getVoxelSize()[0]; + if (++iterCounter >= mMaxIterations || pointCount == 1u || math::Abs((int)maxPointsPerVoxel - (int)mMaxPointsPerVoxel) <= mTolerance) { + mMaxPointsPerVoxel = maxPointsPerVoxel; + } else { + const Foo tmp{dx, maxPointsPerVoxel}; + if (maxPointsPerVoxel < mMaxPointsPerVoxel) { + if (min < tmp) min = tmp; + } else if (max.density == 0 || tmp < max) { + max = tmp; + } + if (max.density) { + dx = (min.dx*(max.density - mMaxPointsPerVoxel) + max.dx*(mMaxPointsPerVoxel-min.density))/double(max.density-min.density); + } else if (maxPointsPerVoxel > 1u) { + dx *= (mMaxPointsPerVoxel-1.0)/(maxPointsPerVoxel-1.0); + } else {// maxPointsPerVoxel = 1 so increase dx significantly + dx *= 10.0; + } + if (mVerbose==2) printf("\ntarget density = %u, current density = %u current dx = %f, next dx = %f\n", mMaxPointsPerVoxel, maxPointsPerVoxel, tmp.dx, dx); + mData.map = Map(dx); + mMemPool.free(mData.d_keys, mData.d_indx, d_keys, mData.d_tile_keys, d_node_count, mData.pointsPerVoxel); + goto jump; + } + } + if (iterCounter>1 && mVerbose) std::cerr << "Used " << iterCounter << " attempts to determine dx that produces a target dpoint denisty\n\n"; + + if (mVerbose==2) mTimer.restart("Compute prefix sum of points per voxel"); + mData.pointsPerVoxelPrefix = mMemPool.template alloc(mData.voxelCount, mStream); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.voxelCount); + + mData.pointsPerLeaf = mMemPool.template alloc(pointCount, mStream); + CALL_CUBS(DeviceRunLengthEncode::Encode, ShiftRightIterator<9>(mData.d_keys), d_keys, mData.pointsPerLeaf, d_node_count, pointCount); + cudaCheck(cudaMemcpyAsync(mData.nodeCount, d_node_count, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + + if constexpr(util::is_same::value) { + uint32_t *d_maxPointsPerLeaf = mMemPool.template alloc(mStream); + CALL_CUBS(DeviceReduce::Max, mData.pointsPerLeaf, d_maxPointsPerLeaf, mData.nodeCount[0]); + cudaCheck(cudaMemcpyAsync(&mMaxPointsPerLeaf, d_maxPointsPerLeaf, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + //printf("\n Leaf count = %u, max points per leaf = %u\n", mData.nodeCount[0], mMaxPointsPerLeaf); + if (mMaxPointsPerLeaf > std::numeric_limits::max()) { + throw std::runtime_error("Too many points per leaf: "+std::to_string(mMaxPointsPerLeaf)); + } + mMemPool.free(d_maxPointsPerLeaf); + } + + mData.pointsPerLeafPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + CALL_CUBS(DeviceScan::ExclusiveSum, mData.pointsPerLeaf, mData.pointsPerLeafPrefix, mData.nodeCount[0]); + + mData.d_leaf_keys = mMemPool.template alloc(mData.nodeCount[0], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_leaf_keys, d_keys, mData.nodeCount[0]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + CALL_CUBS(DeviceSelect::Unique, ShiftRightIterator<12>(mData.d_leaf_keys), d_keys, d_node_count+1, mData.nodeCount[0]);// count lower nodes + cudaCheck(cudaMemcpyAsync(mData.nodeCount+1, d_node_count+1, sizeof(uint32_t), cudaMemcpyDeviceToHost, mStream)); + mData.d_lower_keys = mMemPool.template alloc(mData.nodeCount[1], mStream); + cudaCheck(cudaMemcpyAsync(mData.d_lower_keys, d_keys, mData.nodeCount[1]*sizeof(uint64_t), cudaMemcpyDeviceToDevice, mStream)); + + mMemPool.free(d_keys, d_node_count); + if (mVerbose==2) mTimer.stop(); + + //printf("Leaf count = %u, lower count = %u, upper count = %u\n", mData.nodeCount[0], mData.nodeCount[1], mData.nodeCount[2]); +}// PointsToGrid::countNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline BufferT PointsToGrid::getBuffer(const PtrT, size_t pointCount, const BufferT &pool) +{ + auto sizeofPoint = [&]()->size_t{ + switch (mPointType){ + case PointType::PointID: return sizeof(uint32_t); + case PointType::World64: return sizeof(Vec3d); + case PointType::World32: return sizeof(Vec3f); + case PointType::Grid64: return sizeof(Vec3d); + case PointType::Grid32: return sizeof(Vec3f); + case PointType::Voxel32: return sizeof(Vec3f); + case PointType::Voxel16: return sizeof(Vec3u16); + case PointType::Voxel8: return sizeof(Vec3u8); + case PointType::Default: return pointer_traits::element_size; + default: return size_t(0);// PointType::Disable + } + }; + + mData.grid = 0;// grid is always stored at the start of the buffer! + mData.tree = NanoGrid::memUsage(); // grid ends and tree begins + mData.root = mData.tree + NanoTree::memUsage(); // tree ends and root node begins + mData.upper = mData.root + NanoRoot::memUsage(mData.nodeCount[2]); // root node ends and upper internal nodes begin + mData.lower = mData.upper + NanoUpper::memUsage()*mData.nodeCount[2]; // upper internal nodes ends and lower internal nodes begin + mData.leaf = mData.lower + NanoLower::memUsage()*mData.nodeCount[1]; // lower internal nodes ends and leaf nodes begin + mData.meta = mData.leaf + NanoLeaf::DataType::memUsage()*mData.nodeCount[0];// leaf nodes end and blind meta data begins + mData.blind = mData.meta + sizeof(GridBlindMetaData)*int( mPointType!=PointType::Disable ); // meta data ends and blind data begins + mData.size = mData.blind + pointCount*sizeofPoint();// end of buffer + + auto buffer = BufferT::create(mData.size, &pool, false);// only allocate buffer on the device + mData.d_bufferPtr = buffer.deviceData(); + if (mData.d_bufferPtr == nullptr) throw std::runtime_error("Failed to allocate grid buffer on the device"); + cudaCheck(cudaMemcpyAsync(mDeviceData, &mData, sizeof(Data), cudaMemcpyHostToDevice, mStream));// copy Data CPU -> GPU + return buffer; +}// PointsToGrid::getBuffer + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processGridTreeRoot(const PtrT points, size_t pointCount) +{ + using Vec3T = typename util::remove_const::element_type>::type; + util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data, PointType pointType) { + // process Root + auto &root = d_data->getRoot(); + root.mBBox = CoordBBox(); // init to empty + root.mTableSize = d_data->nodeCount[2]; + root.mBackground = NanoRoot::ValueType(0);// background_value + root.mMinimum = root.mMaximum = NanoRoot::ValueType(0); + root.mAverage = root.mStdDevi = NanoRoot::FloatType(0); + + // process Tree + auto &tree = d_data->getTree(); + tree.setRoot(&root); + tree.setFirstNode(&d_data->getUpper(0)); + tree.setFirstNode(&d_data->getLower(0)); + tree.setFirstNode(&d_data->getLeaf(0)); + tree.mNodeCount[2] = tree.mTileCount[2] = d_data->nodeCount[2]; + tree.mNodeCount[1] = tree.mTileCount[1] = d_data->nodeCount[1]; + tree.mNodeCount[0] = tree.mTileCount[0] = d_data->nodeCount[0]; + tree.mVoxelCount = d_data->voxelCount; + + // process Grid + auto &grid = d_data->getGrid(); + grid.init({GridFlags::HasBBox, GridFlags::IsBreadthFirst}, d_data->size, d_data->map, toGridType()); + grid.mChecksum = ~uint64_t(0);// set all bits on which means it's disabled + grid.mBlindMetadataCount = util::is_same::value;// ? 1u : 0u; + grid.mBlindMetadataOffset = d_data->meta; + if (pointType != PointType::Disable) { + const auto lastLeaf = tree.mNodeCount[0] - 1; + grid.mData1 = d_data->pointsPerLeafPrefix[lastLeaf] + d_data->pointsPerLeaf[lastLeaf]; + auto &meta = d_data->getMeta(); + meta.mDataOffset = sizeof(GridBlindMetaData);// blind data is placed right after this meta data + meta.mValueCount = pointCount; + // Blind meta data + switch (pointType){ + case PointType::PointID: + grid.mGridClass = GridClass::PointIndex; + meta.mSemantic = GridBlindDataSemantic::PointId; + meta.mDataClass = GridBlindDataClass::IndexArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(uint32_t); + util::strcpy(meta.mName, "PointID: uint32_t indices to points"); + break; + case PointType::World64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3d); + util::strcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + break; + case PointType::World32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + break; + case PointType::Grid64: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3d); + util::strcpy(meta.mName, "Grid64: Vec3 point coordinates in grid space"); + break; + case PointType::Grid32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::GridCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "Grid32: Vec3 point coordinates in grid space"); + break; + case PointType::Voxel32: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3f); + util::strcpy(meta.mName, "Voxel32: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel16: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3u16); + util::strcpy(meta.mName, "Voxel16: Vec3 point coordinates in voxel space"); + break; + case PointType::Voxel8: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::VoxelCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3u8); + util::strcpy(meta.mName, "Voxel8: Vec3 point coordinates in voxel space"); + break; + case PointType::Default: + grid.mGridClass = GridClass::PointData; + meta.mSemantic = GridBlindDataSemantic::WorldCoords; + meta.mDataClass = GridBlindDataClass::AttributeArray; + meta.mDataType = toGridType(); + meta.mValueSize = sizeof(Vec3T); + if constexpr(util::is_same::value) { + util::strcpy(meta.mName, "World32: Vec3 point coordinates in world space"); + } else if constexpr(util::is_same::value){ + util::strcpy(meta.mName, "World64: Vec3 point coordinates in world space"); + } else { + printf("Error in PointsToGrid::processGridTreeRoot: expected Vec3T = Vec3f or Vec3d\n"); + } + break; + default: + printf("Error in PointsToGrid::processGridTreeRoot: invalid pointType\n"); + } + } else if constexpr(BuildTraits::is_offindex) { + grid.mData1 = 1u + 512u*d_data->nodeCount[0]; + grid.mGridClass = GridClass::IndexGrid; + } + }, mDeviceData, mPointType);// lambdaKernel + cudaCheckError(); + + char *dst = mData.getGrid().mGridName; + if (const char *src = mGridName.data()) { + cudaCheck(cudaMemcpyAsync(dst, src, GridData::MaxNameSize, cudaMemcpyHostToDevice, mStream)); + } else { + cudaCheck(cudaMemsetAsync(dst, 0, GridData::MaxNameSize, mStream)); + } +}// PointsToGrid::processGridTreeRoot + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processUpperNodes() +{ + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + auto &upper = d_data->getUpper(tid); +#if 1 + auto keyToCoord = [](uint64_t key)->nanovdb::Coord{ + static constexpr int64_t offset = 1 << 31;// max values of uint32_t is 2^31 - 1 + static constexpr uint64_t MASK = (1u << 21) - 1; // used to mask out 21 lower bits + return nanovdb::Coord(int(int64_t(((key >> 42) & MASK) << 12) - offset), // x are the upper 21 bits + int(int64_t(((key >> 21) & MASK) << 12) - offset), // y are the middle 21 bits + int(int64_t(( key & MASK) << 12) - offset)); // z are the lower 21 bits + }; + const Coord ijk = keyToCoord(d_data->d_tile_keys[tid]); +#else + const Coord ijk = NanoRoot::KeyToCoord(d_data->d_tile_keys[tid]); +#endif + root.tile(tid)->setChild(ijk, &upper, &root); + upper.mBBox[0] = ijk; + upper.mFlags = 0; + upper.mValueMask.setOff(); + upper.mChildMask.setOff(); + upper.mMinimum = upper.mMaximum = NanoLower::ValueType(0); + upper.mAverage = upper.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + mMemPool.free(mData.d_tile_keys); + + const uint64_t valueCount = mData.nodeCount[2] << 15; + util::cuda::lambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &upper = d_data->getUpper(tid >> 15); + upper.mTable[tid & 32767u].value = NanoUpper::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processUpperNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processLowerNodes() +{ + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + auto &root = d_data->getRoot(); + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + const uint32_t upperOffset = lowerKey & 32767u;// (1 << 15) - 1 = 32767 + upper.mChildMask.setOnAtomic(upperOffset); + auto &lower = d_data->getLower(tid); + upper.setChild(upperOffset, &lower); + lower.mBBox[0] = upper.offsetToGlobalCoord(upperOffset); + lower.mFlags = 0; + lower.mValueMask.setOff(); + lower.mChildMask.setOff(); + lower.mMinimum = lower.mMaximum = NanoLower::ValueType(0);// background; + lower.mAverage = lower.mStdDevi = NanoLower::FloatType(0); + }, mDeviceData); + cudaCheckError(); + + const uint64_t valueCount = mData.nodeCount[1] << 12; + util::cuda::lambdaKernel<<>>(valueCount, [=] __device__(size_t tid, Data *d_data) { + auto &lower = d_data->getLower(tid >> 12); + lower.mTable[tid & 4095u].value = NanoLower::ValueType(0);// background + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processLowerNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processLeafNodes(const PtrT points) +{ + const uint8_t flags = static_cast(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats + + if (mVerbose==2) mTimer.start("process leaf meta data"); + // loop over leaf nodes and add it to its parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid], tile_id = leafKey >> 27; + auto &upper = d_data->getUpper(tile_id); + const uint32_t lowerOffset = leafKey & 4095u, upperOffset = (leafKey >> 12) & 32767u; + auto &lower = *upper.getChild(upperOffset); + lower.mChildMask.setOnAtomic(lowerOffset); + auto &leaf = d_data->getLeaf(tid); + lower.setChild(lowerOffset, &leaf); + leaf.mBBoxMin = lower.offsetToGlobalCoord(lowerOffset); + leaf.mFlags = flags; + auto &valueMask = leaf.mValueMask; + valueMask.setOff();// initiate all bits to off + + if constexpr(util::is_same::value) { + leaf.mOffset = d_data->pointsPerLeafPrefix[tid]; + leaf.mPointCount = d_data->pointsPerLeaf[tid]; + } else if constexpr(BuildTraits::is_offindex) { + leaf.mOffset = tid*512u + 1u;// background is index 0 + leaf.mPrefixSum = 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mAverage = leaf.mStdDevi = NanoLeaf::FloatType(0); + leaf.mMinimum = leaf.mMaximum = NanoLeaf::ValueType(0); + } + }, mDeviceData); cudaCheckError(); + + if (mVerbose==2) mTimer.restart("set active voxel state and values"); + // loop over all active voxels and set LeafNode::mValueMask and LeafNode::mValues + util::cuda::lambdaKernel<<>>(mData.voxelCount, [=] __device__(size_t tid, Data *d_data) { + const uint32_t pointID = d_data->pointsPerVoxelPrefix[tid]; + const uint64_t voxelKey = d_data->d_keys[pointID]; + auto &upper = d_data->getUpper(voxelKey >> 36); + auto &lower = *upper.getChild((voxelKey >> 21) & 32767u); + auto &leaf = *lower.getChild((voxelKey >> 9) & 4095u); + const uint32_t n = voxelKey & 511u; + leaf.mValueMask.setOnAtomic(n);// <--- slow! + if constexpr(util::is_same::value) { + leaf.mValues[n] = uint16_t(pointID + d_data->pointsPerVoxel[tid] - leaf.offset()); + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(1);// set value of active voxels that are not points (or index) + } + }, mDeviceData); cudaCheckError(); + + mMemPool.free(mData.d_keys, mData.pointsPerVoxel, mData.pointsPerVoxelPrefix, mData.pointsPerLeafPrefix, mData.pointsPerLeaf); + + if (mVerbose==2) mTimer.restart("set inactive voxel values"); + const uint64_t denseVoxelCount = mData.nodeCount[0] << 9; + util::cuda::lambdaKernel<<>>(denseVoxelCount, [=] __device__(size_t tid, Data *d_data) { + auto &leaf = d_data->getLeaf(tid >> 9u); + const uint32_t n = tid & 511u; + if (leaf.mValueMask.isOn(n)) return; + if constexpr(util::is_same::value) { + const uint32_t m = leaf.mValueMask.findPrev(n - 1); + leaf.mValues[n] = m < 512u ? leaf.mValues[m] : 0u; + } else if constexpr(!BuildTraits::is_special) { + leaf.mValues[n] = NanoLeaf::ValueType(0);// value of inactive voxels + } + }, mDeviceData); cudaCheckError(); + + if constexpr(BuildTraits::is_onindex) { + if (mVerbose==2) mTimer.restart("prefix-sum for index grid"); + uint64_t *devValueIndex = mMemPool.template alloc(mData.nodeCount[0], mStream); + auto devValueIndexPrefix = mMemPool.template alloc(mData.nodeCount[0], mStream); + kernels::fillValueIndexKernel<<>>(mData.nodeCount[0], devValueIndex, mDeviceData); + cudaCheckError(); + CALL_CUBS(DeviceScan::InclusiveSum, devValueIndex, devValueIndexPrefix, mData.nodeCount[0]); + mMemPool.free(devValueIndex); + kernels::leafPrefixSumKernel<<>>(mData.nodeCount[0], devValueIndexPrefix, mDeviceData); + cudaCheckError(); + mMemPool.free(devValueIndexPrefix); + } + + if constexpr(BuildTraits::is_indexmask) { + if (mVerbose==2) mTimer.restart("leaf.mMask = leaf.mValueMask"); + kernels::setMaskEqValMaskKernel<<>>(mData.nodeCount[0], mDeviceData); + cudaCheckError(); + } + if (mVerbose==2) mTimer.stop(); +}// PointsToGrid::processLeafNodes + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +template +inline void PointsToGrid::processPoints(const PtrT, size_t) +{ + mMemPool.free(mData.d_indx); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +// Template specialization with BuildT = Point +template <> +template +inline void PointsToGrid::processPoints(const PtrT points, size_t pointCount) +{ + switch (mPointType){ + case PointType::Disable: + throw std::runtime_error("PointsToGrid::processPoints: mPointType == PointType::Disable\n"); + case PointType::PointID: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->d_indx[tid]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World64: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::World32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid64: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMap(points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Grid32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint(tid) = d_data->map.applyInverseMapF(points[d_data->d_indx[tid]]); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel32: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel16: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Voxel8: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + worldToVoxel(d_data->template getPoint(tid), points[d_data->d_indx[tid]], d_data->map); + }, mDeviceData); cudaCheckError(); + break; + case PointType::Default: + util::cuda::lambdaKernel<<>>(pointCount, [=] __device__(size_t tid, Data *d_data) { + d_data->template getPoint::element_type>(tid) = points[d_data->d_indx[tid]]; + }, mDeviceData); cudaCheckError(); + break; + default: + printf("Internal error in PointsToGrid::processPoints\n"); + } + mMemPool.free(mData.d_indx); +}// PointsToGrid::processPoints + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +inline void PointsToGrid::processBBox() +{ + if (mData.flags.isMaskOff(GridFlags::HasBBox)) { + mMemPool.free(mData.d_leaf_keys, mData.d_lower_keys); + return; + } + + // reset bbox in lower nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + d_data->getLower(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // update and propagate bbox from leaf -> lower/parent nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[0], [=] __device__(size_t tid, Data *d_data) { + const uint64_t leafKey = d_data->d_leaf_keys[tid]; + auto &upper = d_data->getUpper(leafKey >> 27); + auto &lower = *upper.getChild((leafKey >> 12) & 32767u); + auto &leaf = d_data->getLeaf(tid); + leaf.updateBBox(); + lower.mBBox.expandAtomic(leaf.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_leaf_keys); + cudaCheckError(); + + // reset bbox in upper nodes + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getUpper(tid).mBBox = CoordBBox(); + }, mDeviceData); + cudaCheckError(); + + // propagate bbox from lower -> upper/parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[1], [=] __device__(size_t tid, Data *d_data) { + const uint64_t lowerKey = d_data->d_lower_keys[tid]; + auto &upper = d_data->getUpper(lowerKey >> 15); + auto &lower = d_data->getLower(tid); + upper.mBBox.expandAtomic(lower.bbox()); + }, mDeviceData); + mMemPool.free(mData.d_lower_keys); + cudaCheckError() + + // propagate bbox from upper -> root/parent node + util::cuda::lambdaKernel<<>>(mData.nodeCount[2], [=] __device__(size_t tid, Data *d_data) { + d_data->getRoot().mBBox.expandAtomic(d_data->getUpper(tid).bbox()); + }, mDeviceData); + cudaCheckError(); + + // update the world-bbox in the root node + util::cuda::lambdaKernel<<<1, 1, 0, mStream>>>(1, [=] __device__(size_t, Data *d_data) { + d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map); + }, mDeviceData); + cudaCheckError(); +}// PointsToGrid::processBBox + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle// Grid +voxelsToGrid(const PtrT d_ijk, size_t voxelCount, double voxelSize, const BufferT &buffer, cudaStream_t stream) +{ + PointsToGrid converter(voxelSize, Vec3d(0.0), stream); + return converter.getHandle(d_ijk, voxelCount, buffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle// Grid with PointType coordinates as blind data +pointsToGrid(const PtrT d_xyz, int pointCount, int maxPointsPerVoxel, int tolerance, int maxIterations, PointType type, const BufferT &buffer, cudaStream_t stream) +{ + PointsToGrid converter(maxPointsPerVoxel, tolerance, maxIterations, Vec3d(0.0), stream); + converter.setPointType(type); + return converter.getHandle(d_xyz, pointCount, buffer); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle +pointsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(pointsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), std::get<3>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +GridHandle +voxelsToGrid(std::vector> vec, const BufferT &buffer, cudaStream_t stream) +{ + std::vector> handles; + for (auto &p : vec) handles.push_back(voxelsToGrid(std::get<0>(p), std::get<1>(p), std::get<2>(p), buffer, stream)); + return mergeDeviceGrids(handles, stream); +} + +}}// namespace tools::cuda ====================================================================================================================================== + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::pointsToGrid instead")]] +GridHandle +cudaPointsToGrid(const PtrT dWorldPoints, + int pointCount, + double voxelSize = 1.0, + PointType type = PointType::Default, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::pointsToGrid(dWorldPoints, pointCount, voxelSize, type, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::pointsToGrid instead")]] +GridHandle +cudaPointsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::pointsToGrid(pointSet, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::voxelsToGrid instead")]] +GridHandle +cudaVoxelsToGrid(const PtrT dGridVoxels, + size_t voxelCount, + double voxelSize = 1.0, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::voxelsToGrid(dGridVoxels, voxelCount, voxelSize, buffer, stream); +} + +//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +template +[[deprecated("Use cuda::voxelsToGrid instead")]] +GridHandle +cudaVoxelsToGrid(std::vector> pointSet, + const BufferT &buffer = BufferT(), + cudaStream_t stream = 0) +{ + return tools::cuda::voxelsToGrid(pointSet, buffer, stream); +} + +}// namespace nanovdb + +#endif // NVIDIA_TOOLS_CUDA_POINTSTOGRID_CUH_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/util/Util.h b/warp/native/nanovdb/util/Util.h new file mode 100644 index 00000000..e8ebfc1c --- /dev/null +++ b/warp/native/nanovdb/util/Util.h @@ -0,0 +1,657 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/util/Util.h + + \author Ken Museth + + \date January 8, 2020 + + \brief Utility functions +*/ + +#ifndef NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED + +#ifdef __CUDACC_RTC__ + +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned long long uint64_t; + +#define NANOVDB_ASSERT(x) + +#ifndef UINT64_C +#define UINT64_C(x) (x ## ULL) +#endif + +#else // !__CUDACC_RTC__ + +#include // for abs in clang7 +#include // for types like int32_t etc +#include // for size_t type +#include // for assert +#include // for stderr and snprintf +#include // for sqrt and fma +#include // for numeric_limits +#include // for std::move +#ifdef NANOVDB_USE_IOSTREAMS +#include // for read/writeUncompressedGrids +#endif// ifdef NANOVDB_USE_IOSTREAMS + +// All asserts can be disabled here, even for debug builds +#if 1 +#define NANOVDB_ASSERT(x) assert(x) +#else +#define NANOVDB_ASSERT(x) +#endif + +#if defined(NANOVDB_USE_INTRINSICS) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_BitScanForward64) +#endif + +#endif // __CUDACC_RTC__ + +#if defined(__CUDACC__) || defined(__HIP__) +// Only define __hostdev__ qualifier when using NVIDIA CUDA or HIP compilers +#ifndef __hostdev__ +#define __hostdev__ __host__ __device__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif +#else +// Dummy definitions of macros only defined by CUDA and HIP compilers +#ifndef __hostdev__ +#define __hostdev__ // Runs on the CPU and GPU, called from the CPU or the GPU +#endif +#ifndef __global__ +#define __global__ // Runs on the GPU, called from the CPU or the GPU +#endif +#ifndef __device__ +#define __device__ // Runs on the GPU, called from the GPU +#endif +#ifndef __host__ +#define __host__ // Runs on the CPU, called from the CPU +#endif + +#endif // if defined(__CUDACC__) || defined(__HIP__) + +// The following macro will suppress annoying warnings when nvcc +// compiles functions that call (host) intrinsics (which is perfectly valid) +#if defined(_MSC_VER) && defined(__CUDACC__) +#define NANOVDB_HOSTDEV_DISABLE_WARNING __pragma("hd_warning_disable") +#elif defined(__GNUC__) && defined(__CUDACC__) +#define NANOVDB_HOSTDEV_DISABLE_WARNING _Pragma("hd_warning_disable") +#else +#define NANOVDB_HOSTDEV_DISABLE_WARNING +#endif + +// Define compiler warnings that work with all compilers +//#if defined(_MSC_VER) +//#define NANO_WARNING(msg) _pragma("message" #msg) +//#else +//#define NANO_WARNING(msg) _Pragma("message" #msg) +//#endif + +//============================================== +/// @brief Defines macros that issues warnings for deprecated header files +/// @details Example: +/// @code +/// #include // for NANOVDB_DEPRECATED_HEADER +/// #include +/// NANOVDB_DEPRECATED_HEADER("This header file is deprecated, please use instead") +/// @endcode +#ifdef __GNUC__ +#define NANOVDB_PRAGMA(X) _Pragma(#X) +#define NANOVDB_DEPRECATED_HEADER(MSG) NANOVDB_PRAGMA(GCC warning MSG) +#elif defined(_MSC_VER) +#define NANOVDB_STRINGIZE_(MSG) #MSG +#define NANOVDB_STRINGIZE(MSG) NANOVDB_STRINGIZE_(MSG) +#define NANOVDB_DEPRECATED_HEADER(MSG) \ + __pragma(message(__FILE__ "(" NANOVDB_STRINGIZE(__LINE__) ") : Warning: " MSG)) +#endif + +// A portable implementation of offsetof - unfortunately it doesn't work with static_assert +#define NANOVDB_OFFSETOF(CLASS, MEMBER) ((int)(size_t)((char*)&((CLASS*)0)->MEMBER - (char*)0)) + +namespace nanovdb {// ================================================================= + +namespace util {// ==================================================================== + +/// @brief Minimal implementation of std::declval, which converts any type @c T to +//// a reference type, making it possible to use member functions in the operand +/// of the decltype specifier without the need to go through constructors. +/// @tparam T Template type to be converted to T&& +/// @return T&& +/// @warning Unlike std::declval, this version does not work when T = void! However, +/// NVRTC does not like std::declval, so we provide our own implementation. +template +T&& declval() noexcept; + +// --------------------------> string utility functions <------------------------------------ + +/// @brief tests if a c-string @c str is empty, that is its first value is '\0' +/// @param str c-string to be tested for null termination +/// @return true if str[0] = '\0' +__hostdev__ inline bool empty(const char* str) +{ + NANOVDB_ASSERT(str != nullptr); + return *str == '\0'; +}// util::empty + +/// @brief length of a c-sting, excluding '\0'. +/// @param str c-string +/// @return the number of characters that precede the terminating null character. +__hostdev__ inline size_t strlen(const char *str) +{ + NANOVDB_ASSERT(str != nullptr); + const char *s = str; + while(*s) ++s; ; + return (s - str); +}// util::strlen + +/// @brief Copy characters from @c src to @c dst. +/// @param dst pointer to the destination string. +/// @param src pointer to the null-terminated source string. +/// @return destination string @c dst. +/// @note Emulates the behaviour of std::strcpy, except this version also runs on the GPU. +__hostdev__ inline char* strcpy(char *dst, const char *src) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + for (char *p = dst; (*p++ = *src) != '\0'; ++src); + return dst; +}// util::strcpy(char*, const char*) + +/// @brief Copies the first num characters of @c src to @c dst. +/// If the end of the source C string (which is signaled by a +/// null-character) is found before @c max characters have been +/// copied, @c dst is padded with zeros until a total of @c max +/// characters have been written to it. +/// @param dst destination string +/// @param src source string +/// @param max maximum number of character in destination string +/// @return destination string @c dst +/// @warning if strncpy(dst, src, max)[max-1]!='\0' then @c src has more +/// characters than @c max and the return string needs to be +/// manually null-terminated, i.e. strncpy(dst, src, max)[max-1]='\0' +__hostdev__ inline char* strncpy(char *dst, const char *src, size_t max) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + size_t i = 0; + for (; i < max && src[i] != '\0'; ++i) dst[i] = src[i]; + for (; i < max; ++i) dst[i] = '\0'; + return dst; +}// util::strncpy(char *dst, const char *src, size_t max) + +/// @brief converts a number to a string using a specific base +/// @param dst destination string +/// @param num signed number to be concatenated after @c dst +/// @param bas base used when converting @c num to a string +/// @return destination string @c dst +/// @note Emulates the behaviour of itoa, except this verion also works on the GPU. +__hostdev__ inline char* strcpy(char* dst, int num, int bas = 10) +{ + NANOVDB_ASSERT(dst != nullptr && bas > 0); + int len = 0;// length of number once converted to a string + if (num == 0) dst[len++] = '0'; + for (int abs = num < 0 && bas == 10 ? -num : num; abs; abs /= bas) { + const int rem = abs % bas; + dst[len++] = rem > 9 ? rem - 10 + 'a' : rem + '0'; + } + if (num < 0) dst[len++] = '-';// append '-' if negative + for (char *a = dst, *b = a + len - 1; a < b; ++a, --b) {// reverse dst + dst[len] = *a;// use end of string as temp + *a = *b; + *b = dst[len]; + } + dst[len] = '\0';// explicitly terminate end of string + return dst; +}// util::strcpy(char*, int, int) + +/// @brief Appends a copy of the character string pointed to by @c src to +/// the end of the character string pointed to by @c dst on the device. +/// @param dst pointer to the null-terminated byte string to append to. +/// @param src pointer to the null-terminated byte string to copy from. +/// @return pointer to the character array being appended to. +/// @note Emulates the behaviour of std::strcat, except this version also runs on the GPU. +__hostdev__ inline char* strcat(char *dst, const char *src) +{ + NANOVDB_ASSERT(dst != nullptr && src != nullptr); + char *p = dst; + while (*p != '\0') ++p;// advance till end of dst + strcpy(p, src);// append src + return dst; +}// util::strcat(char*, const char*) + +/// @brief concatenates a number after a string using a specific base +/// @param dst null terminated destination string +/// @param num signed number to be concatenated after @c dst +/// @param bas base used when converting @c num to a string +/// @return destination string @c dst +__hostdev__ inline char* strcat(char* dst, int num, int bas = 10) +{ + NANOVDB_ASSERT(dst != nullptr); + char *p = dst; + while (*p != '\0') ++p; + strcpy(p, num, bas); + return dst; +}// util::strcat(char*, int, int) + +/// @brief Compares two null-terminated byte strings lexicographically. +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return Negative value if @c lhs appears before @c rhs in lexicographical order. +/// Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears +/// after @c rhs in lexicographical order. +/// @note Emulates the behaviour of std::strcmp, except this version also runs on the GPU. +__hostdev__ inline int strcmp(const char *lhs, const char *rhs) +{ + while(*lhs != '\0' && (*lhs == *rhs)){ + lhs++; + rhs++; + } + return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs +}// util::strcmp(const char*, const char*) + +/// @brief Test if two null-terminated byte strings are the same +/// @param lhs pointer to the null-terminated byte strings to compare +/// @param rhs pointer to the null-terminated byte strings to compare +/// @return true if the two c-strings are identical +__hostdev__ inline bool streq(const char *lhs, const char *rhs) +{ + return strcmp(lhs, rhs) == 0; +}// util::streq + +namespace impl {// ======================================================= +// Base-case implementation of Variadic Template function impl::sprint +__hostdev__ inline char* sprint(char *dst){return dst;} +// Variadic Template function impl::sprint +template +__hostdev__ inline char* sprint(char *dst, T var1, Types... var2) +{ + return impl::sprint(strcat(dst, var1), var2...); +} +}// namespace impl ========================================================= + +/// @brief prints a variable number of string and/or numbers to a destination string +template +__hostdev__ inline char* sprint(char *dst, T var1, Types... var2) +{ + return impl::sprint(strcpy(dst, var1), var2...); +}// util::sprint + +// --------------------------> memzero <------------------------------------ + +/// @brief Zero initialization of memory +/// @param dst pointer to destination +/// @param byteCount number of bytes to be initialized to zero +/// @return destination pointer @c dst +__hostdev__ inline static void* memzero(void *dst, size_t byteCount) +{ + NANOVDB_ASSERT(dst); + const size_t wordCount = byteCount >> 3; + if (wordCount << 3 == byteCount) { + for (auto *d = (uint64_t*)dst, *e = d + wordCount; d != e; ++d) *d = 0ULL; + } else { + for (auto *d = (char*)dst, *e = d + byteCount; d != e; ++d) *d = '\0'; + } + return dst; +}// util::memzero + +// --------------------------> util::is_same <------------------------------------ + +/// @brief C++11 implementation of std::is_same +/// @note When more than two arguments are provided value = T0==T1 || T0==T2 || ... +template +struct is_same +{ + static constexpr bool value = is_same::value || is_same::value; +}; + +template +struct is_same {static constexpr bool value = false;}; + +template +struct is_same {static constexpr bool value = true;}; + +// --------------------------> util::is_floating_point <------------------------------------ + +/// @brief C++11 implementation of std::is_floating_point +template +struct is_floating_point {static constexpr bool value = is_same::value;}; + +// --------------------------> util::enable_if <------------------------------------ + +/// @brief C++11 implementation of std::enable_if +template +struct enable_if {}; + +template +struct enable_if {using type = T;}; + +// --------------------------> util::disable_if <------------------------------------ + +template +struct disable_if {using type = T;}; + +template +struct disable_if {}; + +// --------------------------> util::is_const <------------------------------------ + +template +struct is_const {static constexpr bool value = false;}; + +template +struct is_const {static constexpr bool value = true;}; + +// --------------------------> util::is_pointer <------------------------------------ + +/// @brief Trait used to identify template parameter that are pointers +/// @tparam T Template parameter to be tested +template +struct is_pointer {static constexpr bool value = false;}; + +/// @brief Template specialization of pointers +/// @tparam T Template parameter to be tested +/// @note T can be both a non-const and const type +template +struct is_pointer {static constexpr bool value = true;}; + +// --------------------------> util::conditional <------------------------------------ + +/// @brief C++11 implementation of std::conditional +template +struct conditional { using type = TrueT; }; + +/// @brief Template specialization of conditional +/// @tparam FalseT Type used when boolean is false +/// @tparam TrueT Type used when boolean is true +template +struct conditional { using type = FalseT; }; + +// --------------------------> util::remove_const <------------------------------------ + +/// @brief Trait use to const from type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_const {using type = T;}; + +/// @brief Template specialization of trait class use to remove const qualifier type from a type +/// @tparam T Type of the const type +/// @details remove_pointer::type = float +template +struct remove_const {using type = T;}; + +// --------------------------> util::remove_reference <------------------------------------ + +/// @brief Trait use to remove reference, i.e. "&", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_reference {using type = T;}; + +/// @brief Template specialization of trait class use to remove reference, i.e. "&", qualifier from a type +/// @tparam T Type of the reference +/// @details remove_pointer::type = float +template +struct remove_reference {using type = T;}; + +// --------------------------> util::remove_pointer <------------------------------------ + +/// @brief Trait use to remove pointer, i.e. "*", qualifier from a type. Default implementation is just a pass-through +/// @tparam T Type +/// @details remove_pointer::type = float +template +struct remove_pointer {using type = T;}; + +/// @brief Template specialization of trait class use to to remove pointer, i.e. "*", qualifier from a type +/// @tparam T Type of the pointer +/// @details remove_pointer::type = float +template +struct remove_pointer {using type = T;}; + +// --------------------------> util::match_const <------------------------------------ + +/// @brief Trait used to transfer the const-ness of a reference type to another type +/// @tparam T Type whose const-ness needs to match the reference type +/// @tparam ReferenceT Reference type that is not const +/// @details match_const::type = int +/// match_const::type = int +template +struct match_const {using type = typename remove_const::type;}; + +/// @brief Template specialization used to transfer the const-ness of a reference type to another type +/// @tparam T Type that will adopt the const-ness of the reference type +/// @tparam ReferenceT Reference type that is const +/// @details match_const::type = const int +/// match_const::type = const int +template +struct match_const {using type = const typename remove_const::type;}; + +// --------------------------> util::is_specialization <------------------------------------ + +/// @brief Metafunction used to determine if the first template +/// parameter is a specialization of the class template +/// given in the second template parameter. +/// +/// @details is_specialization, Vec3>::value == true; +/// is_specialization::value == true; +/// is_specialization, std::vector>::value == true; +template class TemplateType> +struct is_specialization {static const bool value = false;}; +template class TemplateType> +struct is_specialization, TemplateType> +{ + static const bool value = true; +};// util::is_specialization + +// --------------------------> util::PtrDiff <------------------------------------ + +/// @brief Compute the distance, in bytes, between two pointers, dist = p - q +/// @param p fist pointer, assumed to NOT be NULL +/// @param q second pointer, assumed to NOT be NULL +/// @return signed distance between pointer, p - q, addresses in units of bytes +__hostdev__ inline static int64_t PtrDiff(const void* p, const void* q) +{ + NANOVDB_ASSERT(p && q); + return reinterpret_cast(p) - reinterpret_cast(q); +}// util::PtrDiff + +// --------------------------> util::PtrAdd <------------------------------------ + +/// @brief Adds a byte offset to a non-const pointer to produce another non-const pointer +/// @tparam DstT Type of the return pointer (defaults to void) +/// @param p non-const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a non-const pointer defined as the offset of an input pointer +template +__hostdev__ inline static DstT* PtrAdd(void* p, int64_t offset) +{ + NANOVDB_ASSERT(p); + return reinterpret_cast(reinterpret_cast(p) + offset); +}// util::PtrAdd + +/// @brief Adds a byte offset to a const pointer to produce another const pointer +/// @tparam DstT Type of the return pointer (defaults to void) +/// @param p const input pointer, assumed to NOT be NULL +/// @param offset signed byte offset +/// @return a const pointer defined as the offset of a const input pointer +template +__hostdev__ inline static const DstT* PtrAdd(const void* p, int64_t offset) +{ + NANOVDB_ASSERT(p); + return reinterpret_cast(reinterpret_cast(p) + offset); +}// util::PtrAdd + +// -------------------> findLowestOn <---------------------------- + +/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 32 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findLowestOn(uint32_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return __ffs(v) - 1; // one based indexing +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanForward(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return static_cast(__builtin_ctzl(v)); +#else + //NANO_WARNING("Using software implementation for findLowestOn(uint32_t v)") + static const unsigned char DeBruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; +// disable unary minus on unsigned warning +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + return DeBruijn[uint32_t((v & -v) * 0x077CB531U) >> 27]; +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(pop) +#endif + +#endif +}// util::findLowestOn(uint32_t) + +/// @brief Returns the index of the lowest, i.e. least significant, on bit in the specified 64 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findLowestOn(uint64_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return __ffsll(static_cast(v)) - 1; // one based indexing +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanForward64(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return static_cast(__builtin_ctzll(v)); +#else + //NANO_WARNING("Using software implementation for util::findLowestOn(uint64_t)") + static const unsigned char DeBruijn[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12, + }; +// disable unary minus on unsigned warning +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + return DeBruijn[uint64_t((v & -v) * UINT64_C(0x022FDD63CC95386D)) >> 58]; +#if defined(_MSC_VER) && !defined(__NVCC__) +#pragma warning(pop) +#endif + +#endif +}// util::findLowestOn(uint64_t) + +// -------------------> findHighestOn <---------------------------- + +/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 32 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findHighestOn(uint32_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(uint32_t) * 8 - 1 - __clz(v); // Return the number of consecutive high-order zero bits in a 32-bit integer. +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanReverse(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v); +#else + //NANO_WARNING("Using software implementation for util::findHighestOn(uint32_t)") + static const unsigned char DeBruijn[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; + v |= v >> 1; // first round down to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijn[uint32_t(v * 0x07C4ACDDU) >> 27]; +#endif +}// util::findHighestOn + +/// @brief Returns the index of the highest, i.e. most significant, on bit in the specified 64 bit word +/// +/// @warning Assumes that at least one bit is set in the word, i.e. @a v != uint32_t(0)! +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t findHighestOn(uint64_t v) +{ + NANOVDB_ASSERT(v); +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __clzll(static_cast(v)); +#elif defined(_MSC_VER) && defined(NANOVDB_USE_INTRINSICS) + unsigned long index; + _BitScanReverse64(&index, v); + return static_cast(index); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v); +#else + const uint32_t* p = reinterpret_cast(&v); + return p[1] ? 32u + findHighestOn(p[1]) : findHighestOn(p[0]); +#endif +}// util::findHighestOn + +// ----------------------------> util::countOn <-------------------------------------- + +/// @return Number of bits that are on in the specified 64-bit word +NANOVDB_HOSTDEV_DISABLE_WARNING +__hostdev__ inline uint32_t countOn(uint64_t v) +{ +#if (defined(__CUDA_ARCH__) || defined(__HIP__)) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using popcll for util::countOn + return __popcll(v); +// __popcnt64 intrinsic support was added in VS 2019 16.8 +#elif defined(_MSC_VER) && defined(_M_X64) && (_MSC_VER >= 1928) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using popcnt64 for util::countOn + return uint32_t(__popcnt64(v)); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS) + //#warning Using builtin_popcountll for util::countOn + return __builtin_popcountll(v); +#else // use software implementation + //NANO_WARNING("Using software implementation for util::countOn") + v = v - ((v >> 1) & uint64_t(0x5555555555555555)); + v = (v & uint64_t(0x3333333333333333)) + ((v >> 2) & uint64_t(0x3333333333333333)); + return (((v + (v >> 4)) & uint64_t(0xF0F0F0F0F0F0F0F)) * uint64_t(0x101010101010101)) >> 56; +#endif +}// util::countOn(uint64_t) + +}// namespace util ================================================================== + +[[deprecated("Use nanovdb::util::findLowestOn instead")]] +__hostdev__ inline uint32_t FindLowestOn(uint32_t v){return util::findLowestOn(v);} +[[deprecated("Use nanovdb::util::findLowestOn instead")]] +__hostdev__ inline uint32_t FindLowestOn(uint64_t v){return util::findLowestOn(v);} +[[deprecated("Use nanovdb::util::findHighestOn instead")]] +__hostdev__ inline uint32_t FindHighestOn(uint32_t v){return util::findHighestOn(v);} +[[deprecated("Use nanovdb::util::findHighestOn instead")]] +__hostdev__ inline uint32_t FindHighestOn(uint64_t v){return util::findHighestOn(v);} +[[deprecated("Use nanovdb::util::countOn instead")]] +__hostdev__ inline uint32_t CountOn(uint64_t v){return util::countOn(v);} + +} // namespace nanovdb =================================================================== + +#endif // end of NANOVDB_UTIL_UTIL_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/util/cuda/Timer.h b/warp/native/nanovdb/util/cuda/Timer.h new file mode 100644 index 00000000..bd73f8e3 --- /dev/null +++ b/warp/native/nanovdb/util/cuda/Timer.h @@ -0,0 +1,116 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/// @file nanovdb/util/cuda/Timer.h +/// +/// @author Ken Museth +/// +/// @brief A simple GPU timing class + +#ifndef NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED + +#include // for std::cerr +#include +#include + +namespace nanovdb { + +namespace util{ namespace cuda { + +class Timer +{ + cudaStream_t mStream{0}; + cudaEvent_t mStart, mStop; + +public: + /// @brief Default constructor + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @note Starts the timer + Timer(cudaStream_t stream = 0) : mStream(stream) + { + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, mStream); + } + + /// @brief Construct and start the timer + /// @param msg string message to be printed when timer is started + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + Timer(const std::string &msg, cudaStream_t stream = 0, std::ostream& os = std::cerr) + : mStream(stream) + { + os << msg << " ... " << std::flush; + cudaEventCreate(&mStart); + cudaEventCreate(&mStop); + cudaEventRecord(mStart, mStream); + } + + /// @brief Destructor + ~Timer() + { + cudaEventDestroy(mStart); + cudaEventDestroy(mStop); + } + + /// @brief Start the timer + /// @param stream CUDA stream to be timed (defaults to stream 0) + /// @param os output stream for the message above + void start() {cudaEventRecord(mStart, mStream);} + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + + /// @param os output stream for the message above + void start(const std::string &msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(); + } + + /// @brief Start the timer + /// @param msg string message to be printed when timer is started + /// @param os output stream for the message above + void start(const char* msg, std::ostream& os = std::cerr) + { + os << msg << " ... " << std::flush; + this->start(); + } + + /// @brief elapsed time (since start) in miliseconds + /// @return elapsed time (since start) in miliseconds + float elapsed() + { + cudaEventRecord(mStop, mStream); + cudaEventSynchronize(mStop); + float diff = 0.0f; + cudaEventElapsedTime(&diff, mStart, mStop); + return diff; + } + + /// @brief stop the timer + /// @param os output stream for the message above + void stop(std::ostream& os = std::cerr) + { + float diff = this->elapsed(); + os << "completed in " << diff << " milliseconds" << std::endl; + } + + /// @brief stop and start the timer + /// @param msg string message to be printed when timer is started + /// @warning Remember to call start before restart + void restart(const std::string &msg, std::ostream& os = std::cerr) + { + this->stop(); + this->start(msg, os); + } +};// Timer + +}}// namespace util::cuda + +using GpuTimer [[deprecated("Use nanovdb::util::cuda::Timer instead")]]= util::cuda::Timer; + +} // namespace nanovdb + +#endif // NANOVDB_UTIL_CUDA_TIMER_H_HAS_BEEN_INCLUDED diff --git a/warp/native/nanovdb/util/cuda/Util.h b/warp/native/nanovdb/util/cuda/Util.h new file mode 100644 index 00000000..b4391bab --- /dev/null +++ b/warp/native/nanovdb/util/cuda/Util.h @@ -0,0 +1,193 @@ +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file nanovdb/util/cuda/Util.h + + \author Ken Museth + + \date December 20, 2023 + + \brief Cuda specific utility functions +*/ + +#ifndef NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED +#define NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED + +#include +#include +#include // for stderr and NANOVDB_ASSERT + +// change 1 -> 0 to only perform asserts during debug builds +#if 1 || defined(DEBUG) || defined(_DEBUG) + static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) + { + if (code != cudaSuccess) { + fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line); + //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } + } + static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true) + { + if (ptr == nullptr) { + fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } else if (uint64_t(ptr) % 32) { + fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line); + if (abort) exit(1); + } + } +#else + static inline void gpuAssert(cudaError_t, const char*, int, bool = true){} + static inline void ptrAssert(void*, const char*, const char*, int, bool = true){} +#endif + +// Convenience function for checking CUDA runtime API results +// can be wrapped around any runtime API call. No-op in release builds. +#define cudaCheck(ans) \ + { \ + gpuAssert((ans), __FILE__, __LINE__); \ + } + +#define checkPtr(ptr, msg) \ + { \ + ptrAssert((ptr), (msg), __FILE__, __LINE__); \ + } + +#define cudaSync() \ + { \ + cudaCheck(cudaDeviceSynchronize()); \ + } + +#define cudaCheckError() \ + { \ + cudaCheck(cudaGetLastError()); \ + } + +namespace nanovdb {// ========================================================= + +namespace util{ namespace cuda {// ====================================================== + +//#define NANOVDB_USE_SYNC_CUDA_MALLOC +// cudaMallocAsync and cudaFreeAsync were introduced in CUDA 11.2 so we introduce +// custom implementations that map to cudaMalloc and cudaFree below. If NANOVDB_USE_SYNC_CUDA_MALLOC +// is defined these implementations will also be defined, which is useful in virtualized environments +// that slice up the GPU and share it between instances as vGPU's. GPU unified memory is usually disabled +// out of security considerations. Asynchronous CUDA malloc/free depends on GPU unified memory, so it +// is not possible to use cudaMallocAsync and cudaFreeAsync in such environments. + +#if (CUDART_VERSION < 11020) || defined(NANOVDB_USE_SYNC_CUDA_MALLOC) // 11.2 introduced cudaMallocAsync and cudaFreeAsync + +/// @brief Simple wrapper that calls cudaMalloc +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored) +/// @return Cuda error code +inline cudaError_t mallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);} + +/// @brief Simple wrapper that calls cudaFree +/// @param d_ptr Device pointer that will be freed +/// @param dummy The stream establishing the stream ordering promise (ignored) +/// @return Cuda error code +inline cudaError_t freeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);} + +#else + +/// @brief Simple wrapper that calls cudaMallocAsync +/// @param d_ptr Device pointer to allocated device memory +/// @param size Number of bytes to allocate +/// @param stream The stream establishing the stream ordering contract and the memory pool to allocate from +/// @return Cuda error code +inline cudaError_t mallocAsync(void** d_ptr, size_t size, cudaStream_t stream){return cudaMallocAsync(d_ptr, size, stream);} + +/// @brief Simple wrapper that calls cudaFreeAsync +/// @param d_ptr Device pointer that will be freed +/// @param stream The stream establishing the stream ordering promise +/// @return Cuda error code +inline cudaError_t freeAsync(void* d_ptr, cudaStream_t stream){return cudaFreeAsync(d_ptr, stream);} + +#endif + +/// @brief Simple (naive) implementation of a unique device pointer +/// using stream ordered memory allocation and deallocation. +/// @tparam T Type of the device pointer +template +class unique_ptr +{ + T *mPtr;// pointer to stream ordered memory allocation + cudaStream_t mStream; +public: + unique_ptr(size_t count = 0, cudaStream_t stream = 0) : mPtr(nullptr), mStream(stream) + { + if (count>0) cudaCheck(mallocAsync((void**)&mPtr, count*sizeof(T), stream)); + } + unique_ptr(const unique_ptr&) = delete; + unique_ptr(unique_ptr&& other) : mPtr(other.mPtr), mStream(other.mStream) + { + other.mPtr = nullptr; + } + ~unique_ptr() + { + if (mPtr) cudaCheck(freeAsync(mPtr, mStream)); + } + unique_ptr& operator=(const unique_ptr&) = delete; + unique_ptr& operator=(unique_ptr&& rhs) noexcept + { + mPtr = rhs.mPtr; + mStream = rhs.mStream; + rhs.mPtr = nullptr; + return *this; + } + void reset() { + if (mPtr) { + cudaCheck(freeAsync(mPtr, mStream)); + mPtr = nullptr; + } + } + T* get() const {return mPtr;} + explicit operator bool() const {return mPtr != nullptr;} +};// util::cuda::unique_ptr + +/// @brief Computes the number of blocks per grid given the problem size and number of threads per block +/// @param numItems Problem size +/// @param threadsPerBlock Number of threads per block (second CUDA launch parameter) +/// @return number of blocks per grid (first CUDA launch parameter) +/// @note CUDA launch parameters: kernel<<< blocksPerGrid, threadsPerBlock, sharedMemSize, streamID>>> +inline size_t blocksPerGrid(size_t numItems, size_t threadsPerBlock) +{ + NANOVDB_ASSERT(numItems > 0 && threadsPerBlock >= 32 && threadsPerBlock % 32 == 0); + return (numItems + threadsPerBlock - 1) / threadsPerBlock; +} + + +#if defined(__CUDACC__)// the following functions only run on the GPU! + +/// @brief Cuda kernel that launches device lambda functions +/// @param numItems Problem size +template +__global__ void lambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +}// util::cuda::lambdaKernel + +#endif// __CUDACC__ + +}}// namespace util::cuda ============================================================ + +}// namespace nanovdb =============================================================== + +#if defined(__CUDACC__)// the following functions only run on the GPU! +template +[[deprecated("Use nanovdb::cuda::lambdaKernel instead")]] +__global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= numItems) return; + func(tid, args...); +} +#endif// __CUDACC__ + +#endif// NANOVDB_UTIL_CUDA_UTIL_H_HAS_BEEN_INCLUDED \ No newline at end of file diff --git a/warp/native/volume.cpp b/warp/native/volume.cpp index 68c3c554..36267c5e 100644 --- a/warp/native/volume.cpp +++ b/warp/native/volume.cpp @@ -6,10 +6,10 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "volume.h" +#include "cuda_util.h" #include "volume_builder.h" +#include "volume_impl.h" #include "warp.h" -#include "cuda_util.h" #include @@ -21,40 +21,50 @@ namespace struct VolumeDesc { // NanoVDB buffer either in device or host memory - void* buffer; + void *buffer; uint64_t size_in_bytes; + bool owner; // whether the buffer should be deallocated when the volume is destroyed - // offset to the voxel values of the first leaf node relative to buffer - uint64_t first_voxel_data_offs; - - // copy of the grids's metadata to keep on the host for device volumes pnanovdb_grid_t grid_data; - - // copy of the tree's metadata to keep on the host for device volumes pnanovdb_tree_t tree_data; + // Host-accessible version of the blind metadata (copy if GPU, alias if CPU) + pnanovdb_gridblindmetadata_t *blind_metadata; + // CUDA context for this volume (NULL if CPU) - void* context; + void *context; + + pnanovdb_buf_t as_pnano() const + { + return pnanovdb_make_buf(static_cast(buffer), size_in_bytes); + } }; // Host-side volume descriptors. Maps each CPU/GPU volume buffer address (id) to a CPU desc std::map g_volume_descriptors; -bool volume_get_descriptor(uint64_t id, VolumeDesc& volumeDesc) +bool volume_get_descriptor(uint64_t id, const VolumeDesc *&volumeDesc) { - if (id == 0) return false; + if (id == 0) + return false; - const auto& iter = g_volume_descriptors.find(id); + const auto &iter = g_volume_descriptors.find(id); if (iter == g_volume_descriptors.end()) return false; else - volumeDesc = iter->second; + volumeDesc = &iter->second; return true; } -void volume_add_descriptor(uint64_t id, const VolumeDesc& volumeDesc) +bool volume_exists(const void *id) { - g_volume_descriptors[id] = volumeDesc; + const VolumeDesc *volume; + return volume_get_descriptor((uint64_t)id, volume); +} + +void volume_add_descriptor(uint64_t id, VolumeDesc &&volumeDesc) +{ + g_volume_descriptors[id] = std::move(volumeDesc); } void volume_rem_descriptor(uint64_t id) @@ -64,234 +74,456 @@ void volume_rem_descriptor(uint64_t id) } // anonymous namespace - // NB: buf must be a host pointer -uint64_t volume_create_host(void* buf, uint64_t size) +uint64_t volume_create_host(void *buf, uint64_t size, bool copy, bool owner) { - if (size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)) - return 0; // This cannot be a valid NanoVDB grid with data + if (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)) + return 0; // This cannot be a valid NanoVDB grid with data - VolumeDesc volume; + if (!copy && volume_exists(buf)) + { + // descriptor already created for this volume + return 0; + } + VolumeDesc volume; volume.context = NULL; memcpy_h2h(&volume.grid_data, buf, sizeof(pnanovdb_grid_t)); - memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t)); + memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t *)buf + 1, sizeof(pnanovdb_tree_t)); - if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER) + if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID) return 0; + if (size == 0) + { + size = volume.grid_data.grid_size; + } + + // Copy or alias buffer volume.size_in_bytes = size; - volume.buffer = alloc_host(size); - memcpy_h2h(volume.buffer, buf, size); + if (copy) + { + volume.buffer = alloc_host(size); + memcpy_h2h(volume.buffer, buf, size); + volume.owner = true; + } + else + { + volume.buffer = buf; + volume.owner = owner; + } - volume.first_voxel_data_offs = - sizeof(pnanovdb_grid_t) + volume.tree_data.node_offset_leaf + PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_FLOAT, leaf_off_table); + // Alias blind metadata + volume.blind_metadata = reinterpret_cast(static_cast(volume.buffer) + + volume.grid_data.blind_metadata_offset); uint64_t id = (uint64_t)volume.buffer; - volume_add_descriptor(id, volume); + volume_add_descriptor(id, std::move(volume)); return id; } // NB: buf must be a pointer on the same device -uint64_t volume_create_device(void* context, void* buf, uint64_t size) +uint64_t volume_create_device(void *context, void *buf, uint64_t size, bool copy, bool owner) { - if (size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)) - return 0; // This cannot be a valid NanoVDB grid with data + if (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)) + return 0; // This cannot be a valid NanoVDB grid with data + + if (!copy && volume_exists(buf)) + { + // descriptor already created for this volume + return 0; + } ContextGuard guard(context); VolumeDesc volume; - volume.context = context ? context : cuda_context_get_current(); memcpy_d2h(WP_CURRENT_CONTEXT, &volume.grid_data, buf, sizeof(pnanovdb_grid_t)); - memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t)); + memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t *)buf + 1, sizeof(pnanovdb_tree_t)); + // no sync needed since the above copies are to pageable memory - if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER) + if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID) return 0; + if (size == 0) + { + size = volume.grid_data.grid_size; + } + + // Copy or alias data buffer volume.size_in_bytes = size; - volume.buffer = alloc_device(WP_CURRENT_CONTEXT, size); - memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size); + if (copy) + { + volume.buffer = alloc_device(WP_CURRENT_CONTEXT, size); + memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size); + volume.owner = true; + } + else + { + volume.buffer = buf; + volume.owner = owner; + } - volume.first_voxel_data_offs = - sizeof(pnanovdb_grid_t) + volume.tree_data.node_offset_leaf + PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_FLOAT, leaf_off_table); + // Make blind metadata accessible on host + const uint64_t blindmetadata_size = volume.grid_data.blind_metadata_count * sizeof(pnanovdb_gridblindmetadata_t); + volume.blind_metadata = static_cast(alloc_pinned(blindmetadata_size)); + memcpy_d2h(WP_CURRENT_CONTEXT, volume.blind_metadata, + static_cast(volume.buffer) + volume.grid_data.blind_metadata_offset, blindmetadata_size); uint64_t id = (uint64_t)volume.buffer; - - volume_add_descriptor(id, volume); + volume_add_descriptor(id, std::move(volume)); return id; } -static void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size) +void volume_get_buffer_info(uint64_t id, void **buf, uint64_t *size) { *buf = 0; *size = 0; - VolumeDesc volume; + const VolumeDesc *volume; if (volume_get_descriptor(id, volume)) { - *buf = volume.buffer; - *size = volume.size_in_bytes; + *buf = volume->buffer; + *size = volume->size_in_bytes; } } -void volume_get_buffer_info_host(uint64_t id, void** buf, uint64_t* size) +void volume_get_voxel_size(uint64_t id, float *dx, float *dy, float *dz) { - volume_get_buffer_info(id, buf, size); + *dx = *dx = *dz = 0.0f; + + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume)) + { + *dx = (float)volume->grid_data.voxel_size[0]; + *dy = (float)volume->grid_data.voxel_size[1]; + *dz = (float)volume->grid_data.voxel_size[2]; + } } -void volume_get_buffer_info_device(uint64_t id, void** buf, uint64_t* size) +void volume_get_tile_and_voxel_count(uint64_t id, uint32_t &tile_count, uint64_t &voxel_count) { - volume_get_buffer_info(id, buf, size); + tile_count = 0; + voxel_count = 0; + + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume)) + { + tile_count = volume->tree_data.node_count_leaf; + + const uint32_t grid_type = volume->grid_data.grid_type; + + switch (grid_type) + { + case PNANOVDB_GRID_TYPE_ONINDEX: + case PNANOVDB_GRID_TYPE_ONINDEXMASK: + // number of indexable voxels is number of active voxels + voxel_count = volume->tree_data.voxel_count; + break; + default: + // all leaf voxels are indexable + voxel_count = uint64_t(tile_count) * PNANOVDB_LEAF_TABLE_COUNT; + } + } } -void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz) +const char *volume_get_grid_info(uint64_t id, uint64_t *grid_size, uint32_t *grid_index, uint32_t *grid_count, + float translation[3], float transform[9], char type_str[16]) { - *dx = *dx = *dz = 0.0f; + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume)) + { + const pnanovdb_grid_t &grid_data = volume->grid_data; + *grid_count = grid_data.grid_count; + *grid_index = grid_data.grid_index; + *grid_size = grid_data.grid_size; - VolumeDesc volume; + memcpy(translation, grid_data.map.vecf, sizeof(grid_data.map.vecf)); + memcpy(transform, grid_data.map.matf, sizeof(grid_data.map.matf)); + + nanovdb::toStr(type_str, static_cast(grid_data.grid_type)); + return (const char *)grid_data.grid_name; + } + + *grid_size = 0; + *grid_index = 0; + *grid_count = 0; + type_str[0] = 0; + + return nullptr; +} + +uint32_t volume_get_blind_data_count(uint64_t id) +{ + const VolumeDesc *volume; if (volume_get_descriptor(id, volume)) { - *dx = (float)volume.grid_data.voxel_size[0]; - *dy = (float)volume.grid_data.voxel_size[1]; - *dz = (float)volume.grid_data.voxel_size[2]; + return volume->grid_data.blind_metadata_count; } + return 0; } -void volume_get_tiles_host(uint64_t id, void** buf, uint64_t* size) +const char *volume_get_blind_data_info(uint64_t id, uint32_t data_index, void **buf, uint64_t *value_count, + uint32_t *value_size, char type_str[16]) { - static constexpr uint32_t MASK = (1u << 3u) - 1u; // mask for bit operations + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume) && data_index < volume->grid_data.blind_metadata_count) + { + const pnanovdb_gridblindmetadata_t &metadata = volume->blind_metadata[data_index]; + *value_count = metadata.value_count; + *value_size = metadata.value_size; + + nanovdb::toStr(type_str, static_cast(metadata.data_type)); + *buf = static_cast(volume->buffer) + volume->grid_data.blind_metadata_offset + + data_index * sizeof(pnanovdb_gridblindmetadata_t) + metadata.data_offset; + return (const char *)metadata.name; + } + *buf = nullptr; + *value_count = 0; + *value_size = 0; + type_str[0] = 0; + return nullptr; +} - *buf = 0; - *size = 0; +void volume_get_tiles_host(uint64_t id, void *buf) +{ + static constexpr uint32_t MASK = (1u << 3u) - 1u; // mask for bit operations - VolumeDesc volume; + const VolumeDesc *volume; if (volume_get_descriptor(id, volume)) { - const uint32_t leaf_count = volume.tree_data.node_count_leaf; - *size = leaf_count * sizeof(pnanovdb_coord_t); + const uint32_t leaf_count = volume->tree_data.node_count_leaf; + + pnanovdb_coord_t *leaf_coords = static_cast(buf); + + const uint64_t first_leaf = + (uint64_t)volume->buffer + sizeof(pnanovdb_grid_t) + volume->tree_data.node_offset_leaf; + const uint32_t leaf_stride = PNANOVDB_GRID_TYPE_GET(volume->grid_data.grid_type, leaf_size); + + const pnanovdb_buf_t pnano_buf = volume->as_pnano(); + + for (uint32_t i = 0; i < leaf_count; ++i) + { + pnanovdb_leaf_handle_t leaf = volume::get_leaf(pnano_buf, i); + leaf_coords[i] = volume::leaf_origin(pnano_buf, leaf); + } + } +} - pnanovdb_coord_t *leaf_coords = (pnanovdb_coord_t*)alloc_host(*size); - *buf = leaf_coords; +void volume_get_voxels_host(uint64_t id, void *buf) +{ + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume)) + { + uint32_t leaf_count; + uint64_t voxel_count; + volume_get_tile_and_voxel_count(id, leaf_count, voxel_count); - const uint64_t first_leaf = (uint64_t)volume.buffer + sizeof(pnanovdb_grid_t) + volume.tree_data.node_offset_leaf; - const uint32_t leaf_stride = PNANOVDB_GRID_TYPE_GET(volume.grid_data.grid_type, leaf_size); + pnanovdb_coord_t *voxel_coords = static_cast(buf); + const pnanovdb_buf_t pnano_buf = volume->as_pnano(); for (uint32_t i = 0; i < leaf_count; ++i) { - leaf_coords[i] = ((pnanovdb_leaf_t*)(first_leaf + leaf_stride * i))->bbox_min; - leaf_coords[i].x &= ~MASK; - leaf_coords[i].y &= ~MASK; - leaf_coords[i].z &= ~MASK; + pnanovdb_leaf_handle_t leaf = volume::get_leaf(pnano_buf, i); + pnanovdb_coord_t leaf_coords = volume::leaf_origin(pnano_buf, leaf); + + for (uint32_t n = 0; n < 512; ++n) + { + pnanovdb_coord_t loc_ijk = volume::leaf_offset_to_local_coord(n); + pnanovdb_coord_t ijk = { + loc_ijk.x + leaf_coords.x, + loc_ijk.y + leaf_coords.y, + loc_ijk.z + leaf_coords.z, + }; + + const uint64_t index = volume::leaf_voxel_index(pnano_buf, i, ijk); + if (index < voxel_count) + { + voxel_coords[index] = ijk; + } + } } } } void volume_destroy_host(uint64_t id) { - free_host((void*)id); - volume_rem_descriptor(id); + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume)) + { + if (volume->owner) + { + free_host(volume->buffer); + } + volume_rem_descriptor(id); + } } void volume_destroy_device(uint64_t id) { - VolumeDesc volume; + const VolumeDesc *volume; if (volume_get_descriptor(id, volume)) { - ContextGuard guard(volume.context); - free_device(WP_CURRENT_CONTEXT, volume.buffer); + ContextGuard guard(volume->context); + if (volume->owner) + { + free_device(WP_CURRENT_CONTEXT, volume->buffer); + } + free_pinned(volume->blind_metadata); volume_rem_descriptor(id); } } - #if WP_ENABLE_CUDA -uint64_t volume_f_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value, float tx, float ty, float tz, bool points_in_world_space) +uint64_t volume_f_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value, + float tx, float ty, float tz, bool points_in_world_space) { - nanovdb::FloatGrid* grid; + nanovdb::FloatGrid *grid; size_t gridSize; BuildGridParams params; params.voxel_size = voxel_size; params.background_value = bg_value; params.translation = nanovdb::Vec3f{tx, ty, tz}; - build_grid_from_tiles(grid, gridSize, points, num_points, points_in_world_space, params); + build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); - return volume_create_device(context, grid, gridSize); + return volume_create_device(context, grid, gridSize, false, true); } -uint64_t volume_v_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value_x, float bg_value_y, float bg_value_z, float tx, float ty, float tz, bool points_in_world_space) +uint64_t volume_v_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value_x, + float bg_value_y, float bg_value_z, float tx, float ty, float tz, + bool points_in_world_space) { - nanovdb::Vec3fGrid* grid; + nanovdb::Vec3fGrid *grid; size_t gridSize; BuildGridParams params; params.voxel_size = voxel_size; params.background_value = nanovdb::Vec3f{bg_value_x, bg_value_y, bg_value_z}; params.translation = nanovdb::Vec3f{tx, ty, tz}; - build_grid_from_tiles(grid, gridSize, points, num_points, points_in_world_space, params); + build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); - return volume_create_device(context, grid, gridSize); + return volume_create_device(context, grid, gridSize, false, true); } -uint64_t volume_i_from_tiles_device(void* context, void* points, int num_points, float voxel_size, int bg_value, float tx, float ty, float tz, bool points_in_world_space) +uint64_t volume_i_from_tiles_device(void *context, void *points, int num_points, float voxel_size, int bg_value, + float tx, float ty, float tz, bool points_in_world_space) { - nanovdb::Int32Grid* grid; + nanovdb::Int32Grid *grid; size_t gridSize; BuildGridParams params; params.voxel_size = voxel_size; params.background_value = (int32_t)(bg_value); params.translation = nanovdb::Vec3f{tx, ty, tz}; - build_grid_from_tiles(grid, gridSize, points, num_points, points_in_world_space, params); + build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); - return volume_create_device(context, grid, gridSize); + return volume_create_device(context, grid, gridSize, false, true); } -void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords, const uint64_t first_leaf, const uint32_t leaf_stride); +uint64_t volume_index_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float tx, + float ty, float tz, bool points_in_world_space) +{ + nanovdb::IndexGrid *grid; + size_t gridSize; + BuildGridParams params; + params.voxel_size = voxel_size; + params.translation = nanovdb::Vec3f{tx, ty, tz}; -void volume_get_tiles_device(uint64_t id, void** buf, uint64_t* size) + build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); + + return volume_create_device(context, grid, gridSize, false, true); +} + +uint64_t volume_from_active_voxels_device(void *context, void *points, int num_points, float voxel_size, float tx, + float ty, float tz, bool points_in_world_space) { - *buf = 0; - *size = 0; + nanovdb::OnIndexGrid *grid; + size_t gridSize; + BuildGridParams params; + params.voxel_size = voxel_size; + params.translation = nanovdb::Vec3f{tx, ty, tz}; - VolumeDesc volume; + build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); + + return volume_create_device(context, grid, gridSize, false, true); +} + +void launch_get_leaf_coords(void *context, const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords, + pnanovdb_buf_t buf); +void launch_get_voxel_coords(void *context, const uint32_t leaf_count, const uint32_t voxel_count, + pnanovdb_coord_t *voxel_coords, pnanovdb_buf_t buf); + +void volume_get_tiles_device(uint64_t id, void *buf) +{ + const VolumeDesc *volume; if (volume_get_descriptor(id, volume)) { - const uint32_t leaf_count = volume.tree_data.node_count_leaf; - *size = leaf_count * sizeof(pnanovdb_coord_t); + const uint32_t leaf_count = volume->tree_data.node_count_leaf; - pnanovdb_coord_t *leaf_coords = (pnanovdb_coord_t*)alloc_device(volume.context, *size); - *buf = leaf_coords; + pnanovdb_coord_t *leaf_coords = static_cast(buf); + launch_get_leaf_coords(volume->context, leaf_count, leaf_coords, volume->as_pnano()); + } +} - const uint64_t first_leaf = (uint64_t)volume.buffer + sizeof(pnanovdb_grid_t) + volume.tree_data.node_offset_leaf; - const uint32_t leaf_stride = PNANOVDB_GRID_TYPE_GET(volume.grid_data.grid_type, leaf_size); +void volume_get_voxels_device(uint64_t id, void *buf) +{ + const VolumeDesc *volume; + if (volume_get_descriptor(id, volume)) + { + uint32_t leaf_count; + uint64_t voxel_count; + volume_get_tile_and_voxel_count(id, leaf_count, voxel_count); - launch_get_leaf_coords(volume.context, leaf_count, leaf_coords, first_leaf, leaf_stride); + pnanovdb_coord_t *voxel_coords = static_cast(buf); + launch_get_voxel_coords(volume->context, leaf_count, voxel_count, voxel_coords, volume->as_pnano()); } } + #else // stubs for non-CUDA platforms -uint64_t volume_f_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value, float tx, float ty, float tz, bool points_in_world_space) +uint64_t volume_f_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value, + float tx, float ty, float tz, bool points_in_world_space) +{ + return 0; +} + +uint64_t volume_v_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value_x, + float bg_value_y, float bg_value_z, float tx, float ty, float tz, + bool points_in_world_space) +{ + return 0; +} + +uint64_t volume_i_from_tiles_device(void *context, void *points, int num_points, float voxel_size, int bg_value, + float tx, float ty, float tz, bool points_in_world_space) { return 0; } -uint64_t volume_v_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value_x, float bg_value_y, float bg_value_z, float tx, float ty, float tz, bool points_in_world_space) +uint64_t volume_index_from_tiles_device(void *context, void *points, int num_points, float voxel_size, + float tx, float ty, float tz, bool points_in_world_space) { return 0; } -uint64_t volume_i_from_tiles_device(void* context, void* points, int num_points, float voxel_size, int bg_value, float tx, float ty, float tz, bool points_in_world_space) +uint64_t volume_from_active_voxels_device(void *context, void *points, int num_points, float voxel_size, float tx, + float ty, float tz, bool points_in_world_space) { return 0; } -void volume_get_tiles_device(uint64_t id, void** buf, uint64_t* size) {} +void volume_get_tiles_device(uint64_t id, void *buf) +{ +} + +void volume_get_voxels_device(uint64_t id, void *buf) +{ +} #endif diff --git a/warp/native/volume.cu b/warp/native/volume.cu index b0c67dbe..c1586f24 100644 --- a/warp/native/volume.cu +++ b/warp/native/volume.cu @@ -6,27 +6,53 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "volume.h" -#include "warp.h" #include "cuda_util.h" +#include "volume_impl.h" +#include "warp.h" + +__global__ void volume_get_leaf_coords(const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords, + const pnanovdb_buf_t buf) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < leaf_count) + { + pnanovdb_leaf_handle_t leaf = wp::volume::get_leaf(buf, tid); + leaf_coords[tid] = wp::volume::leaf_origin(buf, leaf); + } +} -__global__ void volume_get_leaf_coords(const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords, const uint64_t first_leaf, const uint32_t leaf_stride) -{ - static constexpr uint32_t MASK = (1u << 3u) - 1u; // mask for bit operations +__global__ void volume_get_voxel_coords(const uint32_t voxel_count, pnanovdb_coord_t *voxel_coords, + const pnanovdb_buf_t buf) +{ + const uint32_t leaf_index = blockIdx.x; + pnanovdb_leaf_handle_t leaf = wp::volume::get_leaf(buf, leaf_index); + pnanovdb_coord_t leaf_coords = wp::volume::leaf_origin(buf, leaf); - const int tid = blockIdx.x*blockDim.x + threadIdx.x; + pnanovdb_coord_t ijk = { + int32_t(threadIdx.x) + leaf_coords.x, + int32_t(threadIdx.y) + leaf_coords.y, + int32_t(threadIdx.z) + leaf_coords.z, + }; - if (tid < leaf_count) { - leaf_coords[tid] = ((pnanovdb_leaf_t*)(first_leaf + leaf_stride * tid))->bbox_min; - leaf_coords[tid].x &= ~MASK; - leaf_coords[tid].y &= ~MASK; - leaf_coords[tid].z &= ~MASK; + const uint64_t index = wp::volume::leaf_voxel_index(buf, leaf_index, ijk); + if (index < voxel_count) + { + voxel_coords[index] = ijk; } } -void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords, const uint64_t first_leaf, const uint32_t leaf_stride) +void launch_get_leaf_coords(void *context, const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords, pnanovdb_buf_t buf) +{ + ContextGuard guard(context); + wp_launch_device(WP_CURRENT_CONTEXT, volume_get_leaf_coords, leaf_count, (leaf_count, leaf_coords, buf)); +} + +void launch_get_voxel_coords(void *context, const uint32_t leaf_count, const uint32_t voxel_count, + pnanovdb_coord_t *voxel_coords, pnanovdb_buf_t buf) { ContextGuard guard(context); - wp_launch_device(WP_CURRENT_CONTEXT, volume_get_leaf_coords, leaf_count, (leaf_count, leaf_coords, first_leaf, leaf_stride)); + cudaStream_t stream = (cudaStream_t)cuda_stream_get_current(); + volume_get_voxel_coords<<>>(voxel_count, voxel_coords, buf); } diff --git a/warp/native/volume.h b/warp/native/volume.h index f15b398d..319f2a1f 100644 --- a/warp/native/volume.h +++ b/warp/native/volume.h @@ -8,529 +8,951 @@ #pragma once +#include "array.h" #include "builtin.h" #define PNANOVDB_C #define PNANOVDB_MEMCPY_CUSTOM #define pnanovdb_memcpy memcpy + +#if defined(WP_NO_CRT) && !defined(__CUDACC__) +// PNanoVDB will try to include unless __CUDACC_RTC__ is defined +#define __CUDACC_RTC__ +#endif + #include "nanovdb/PNanoVDB.h" -#include "nanovdb/PNanoVDBWrite.h" + +#if defined(WP_NO_CRT) && !defined(__CUDACC__) +#undef __CUDACC_RTC__ +#endif namespace wp { namespace volume { +// Need to kept in sync with constants in python-side Volume class static constexpr int CLOSEST = 0; static constexpr int LINEAR = 1; -// helper functions +// pnanovdb helper function + CUDA_CALLABLE inline pnanovdb_buf_t id_to_buffer(uint64_t id) { pnanovdb_buf_t buf; - buf.data = (uint32_t*)id; + buf.data = (uint32_t *)id; return buf; } -CUDA_CALLABLE inline pnanovdb_uint32_t get_grid_type(const pnanovdb_buf_t& buf) +CUDA_CALLABLE inline pnanovdb_grid_handle_t get_grid(pnanovdb_buf_t buf) { - const pnanovdb_grid_t *grid_data = (const pnanovdb_grid_t*)buf.data; - return grid_data->grid_type; + return {0u}; } -CUDA_CALLABLE inline pnanovdb_root_handle_t get_root(const pnanovdb_buf_t& buf, - const pnanovdb_grid_handle_t& grid = { 0u }) +CUDA_CALLABLE inline pnanovdb_uint32_t get_grid_type(pnanovdb_buf_t buf) { - const auto tree = pnanovdb_grid_get_tree(buf, grid); - return pnanovdb_tree_get_root(buf, tree); + return pnanovdb_grid_get_grid_type(buf, get_grid(buf)); } -} // namespace volume -CUDA_CALLABLE inline void pnano_read(float& result, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) { - const pnanovdb_address_t address = pnanovdb_root_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, root, ijk); - result = pnanovdb_read_float(buf, address); +CUDA_CALLABLE inline pnanovdb_tree_handle_t get_tree(pnanovdb_buf_t buf) +{ + return pnanovdb_grid_get_tree(buf, get_grid(buf)); } -CUDA_CALLABLE inline void pnano_read(int32_t& result, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) { - const pnanovdb_address_t address = pnanovdb_root_get_value_address(PNANOVDB_GRID_TYPE_INT32, buf, root, ijk); - result = pnanovdb_read_int32(buf, address); + +CUDA_CALLABLE inline pnanovdb_root_handle_t get_root(pnanovdb_buf_t buf) +{ + return pnanovdb_tree_get_root(buf, get_tree(buf)); } -CUDA_CALLABLE inline void pnano_read(vec3& result, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) { - const pnanovdb_address_t address = pnanovdb_root_get_value_address(PNANOVDB_GRID_TYPE_VEC3F, buf, root, ijk); - const pnanovdb_vec3_t v = pnanovdb_read_vec3f(buf, address); - result = {v.x, v.y, v.z}; + +template struct pnano_traits +{ +}; + +// to add support for more grid types, extend this +// and update _volume_supported_value_types in builtins.py + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_INT32; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_INT64; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_UINT32; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_FLOAT; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_DOUBLE; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_VEC3F; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_VEC3D; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_VEC4F; +}; + +template <> struct pnano_traits +{ + static constexpr int GRID_TYPE = PNANOVDB_GRID_TYPE_VEC4D; +}; + +// common accessors over various grid types +// WARNING: implementation below only for >=32b values, but that's the case for all types above +// for smaller types add a specialization + +template CUDA_CALLABLE inline void pnano_read(T &result, pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + result = *reinterpret_cast(buf.data + (address.byte_offset >> 2)); } -CUDA_CALLABLE inline void pnano_read(float& result, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) { - pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, ijk); - result = pnanovdb_read_float(buf, address); +template +CUDA_CALLABLE inline void pnano_write(const T &value, pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + *reinterpret_cast(buf.data + (address.byte_offset >> 2)) = value; } -CUDA_CALLABLE inline void pnano_read(int32_t& result, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) { - pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_INT32, buf, acc, ijk); - result = pnanovdb_read_int32(buf, address); + +template +CUDA_CALLABLE inline void pnano_read(T &result, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, + PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + using traits = pnano_traits; + const pnanovdb_address_t address = pnanovdb_root_get_value_address(traits::GRID_TYPE, buf, root, ijk); + pnano_read(result, buf, address); } -CUDA_CALLABLE inline void pnano_read(vec3& result, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) { - pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_VEC3F, buf, acc, ijk); - const pnanovdb_vec3_t v = pnanovdb_read_vec3f(buf, address); - result = {v.x, v.y, v.z}; + +template +CUDA_CALLABLE inline void pnano_read(T &result, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, + PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + using traits = pnano_traits; + // pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(traits::GRID_TYPE, buf, acc, ijk); + pnanovdb_uint32_t level; + const pnanovdb_address_t address = + pnanovdb_readaccessor_get_value_address_and_level(traits::GRID_TYPE, buf, acc, ijk, PNANOVDB_REF(level)); + pnano_read(result, buf, address); } -// Sampling the volume at the given index-space coordinates, uvw can be fractional -template -CUDA_CALLABLE inline T volume_sample(uint64_t id, vec3 uvw, int sampling_mode) +/// regular grid accessor (values stored in leafs) + +struct value_accessor_base { - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); - const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] }; + pnanovdb_buf_t buf; + pnanovdb_root_handle_t root; + pnanovdb_readaccessor_t accessor; + + explicit inline CUDA_CALLABLE value_accessor_base(const pnanovdb_buf_t buf) : buf(buf), root(get_root(buf)) + { + } + + CUDA_CALLABLE inline void init_cache() + { + pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root); + } +}; + +template struct leaf_value_accessor : value_accessor_base +{ + using ValueType = T; + + explicit inline CUDA_CALLABLE leaf_value_accessor(const pnanovdb_buf_t buf) : value_accessor_base(buf) + { + } + + CUDA_CALLABLE inline bool is_valid() const + { + return get_grid_type(buf) == pnano_traits::GRID_TYPE; + } - if (sampling_mode == volume::CLOSEST) + CUDA_CALLABLE inline T read_single(const pnanovdb_coord_t &ijk) const { - const pnanovdb_coord_t ijk = pnanovdb_vec3_round_to_coord(uvw_pnano); T val; pnano_read(val, buf, root, PNANOVDB_REF(ijk)); return val; } - else if (sampling_mode == volume::LINEAR) + + CUDA_CALLABLE inline T read_cache(const pnanovdb_coord_t &ijk) { - // NB. linear sampling is not used on int volumes - constexpr pnanovdb_coord_t OFFSETS[] = { - { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, - }; + T val; + pnano_read(val, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk)); + return val; + } - const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) }; - const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z }; - const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z }; + CUDA_CALLABLE inline void adj_read_single(const pnanovdb_coord_t &ijk, const T &adj_ret) + { + // NOP + } - pnanovdb_readaccessor_t accessor; - pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root); - T val = 0; - const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x }; - const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y }; - const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z }; - for (int idx = 0; idx < 8; ++idx) - { - const pnanovdb_coord_t& offs = OFFSETS[idx]; - const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs); - T v; - pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted)); - val = add(val, T(wx[offs.x] * wy[offs.y] * wz[offs.z] * v)); - } - return val; + CUDA_CALLABLE inline void adj_read_cache(const pnanovdb_coord_t &ijk, const T &adj_ret) + { + // NOP } - return 0; -} +}; -// Sampling a float volume at the given index-space coordinates, uvw can be fractional -CUDA_CALLABLE inline float volume_sample_f(uint64_t id, vec3 uvw, int sampling_mode) +CUDA_CALLABLE inline pnanovdb_uint64_t leaf_regular_get_voxel_index(pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return 0.f; - return volume_sample(id, uvw, sampling_mode); + // compute leaf index from value address, assuming all leaf voxels are allocated + const pnanovdb_grid_type_t grid_type = get_grid_type(buf); + const pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + const pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_table) + + ((PNANOVDB_GRID_TYPE_GET(grid_type, value_stride_bits) * n) >> 3u); + const pnanovdb_address_t leaf_address = pnanovdb_address_offset_neg(value_address, byte_offset); + + const pnanovdb_uint64_t first_leaf_offset = pnanovdb_tree_get_node_offset_leaf(buf, get_tree(buf)); + const pnanovdb_uint32_t leaf_size = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_size); + const pnanovdb_uint64_t leaf_index = (leaf_address.byte_offset - first_leaf_offset) / leaf_size; + + return leaf_index * PNANOVDB_LEAF_TABLE_COUNT + n + 1; } -// Sampling an int volume at the given index-space coordinates, uvw can be fractional -CUDA_CALLABLE inline int32_t volume_sample_i(uint64_t id, vec3 uvw) +CUDA_CALLABLE inline pnanovdb_uint64_t get_grid_voxel_index(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + const pnanovdb_coord_t &ijk) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_INT32) return 0; - return volume_sample(id, uvw, volume::CLOSEST); -} + switch (grid_type) + { + case PNANOVDB_GRID_TYPE_INDEX: + return pnanovdb_leaf_index_get_value_index(buf, value_address, PNANOVDB_REF(ijk)); + case PNANOVDB_GRID_TYPE_ONINDEX: + return pnanovdb_leaf_onindex_get_value_index(buf, value_address, PNANOVDB_REF(ijk)); + case PNANOVDB_GRID_TYPE_INDEXMASK: + return pnanovdb_leaf_indexmask_get_value_index(buf, value_address, PNANOVDB_REF(ijk)); + case PNANOVDB_GRID_TYPE_ONINDEXMASK: + return pnanovdb_leaf_onindexmask_get_value_index(buf, value_address, PNANOVDB_REF(ijk)); + default: + return leaf_regular_get_voxel_index(buf, value_address, PNANOVDB_REF(ijk)); + } +}; -// Sampling a vector volume at the given index-space coordinates, uvw can be fractional -CUDA_CALLABLE inline vec3 volume_sample_v(uint64_t id, vec3 uvw, int sampling_mode) +/// index grid accessor +template struct index_value_accessor : value_accessor_base { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_VEC3F) return vec3(0.f); - return volume_sample(id, uvw, sampling_mode); + using ValueType = T; + + pnanovdb_grid_type_t grid_type; + array_t data; + const T &background; + T *adj_background; + + explicit inline CUDA_CALLABLE index_value_accessor(const pnanovdb_buf_t buf, const array_t &data, + const T &background, T *adj_background = nullptr) + : value_accessor_base(buf), grid_type(get_grid_type(buf)), data(data), background(background), + adj_background(adj_background) + { + } + + CUDA_CALLABLE inline bool is_valid() const + { + // Accessor is valid for all grid types + return true; + } + + CUDA_CALLABLE inline T read_single(const pnanovdb_coord_t &ijk) const + { + pnanovdb_uint32_t level; + const pnanovdb_address_t address = + pnanovdb_root_get_value_address_and_level(grid_type, buf, root, PNANOVDB_REF(ijk), PNANOVDB_REF(level)); + return read_at(level, address, ijk); + } + + CUDA_CALLABLE inline T read_cache(const pnanovdb_coord_t &ijk) + { + pnanovdb_uint32_t level; + const pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level( + grid_type, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk), PNANOVDB_REF(level)); + return read_at(level, address, ijk); + } + + CUDA_CALLABLE inline T read_at(pnanovdb_uint32_t level, const pnanovdb_address_t address, + const pnanovdb_coord_t &ijk) const + { + if (level == 0) + { + pnanovdb_uint64_t voxel_index = get_grid_voxel_index(grid_type, buf, address, ijk); + + if (voxel_index > 0) + { + return *wp::address(data, voxel_index - 1); + } + } + + return background; + } + + CUDA_CALLABLE inline void adj_read_single(const pnanovdb_coord_t &ijk, const T &adj_ret) + { + pnanovdb_uint32_t level; + const pnanovdb_address_t address = + pnanovdb_root_get_value_address_and_level(grid_type, buf, root, PNANOVDB_REF(ijk), PNANOVDB_REF(level)); + adj_read_at(level, address, ijk, adj_ret); + } + + CUDA_CALLABLE inline void adj_read_cache(const pnanovdb_coord_t &ijk, const T &adj_ret) + { + pnanovdb_uint32_t level; + const pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level( + grid_type, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk), PNANOVDB_REF(level)); + adj_read_at(level, address, ijk, adj_ret); + } + + CUDA_CALLABLE inline void adj_read_at(pnanovdb_uint32_t level, const pnanovdb_address_t address, + const pnanovdb_coord_t &ijk, const T &adj_ret) const + { + if (level == 0) + { + pnanovdb_uint64_t voxel_index = get_grid_voxel_index(grid_type, buf, address, ijk); + + if (voxel_index > 0) + { + adj_atomic_add(&index_grad(data, voxel_index - 1), adj_ret); + return; + } + } + *adj_background += adj_ret; + } +}; + +CUDA_CALLABLE inline pnanovdb_coord_t vec3_round_to_coord(const pnanovdb_vec3_t a) +{ + pnanovdb_coord_t v; + v.x = pnanovdb_float_to_int32(roundf(a.x)); + v.y = pnanovdb_float_to_int32(roundf(a.y)); + v.z = pnanovdb_float_to_int32(roundf(a.z)); + return v; } -CUDA_CALLABLE inline void adj_volume_sample_f( - uint64_t id, vec3 uvw, int sampling_mode, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, const float& adj_ret) +template struct val_traits { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return; + using grad_t = vec_t<3, T>; + using scalar_t = T; - if (sampling_mode != volume::LINEAR) { - return; // NOP + // multiplies the gradient on the right + // needs to be specialized for scalar types as gradient is stored as column rather than row vector + static CUDA_CALLABLE inline T rmul(const grad_t &grad, const vec_t<3, scalar_t> &rhs) + { + return dot(grad, rhs); } +}; - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); - const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] }; +template struct val_traits> +{ + using grad_t = mat_t<3, Length, T>; + using scalar_t = T; - constexpr pnanovdb_coord_t OFFSETS[] = { - { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, - }; + static CUDA_CALLABLE inline vec_t rmul(const grad_t &grad, const vec_t<3, scalar_t> &rhs) + { + return mul(grad, rhs); + } +}; - const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) }; - const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z }; - const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z }; +// Sampling the volume at the given index-space coordinates, uvw can be fractional +template +CUDA_CALLABLE inline typename Accessor::ValueType volume_sample(Accessor &accessor, vec3 uvw, int sampling_mode) +{ + using T = typename Accessor::ValueType; + using w_t = typename val_traits::scalar_t; - pnanovdb_readaccessor_t accessor; - pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root); - const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x }; - const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y }; - const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z }; - vec3 dphi(0,0,0); - for (int idx = 0; idx < 8; ++idx) + if (!accessor.is_valid()) { - const pnanovdb_coord_t& offs = OFFSETS[idx]; - const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs); - float v; - pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted)); - const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1); - const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]); - dphi = add(dphi, mul(v, grad_w)); + return 0; + } + + const pnanovdb_buf_t buf = accessor.buf; + const pnanovdb_vec3_t uvw_pnano{uvw[0], uvw[1], uvw[2]}; + + if (sampling_mode == CLOSEST) + { + const pnanovdb_coord_t ijk = vec3_round_to_coord(uvw_pnano); + return accessor.read_single(ijk); } + else if (sampling_mode == LINEAR) + { + // NB. linear sampling is not used on int volumes + constexpr pnanovdb_coord_t OFFSETS[] = { + {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1}, + }; + + const pnanovdb_vec3_t ijk_base{floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z)}; + const pnanovdb_vec3_t ijk_frac{uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z}; + const pnanovdb_coord_t ijk{(pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, + (pnanovdb_int32_t)ijk_base.z}; + + accessor.init_cache(); + T val = 0; + const float wx[2]{1 - ijk_frac.x, ijk_frac.x}; + const float wy[2]{1 - ijk_frac.y, ijk_frac.y}; + const float wz[2]{1 - ijk_frac.z, ijk_frac.z}; + for (int idx = 0; idx < 8; ++idx) + { + const pnanovdb_coord_t &offs = OFFSETS[idx]; + const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs); + const T v = accessor.read_cache(ijk_shifted); - adj_uvw += mul(dphi, adj_ret); + const w_t w = wx[offs.x] * wy[offs.y] * wz[offs.z]; + val = add(val, w * v); + } + return val; + } + return 0; } -CUDA_CALLABLE inline void adj_volume_sample_v( - uint64_t id, vec3 uvw, int sampling_mode, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, const vec3& adj_ret) +template +CUDA_CALLABLE inline void adj_volume_sample(Accessor &accessor, vec3 uvw, int sampling_mode, vec3 &adj_uvw, + const typename Accessor::ValueType &adj_ret) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_VEC3F) return; + // TODO: accessor data gradients - if (sampling_mode != volume::LINEAR) { - return; // NOP + using T = typename Accessor::ValueType; + using w_t = typename val_traits::scalar_t; + using w_grad_t = vec_t<3, w_t>; + + if (!accessor.is_valid()) + { + return; } - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); - const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] }; + const pnanovdb_buf_t buf = accessor.buf; + const pnanovdb_vec3_t uvw_pnano{uvw[0], uvw[1], uvw[2]}; + + if (sampling_mode != LINEAR) + { + const pnanovdb_coord_t ijk = vec3_round_to_coord(uvw_pnano); + accessor.adj_read_single(ijk, adj_ret); + return; + } constexpr pnanovdb_coord_t OFFSETS[] = { - { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, + {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1}, }; - const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) }; - const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z }; - const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z }; + const pnanovdb_vec3_t ijk_base{floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z)}; + const pnanovdb_vec3_t ijk_frac{uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z}; + const pnanovdb_coord_t ijk{(pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, + (pnanovdb_int32_t)ijk_base.z}; - pnanovdb_readaccessor_t accessor; - pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root); - const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x }; - const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y }; - const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z }; - vec3 dphi[3] = {{0,0,0}, {0,0,0}, {0,0,0}}; + accessor.init_cache(); + + const float wx[2]{1 - ijk_frac.x, ijk_frac.x}; + const float wy[2]{1 - ijk_frac.y, ijk_frac.y}; + const float wz[2]{1 - ijk_frac.z, ijk_frac.z}; for (int idx = 0; idx < 8; ++idx) { - const pnanovdb_coord_t& offs = OFFSETS[idx]; + const pnanovdb_coord_t &offs = OFFSETS[idx]; const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs); - vec3 v; - pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted)); + const T v = accessor.read_cache(ijk_shifted); + const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1); - const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]); - dphi[0] = add(dphi[0], mul(v[0], grad_w)); - dphi[1] = add(dphi[1], mul(v[1], grad_w)); - dphi[2] = add(dphi[2], mul(v[2], grad_w)); - } - for (int k = 0; k < 3; ++k) - { - adj_uvw[k] += dot(dphi[k], adj_ret); - } -} + const w_t w = wx[offs.x] * wy[offs.y] * wz[offs.z]; + const w_grad_t grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], + signs[2] * wx[offs.x] * wy[offs.y]); -CUDA_CALLABLE inline void adj_volume_sample_i(uint64_t id, vec3 uvw, uint64_t& adj_id, vec3& adj_uvw, const int32_t& adj_ret) -{ - // NOP + adj_uvw += vec3(mul(w_t(dot(v, adj_ret)), grad_w)); + + const T adj_v = w * adj_ret; + accessor.adj_read_cache(ijk_shifted, adj_v); + } } // Sampling the volume at the given index-space coordinates, uvw can be fractional -CUDA_CALLABLE inline float volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3& grad) +template +CUDA_CALLABLE inline typename Accessor::ValueType volume_sample_grad( + Accessor &accessor, vec3 uvw, int sampling_mode, typename val_traits::grad_t &grad) { - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); - const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] }; + using T = typename Accessor::ValueType; + using grad_T = typename val_traits::grad_t; + using w_t = typename val_traits::scalar_t; + using w_grad_t = vec_t<3, w_t>; + + grad = grad_T{}; - if (sampling_mode == volume::CLOSEST) + if (!accessor.is_valid()) { - const pnanovdb_coord_t ijk = pnanovdb_vec3_round_to_coord(uvw_pnano); - float val; - pnano_read(val, buf, root, PNANOVDB_REF(ijk)); - grad = vec3(0.0f, 0.0f, 0.0f); - return val; + return 0; } - else if (sampling_mode == volume::LINEAR) + + const pnanovdb_buf_t buf = accessor.buf; + const pnanovdb_vec3_t uvw_pnano{uvw[0], uvw[1], uvw[2]}; + + if (sampling_mode == CLOSEST) + { + const pnanovdb_coord_t ijk = vec3_round_to_coord(uvw_pnano); + return accessor.read_single(ijk); + } + else if (sampling_mode == LINEAR) { // NB. linear sampling is not used on int volumes constexpr pnanovdb_coord_t OFFSETS[] = { - { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, + {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1}, }; - const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) }; - const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z }; - const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z }; + const pnanovdb_vec3_t ijk_base{floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z)}; + const pnanovdb_vec3_t ijk_frac{uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z}; + const pnanovdb_coord_t ijk{(pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, + (pnanovdb_int32_t)ijk_base.z}; - pnanovdb_readaccessor_t accessor; - pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root); - float val = 0.0f; - const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x }; - const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y }; - const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z }; - - const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f}; - const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f}; - - float dfdx = 0.0f; - float dfdy = 0.0f; - float dfdz = 0.0f; + accessor.init_cache(); + T val = 0; + const float wx[2]{1 - ijk_frac.x, ijk_frac.x}; + const float wy[2]{1 - ijk_frac.y, ijk_frac.y}; + const float wz[2]{1 - ijk_frac.z, ijk_frac.z}; for (int idx = 0; idx < 8; ++idx) { - const pnanovdb_coord_t& offs = OFFSETS[idx]; + const pnanovdb_coord_t &offs = OFFSETS[idx]; const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs); - float v; - pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted)); - val = add(val, wx[offs.x] * wy[offs.y] * wz[offs.z] * v); - dfdx = add(dfdx, wy[offs.y] * wz[offs.z] * sign_dx[idx] * v); - dfdy = add(dfdy, wx[offs.x] * wz[offs.z] * sign_dy[idx] * v); - dfdz = add(dfdz, wx[offs.x] * wy[offs.y] * sign_dz[idx] * v); + const T v = accessor.read_cache(ijk_shifted); + + const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1); + + const w_t w = wx[offs.x] * wy[offs.y] * wz[offs.z]; + const w_grad_t grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], + signs[2] * wx[offs.x] * wy[offs.y]); + + val = add(val, w * v); + grad += outer(v, grad_w); } - grad = vec3(dfdx, dfdy, dfdz); return val; } - return 0.0f; + return 0; } -CUDA_CALLABLE inline void adj_volume_sample_grad_f( - uint64_t id, vec3 uvw, int sampling_mode, vec3& grad, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, vec3& adj_grad, const float& adj_ret) +template +CUDA_CALLABLE inline void adj_volume_sample_grad(Accessor &accessor, vec3 uvw, int sampling_mode, + typename val_traits::grad_t &grad, + vec3 &adj_uvw, + typename val_traits::grad_t &adj_grad, + const typename Accessor::ValueType &adj_ret) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return; + // TODO: accessor data gradients - if (sampling_mode != volume::LINEAR) { - return; // NOP + using T = typename Accessor::ValueType; + using grad_T = typename val_traits::grad_t; + using w_t = typename val_traits::scalar_t; + using w_grad_t = vec_t<3, w_t>; + using w_hess_t = mat_t<3, 3, w_t>; + + if (!accessor.is_valid()) + { + return; } - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); - const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] }; + const pnanovdb_buf_t buf = accessor.buf; + const pnanovdb_vec3_t uvw_pnano{uvw[0], uvw[1], uvw[2]}; + + if (sampling_mode != LINEAR) + { + const pnanovdb_coord_t ijk = vec3_round_to_coord(uvw_pnano); + accessor.adj_read_single(ijk, adj_ret); + return; + } constexpr pnanovdb_coord_t OFFSETS[] = { - { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, + {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1}, }; - const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) }; - const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z }; - const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z }; + const pnanovdb_vec3_t ijk_base{floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z)}; + const pnanovdb_vec3_t ijk_frac{uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z}; + const pnanovdb_coord_t ijk{(pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, + (pnanovdb_int32_t)ijk_base.z}; - pnanovdb_readaccessor_t accessor; - pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root); - const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x }; - const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y }; - const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z }; - const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f}; - const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f}; - - float dfdxdy = 0.0f; - float dfdxdz = 0.0f; - float dfdydx = 0.0f; - float dfdydz = 0.0f; - float dfdzdx = 0.0f; - float dfdzdy = 0.0f; - vec3 dphi(0,0,0); + accessor.init_cache(); + + const float wx[2]{1 - ijk_frac.x, ijk_frac.x}; + const float wy[2]{1 - ijk_frac.y, ijk_frac.y}; + const float wz[2]{1 - ijk_frac.z, ijk_frac.z}; for (int idx = 0; idx < 8; ++idx) { - const pnanovdb_coord_t& offs = OFFSETS[idx]; + const pnanovdb_coord_t &offs = OFFSETS[idx]; const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs); - float v; - pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted)); + const T v = accessor.read_cache(ijk_shifted); + const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1); - const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]); - dphi = add(dphi, mul(v, grad_w)); - dfdxdy = add(dfdxdy, signs[1] * wz[offs.z] * sign_dx[idx] * v); - dfdxdz = add(dfdxdz, wy[offs.y] * signs[2] * sign_dx[idx] * v); + const w_t w = wx[offs.x] * wy[offs.y] * wz[offs.z]; + const w_grad_t grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], + signs[2] * wx[offs.x] * wy[offs.y]); + adj_uvw += vec3(mul(w_t(dot(v, adj_ret)), grad_w)); - dfdydx = add(dfdydx, signs[0] * wz[offs.z] * sign_dy[idx] * v); - dfdydz = add(dfdydz, wx[offs.x] * signs[2] * sign_dy[idx] * v); + const w_hess_t hess_w(0.0, signs[1] * signs[0] * wz[offs.z], signs[2] * signs[0] * wy[offs.y], + signs[0] * signs[1] * wz[offs.z], 0.0, signs[2] * signs[1] * wx[offs.x], + signs[0] * signs[2] * wy[offs.y], signs[1] * signs[2] * wx[offs.x], 0.0); + adj_uvw += vec3(mul(mul(v, adj_grad), hess_w)); - dfdzdx = add(dfdzdx, signs[0] * wy[offs.y] * sign_dz[idx] * v); - dfdzdy = add(dfdzdy, wx[offs.x] * signs[1] * sign_dz[idx] * v); + const T adj_v = w * adj_ret + val_traits::rmul(adj_grad, grad_w); + accessor.adj_read_cache(ijk_shifted, adj_v); } +} + +} // namespace volume + // namespace volume + +// exposed kernel builtins + +// volume_sample - adj_uvw += mul(dphi, adj_ret); - adj_uvw[0] += adj_grad[1] * dfdydx + adj_grad[2] * dfdzdx; - adj_uvw[1] += adj_grad[0] * dfdxdy + adj_grad[2] * dfdzdy; - adj_uvw[2] += adj_grad[0] * dfdxdz + adj_grad[1] * dfdydz; +template CUDA_CALLABLE inline T volume_sample(uint64_t id, vec3 uvw, int sampling_mode) +{ + volume::leaf_value_accessor accessor(volume::id_to_buffer(id)); + return volume::volume_sample(accessor, uvw, sampling_mode); } -CUDA_CALLABLE inline float volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k) +template +CUDA_CALLABLE inline void adj_volume_sample(uint64_t id, vec3 uvw, int sampling_mode, uint64_t &adj_id, vec3 &adj_uvw, + int &adj_sampling_mode, const T &adj_ret) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return 0.f; + volume::leaf_value_accessor accessor(volume::id_to_buffer(id)); + volume::adj_volume_sample(accessor, uvw, sampling_mode, adj_uvw, adj_ret); +} - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); +template +CUDA_CALLABLE inline T volume_sample_grad(uint64_t id, vec3 uvw, int sampling_mode, + typename volume::val_traits::grad_t &grad) +{ + volume::leaf_value_accessor accessor(volume::id_to_buffer(id)); + return volume::volume_sample_grad(accessor, uvw, sampling_mode, grad); +} - const pnanovdb_coord_t ijk{ i, j, k }; - float val; - pnano_read(val, buf, root, PNANOVDB_REF(ijk)); - return val; +template +CUDA_CALLABLE inline void adj_volume_sample_grad(uint64_t id, vec3 uvw, int sampling_mode, + typename volume::val_traits::grad_t &grad, uint64_t &adj_id, + vec3 &adj_uvw, int &adj_sampling_mode, + typename volume::val_traits::grad_t &adj_grad, const T &adj_ret) +{ + volume::leaf_value_accessor accessor(volume::id_to_buffer(id)); + volume::adj_volume_sample_grad(accessor, uvw, sampling_mode, grad, adj_uvw, adj_grad, adj_ret); } -CUDA_CALLABLE inline int32_t volume_lookup_i(uint64_t id, int32_t i, int32_t j, int32_t k) +// Sampling a float volume at the given index-space coordinates, uvw can be fractional +CUDA_CALLABLE inline float volume_sample_f(uint64_t id, vec3 uvw, int sampling_mode) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_INT32) return 0; + return volume_sample(id, uvw, sampling_mode); +} - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); +// Sampling an int volume at the given index-space coordinates, uvw can be fractional +CUDA_CALLABLE inline int32_t volume_sample_i(uint64_t id, vec3 uvw) +{ + return volume_sample(id, uvw, volume::CLOSEST); +} - const pnanovdb_coord_t ijk{ i, j, k }; - int32_t val; - pnano_read(val, buf, root, PNANOVDB_REF(ijk)); - return val; +// Sampling a vector volume at the given index-space coordinates, uvw can be fractional +CUDA_CALLABLE inline vec3 volume_sample_v(uint64_t id, vec3 uvw, int sampling_mode) +{ + return volume_sample(id, uvw, sampling_mode); } -CUDA_CALLABLE inline vec3 volume_lookup_v(uint64_t id, int32_t i, int32_t j, int32_t k) +CUDA_CALLABLE inline void adj_volume_sample_f(uint64_t id, vec3 uvw, int sampling_mode, uint64_t &adj_id, vec3 &adj_uvw, + int &adj_sampling_mode, const float &adj_ret) +{ + adj_volume_sample(id, uvw, sampling_mode, adj_id, adj_uvw, adj_sampling_mode, adj_ret); +} + +CUDA_CALLABLE inline void adj_volume_sample_v(uint64_t id, vec3 uvw, int sampling_mode, uint64_t &adj_id, vec3 &adj_uvw, + int &adj_sampling_mode, const vec3 &adj_ret) +{ + adj_volume_sample(id, uvw, sampling_mode, adj_id, adj_uvw, adj_sampling_mode, adj_ret); +} + +CUDA_CALLABLE inline void adj_volume_sample_i(uint64_t id, vec3 uvw, uint64_t &adj_id, vec3 &adj_uvw, + const int32_t &adj_ret) +{ + // NOP +} + +// Sampling the volume at the given index-space coordinates, uvw can be fractional +CUDA_CALLABLE inline float volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3 &grad) +{ + return volume_sample_grad(id, uvw, sampling_mode, grad); +} + +CUDA_CALLABLE inline void adj_volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3 &grad, + uint64_t &adj_id, vec3 &adj_uvw, int &adj_sampling_mode, + vec3 &adj_grad, const float &adj_ret) +{ + adj_volume_sample_grad(id, uvw, sampling_mode, grad, adj_id, adj_uvw, adj_sampling_mode, adj_grad, adj_ret); +} + +// volume_sample_index + +template +CUDA_CALLABLE inline T volume_sample_index(uint64_t id, vec3 uvw, int sampling_mode, const array_t &voxel_data, + const T &background) +{ + volume::index_value_accessor accessor(volume::id_to_buffer(id), voxel_data, background); + return volume::volume_sample(accessor, uvw, sampling_mode); +} + +template +CUDA_CALLABLE inline void adj_volume_sample_index(uint64_t id, vec3 uvw, int sampling_mode, + const array_t &voxel_data, const T &background, uint64_t &adj_id, + vec3 &adj_uvw, int &adj_sampling_mode, array_t &adj_voxel_data, + T &adj_background, const T &adj_ret) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_VEC3F) return vec3(0.f); + volume::index_value_accessor accessor(volume::id_to_buffer(id), voxel_data, background, &adj_background); + volume::adj_volume_sample(accessor, uvw, sampling_mode, adj_uvw, adj_ret); +} + +template +CUDA_CALLABLE inline T volume_sample_grad_index(uint64_t id, vec3 uvw, int sampling_mode, const array_t &voxel_data, + const T &background, typename volume::val_traits::grad_t &grad) +{ + volume::index_value_accessor accessor(volume::id_to_buffer(id), voxel_data, background); + return volume::volume_sample_grad(accessor, uvw, sampling_mode, grad); +} + +template +CUDA_CALLABLE inline void adj_volume_sample_grad_index( + uint64_t id, vec3 uvw, int sampling_mode, const array_t &voxel_data, const T &background, + typename volume::val_traits::grad_t &grad, uint64_t &adj_id, vec3 &adj_uvw, int &adj_sampling_mode, + array_t &adj_voxel_data, T &adj_background, typename volume::val_traits::grad_t &adj_grad, const T &adj_ret) +{ + volume::index_value_accessor accessor(volume::id_to_buffer(id), voxel_data, background, &adj_background); + volume::adj_volume_sample_grad(accessor, uvw, sampling_mode, grad, adj_uvw, adj_grad, adj_ret); +} + +// volume_lookup + +template CUDA_CALLABLE inline T volume_lookup(uint64_t id, int32_t i, int32_t j, int32_t k) +{ + using traits = volume::pnano_traits; const pnanovdb_buf_t buf = volume::id_to_buffer(id); + if (volume::get_grid_type(buf) != traits::GRID_TYPE) + return 0; + const pnanovdb_root_handle_t root = volume::get_root(buf); - const pnanovdb_coord_t ijk{ i, j, k }; - vec3 val; - pnano_read(val, buf, root, PNANOVDB_REF(ijk)); + const pnanovdb_coord_t ijk{i, j, k}; + T val; + volume::pnano_read(val, buf, root, PNANOVDB_REF(ijk)); return val; } -CUDA_CALLABLE inline void adj_volume_lookup_f( - uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t& adj_id, int32_t& adj_i, int32_t& adj_j, int32_t& adj_k, const float& adj_ret) +template +CUDA_CALLABLE inline void adj_volume_lookup(uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t &adj_id, + int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, const T &adj_ret) { - // NOP + // NOP -- adjoint of grid values is not available +} + +CUDA_CALLABLE inline float volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k) +{ + return volume_lookup(id, i, j, k); } -CUDA_CALLABLE inline void adj_volume_lookup_i( - uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t& adj_id, int32_t& adj_i, int32_t& adj_j, int32_t& adj_k, const int32_t& adj_ret) +CUDA_CALLABLE inline int32_t volume_lookup_i(uint64_t id, int32_t i, int32_t j, int32_t k) { - // NOP + return volume_lookup(id, i, j, k); } -CUDA_CALLABLE inline void adj_volume_lookup_v( - uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t& adj_id, int32_t& adj_i, int32_t& adj_j, int32_t& adj_k, const vec3& adj_ret) +CUDA_CALLABLE inline vec3 volume_lookup_v(uint64_t id, int32_t i, int32_t j, int32_t k) { - // NOP + return volume_lookup(id, i, j, k); } -CUDA_CALLABLE inline void volume_store_f(uint64_t id, int32_t i, int32_t j, int32_t k, const float& value) +CUDA_CALLABLE inline void adj_volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t &adj_id, + int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, const float &adj_ret) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return; + adj_volume_lookup(id, i, j, k, adj_id, adj_i, adj_j, adj_k, adj_ret); +} +CUDA_CALLABLE inline void adj_volume_lookup_i(uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t &adj_id, + int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, const int32_t &adj_ret) +{ + adj_volume_lookup(id, i, j, k, adj_id, adj_i, adj_j, adj_k, adj_ret); +} + +CUDA_CALLABLE inline void adj_volume_lookup_v(uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t &adj_id, + int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, const vec3 &adj_ret) +{ + adj_volume_lookup(id, i, j, k, adj_id, adj_i, adj_j, adj_k, adj_ret); +} + +CUDA_CALLABLE inline int32_t volume_lookup_index(uint64_t id, int32_t i, int32_t j, int32_t k) +{ const pnanovdb_buf_t buf = volume::id_to_buffer(id); const pnanovdb_root_handle_t root = volume::get_root(buf); + const pnanovdb_grid_type_t grid_type = volume::get_grid_type(buf); + + const pnanovdb_coord_t ijk{i, j, k}; + + pnanovdb_uint32_t level; + const pnanovdb_address_t address = + pnanovdb_root_get_value_address_and_level(grid_type, buf, root, PNANOVDB_REF(ijk), PNANOVDB_REF(level)); + + if (level == 0) + { + pnanovdb_uint64_t voxel_index = volume::get_grid_voxel_index(grid_type, buf, address, ijk); - const pnanovdb_coord_t ijk{ i, j, k }; - const pnanovdb_address_t address = pnanovdb_root_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, root, PNANOVDB_REF(ijk)); - pnanovdb_write_float(buf, address, value); + return static_cast(voxel_index) - 1; + } + return -1; } -CUDA_CALLABLE inline void adj_volume_store_f( - uint64_t id, int32_t i, int32_t j, int32_t k, const float& value, - uint64_t& adj_id, int32_t& adj_i, int32_t& adj_j, int32_t& adj_k, float& adj_value) +CUDA_CALLABLE inline void adj_volume_lookup_index(uint64_t id, int32_t i, int32_t j, int32_t k, uint64_t &adj_id, + int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, const vec3 &adj_ret) { - adj_value += volume_lookup_f(id, i, j, k); + // NOP } -CUDA_CALLABLE inline void volume_store_v(uint64_t id, int32_t i, int32_t j, int32_t k, const vec3& value) +// volume_store + +template +CUDA_CALLABLE inline void volume_store(uint64_t id, int32_t i, int32_t j, int32_t k, const T &value) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_VEC3F) return; + using traits = volume::pnano_traits; const pnanovdb_buf_t buf = volume::id_to_buffer(id); + if (volume::get_grid_type(buf) != traits::GRID_TYPE) + return; + const pnanovdb_root_handle_t root = volume::get_root(buf); + const pnanovdb_coord_t ijk{i, j, k}; + + pnanovdb_uint32_t level; + const pnanovdb_address_t address = + pnanovdb_root_get_value_address_and_level(traits::GRID_TYPE, buf, root, PNANOVDB_REF(ijk), PNANOVDB_REF(level)); - const pnanovdb_coord_t ijk{ i, j, k }; - const pnanovdb_address_t address = pnanovdb_root_get_value_address(PNANOVDB_GRID_TYPE_VEC3F, buf, root, PNANOVDB_REF(ijk)); - const pnanovdb_vec3_t v{ value[0], value[1], value[2] }; - pnanovdb_write_vec3(buf, address, &v); + if (level == 0) + { + // only write at at leaf level (prevent modifying background value) + // TODO is this the intended semantics? or should be allow writing to background? + volume::pnano_write(value, buf, address); + } } -CUDA_CALLABLE inline void adj_volume_store_v( - uint64_t id, int32_t i, int32_t j, int32_t k, const vec3& value, - uint64_t& adj_id, int32_t& adj_i, int32_t& adj_j, int32_t& adj_k, vec3& adj_value) +template +CUDA_CALLABLE inline void adj_volume_store(uint64_t id, int32_t i, int32_t j, int32_t k, const T &value, + uint64_t &adj_id, int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, + T &adj_value) { - adj_value = add(adj_value, volume_lookup_v(id, i, j, k)); + // NOP -- adjoint of grid values is not available } -CUDA_CALLABLE inline void volume_store_i(uint64_t id, int32_t i, int32_t j, int32_t k, const int32_t& value) +CUDA_CALLABLE inline void volume_store_f(uint64_t id, int32_t i, int32_t j, int32_t k, const float &value) { - if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_INT32) return; + volume_store(id, i, j, k, value); +} - const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_root_handle_t root = volume::get_root(buf); +CUDA_CALLABLE inline void adj_volume_store_f(uint64_t id, int32_t i, int32_t j, int32_t k, const float &value, + uint64_t &adj_id, int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, + float &adj_value) +{ + adj_volume_store(id, i, j, k, value, adj_id, adj_i, adj_j, adj_k, adj_value); +} + +CUDA_CALLABLE inline void volume_store_v(uint64_t id, int32_t i, int32_t j, int32_t k, const vec3 &value) +{ + volume_store(id, i, j, k, value); +} - const pnanovdb_coord_t ijk{ i, j, k }; - const pnanovdb_address_t address = pnanovdb_root_get_value_address(PNANOVDB_GRID_TYPE_INT32, buf, root, PNANOVDB_REF(ijk)); - pnanovdb_write_int32(buf, address, value); +CUDA_CALLABLE inline void adj_volume_store_v(uint64_t id, int32_t i, int32_t j, int32_t k, const vec3 &value, + uint64_t &adj_id, int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, + vec3 &adj_value) +{ + adj_volume_store(id, i, j, k, value, adj_id, adj_i, adj_j, adj_k, adj_value); +} + +CUDA_CALLABLE inline void volume_store_i(uint64_t id, int32_t i, int32_t j, int32_t k, const int32_t &value) +{ + volume_store(id, i, j, k, value); } -CUDA_CALLABLE inline void adj_volume_store_i( - uint64_t id, int32_t i, int32_t j, int32_t k, const int32_t& value, - uint64_t& adj_id, int32_t& adj_i, int32_t& adj_j, int32_t& adj_k, int32_t& adj_value) +CUDA_CALLABLE inline void adj_volume_store_i(uint64_t id, int32_t i, int32_t j, int32_t k, const int32_t &value, + uint64_t &adj_id, int32_t &adj_i, int32_t &adj_j, int32_t &adj_k, + int32_t &adj_value) { - adj_value = add(adj_value, volume_lookup_i(id, i, j, k)); + adj_volume_store(id, i, j, k, value, adj_id, adj_i, adj_j, adj_k, adj_value); } // Transform position from index space to world space CUDA_CALLABLE inline vec3 volume_index_to_world(uint64_t id, vec3 uvw) { const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_grid_handle_t grid = { 0u }; - const pnanovdb_vec3_t pos{ uvw[0], uvw[1], uvw[2] }; + const pnanovdb_grid_handle_t grid = {0u}; + const pnanovdb_vec3_t pos{uvw[0], uvw[1], uvw[2]}; const pnanovdb_vec3_t xyz = pnanovdb_grid_index_to_worldf(buf, grid, PNANOVDB_REF(pos)); - return { xyz.x, xyz.y, xyz.z }; + return {xyz.x, xyz.y, xyz.z}; } // Transform position from world space to index space CUDA_CALLABLE inline vec3 volume_world_to_index(uint64_t id, vec3 xyz) { const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_grid_handle_t grid = { 0u }; - const pnanovdb_vec3_t pos{ xyz[0], xyz[1], xyz[2] }; + const pnanovdb_grid_handle_t grid = {0u}; + const pnanovdb_vec3_t pos{xyz[0], xyz[1], xyz[2]}; const pnanovdb_vec3_t uvw = pnanovdb_grid_world_to_indexf(buf, grid, PNANOVDB_REF(pos)); - return { uvw.x, uvw.y, uvw.z }; + return {uvw.x, uvw.y, uvw.z}; } -CUDA_CALLABLE inline void adj_volume_index_to_world(uint64_t id, vec3 uvw, uint64_t& adj_id, vec3& adj_uvw, const vec3& adj_ret) +CUDA_CALLABLE inline void adj_volume_index_to_world(uint64_t id, vec3 uvw, uint64_t &adj_id, vec3 &adj_uvw, + const vec3 &adj_ret) { const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_grid_handle_t grid = { 0u }; - const pnanovdb_vec3_t pos{ adj_ret[0], adj_ret[1], adj_ret[2] }; + const pnanovdb_grid_handle_t grid = {0u}; + const pnanovdb_vec3_t pos{adj_ret[0], adj_ret[1], adj_ret[2]}; const pnanovdb_vec3_t xyz = pnanovdb_grid_index_to_world_dirf(buf, grid, PNANOVDB_REF(pos)); - adj_uvw = add(adj_uvw, vec3{ xyz.x, xyz.y, xyz.z }); + adj_uvw = add(adj_uvw, vec3{xyz.x, xyz.y, xyz.z}); } -CUDA_CALLABLE inline void adj_volume_world_to_index(uint64_t id, vec3 xyz, uint64_t& adj_id, vec3& adj_xyz, const vec3& adj_ret) +CUDA_CALLABLE inline void adj_volume_world_to_index(uint64_t id, vec3 xyz, uint64_t &adj_id, vec3 &adj_xyz, + const vec3 &adj_ret) { const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_grid_handle_t grid = { 0u }; - const pnanovdb_vec3_t pos{ adj_ret[0], adj_ret[1], adj_ret[2] }; + const pnanovdb_grid_handle_t grid = {0u}; + const pnanovdb_vec3_t pos{adj_ret[0], adj_ret[1], adj_ret[2]}; const pnanovdb_vec3_t uvw = pnanovdb_grid_world_to_index_dirf(buf, grid, PNANOVDB_REF(pos)); - adj_xyz = add(adj_xyz, vec3{ uvw.x, uvw.y, uvw.z }); + adj_xyz = add(adj_xyz, vec3{uvw.x, uvw.y, uvw.z}); } // Transform direction from index space to world space CUDA_CALLABLE inline vec3 volume_index_to_world_dir(uint64_t id, vec3 uvw) { const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_grid_handle_t grid = { 0u }; - const pnanovdb_vec3_t pos{ uvw[0], uvw[1], uvw[2] }; + const pnanovdb_grid_handle_t grid = {0u}; + const pnanovdb_vec3_t pos{uvw[0], uvw[1], uvw[2]}; const pnanovdb_vec3_t xyz = pnanovdb_grid_index_to_world_dirf(buf, grid, PNANOVDB_REF(pos)); - return { xyz.x, xyz.y, xyz.z }; + return {xyz.x, xyz.y, xyz.z}; } // Transform direction from world space to index space CUDA_CALLABLE inline vec3 volume_world_to_index_dir(uint64_t id, vec3 xyz) { const pnanovdb_buf_t buf = volume::id_to_buffer(id); - const pnanovdb_grid_handle_t grid = { 0u }; - const pnanovdb_vec3_t pos{ xyz[0], xyz[1], xyz[2] }; + const pnanovdb_grid_handle_t grid = {0u}; + const pnanovdb_vec3_t pos{xyz[0], xyz[1], xyz[2]}; const pnanovdb_vec3_t uvw = pnanovdb_grid_world_to_index_dirf(buf, grid, PNANOVDB_REF(pos)); - return { uvw.x, uvw.y, uvw.z }; + return {uvw.x, uvw.y, uvw.z}; } -CUDA_CALLABLE inline void adj_volume_index_to_world_dir(uint64_t id, vec3 uvw, uint64_t& adj_id, vec3& adj_uvw, const vec3& adj_ret) +CUDA_CALLABLE inline void adj_volume_index_to_world_dir(uint64_t id, vec3 uvw, uint64_t &adj_id, vec3 &adj_uvw, + const vec3 &adj_ret) { adj_volume_index_to_world(id, uvw, adj_id, adj_uvw, adj_ret); } -CUDA_CALLABLE inline void adj_volume_world_to_index_dir(uint64_t id, vec3 xyz, uint64_t& adj_id, vec3& adj_xyz, const vec3& adj_ret) +CUDA_CALLABLE inline void adj_volume_world_to_index_dir(uint64_t id, vec3 xyz, uint64_t &adj_id, vec3 &adj_xyz, + const vec3 &adj_ret) { adj_volume_world_to_index(id, xyz, adj_id, adj_xyz, adj_ret); } diff --git a/warp/native/volume_builder.cu b/warp/native/volume_builder.cu index 53449417..31c1a531 100644 --- a/warp/native/volume_builder.cu +++ b/warp/native/volume_builder.cu @@ -1,425 +1,446 @@ #include "volume_builder.h" +#include + #include #include #include -#include - -// Explanation of key types -// ------------------------ -// -// leaf_key: -// .__.__. .... .__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__. -// 63 62 .... 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -// XX|< tile key >|< upper offset >|< lower offset >| -// -// tile key (36 bit): -// (uint32(ijk[2]) >> ChildT::TOTAL) | -// (uint64_t(uint32(ijk[1]) >> ChildT::TOTAL)) << 12 | -// (uint64_t(uint32(ijk[0]) >> ChildT::TOTAL)) << 24 -// -// lower_key (51 bits) == leaf_key >> 12 -// -// upper_key (36 bits) == lower_key >> 15 == leaf_key >> 27 == tile key - -CUDA_CALLABLE inline uint64_t coord_to_full_key(const nanovdb::Coord& ijk) -{ - using Tree = nanovdb::FloatTree; // any type is fine at this point - assert((abs(ijk[0]) >> 24) == 0); - assert((abs(ijk[1]) >> 24) == 0); - assert((abs(ijk[2]) >> 24) == 0); - constexpr uint32_t MASK_12BITS = (1u << 12) - 1u; - const uint64_t tile_key36 = - ((uint32_t(ijk[2]) >> 12) & MASK_12BITS) | // z is the lower 12 bits - (uint64_t((uint32_t(ijk[1]) >> 12) & MASK_12BITS) << 12) | // y is the middle 12 bits - (uint64_t((uint32_t(ijk[0]) >> 12) & MASK_12BITS) << 24); // x is the upper 12 bits - const uint32_t upper_offset = Tree::Node2::CoordToOffset(ijk); - const uint32_t lower_offset = Tree::Node1::CoordToOffset(ijk); - return (tile_key36 << 27) | (upper_offset << 12) | lower_offset; -} -__global__ -void generate_keys(size_t num_points, const nanovdb::Coord* points, uint64_t* all_leaf_keys) +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +// dynamic initialization is not supported for a function-scope static __shared__ variable within a +// __device__/__global__ function +#pragma nv_diag_suppress 20054 +#elif defined(__NVCC__) +#pragma diag_suppress 20054 +#endif +namespace +{ +/// Allocator class following interface of cub::cachingDeviceAllocator, as expected by naovdb::PointsToGrid +struct Allocator { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= num_points) return; - all_leaf_keys[tid] = coord_to_full_key(points[tid]); -} + cudaError_t DeviceAllocate(void **d_ptr, ///< [out] Reference to pointer to the allocation + size_t bytes, ///< [in] Minimum number of bytes for the allocation + cudaStream_t active_stream) ///< [in] The stream to be associated with this allocation + { + // in PointsToGrid stream argument always coincide with current stream, ignore + *d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes); + return cudaSuccess; + } + + cudaError_t DeviceFree(void *d_ptr) + { + free_device(WP_CURRENT_CONTEXT, d_ptr); + return cudaSuccess; + } + + cudaError_t FreeAllCached() + { + return cudaSuccess; + } +}; -__global__ -void generate_keys(size_t num_points, const nanovdb::Vec3f* points, uint64_t* all_leaf_keys, float one_over_voxel_size, nanovdb::Vec3f translation) +/// @brief Implementation of NanoVDB's DeviceBuffer that uses warp allocators +class DeviceBuffer { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= num_points) return; + uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device) + void *mCpuData, *mGpuData; // raw pointers to the host and device buffers + bool mManaged; + + public: + /// @brief Static factory method that return an instance of this buffer + /// @param size byte size of buffer to be initialized + /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, bool host = true, + void *stream = nullptr) + { + return DeviceBuffer(size, host, stream); + } - const nanovdb::Coord ijk = ((points[tid] - translation) * one_over_voxel_size).round(); - all_leaf_keys[tid] = coord_to_full_key(ijk); -} + /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory + /// @param size byte size of buffer specified by external memory + /// @param cpuData pointer to externally managed host memory + /// @param gpuData pointer to externally managed device memory + /// @return An instance of this class using move semantics + static DeviceBuffer create(uint64_t size, void *cpuData, void *gpuData) + { + return DeviceBuffer(size, cpuData, gpuData); + } -// Convert a 36 bit tile key to the ijk origin of the addressed tile -CUDA_CALLABLE inline nanovdb::Coord tile_key36_to_coord(uint64_t tile_key36) { - auto extend_sign = [](uint32_t i) -> int32_t { return i | ((i>>11 & 1) * 0xFFFFF800);}; - constexpr uint32_t MASK_12BITS = (1u << 12) - 1u; - const int32_t i = extend_sign(uint32_t(tile_key36 >> 24) & MASK_12BITS); - const int32_t j = extend_sign(uint32_t(tile_key36 >> 12) & MASK_12BITS); - const int32_t k = extend_sign(uint32_t(tile_key36) & MASK_12BITS); - return nanovdb::Coord(i, j, k) << 12; -} + /// @brief Constructor + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @param stream optional stream argument (defaults to stream NULL) + DeviceBuffer(uint64_t size = 0, bool host = true, void *stream = nullptr) + : mSize(0), mCpuData(nullptr), mGpuData(nullptr), mManaged(false) + { + if (size > 0) + this->init(size, host, stream); + } + + DeviceBuffer(uint64_t size, void *cpuData, void *gpuData) + : mSize(size), mCpuData(cpuData), mGpuData(gpuData), mManaged(false) + { + } + /// @brief Disallow copy-construction + DeviceBuffer(const DeviceBuffer &) = delete; -// --- CUB helpers --- -template -struct ShiftRight { - CUDA_CALLABLE inline OutType operator()(const InType& v) const { - return static_cast(v >> bits); + /// @brief Move copy-constructor + DeviceBuffer(DeviceBuffer &&other) noexcept + : mSize(other.mSize), mCpuData(other.mCpuData), mGpuData(other.mGpuData), mManaged(other.mManaged) + { + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; } -}; -template -struct ShiftRightIterator : public cub::TransformInputIterator, InType*> { - using BASE = cub::TransformInputIterator, InType*>; - CUDA_CALLABLE inline ShiftRightIterator(uint64_t* input_itr) - : BASE(input_itr, ShiftRight()) {} -}; + /// @brief Disallow copy assignment operation + DeviceBuffer &operator=(const DeviceBuffer &) = delete; + /// @brief Move copy assignment operation + DeviceBuffer &operator=(DeviceBuffer &&other) noexcept + { + this->clear(); + mSize = other.mSize; + mCpuData = other.mCpuData; + mGpuData = other.mGpuData; + mManaged = other.mManaged; + other.mSize = 0; + other.mCpuData = nullptr; + other.mGpuData = nullptr; + other.mManaged = false; + return *this; + } -// --- Atomic instructions for NanoVDB construction --- -template -CUDA_CALLABLE_DEVICE void set_mask_atomic(MaskT& mask, uint32_t n) { - unsigned long long int* words = reinterpret_cast(&mask); - atomicOr(words + (n / 64), 1ull << (n & 63)); -} + /// @brief Destructor frees memory on both the host and device + ~DeviceBuffer() + { + this->clear(); + }; + + /// @brief Initialize buffer + /// @param size byte size of buffer to be initialized + /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU + /// @note All existing buffers are first cleared + /// @warning size is expected to be non-zero. Use clear() clear buffer! + void init(uint64_t size, bool host = true, void *stream = nullptr) + { + if (mSize > 0) + this->clear(stream); + NANOVDB_ASSERT(size > 0); + if (host) + { + mCpuData = + alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned + } + else + { + mGpuData = alloc_device(WP_CURRENT_CONTEXT, size); + } + mSize = size; + mManaged = true; + } -template -CUDA_CALLABLE_DEVICE void expand_cwise_atomic(nanovdb::BBox& bbox, const Vec3T& v) { - atomicMin(&bbox.mCoord[0][0], v[0]); - atomicMin(&bbox.mCoord[0][1], v[1]); - atomicMin(&bbox.mCoord[0][2], v[2]); - atomicMax(&bbox.mCoord[1][0], v[0]); - atomicMax(&bbox.mCoord[1][1], v[1]); - atomicMax(&bbox.mCoord[1][2], v[2]); -} + /// @brief Retuns a raw pointer to the host/CPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void *data() const + { + return mCpuData; + } + + /// @brief Retuns a raw pointer to the device/GPU buffer managed by this allocator. + /// @warning Note that the pointer can be NULL! + void *deviceData() const + { + return mGpuData; + } + + /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator. + uint64_t size() const + { + return mSize; + } -template -__hostdev__ const typename RootDataType::Tile* find_tile(const RootDataType* root_data, const nanovdb::Coord& ijk) + //@{ + /// @brief Returns true if this allocator is empty, i.e. has no allocated memory + bool empty() const + { + return mSize == 0; + } + bool isEmpty() const + { + return mSize == 0; + } + //@} + + /// @brief Detach device data so it is not dealloced when this buffer is destroyed + void detachDeviceData() + { + mGpuData = nullptr; + if (!mCpuData) + { + mSize = 0; + } + } + + /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL + void clear(void *stream = nullptr) + { + if (mManaged && mGpuData) + free_device(WP_CURRENT_CONTEXT, mGpuData); + if (mManaged && mCpuData) + free_pinned(mCpuData); + mCpuData = mGpuData = nullptr; + mSize = 0; + mManaged = false; + } + +}; // DeviceBuffer class + +template __global__ void activateAllLeafVoxels(Tree *tree) { - using Tile = typename RootDataType::Tile; - const Tile *tiles = reinterpret_cast(root_data + 1); - const auto key = RootDataType::CoordToKey(ijk); + const unsigned leaf_count = tree->mNodeCount[0]; - for (uint32_t i = 0; i < root_data->mTableSize; ++i) + const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < leaf_count) { - if (tiles[i].key == key) - return &tiles[i]; + // activate all leaf voxels + typename Tree::LeafNodeType *const leaf_nodes = tree->getFirstLeaf(); + typename Tree::LeafNodeType &leaf = leaf_nodes[tid]; + leaf.mValueMask.setOn(); + leaf.updateBBox(); + } + + if (tid == 0) + { + tree->mVoxelCount = Tree::LeafNodeType::SIZE * leaf_count; // full leaves } - return nullptr; } -// --- Wrapper for launching lambda kernels -template -__global__ void kernel(const size_t num_items, Func f, Args... args) +template +__device__ std::enable_if_t::is_index> setBackgroundValue( + Node &node, unsigned tile_id, const typename Node::BuildType background_value) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid >= num_items) return; - f(tid, args...); + node.setValue(tile_id, background_value); } -template -void build_grid_from_tiles(nanovdb::Grid> *&out_grid, - size_t &out_grid_size, - const void *points, - size_t num_points, - bool points_in_world_space, - const BuildGridParams ¶ms) +template +__device__ std::enable_if_t::is_index> setBackgroundValue( + Node &node, unsigned tile_id, const typename Node::BuildType background_value) { - using FloatT = typename nanovdb::FloatTraits::FloatType; - const BuildT ZERO_VAL{0}; - const FloatT ZERO_SCALAR{0}; - - // Don't want to access "params" in kernels - const double dx = params.voxel_size; - const double Tx = params.translation[0], Ty = params.translation[1], Tz = params.translation[2]; - const BuildT background_value = params.background_value; +} - const unsigned int num_threads = 256; - unsigned int num_blocks; +template +__device__ std::enable_if_t::is_index> setBackgroundValue( + Node &node, const typename Node::BuildType background_value) +{ + node.mBackground = background_value; +} - out_grid = nullptr; - out_grid_size = 0; +template +__device__ std::enable_if_t::is_index> setBackgroundValue( + Node &node, const typename Node::BuildType background_value) +{ +} - cub::CachingDeviceAllocator allocator; - - uint64_t* leaf_keys; - uint64_t* lower_keys; - uint64_t* upper_keys; - uint32_t* node_counts; - uint32_t leaf_count, lower_node_count, upper_node_count; +template +__global__ void setInternalBBoxAndBackgroundValue(Tree *tree, const typename Tree::BuildType background_value) +{ + using BBox = nanovdb::math::BBox; + __shared__ BBox bbox; - allocator.DeviceAllocate((void**)&leaf_keys, sizeof(uint64_t) * num_points); - allocator.DeviceAllocate((void**)&node_counts, sizeof(uint32_t) * 3); + const unsigned node_count = tree->mNodeCount[NodeT::LEVEL]; + const unsigned node_id = blockIdx.x; - // Phase 1: counting the nodes + if (node_id < node_count) { - // Generating keys from coords - uint64_t* all_leaf_keys; - uint64_t* all_leaf_keys_sorted; - allocator.DeviceAllocate((void**)&all_leaf_keys, sizeof(uint64_t) * num_points); - allocator.DeviceAllocate((void**)&all_leaf_keys_sorted, sizeof(uint64_t) * num_points); - - num_blocks = (static_cast(num_points) + num_threads - 1) / num_threads; - if (points_in_world_space) { - generate_keys<<>>(num_points, static_cast(points), all_leaf_keys, static_cast(1.0 / dx), nanovdb::Vec3f(params.translation)); - } else { - generate_keys<<>>(num_points, static_cast(points), all_leaf_keys); + + if (threadIdx.x == 0) + { + bbox = BBox(); } - void* d_temp_storage = nullptr; - size_t temp_storage_bytes; - - // Sort the keys, then get an array of unique keys - cub::DeviceRadixSort::SortKeys(nullptr, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast(num_points), /* begin_bit = */ 0, /* end_bit = */ 63); - allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes); - cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast(num_points), /* begin_bit = */ 0, /* end_bit = */ 63); - allocator.DeviceFree(d_temp_storage); - - cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast(num_points)); - allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes); - cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast(num_points)); - allocator.DeviceFree(d_temp_storage); - check_cuda(cudaMemcpy(&leaf_count, node_counts, sizeof(uint32_t), cudaMemcpyDeviceToHost)); - - allocator.DeviceFree(all_leaf_keys); - all_leaf_keys = nullptr; - allocator.DeviceFree(all_leaf_keys_sorted); - all_leaf_keys_sorted = nullptr; - - - // Get the keys unique to lower nodes and the number of them - allocator.DeviceAllocate((void**)&lower_keys, sizeof(uint64_t) * leaf_count); - cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count); - allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes); - cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count); - allocator.DeviceFree(d_temp_storage); - check_cuda(cudaMemcpy(&lower_node_count, node_counts + 1, sizeof(uint32_t), cudaMemcpyDeviceToHost)); - - // Get the keys unique to upper nodes and the number of them - allocator.DeviceAllocate((void**)&upper_keys, sizeof(uint64_t) * lower_node_count); - cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count); - allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes); - cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count); - allocator.DeviceFree(d_temp_storage); - check_cuda(cudaMemcpy(&upper_node_count, node_counts + 2, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + __syncthreads(); + + NodeT &node = tree->template getFirstNode()[node_id]; + for (unsigned child_id = threadIdx.x; child_id < NodeT::SIZE; child_id += blockDim.x) + { + if (node.isChild(child_id)) + { + bbox.expandAtomic(node.getChild(child_id)->bbox()); + } + else + { + setBackgroundValue(node, child_id, background_value); + } + } + + __syncthreads(); + + if (threadIdx.x == 0) + { + node.mBBox = bbox; + } } +} - using Tree = nanovdb::NanoTree; - using Grid = nanovdb::Grid; - - const size_t total_bytes = - sizeof(Grid) + - sizeof(Tree) + - sizeof(typename Tree::RootType) + - sizeof(typename Tree::RootType::Tile) * upper_node_count + - sizeof(typename Tree::Node2) * upper_node_count + - sizeof(typename Tree::Node1) * lower_node_count + - sizeof(typename Tree::Node0) * leaf_count; - - const int64_t upper_mem_offset = - sizeof(nanovdb::GridData) + sizeof(Tree) + sizeof(typename Tree::RootType) + - sizeof(typename Tree::RootType::Tile) * upper_node_count; - const int64_t lower_mem_offset = upper_mem_offset + sizeof(typename Tree::Node2) * upper_node_count; - const int64_t leaf_mem_offset = lower_mem_offset + sizeof(typename Tree::Node1) * lower_node_count; - - typename Grid::DataType* grid; - check_cuda(cudaMalloc(&grid, total_bytes)); - - typename Tree::DataType* const tree = reinterpret_cast(grid + 1); // The tree is immediately after the grid - typename Tree::RootType::DataType* const root = reinterpret_cast(tree + 1); // The root is immediately after the tree - typename Tree::RootType::Tile* const tiles = reinterpret_cast(root + 1); - typename Tree::Node2::DataType* const upper_nodes = nanovdb::PtrAdd(grid, upper_mem_offset); - typename Tree::Node1::DataType* const lower_nodes = nanovdb::PtrAdd(grid, lower_mem_offset); - typename Tree::Node0::DataType* const leaf_nodes = nanovdb::PtrAdd(grid, leaf_mem_offset); - - // Phase 2: building the tree +template +__global__ void setRootBBoxAndBackgroundValue(nanovdb::Grid *grid, + const typename Tree::BuildType background_value) +{ + using BBox = typename Tree::RootNodeType::BBoxType; + __shared__ BBox bbox; + + Tree &tree = grid->tree(); + const unsigned upper_count = tree.mNodeCount[2]; + + if (threadIdx.x == 0) { - // Setting up the tree and root node - kernel<<<1, 1>>>(1, [=] __device__(size_t i) { - tree->mNodeOffset[3] = sizeof(Tree); - tree->mNodeOffset[2] = tree->mNodeOffset[3] + sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count; - tree->mNodeOffset[1] = tree->mNodeOffset[2] + sizeof(typename Tree::Node2) * upper_node_count; - tree->mNodeOffset[0] = tree->mNodeOffset[1] + sizeof(typename Tree::Node1) * lower_node_count; - tree->mNodeCount[2] = tree->mTileCount[2] = upper_node_count; - tree->mNodeCount[1] = tree->mTileCount[1] = lower_node_count; - tree->mNodeCount[0] = tree->mTileCount[0] = leaf_count; - tree->mVoxelCount = Tree::Node0::SIZE * leaf_count; // assuming full leaves - - root->mBBox = nanovdb::CoordBBox(); // init to empty - root->mTableSize = upper_node_count; - root->mBackground = background_value; - root->mMinimum = ZERO_VAL; - root->mMaximum = ZERO_VAL; - root->mAverage = ZERO_SCALAR; - root->mStdDevi = ZERO_SCALAR; - }); + bbox = BBox(); } - // Add tiles and upper nodes - // i : 0 .. upper_node_count-1 - num_blocks = (upper_node_count + num_threads - 1) / num_threads; + __syncthreads(); + + for (unsigned upper_id = threadIdx.x; upper_id < upper_count; upper_id += blockDim.x) { - kernel<<>>(upper_node_count, [=] __device__(size_t i) { - tiles[i].key = root->CoordToKey(tile_key36_to_coord(upper_keys[i])); - tiles[i].child = sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count + sizeof(typename Tree::Node2) * i; - tiles[i].state = 0; - tiles[i].value = background_value; - - assert(reinterpret_cast(root->getChild(tiles + i)) == reinterpret_cast(upper_nodes + i)); - auto& node = upper_nodes[i]; - node.mBBox = nanovdb::CoordBBox(); - node.mFlags = 0; - node.mValueMask.setOff(); - node.mChildMask.setOff(); - node.mMinimum = ZERO_VAL; - node.mMaximum = ZERO_VAL; - node.mAverage = ZERO_SCALAR; - node.mStdDevi = ZERO_SCALAR; - for (size_t n = 0; n < Tree::Node2::SIZE; ++n) { - node.mTable[n].value = background_value; - } - }); + typename Tree::UpperNodeType &upper = tree.getFirstUpper()[upper_id]; + bbox.expandAtomic(upper.bbox()); } - constexpr uint32_t MASK_15BITS = (1u << 15) - 1u; - constexpr uint32_t MASK_12BITS = (1u << 12) - 1u; + __syncthreads(); - // Init lower nodes and register to parent - // i : 0 .. lower_node_count-1 - num_blocks = (lower_node_count + num_threads - 1) / num_threads; + if (threadIdx.x == 0) { - kernel<<>>(lower_node_count, [=] __device__(size_t i) { - uint32_t upper_offset = lower_keys[i] & MASK_15BITS; - auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data(); - set_mask_atomic(upper_node->mChildMask, upper_offset); - upper_node->setChild(upper_offset, lower_nodes + i); - - auto& node = lower_nodes[i]; - node.mBBox = nanovdb::CoordBBox(); - node.mFlags = 0; - node.mValueMask.setOff(); - node.mChildMask.setOff(); - node.mMinimum = ZERO_VAL; - node.mMaximum = ZERO_VAL; - node.mAverage = ZERO_SCALAR; - node.mStdDevi = ZERO_SCALAR; - for (size_t n = 0; n < Tree::Node1::SIZE; ++n) { - node.mTable[n].value = background_value; - } - }); + typename Tree::RootNodeType &root = tree.root(); + setBackgroundValue(root, background_value); + root.mBBox = bbox; + + grid->mWorldBBox = root.mBBox.transform(grid->map()); } +} + +template +void finalize_grid(nanovdb::Grid> &out_grid, const BuildGridParams ¶ms) +{ + // set background value, activate all voxels for allocated tiles and update bbox + + using Tree = nanovdb::NanoTree; + Tree *tree = &out_grid.tree(); + + int node_counts[3]; + memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts)); + // synchronization below is unnecessary as node_counts is in pageable memory. + // keep it for clarity + cudaStream_t stream = static_cast(cuda_stream_get_current()); + cuda_stream_synchronize(stream); + + const unsigned int leaf_count = node_counts[0]; + const unsigned int lower_count = node_counts[1]; + const unsigned int upper_count = node_counts[2]; + + constexpr unsigned NUM_THREADS = 256; + const unsigned leaf_blocks = (leaf_count + NUM_THREADS - 1) / NUM_THREADS; + activateAllLeafVoxels<<>>(tree); + + setInternalBBoxAndBackgroundValue + <<>>(tree, params.background_value); + setInternalBBoxAndBackgroundValue + <<>>(tree, params.background_value); + setRootBBoxAndBackgroundValue<<<1, NUM_THREADS, 0, stream>>>(&out_grid, params.background_value); + + check_cuda(cuda_context_check(WP_CURRENT_CONTEXT)); +} + +template <> +void finalize_grid(nanovdb::Grid> &out_grid, + const BuildGridParams ¶ms) +{ + // nothing to do for OnIndex grids +} + +/// "fancy-pointer" that transforms from world to index coordinates +struct WorldSpacePointsPtr +{ + const nanovdb::Vec3f *points; + const nanovdb::Map map; - // Init leaf nodes and register to parent - // i : 0 .. leaf_count-1 - num_blocks = (leaf_count + num_threads - 1) / num_threads; + __device__ nanovdb::Vec3f operator[](int idx) const { - kernel<<>>(leaf_count, [=] __device__(size_t i) { - uint32_t lower_offset = leaf_keys[i] & MASK_12BITS; - uint32_t upper_offset = (leaf_keys[i] >> 12) & MASK_15BITS; - const nanovdb::Coord ijk = tile_key36_to_coord(leaf_keys[i] >> 27); - - auto* upper_node = root->getChild(find_tile(root, ijk))->data(); - auto* lower_node = upper_node->getChild(upper_offset)->data(); - set_mask_atomic(lower_node->mChildMask, lower_offset); - lower_node->setChild(lower_offset, leaf_nodes + i); - - const nanovdb::Coord localUpperIjk = Tree::Node2::OffsetToLocalCoord(upper_offset) << Tree::Node1::TOTAL; - const nanovdb::Coord localLowerIjk = Tree::Node1::OffsetToLocalCoord(lower_offset) << Tree::Node0::TOTAL; - const nanovdb::Coord leafOrigin = ijk + localUpperIjk + localLowerIjk; - - auto& node = leaf_nodes[i]; - node.mBBoxMin = leafOrigin; - node.mBBoxDif[0] = leaf_nodes[i].mBBoxDif[1] = leaf_nodes[i].mBBoxDif[2] = Tree::Node0::DIM; - node.mFlags = 0; - node.mValueMask.setOn(); - node.mMinimum = ZERO_VAL; - node.mMaximum = ZERO_VAL; - node.mAverage = ZERO_SCALAR; - node.mStdDevi = ZERO_SCALAR; - // mValues is undefined - - // propagating bbox up: - expand_cwise_atomic(lower_node->mBBox, leafOrigin); - expand_cwise_atomic(lower_node->mBBox, leafOrigin + nanovdb::Coord(Tree::Node0::DIM)); - }); + return map.applyInverseMapF(points[idx]); } - // Propagating bounding boxes from lower nodes to upper nodes - // i : 0 .. lower_node_count-1 - num_blocks = (lower_node_count + num_threads - 1) / num_threads; + __device__ nanovdb::Vec3f operator*() const { - kernel<<>>(lower_node_count, [=] __device__(size_t i) { - auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data(); - expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.min()); - expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.max()); - }); + return (*this)[0]; } +}; - // Setting up root bounding box and grid - { - kernel<<<1, 1>>>(1, [=] __device__(size_t i) { - for (int i = 0; i < upper_node_count; ++i) { - root->mBBox.expand(upper_nodes[i].mBBox.min()); - root->mBBox.expand(upper_nodes[i].mBBox.max()); - } +} // namespace - nanovdb::Map map; - { - const double mat[4][4] = { - {dx, 0.0, 0.0, 0.0}, // row 0 - {0.0, dx, 0.0, 0.0}, // row 1 - {0.0, 0.0, dx, 0.0}, // row 2 - {Tx, Ty, Tz, 1.0}, // row 3 - }; - const double invMat[4][4] = { - {1 / dx, 0.0, 0.0, 0.0}, // row 0 - {0.0, 1 / dx, 0.0, 0.0}, // row 1 - {0.0, 0.0, 1 / dx, 0.0}, // row 2 - {0.0, 0.0, 0.0, 0.0}, // row 3, ignored by Map::set - }; - map.set(mat, invMat, 1.0); - } +namespace nanovdb +{ +template <> struct BufferTraits +{ + static constexpr bool hasDeviceDual = true; +}; + +} // namespace nanovdb + +template +void build_grid_from_points(nanovdb::Grid> *&out_grid, size_t &out_grid_size, + const void *points, size_t num_points, bool points_in_world_space, + const BuildGridParams ¶ms) +{ + + out_grid = nullptr; + out_grid_size = 0; - grid->mMagic = NANOVDB_MAGIC_NUMBER; - grid->mChecksum = 0xFFFFFFFFFFFFFFFFull; - grid->mVersion = nanovdb::Version(); - grid->mFlags = static_cast(nanovdb::GridFlags::HasBBox) | - static_cast(nanovdb::GridFlags::IsBreadthFirst); - grid->mGridIndex = 0; - grid->mGridCount = 1; - grid->mGridSize = total_bytes; - // mGridName is set below - grid->mWorldBBox.mCoord[0] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[0])); - grid->mWorldBBox.mCoord[1] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[1])); - grid->mVoxelSize = nanovdb::Vec3d(dx); - grid->mMap = map; - grid->mGridClass = nanovdb::GridClass::Unknown; - grid->mGridType = nanovdb::mapToGridType(); - grid->mBlindMetadataOffset = total_bytes; - grid->mBlindMetadataCount = 0; - }); + cudaStream_t stream = static_cast(cuda_stream_get_current()); + nanovdb::Map map(params.voxel_size, params.translation); + nanovdb::tools::cuda::PointsToGrid p2g(map, stream); + + // p2g.setVerbose(2); + p2g.setGridName(params.name); + p2g.setChecksum(nanovdb::CheckMode::Disable); + + // Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels + p2g.includeBBox(nanovdb::BuildTraits::is_onindex); + + nanovdb::GridHandle grid_handle; + + if (points_in_world_space) + { + grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast(points), map}, num_points, + DeviceBuffer()); + } + else + { + grid_handle = p2g.getHandle(static_cast(points), num_points, DeviceBuffer()); } - check_cuda(cudaMemcpy(grid->mGridName, params.name, 256, cudaMemcpyHostToDevice)); + out_grid = grid_handle.deviceGrid(); + out_grid_size = grid_handle.gridSize(); - allocator.DeviceFree(lower_keys); - allocator.DeviceFree(upper_keys); - allocator.DeviceFree(leaf_keys); - allocator.DeviceFree(node_counts); + finalize_grid(*out_grid, params); - out_grid = reinterpret_cast(grid); - out_grid_size = total_bytes; + // So that buffer is not destroyed when handles goes out of scope + grid_handle.buffer().detachDeviceData(); } -template void build_grid_from_tiles(nanovdb::Grid>*&, size_t&, const void*, size_t, bool, const BuildGridParams&); -template void build_grid_from_tiles(nanovdb::Grid>*&, size_t&, const void*, size_t, bool, const BuildGridParams&); -template void build_grid_from_tiles(nanovdb::Grid>*&, size_t&, const void*, size_t, bool, const BuildGridParams&); +template void build_grid_from_points(nanovdb::Grid> *&, size_t &, const void *, size_t, bool, + const BuildGridParams &); +template void build_grid_from_points(nanovdb::Grid> *&, size_t &, const void *, + size_t, bool, const BuildGridParams &); +template void build_grid_from_points(nanovdb::Grid> *&, size_t &, const void *, size_t, bool, + const BuildGridParams &); +template void build_grid_from_points(nanovdb::Grid> *&, size_t &, const void *, + size_t, bool, const BuildGridParams &); +template void build_grid_from_points(nanovdb::Grid> *&, size_t &, const void *, + size_t, bool, const BuildGridParams &); diff --git a/warp/native/volume_builder.h b/warp/native/volume_builder.h index 2281125e..74c7d6cb 100644 --- a/warp/native/volume_builder.h +++ b/warp/native/volume_builder.h @@ -10,8 +10,23 @@ struct BuildGridParams { char name[256] = ""; }; +template<> +struct BuildGridParams { + double voxel_size = 1.0; + nanovdb::ValueIndex background_value; + nanovdb::Vec3d translation{0.0, 0.0, 0.0}; + char name[256] = ""; +}; + +template<> +struct BuildGridParams { + double voxel_size = 1.0; + nanovdb::Vec3d translation{0.0, 0.0, 0.0}; + char name[256] = ""; +}; + template -void build_grid_from_tiles(nanovdb::Grid> *&out_grid, +void build_grid_from_points(nanovdb::Grid> *&out_grid, size_t &out_grid_size, const void *points, size_t num_points, diff --git a/warp/native/volume_impl.h b/warp/native/volume_impl.h new file mode 100644 index 00000000..68e014c8 --- /dev/null +++ b/warp/native/volume_impl.h @@ -0,0 +1,61 @@ +/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved. + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +#pragma once + +#include "volume.h" + +// Helper functions for cpp/cu files, not to be exposed to user kernels + +namespace wp +{ + +namespace volume +{ + +inline CUDA_CALLABLE pnanovdb_leaf_handle_t get_leaf(const pnanovdb_buf_t buf, const uint32_t leaf_id) +{ + const pnanovdb_tree_handle_t tree = get_tree(buf); + const uint64_t first_leaf_offset = pnanovdb_tree_get_node_offset_leaf(buf, tree); + const uint32_t leaf_stride = PNANOVDB_GRID_TYPE_GET(get_grid_type(buf), leaf_size); + return {pnanovdb_address_offset64(tree.address, first_leaf_offset + uint64_t(leaf_id) * leaf_stride)}; +} + +inline CUDA_CALLABLE pnanovdb_coord_t leaf_origin(const pnanovdb_buf_t buf, const pnanovdb_leaf_handle_t leaf) +{ + pnanovdb_coord_t origin = pnanovdb_leaf_get_bbox_min(buf, leaf); + // mask out last three bits corresponding to voxel coordinates within leaf + constexpr uint32_t MASK = (1u << 3u) - 1u; + origin.x &= ~MASK; + origin.y &= ~MASK; + origin.z &= ~MASK; + return origin; +} + +inline CUDA_CALLABLE uint64_t leaf_voxel_index(const pnanovdb_buf_t buf, const uint32_t leaf_id, + const pnanovdb_coord_t &ijk) +{ + const uint32_t grid_type = get_grid_type(buf); + + const pnanovdb_leaf_handle_t leaf = get_leaf(buf, leaf_id); + const pnanovdb_address_t value_address = pnanovdb_leaf_get_value_address(grid_type, buf, leaf, &ijk); + return volume::get_grid_voxel_index(grid_type, buf, value_address, ijk) - 1; +} + +inline CUDA_CALLABLE pnanovdb_coord_t leaf_offset_to_local_coord(uint32_t offset) +{ + pnanovdb_coord_t coord; + coord.x = (offset >> 6) & 7; + coord.y = (offset >> 3) & 7; + coord.z = (offset >> 0) & 7; + return coord; +} + +} // namespace volume + +} // namespace wp diff --git a/warp/native/warp.h b/warp/native/warp.h index 2c072b61..8af1a3ed 100644 --- a/warp/native/warp.h +++ b/warp/native/warp.h @@ -97,20 +97,28 @@ extern "C" const void* a, const void* b, const void* c, void* d, float alpha, float beta, bool row_major_a, bool row_major_b, bool allow_tf32x3_arith, int batch_count); - WP_API uint64_t volume_create_host(void* buf, uint64_t size); - WP_API void volume_get_buffer_info_host(uint64_t id, void** buf, uint64_t* size); - WP_API void volume_get_tiles_host(uint64_t id, void** buf, uint64_t* size); + WP_API uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner); + WP_API void volume_get_tiles_host(uint64_t id, void* buf); + WP_API void volume_get_voxels_host(uint64_t id, void* buf); WP_API void volume_destroy_host(uint64_t id); - WP_API uint64_t volume_create_device(void* context, void* buf, uint64_t size); + WP_API uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner); + WP_API void volume_get_tiles_device(uint64_t id, void* buf); + WP_API void volume_get_voxels_device(uint64_t id, void* buf); + WP_API void volume_destroy_device(uint64_t id); + WP_API uint64_t volume_f_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value, float tx, float ty, float tz, bool points_in_world_space); WP_API uint64_t volume_v_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value_x, float bg_value_y, float bg_value_z, float tx, float ty, float tz, bool points_in_world_space); WP_API uint64_t volume_i_from_tiles_device(void* context, void* points, int num_points, float voxel_size, int bg_value, float tx, float ty, float tz, bool points_in_world_space); - WP_API void volume_get_buffer_info_device(uint64_t id, void** buf, uint64_t* size); - WP_API void volume_get_tiles_device(uint64_t id, void** buf, uint64_t* size); - WP_API void volume_destroy_device(uint64_t id); + WP_API uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float tx, float ty, float tz, bool points_in_world_space); + WP_API uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float voxel_size, float tx, float ty, float tz, bool points_in_world_space); + WP_API void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size); WP_API void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz); + WP_API void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count); + WP_API const char* volume_get_grid_info(uint64_t id, uint64_t *grid_size, uint32_t *grid_index, uint32_t *grid_count, float translation[3], float transform[9], char type_str[16]); + WP_API uint32_t volume_get_blind_data_count(uint64_t id); + WP_API const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count, uint32_t* value_size, char type_str[16]); WP_API uint64_t marching_cubes_create_device(void* context); WP_API void marching_cubes_destroy_device(uint64_t id); diff --git a/warp/stubs.py b/warp/stubs.py index cffd825c..ac927d34 100644 --- a/warp/stubs.py +++ b/warp/stubs.py @@ -1112,6 +1112,17 @@ def volume_store_i(id: uint64, i: int32, j: int32, k: int32, value: int32): ... +@over +def volume_lookup_index(id: uint64, i: int32, j: int32, k: int32) -> int32: + """Returns the index associated to the voxel with coordinates ``i``, ``j``, ``k``. + + If the voxel at this index does not exist, this function returns -1. + This function is available for both index grids and classical volumes. + + """ + ... + + @over def volume_index_to_world(id: uint64, uvw: vec3f) -> vec3f: """Transform a point ``uvw`` defined in volume index space to world space given the volume's intrinsic affine transformation.""" diff --git a/warp/tests/assets/test_index_grid.nvdb b/warp/tests/assets/test_index_grid.nvdb new file mode 100644 index 00000000..142764ba --- /dev/null +++ b/warp/tests/assets/test_index_grid.nvdb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c99e8b7a3b8f49ff4f85c3759cf7c0580b4aca41398df67ae795db0ab7bcf54 +size 8034 diff --git a/warp/tests/test_volume.py b/warp/tests/test_volume.py index 56b09556..038f3c42 100644 --- a/warp/tests/test_volume.py +++ b/warp/tests/test_volume.py @@ -6,6 +6,7 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. import unittest +from typing import Any import numpy as np @@ -30,6 +31,7 @@ def test_volume_lookup_f(volume: wp.uint64, points: wp.array(dtype=wp.vec3)): k = int(p[2]) expect_eq(wp.volume_lookup_f(volume, i, j, k), expected) + expect_eq(wp.volume_lookup(volume, i, j, k, dtype=wp.float32), expected) @wp.kernel @@ -45,6 +47,7 @@ def test_volume_sample_closest_f(volume: wp.uint64, points: wp.array(dtype=wp.ve expected = 10.0 expect_eq(wp.volume_sample_f(volume, p, wp.Volume.CLOSEST), expected) + expect_eq(wp.volume_sample(volume, p, wp.Volume.CLOSEST, dtype=wp.float32), expected) q = wp.volume_index_to_world(volume, p) q_inv = wp.volume_world_to_index(volume, q) @@ -62,6 +65,7 @@ def test_volume_sample_linear_f(volume: wp.uint64, points: wp.array(dtype=wp.vec return # not testing against background values expect_near(wp.volume_sample_f(volume, p, wp.Volume.LINEAR), expected, 2.0e-4) + expect_near(wp.volume_sample(volume, p, wp.Volume.LINEAR, dtype=wp.float32), expected, 2.0e-4) @wp.kernel @@ -86,6 +90,13 @@ def test_volume_sample_grad_linear_f(volume: wp.uint64, points: wp.array(dtype=w expect_near(grad[1], expected_gy, 2.0e-4) expect_near(grad[2], expected_gz, 2.0e-4) + val = wp.volume_sample_grad(volume, p, wp.Volume.LINEAR, grad, dtype=wp.float32) + + expect_near(val, expected_val, 2.0e-4) + expect_near(grad[0], expected_gx, 2.0e-4) + expect_near(grad[1], expected_gy, 2.0e-4) + expect_near(grad[2], expected_gz, 2.0e-4) + @wp.kernel def test_volume_sample_local_f_linear_values( @@ -162,6 +173,7 @@ def test_volume_lookup_v(volume: wp.uint64, points: wp.array(dtype=wp.vec3)): k = int(p[2]) expect_eq(wp.volume_lookup_v(volume, i, j, k), expected) + expect_eq(wp.volume_lookup(volume, i, j, k, dtype=wp.vec3), expected) @wp.kernel @@ -177,6 +189,7 @@ def test_volume_sample_closest_v(volume: wp.uint64, points: wp.array(dtype=wp.ve expected = wp.vec3(10.8, -4.13, 10.26) expect_eq(wp.volume_sample_v(volume, p, wp.Volume.CLOSEST), expected) + expect_eq(wp.volume_sample(volume, p, wp.Volume.CLOSEST, dtype=wp.vec3), expected) q = wp.volume_index_to_world(volume, p) q_inv = wp.volume_world_to_index(volume, q) @@ -196,6 +209,30 @@ def test_volume_sample_linear_v(volume: wp.uint64, points: wp.array(dtype=wp.vec return # not testing against background values expect_near(wp.volume_sample_v(volume, p, wp.Volume.LINEAR), expected, 2.0e-4) + expect_near(wp.volume_sample(volume, p, wp.Volume.LINEAR, dtype=wp.vec3), expected, 2.0e-4) + + +@wp.kernel +def test_volume_sample_grad_linear_v(volume: wp.uint64, points: wp.array(dtype=wp.vec3)): + tid = wp.tid() + + p = points[tid] + + if abs(p[0]) > 10.0 or abs(p[1]) > 10.0 or abs(p[2]) > 10.0: + return # not testing against background values + + expected_val = wp.vec3( + p[0] + 2.0 * p[1] + 3.0 * p[2], 4.0 * p[0] + 5.0 * p[1] + 6.0 * p[2], 7.0 * p[0] + 8.0 * p[1] + 9.0 * p[2] + ) + expected_grad = wp.mat33(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) + + grad = wp.mat33(0.0) + val = wp.volume_sample_grad(volume, p, wp.Volume.LINEAR, grad, dtype=wp.vec3) + + expect_near(val, expected_val, 2.0e-4) + expect_near(grad[0], expected_grad[0], 2.0e-4) + expect_near(grad[1], expected_grad[1], 2.0e-4) + expect_near(grad[2], expected_grad[2], 2.0e-4) @wp.kernel @@ -233,6 +270,7 @@ def test_volume_lookup_i(volume: wp.uint64, points: wp.array(dtype=wp.vec3)): expected = 10 expect_eq(wp.volume_lookup_i(volume, i, j, k), expected) + expect_eq(wp.volume_lookup(volume, i, j, k, dtype=wp.int32), expected) @wp.kernel @@ -248,6 +286,7 @@ def test_volume_sample_i(volume: wp.uint64, points: wp.array(dtype=wp.vec3)): expected = 10 expect_eq(wp.volume_sample_i(volume, p), expected) + expect_eq(wp.volume_sample(volume, p, wp.Volume.CLOSEST, dtype=wp.int32), expected) q = wp.volume_index_to_world(volume, p) q_inv = wp.volume_world_to_index(volume, q) @@ -293,9 +332,7 @@ def test_volume_store_f(volume: wp.uint64, points: wp.array(dtype=wp.vec3), valu j = int(p[1]) k = int(p[2]) - # NB: Writing outside the allocated domain overwrites the background value of the Volume - if abs(i) <= 11 and abs(j) <= 11 and abs(k) <= 11: - wp.volume_store_f(volume, i, j, k, float(i + 100 * j + 10000 * k)) + wp.volume_store(volume, i, j, k, float(i + 100 * j + 10000 * k)) values[tid] = wp.volume_lookup_f(volume, i, j, k) @@ -308,9 +345,7 @@ def test_volume_store_v(volume: wp.uint64, points: wp.array(dtype=wp.vec3), valu j = int(p[1]) k = int(p[2]) - # NB: Writing outside the allocated domain overwrites the background value of the Volume - if abs(i) <= 11 and abs(j) <= 11 and abs(k) <= 11: - wp.volume_store_v(volume, i, j, k, p) + wp.volume_store(volume, i, j, k, p) values[tid] = wp.volume_lookup_v(volume, i, j, k) @@ -323,9 +358,7 @@ def test_volume_store_i(volume: wp.uint64, points: wp.array(dtype=wp.vec3), valu j = int(p[1]) k = int(p[2]) - # NB: Writing outside the allocated domain overwrites the background value of the Volume - if abs(i) <= 11 and abs(j) <= 11 and abs(k) <= 11: - wp.volume_store_i(volume, i, j, k, i + 100 * j + 10000 * k) + wp.volume_store(volume, i, j, k, i + 100 * j + 10000 * k) values[tid] = wp.volume_lookup_i(volume, i, j, k) @@ -355,6 +388,7 @@ def test_volume_store_i(volume: wp.uint64, points: wp.array(dtype=wp.vec3), valu "float": os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/test_grid.nvdb")), "int32": os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/test_int32_grid.nvdb")), "vec3f": os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/test_vec_grid.nvdb")), + "index": os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/test_index_grid.nvdb")), "torus": os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/torus.nvdb")), "float_write": os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/test_grid.nvdb")), } @@ -492,7 +526,7 @@ def test_volume_sample_linear_v_gradient(test, device): ) tape.backward(values) - grad_expected = np.array([6.0, 15.0, 24.0]) + grad_expected = np.array([12.0, 15.0, 18.0]) grad_computed = tape.gradients[uvws].numpy()[0] np.testing.assert_allclose(grad_computed, grad_expected, rtol=1e-4) @@ -506,7 +540,7 @@ def test_volume_sample_linear_v_gradient(test, device): ) tape.backward(values) - grad_expected = np.array([6.0, 15.0, 24.0]) / 0.25 + grad_expected = np.array([12.0, 15.0, 18.0]) / 0.25 grad_computed = tape.gradients[xyzs].numpy()[0] np.testing.assert_allclose(grad_computed, grad_expected, rtol=1e-4) @@ -604,6 +638,188 @@ def test_volume_introspection(test, device): np.testing.assert_equal(test_volume_tiles, tiles_sorted) np.testing.assert_equal([0.25] * 3, voxel_size) + voxel_count = volume.get_voxel_count() + voxels_actual = volume.get_voxels().numpy() + assert voxel_count == voxels_actual.shape[0] + + # Voxel coordinates should be unique + voxels_unique = np.unique(voxels_actual, axis=0) + assert voxel_count == voxels_unique.shape[0] + + # Get back tiles from voxels, shoud match get_tiles() + voxel_tiles = 8 * (voxels_unique // 8) + voxel_tiles_sorted = voxel_tiles[np.lexsort(voxel_tiles.T[::-1])] + voxel_tiles_unique = np.unique(voxel_tiles_sorted, axis=0) + + np.testing.assert_equal(voxel_tiles_unique, tiles_sorted) + + +def test_volume_multiple_grids(test, device): + volume = volumes["index"][device.alias] + + volume_2 = volume.load_next_grid() + + test.assertIsNotNone(volume_2) + + test.assertNotEqual(volume.id, volume_2.id) + test.assertNotEqual(volume.get_voxel_count(), volume_2.get_voxel_count()) + + test.assertEqual(volume.get_grid_info().grid_count, volume_2.get_grid_info().grid_count) + test.assertEqual(volume.get_grid_info().grid_index + 1, volume_2.get_grid_info().grid_index) + + volume_3 = volume_2.load_next_grid() + test.assertIsNone(volume_3) + + +def test_volume_feature_array(test, device): + volume = volumes["index"][device.alias] + + test.assertEqual(volume.get_feature_array_count(), 1) + + array = volume.feature_array(0, dtype=wp.uint64) + test.assertEqual(array.device, device) + test.assertEqual(array.dtype, wp.uint64) + + # fVDB convention, data starts with array ndim + shape + np.testing.assert_equal(array.numpy()[0:4], [3, volume.get_voxel_count(), 2, 3]) + + +@wp.kernel +def fill_leaf_values_kernel(volume: wp.uint64, ijk: wp.array2d(dtype=wp.int32), values: wp.array(dtype=Any)): + tid = wp.tid() + + i = ijk[tid, 0] + j = ijk[tid, 1] + k = ijk[tid, 2] + + expect_eq(tid, wp.volume_lookup_index(volume, i, j, k)) + + values[tid] = wp.volume_lookup(volume, i, j, k, dtype=values.dtype) + + +@wp.kernel +def test_volume_sample_index_kernel( + volume: wp.uint64, + points: wp.array(dtype=wp.vec3), + values: wp.array(dtype=Any), + background: wp.array(dtype=Any), + sampled_values: wp.array(dtype=Any), +): + tid = wp.tid() + p = points[tid] + + ref = wp.volume_sample(volume, p, wp.Volume.LINEAR, dtype=values.dtype) + sampled_values[tid] = wp.volume_sample_index(volume, p, wp.Volume.LINEAR, values, background[0]) + expect_eq(sampled_values[tid], ref) + + +@wp.kernel +def test_volume_sample_grad_index_kernel( + volume: wp.uint64, + points: wp.array(dtype=wp.vec3), + values: wp.array(dtype=Any), + background: wp.array(dtype=Any), + sampled_values: wp.array(dtype=Any), + sampled_grads: wp.array(dtype=Any), +): + tid = wp.tid() + p = points[tid] + + ref_grad = sampled_grads.dtype() + ref = wp.volume_sample_grad(volume, p, wp.Volume.LINEAR, ref_grad, dtype=values.dtype) + + grad = type(ref_grad)() + sampled_values[tid] = wp.volume_sample_grad_index(volume, p, wp.Volume.LINEAR, values, background[0], grad) + expect_eq(sampled_values[tid], ref) + + expect_eq(grad[0], ref_grad[0]) + expect_eq(grad[1], ref_grad[1]) + expect_eq(grad[2], ref_grad[2]) + sampled_grads[tid] = grad + + +def test_volume_sample_index(test, device): + points = rng.uniform(-10.0, 10.0, size=(100, 3)) + points[0:10, 0] += 100.0 # ensure some points are over unallocated voxels + uvws = wp.array(points, dtype=wp.vec3, device=device) + + bg_values = { + "float": 10.0, + "vec3f": wp.vec3(10.8, -4.13, 10.26), + } + grad_types = { + "float": wp.vec3, + "vec3f": wp.mat33, + } + + for volume_names in ("float", "vec3f"): + with test.subTest(volume_names=volume_names): + volume = volumes[volume_names][device.alias] + + ijk = volume.get_voxels() + + values = wp.empty(shape=volume.get_voxel_count(), dtype=volume.dtype, device=device, requires_grad=True) + + vid = wp.uint64(volume.id) + wp.launch(fill_leaf_values_kernel, dim=values.shape, inputs=[vid, ijk, values], device=device) + + sampled_values = wp.empty(shape=points.shape[0], dtype=volume.dtype, device=device, requires_grad=True) + background = wp.array([bg_values[volume_names]], dtype=volume.dtype, device=device, requires_grad=True) + + tape = wp.Tape() + with tape: + wp.launch( + test_volume_sample_index_kernel, + dim=points.shape[0], + inputs=[vid, uvws, values, background, sampled_values], + device=device, + ) + + sampled_values.grad.fill_(1.0) + tape.backward() + + # test adjoint w.r.t voxel and background value arrays + # we should have sum(sampled_values) = sum(adj_values * values) + (adj_background * background) + sum_sampled_values = np.sum(sampled_values.numpy(), axis=0) + sum_values_adj = np.sum(values.numpy() * values.grad.numpy(), axis=0) + sum_background_adj = background.numpy()[0] * background.grad.numpy()[0] + + np.testing.assert_allclose(sum_sampled_values, sum_values_adj + sum_background_adj, rtol=1.0e-3) + + tape.reset() + + sampled_grads = wp.empty( + shape=points.shape[0], dtype=grad_types[volume_names], device=device, requires_grad=True + ) + + with tape: + wp.launch( + test_volume_sample_grad_index_kernel, + dim=points.shape[0], + inputs=[vid, uvws, values, background, sampled_values, sampled_grads], + device=device, + ) + + sampled_values.grad.fill_(1.0) + tape.backward() + + # we should have sum(sampled_values) = sum(adj_values * values) + (adj_background * background) + sum_sampled_values = np.sum(sampled_values.numpy(), axis=0) + sum_values_adj = np.sum(values.numpy() * values.grad.numpy(), axis=0) + sum_background_adj = background.numpy()[0] * background.grad.numpy()[0] + np.testing.assert_allclose(sum_sampled_values, sum_values_adj + sum_background_adj, rtol=1.0e-3) + + tape.zero() + sampled_values.grad.fill_(0.0) + sampled_grads.grad.fill_(1.0) + tape.backward() + + # we should have sum(sampled_grad, axes=(0, -1)) = sum(adj_values * values) + (adj_background * background) + sum_sampled_grads = np.sum(np.sum(sampled_grads.numpy(), axis=0), axis=-1) + sum_values_adj = np.sum(values.numpy() * values.grad.numpy(), axis=0) + sum_background_adj = background.numpy()[0] * background.grad.numpy()[0] + np.testing.assert_allclose(sum_sampled_grads, sum_values_adj + sum_background_adj, rtol=1.0e-3) + def test_volume_from_numpy(test, device): # Volume.allocate_from_tiles() is only available with CUDA @@ -657,6 +873,9 @@ class TestVolume(unittest.TestCase): add_function_test( TestVolume, "test_volume_from_numpy", test_volume_from_numpy, devices=get_selected_cuda_test_devices() ) +add_function_test(TestVolume, "test_volume_multiple_grids", test_volume_multiple_grids, devices=devices) +add_function_test(TestVolume, "test_volume_feature_array", test_volume_feature_array, devices=devices) +add_function_test(TestVolume, "test_volume_sample_index", test_volume_sample_index, devices=devices) points = {} points_jittered = {} @@ -715,6 +934,13 @@ class TestVolume(unittest.TestCase): inputs=[volumes["vec3f"][device.alias].id, points_jittered[device.alias]], devices=[device.alias], ) + add_kernel_test( + TestVolume, + test_volume_sample_grad_linear_v, + dim=len(point_grid), + inputs=[volumes["vec3f"][device.alias].id, points_jittered[device.alias]], + devices=[device.alias], + ) add_kernel_test( TestVolume, diff --git a/warp/tests/test_volume_write.py b/warp/tests/test_volume_write.py index a3725d23..75fcedc1 100644 --- a/warp/tests/test_volume_write.py +++ b/warp/tests/test_volume_write.py @@ -166,6 +166,10 @@ def test_volume_allocation(test, device): points_in_world_space=True, device=device, ) + + assert wp.types.types_equal(volume_a.dtype, wp.float32) + assert wp.types.types_equal(volume_b.dtype, wp.float32) + points = wp.array(points_ref, dtype=wp.vec3, device=device) values_a = wp.empty(num_points, dtype=wp.float32, device=device) values_b = wp.empty(num_points, dtype=wp.float32, device=device) @@ -201,6 +205,10 @@ def test_volume_allocate_by_tiles_f(test, device): points_ws_d = wp.array(points_ws, dtype=wp.vec3, device=device) volume_a = wp.Volume.allocate_by_tiles(points_is_d, voxel_size, background_value, translation, device=device) volume_b = wp.Volume.allocate_by_tiles(points_ws_d, voxel_size, background_value, translation, device=device) + + assert wp.types.types_equal(volume_a.dtype, wp.float32) + assert wp.types.types_equal(volume_b.dtype, wp.float32) + values_a = wp.empty(num_tiles * 512, dtype=wp.float32, device=device) values_b = wp.empty(num_tiles * 512, dtype=wp.float32, device=device) @@ -229,6 +237,9 @@ def test_volume_allocate_by_tiles_v(test, device): points_d = wp.array(points_is, dtype=wp.int32, device=device) volume = wp.Volume.allocate_by_tiles(points_d, 0.1, wp.vec3(1, 2, 3), device=device) + + assert wp.types.types_equal(volume.dtype, wp.vec3) + values = wp.empty(len(points_d) * 512, dtype=wp.vec3, device=device) wp.launch(test_volume_tile_store_v, dim=len(points_d), inputs=[volume.id, points_d], device=device) @@ -238,6 +249,72 @@ def test_volume_allocate_by_tiles_v(test, device): np.testing.assert_equal(values_res, values_ref) +def test_volume_allocate_by_tiles_index(test, device): + num_tiles = 10 + rng = np.random.default_rng(101215) + tiles = rng.integers(-512, 512, size=(num_tiles, 3), dtype=np.int32) + points_is = tiles * 8 + + points_d = wp.array(points_is, dtype=wp.int32, device=device) + volume = wp.Volume.allocate_by_tiles(points_d, 0.1, bg_value=None, device=device) + + assert volume.is_index + + vol_tiles = volume.get_tiles().numpy() / 8 + vol_tile_sorted = vol_tiles[np.lexsort(vol_tiles.T[::-1])] + vol_tile_unique = np.unique(vol_tile_sorted, axis=0) + + tile_sorted = tiles[np.lexsort(tiles.T[::-1])] + tile_unique = np.unique(tile_sorted, axis=0) + + np.testing.assert_equal(tile_unique, vol_tile_unique) + + +def test_volume_allocation_from_voxels(test, device): + point_count = 387 + rng = np.random.default_rng(101215) + + # Create from world-space points + points = wp.array(rng.uniform(5.0, 10.0, size=(point_count, 3)), dtype=float, device=device) + + volume = wp.Volume.allocate_by_voxels( + voxel_points=points, voxel_size=0.25, translation=(0.0, 5.0, 10.0), device=device + ) + + assert volume.is_index + + test.assertNotEqual(volume.id, 0) + + test.assertAlmostEqual(volume.get_voxel_size(), (0.25, 0.25, 0.25)) + voxel_count = volume.get_voxel_count() + test.assertGreaterEqual(point_count, voxel_count) + test.assertGreaterEqual(voxel_count, 1) + + voxels = volume.get_voxels() + + # Check that world-to-index transform has been correctly applied + voxel_low = np.min(voxels.numpy(), axis=0) + voxel_up = np.max(voxels.numpy(), axis=0) + np.testing.assert_array_less([19, -1, -21], voxel_low) + np.testing.assert_array_less(voxel_up, [41, 21, 1]) + + # Recreate the volume from ijk coords + volume_from_ijk = wp.Volume.allocate_by_voxels( + voxel_points=voxels, voxel_size=0.25, translation=(0.0, 5.0, 10.0), device=device + ) + + assert volume_from_ijk.is_index + + assert volume_from_ijk.get_voxel_count() == voxel_count + ijk_voxels = volume_from_ijk.get_voxels().numpy() + + voxels = voxels.numpy() + voxel_sorted = voxels[np.lexsort(voxels.T[::-1])] + ijk_voxel_sorted = ijk_voxels[np.lexsort(ijk_voxels.T[::-1])] + + np.testing.assert_equal(voxel_sorted, ijk_voxel_sorted) + + devices = get_selected_cuda_test_devices() @@ -248,6 +325,15 @@ class TestVolumeWrite(unittest.TestCase): add_function_test(TestVolumeWrite, "test_volume_allocation", test_volume_allocation, devices=devices) add_function_test(TestVolumeWrite, "test_volume_allocate_by_tiles_f", test_volume_allocate_by_tiles_f, devices=devices) add_function_test(TestVolumeWrite, "test_volume_allocate_by_tiles_v", test_volume_allocate_by_tiles_v, devices=devices) +add_function_test( + TestVolumeWrite, "test_volume_allocate_by_tiles_index", test_volume_allocate_by_tiles_index, devices=devices +) +add_function_test( + TestVolumeWrite, + "test_volume_allocation_from_voxels", + test_volume_allocation_from_voxels, + devices=devices, +) if __name__ == "__main__": diff --git a/warp/types.py b/warp/types.py index 8de0b0c1..d53946f5 100644 --- a/warp/types.py +++ b/warp/types.py @@ -12,7 +12,7 @@ import inspect import struct import zlib -from typing import Any, Callable, Generic, List, Tuple, TypeVar, Union +from typing import Any, Callable, Generic, List, NamedTuple, Optional, Tuple, TypeVar, Union import numpy as np @@ -2997,11 +2997,12 @@ class Volume: #: Enum value to specify trilinear interpolation during sampling LINEAR = constant(1) - def __init__(self, data: array): + def __init__(self, data: array, copy: bool = True): """Class representing a sparse grid. Args: data (:class:`warp.array`): Array of bytes representing the volume in NanoVDB format + copy (bool): Whether the incoming data will be copied or aliased """ self.id = 0 @@ -3011,16 +3012,16 @@ def __init__(self, data: array): if data is None: return - - if data.device is None: - raise RuntimeError("Invalid device") self.device = data.device + owner = False if self.device.is_cpu: - self.id = self.runtime.core.volume_create_host(ctypes.cast(data.ptr, ctypes.c_void_p), data.size) + self.id = self.runtime.core.volume_create_host( + ctypes.cast(data.ptr, ctypes.c_void_p), data.size, copy, owner + ) else: self.id = self.runtime.core.volume_create_device( - self.device.context, ctypes.cast(data.ptr, ctypes.c_void_p), data.size + self.device.context, ctypes.cast(data.ptr, ctypes.c_void_p), data.size, copy, owner ) if self.id == 0: @@ -3041,32 +3042,90 @@ def array(self) -> array: """Returns the raw memory buffer of the Volume as an array""" buf = ctypes.c_void_p(0) size = ctypes.c_uint64(0) + self.runtime.core.volume_get_buffer_info(self.id, ctypes.byref(buf), ctypes.byref(size)) + return array(ptr=buf.value, dtype=uint8, shape=size.value, device=self.device, owner=False) + + def get_tile_count(self) -> int: + """Returns the number of tiles (NanoVDB leaf nodes) of the volume""" + + voxel_count, tile_count = ( + ctypes.c_uint64(0), + ctypes.c_uint32(0), + ) + self.runtime.core.volume_get_tile_and_voxel_count(self.id, ctypes.byref(tile_count), ctypes.byref(voxel_count)) + return tile_count.value + + def get_tiles(self, out: Optional[array] = None) -> array: + """Returns the integer coordinates of all allocated tiles for this volume. + + Args: + out (:class:`warp.array`, optional): If provided, use the `out` array to store the tile coordinates, otherwise + a new array will be allocated. `out` must be a contiguous array of ``tile_count`` ``vec3i`` or ``tile_count x 3`` ``int32`` + on the same device as this volume. + """ + + if self.id == 0: + raise RuntimeError("Invalid Volume") + + tile_count = self.get_tile_count() + if out is None: + out = warp.empty(dtype=int32, shape=(tile_count, 3), device=self.device) + elif out.device != self.device or out.shape[0] < tile_count: + raise RuntimeError(f"'out' array must an array with at least {tile_count} rows on device {self.device}") + elif not _is_contiguous_vec_like_array(out, vec_length=3, scalar_types=(int32,)): + raise RuntimeError( + "'out' must be a contiguous 1D array with type vec3i or a 2D array of type int32 with shape (N, 3) " + ) + if self.device.is_cpu: - self.runtime.core.volume_get_buffer_info_host(self.id, ctypes.byref(buf), ctypes.byref(size)) + self.runtime.core.volume_get_tiles_host(self.id, out.ptr) else: - self.runtime.core.volume_get_buffer_info_device(self.id, ctypes.byref(buf), ctypes.byref(size)) - return array(ptr=buf.value, dtype=uint8, shape=size.value, device=self.device) + self.runtime.core.volume_get_tiles_device(self.id, out.ptr) + + return out + + def get_voxel_count(self) -> int: + """Returns the total number of allocated voxels for this volume""" + + voxel_count, tile_count = ( + ctypes.c_uint64(0), + ctypes.c_uint32(0), + ) + self.runtime.core.volume_get_tile_and_voxel_count(self.id, ctypes.byref(tile_count), ctypes.byref(voxel_count)) + return voxel_count.value + + def get_voxels(self, out: Optional[array] = None) -> array: + """Returns the integer coordinates of all allocated voxels for this volume. + + Args: + out (:class:`warp.array`, optional): If provided, use the `out` array to store the voxel coordinates, otherwise + a new array will be allocated. `out` must be a contiguous array of ``voxel_count`` ``vec3i`` or ``voxel_count x 3`` ``int32`` + on the same device as this volume. + """ - def get_tiles(self) -> array: if self.id == 0: raise RuntimeError("Invalid Volume") - buf = ctypes.c_void_p(0) - size = ctypes.c_uint64(0) + voxel_count = self.get_voxel_count() + if out is None: + out = warp.empty(dtype=int32, shape=(voxel_count, 3), device=self.device) + elif out.device != self.device or out.shape[0] < voxel_count: + raise RuntimeError(f"'out' array must an array with at least {voxel_count} rows on device {self.device}") + elif not _is_contiguous_vec_like_array(out, vec_length=3, scalar_types=(int32,)): + raise RuntimeError( + "'out' must be a contiguous 1D array with type vec3i or a 2D array of type int32 with shape (N, 3) " + ) + if self.device.is_cpu: - self.runtime.core.volume_get_tiles_host(self.id, ctypes.byref(buf), ctypes.byref(size)) - deleter = self.device.default_allocator.deleter + self.runtime.core.volume_get_voxels_host(self.id, out.ptr) else: - self.runtime.core.volume_get_tiles_device(self.id, ctypes.byref(buf), ctypes.byref(size)) - if self.device.is_mempool_supported: - deleter = self.device.mempool_allocator.deleter - else: - deleter = self.device.default_allocator.deleter - num_tiles = size.value // (3 * 4) + self.runtime.core.volume_get_voxels_device(self.id, out.ptr) - return array(ptr=buf.value, dtype=int32, shape=(num_tiles, 3), device=self.device, deleter=deleter) + return out def get_voxel_size(self) -> Tuple[float, float, float]: + """Voxel size, i.e, world coordinates of voxel's diagonal vector""" + if self.id == 0: raise RuntimeError("Invalid Volume") @@ -3074,9 +3133,181 @@ def get_voxel_size(self) -> Tuple[float, float, float]: self.runtime.core.volume_get_voxel_size(self.id, ctypes.byref(dx), ctypes.byref(dy), ctypes.byref(dz)) return (dx.value, dy.value, dz.value) + class GridInfo(NamedTuple): + """Grid metadata""" + + name: str + """Grid name""" + size_in_bytes: int + """Size of this grid's data, in bytes""" + + grid_index: int + """Index of this grid in the data buffer""" + grid_count: int + """Total number of grids in the data buffer""" + type_str: str + """String describing the type of the grid values""" + + translation: vec3f + """Index-to-world translation""" + transform_matrix: mat33f + """Linear part of the index-to-world transform""" + + def get_grid_info(self) -> Volume.GridInfo: + """Returns the metadata associated with this Volume""" + + grid_index = ctypes.c_uint32(0) + grid_count = ctypes.c_uint32(0) + grid_size = ctypes.c_uint64(0) + translation_buffer = (ctypes.c_float * 3)() + transform_buffer = (ctypes.c_float * 9)() + type_str_buffer = (ctypes.c_char * 16)() + + name = self.runtime.core.volume_get_grid_info( + self.id, + ctypes.byref(grid_size), + ctypes.byref(grid_index), + ctypes.byref(grid_count), + translation_buffer, + transform_buffer, + type_str_buffer, + ) + + if name is None: + raise RuntimeError("Invalid volume") + + return Volume.GridInfo( + name.decode("ascii"), + grid_size.value, + grid_index.value, + grid_count.value, + type_str_buffer.value.decode("ascii"), + vec3f.from_buffer_copy(translation_buffer), + mat33f.from_buffer_copy(transform_buffer), + ) + + _nvdb_type_to_dtype = { + "float": float32, + "double": float64, + "int16": int16, + "int32": int32, + "int64": int64, + "Vec3f": vec3f, + "Vec3d": vec3d, + "Half": float16, + "uint32": uint32, + "bool": bool, + "Vec4f": vec4f, + "Vec4d": vec4d, + "Vec3u8": vec3ub, + "Vec3u16": vec3us, + "uint8": uint8, + } + + @property + def dtype(self) -> type: + """Type of the Volume's values as a Warp type. + + If the grid does not contain values (e.g. index grids) or if the NanoVDB type is not + representable as a Warp type, returns ``None``. + """ + return Volume._nvdb_type_to_dtype.get(self.get_grid_info().type_str, None) + + _nvdb_index_types = ("Index", "OnIndex", "IndexMask", "OnIndexMask") + + @property + def is_index(self) -> bool: + """Whether this Volume contains an index grid, that is, a type of grid that does + not explicitly store values but associates each voxel to linearized index. + """ + + return self.get_grid_info().type_str in Volume._nvdb_index_types + + def get_feature_array_count(self) -> int: + """Returns the number of supplemental data arrays stored alongside the grid""" + + return self.runtime.core.volume_get_blind_data_count(self.id) + + class FeatureArrayInfo(NamedTuple): + """Metadata for a supplemental data array""" + + name: str + """Name of the data array""" + ptr: int + """Memory address of the start of the array""" + + value_size: int + """Size in bytes of the array values""" + value_count: int + """Number of values in the array""" + type_str: str + """String describing the type of the array values""" + + def get_feature_array_info(self, feature_index: int) -> Volume.FeatureArrayInfo: + """Returns the metadata associated to the feature array at `feature_index`""" + + buf = ctypes.c_void_p(0) + value_count = ctypes.c_uint64(0) + value_size = ctypes.c_uint32(0) + type_str_buffer = (ctypes.c_char * 16)() + + name = self.runtime.core.volume_get_blind_data_info( + self.id, + feature_index, + ctypes.byref(buf), + ctypes.byref(value_count), + ctypes.byref(value_size), + type_str_buffer, + ) + + if buf.value is None: + raise RuntimeError("Invalid feature array") + + return Volume.FeatureArrayInfo( + name.decode("ascii"), + buf.value, + value_size.value, + value_count.value, + type_str_buffer.value.decode("ascii"), + ) + + def feature_array(self, feature_index: int, dtype=None) -> array: + """Returns one the the grid's feature data arrays as a Warp array + + Args: + feature_index: index of the supplemental dat aarray in the grid + dtype: type for the returned warp array. If not provided, will be deduced from the array metdata. + """ + + info = self.get_feature_array_info(feature_index) + + if dtype is None: + try: + dtype = Volume._nvdb_type_to_dtype[info.type_str] + except KeyError: + # Unknown type, default to byte array + dtype = uint8 + + value_count = info.value_count + value_size = info.value_size + + if type_size_in_bytes(dtype) == 1: + # allow requesting a byte array from any type + value_count *= value_size + value_size = 1 + elif value_size == 1 and (value_count % type_size_in_bytes(dtype)) == 0: + # allow converting a byte array to any type + value_size = type_size_in_bytes(dtype) + value_count = value_count // value_size + + if type_size_in_bytes(dtype) != value_size: + raise RuntimeError(f"Cannot cast feature data of size {value_size} to array dtype {type_repr(dtype)}") + + return array(ptr=info.ptr, dtype=dtype, shape=value_count, device=self.device, owner=False) + @classmethod def load_from_nvdb(cls, file_or_buffer, device=None) -> Volume: - """Creates a Volume object from a NanoVDB file or in-memory buffer. + """Creates a Volume object from a serialized NanoVDB file or in-memory buffer. Returns: @@ -3088,28 +3319,117 @@ def load_from_nvdb(cls, file_or_buffer, device=None) -> Volume: data = file_or_buffer magic, version, grid_count, codec = struct.unpack("> 21 != 32: # checking major version raise RuntimeError("Unsupported NanoVDB version") - if grid_count != 1: - raise RuntimeError("Only NVDBs with exactly one grid are supported") - grid_data_offset = 192 + struct.unpack(" Volume: + """ + Creates a new :class:`Volume` aliasing an in-memory grid buffer. + + In contrast to :meth:`load_from_nvdb` which should be used to load serialized NanoVDB grids, + here the buffer must be uncompressed and must not contain file header information. + If the passed address does not contain a NanoVDB grid, the behavior of this function is undefined. + + Args: + grid_ptr: Integer address of the start of the grid buffer + buffer_size: Size of the buffer, in bytes. If not provided, the size will be assumed to be that of the single grid starting at `grid_ptr`. + device: Device of the buffer, and of the returned Volume. If not provided, the current Warp device is assumed. + + Returns the newly created Volume. + """ + + if not grid_ptr: + raise (RuntimeError, "Invalid grid buffer pointer") + + # Check that a Volume has not already been created for this address + # (to allow this we would need to ref-count the volume descriptor) + existing_buf = ctypes.c_void_p(0) + existing_size = ctypes.c_uint64(0) + warp.context.runtime.core.volume_get_buffer_info( + grid_ptr, ctypes.byref(existing_buf), ctypes.byref(existing_size) + ) + + if existing_buf.value is not None: + raise RuntimeError( + "A warp Volume has already been created for this grid, aliasing it more than once is not possible." + ) + + data_array = array(ptr=grid_ptr, dtype=uint8, shape=buffer_size, owner=False, device=device) + + return cls(data_array, copy=False) + + def load_next_grid(self) -> Volume: + """ + Tries to create a new warp Volume for the next grid that is linked to by this Volume. + + The existence of a next grid is deduced from the `grid_index` and `grid_count` metadata + as well as the size of this Volume's in-memory buffer. + + Returns the newly created Volume, or None if there is no next grid. + """ + + grid = self.get_grid_info() + + array = self.array() + + if grid.grid_index + 1 >= grid.grid_count or array.capacity <= grid.size_in_bytes: + return None + + next_volume = Volume.load_from_address( + array.ptr + grid.size_in_bytes, buffer_size=array.capacity - grid.size_in_bytes, device=self.device + ) + # makes the new Volume keep a reference to the current grid, as we're aliasing its buffer + next_volume._previous_grid = self + + return next_volume + @classmethod def load_from_numpy( cls, ndarray: np.array, min_world=(0.0, 0.0, 0.0), voxel_size=1.0, bg_value=0.0, device=None @@ -3261,11 +3581,11 @@ def allocate_by_tiles( Args: tile_points (:class:`warp.array`): Array of positions that define the tiles to be allocated. - The array can be a 2D, N-by-3 array of :class:`warp.int32` values, indicating index space positions, - or can be a 1D array of :class:`warp.vec3` values, indicating world space positions. + The array may use an integer scalar type (2D N-by-3 array of :class:`warp.int32` or 1D array of `warp.vec3i` values), indicating index space positions, + or a floating point scalar type (2D N-by-3 array of :class:`warp.float32` or 1D array of `warp.vec3f` values), indicating world space positions. Repeated points per tile are allowed and will be efficiently deduplicated. voxel_size (float): Voxel size of the new volume. - bg_value (float or array-like): Value of unallocated voxels of the volume, also defines the volume's type, a :class:`warp.vec3` volume is created if this is `array-like`, otherwise a float volume is created + bg_value (array-like, float, int or None): Value of unallocated voxels of the volume, also defines the volume's type. A :class:`warp.vec3` volume is created if this is `array-like`, an index volume will be created if `bg_value` is ``None``. translation (array-like): Translation between the index and world spaces. device (Devicelike): The CUDA device to create the volume on, e.g.: "cuda" or "cuda:0". @@ -3276,19 +3596,28 @@ def allocate_by_tiles( raise RuntimeError(f"Voxel size must be positive! Got {voxel_size}") if not device.is_cuda: raise RuntimeError("Only CUDA devices are supported for allocate_by_tiles") - if not ( - isinstance(tile_points, array) - and (tile_points.dtype == int32 and tile_points.ndim == 2) - or (tile_points.dtype == vec3 and tile_points.ndim == 1) - ): - raise RuntimeError("Expected an warp array of vec3s or of n-by-3 int32s as tile_points!") + if not _is_contiguous_vec_like_array(tile_points, vec_length=3, scalar_types=(float32, int32)): + raise RuntimeError( + "tile_points must be contiguous and either a 1D warp array of vec3f or vec3i or a 2D n-by-3 array of int32 or float32." + ) if not tile_points.device.is_cuda: - tile_points = array(tile_points, dtype=tile_points.dtype, device=device) + tile_points = tile_points.to(device) volume = cls(data=None) volume.device = device - in_world_space = tile_points.dtype == vec3 - if hasattr(bg_value, "__len__"): + in_world_space = type_scalar_type(tile_points.dtype) == float32 + if bg_value is None: + volume.id = volume.runtime.core.volume_index_from_tiles_device( + volume.device.context, + ctypes.c_void_p(tile_points.ptr), + tile_points.shape[0], + voxel_size, + translation[0], + translation[1], + translation[2], + in_world_space, + ) + elif hasattr(bg_value, "__len__"): volume.id = volume.runtime.core.volume_v_from_tiles_device( volume.device.context, ctypes.c_void_p(tile_points.ptr), @@ -3332,6 +3661,73 @@ def allocate_by_tiles( return volume + @classmethod + def allocate_by_voxels( + cls, voxel_points: array, voxel_size: float, translation=(0.0, 0.0, 0.0), device=None + ) -> Volume: + """Allocate a new Volume with active voxel for each point voxel_points. + + This function creates an *index* Volume, a special kind of volume that does not any store any + explicit payload but encodes a linearized index for each active voxel, allowing to lookup and + sample data from arbitrary external arrays. + + This function is only supported for CUDA devices. + + Args: + voxel_points (:class:`warp.array`): Array of positions that define the voxels to be allocated. + The array may use an integer scalar type (2D N-by-3 array of :class:`warp.int32` or 1D array of `warp.vec3i` values), indicating index space positions, + or a floating point scalar type (2D N-by-3 array of :class:`warp.float32` or 1D array of `warp.vec3f` values), indicating world space positions. + Repeated points per tile are allowed and will be efficiently deduplicated. + voxel_size (float): Voxel size of the new volume. + translation (array-like): Translation between the index and world spaces. + device (Devicelike): The CUDA device to create the volume on, e.g.: "cuda" or "cuda:0". + + """ + device = warp.get_device(device) + + if voxel_size <= 0.0: + raise RuntimeError(f"Voxel size must be positive! Got {voxel_size}") + if not device.is_cuda: + raise RuntimeError("Only CUDA devices are supported for allocate_by_tiles") + if not (is_array(voxel_points) and voxel_points.is_contiguous): + raise RuntimeError("tile_points must be a contiguous array") + if not _is_contiguous_vec_like_array(voxel_points, vec_length=3, scalar_types=(float32, int32)): + raise RuntimeError( + "voxel_points must be contiguous and either a 1D warp array of vec3f or vec3i or a 2D n-by-3 array of int32 or float32." + ) + if not voxel_points.device.is_cuda: + voxel_points = voxel_points.to(device) + + volume = cls(data=None) + volume.device = device + in_world_space = type_scalar_type(voxel_points.dtype) == float32 + + volume.id = volume.runtime.core.volume_from_active_voxels_device( + volume.device.context, + ctypes.c_void_p(voxel_points.ptr), + voxel_points.shape[0], + voxel_size, + translation[0], + translation[1], + translation[2], + in_world_space, + ) + + if volume.id == 0: + raise RuntimeError("Failed to create volume") + + return volume + + +def _is_contiguous_vec_like_array(array, vec_length: int, scalar_types: Tuple[type]) -> bool: + if not (is_array(array) and array.is_contiguous): + return False + if type_scalar_type(array.dtype) not in scalar_types: + return False + return (array.ndim == 1 and type_length(array.dtype) == vec_length) or ( + array.ndim == 2 and array.shape[1] == vec_length and type_length(array.dtype) == 1 + ) + # definition just for kernel type (cannot be a parameter), see mesh.h # NOTE: its layout must match the corresponding struct defined in C.