From f05d9ac6e916ed7a34f130cd6741af604f326c87 Mon Sep 17 00:00:00 2001 From: swfly Date: Thu, 28 Nov 2024 10:12:34 +0800 Subject: [PATCH] fallback: buffer related fixes --- src/backends/fallback/fallback_accel.cpp | 122 +++++++++--------- src/backends/fallback/fallback_accel.h | 1 + .../fallback/fallback_bindless_array.cpp | 5 +- src/backends/fallback/fallback_buffer.h | 2 +- src/backends/fallback/fallback_codegen.cpp | 1 + src/backends/fallback/fallback_shader.cpp | 37 +++--- src/backends/fallback/fallback_stream.cpp | 6 +- 7 files changed, 87 insertions(+), 87 deletions(-) diff --git a/src/backends/fallback/fallback_accel.cpp b/src/backends/fallback/fallback_accel.cpp index 45b210485..ff998be8d 100644 --- a/src/backends/fallback/fallback_accel.cpp +++ b/src/backends/fallback/fallback_accel.cpp @@ -3,6 +3,7 @@ // #include +#include #include "fallback_mesh.h" #include "fallback_accel.h" @@ -93,7 +94,7 @@ namespace luisa::compute::fallback auto error = rtcGetDeviceError(_device); if (error != RTC_ERROR_NONE) { - printf("Embree Error: %d\n", error); + LUISA_INFO("RTC ERROR: {}", (uint)error); } }); } @@ -116,99 +117,96 @@ namespace luisa::compute::fallback m[3], m[7], m[11], 1.f); } - namespace detail - { - void accel_trace_closest(const FallbackAccel* accel, float ox, float oy, float oz, float dx, float dy, float dz, - float tmin, float tmax, uint mask, SurfaceHit* hit) noexcept - { + void accel_trace_closest(const FallbackAccel* accel, float ox, float oy, float oz, float dx, float dy, float dz, + float tmin, float tmax, uint mask, SurfaceHit* hit) noexcept + { #if LUISA_COMPUTE_EMBREE_VERSION == 3 - RTCIntersectContext ctx{}; - rtcInitIntersectContext(&ctx); + RTCIntersectContext ctx{}; + rtcInitIntersectContext(&ctx); #else - RTCRayQueryContext ctx{}; + RTCRayQueryContext ctx{}; rtcInitRayQueryContext(&ctx); RTCIntersectArguments args{.context = &ctx}; #endif - RTCRayHit rh{}; - rh.ray.org_x = ox; - rh.ray.org_y = oy; - rh.ray.org_z = oz; - rh.ray.dir_x = dx; - rh.ray.dir_y = dy; - rh.ray.dir_z = dz; - rh.ray.tnear = tmin; - rh.ray.tfar = tmax; + RTCRayHit rh{}; + rh.ray.org_x = ox; + rh.ray.org_y = oy; + rh.ray.org_z = oz; + rh.ray.dir_x = dx; + rh.ray.dir_y = dy; + rh.ray.dir_z = dz; + rh.ray.tnear = tmin; + rh.ray.tfar = tmax; - rh.ray.mask = mask; - rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rh.hit.primID = RTC_INVALID_GEOMETRY_ID; - rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - rh.ray.flags = 0; + rh.ray.mask = mask; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.primID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + rh.ray.flags = 0; #if LUISA_COMPUTE_EMBREE_VERSION == 3 - rtcIntersect1(accel->scene(), &ctx, &rh); + rtcIntersect1(accel->scene(), &ctx, &rh); #else - rtcIntersect1(accel->scene(), &rh, &args); + rtcIntersect1(accel->scene(), &rh, &args); #endif - hit->inst = rh.hit.instID[0]; - hit->prim = rh.hit.primID; - hit->bary = make_float2(rh.hit.u, rh.hit.v); - hit->committed_ray_t = rh.ray.tfar; - } - void fill_transform(const FallbackAccel* accel, uint id, float4x4* buffer) - { - // TODO: handle embree 4 + hit->inst = rh.hit.instID[0]; + hit->prim = rh.hit.primID; + hit->bary = make_float2(rh.hit.u, rh.hit.v); + hit->committed_ray_t = rh.ray.tfar; + } + void fill_transform(const FallbackAccel* accel, uint id, float4x4* buffer) + { + // TODO: handle embree 4 - // Retrieve the RTCInstance (you may need to store instances in your application) - auto instance = rtcGetGeometry(accel->scene(), id); + // Retrieve the RTCInstance (you may need to store instances in your application) + auto instance = rtcGetGeometry(accel->scene(), id); - // Get the transform of the instance (a 4x4 matrix) - rtcGetGeometryTransform(instance, 0.f, RTCFormat::RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR, buffer); - } + // Get the transform of the instance (a 4x4 matrix) + rtcGetGeometryTransform(instance, 0.f, RTCFormat::RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR, buffer); + } - bool accel_trace_any(const FallbackAccel* accel, float ox, float oy, float oz, float dx, float dy, float dz, - float tmin, float tmax, uint mask) noexcept - { + bool accel_trace_any(const FallbackAccel* accel, float ox, float oy, float oz, float dx, float dy, float dz, + float tmin, float tmax, uint mask) noexcept + { #if LUISA_COMPUTE_EMBREE_VERSION == 3 - RTCIntersectContext ctx{}; - rtcInitIntersectContext(&ctx); + RTCIntersectContext ctx{}; + rtcInitIntersectContext(&ctx); #else - RTCRayQueryContext ctx{}; + RTCRayQueryContext ctx{}; rtcInitRayQueryContext(&ctx); RTCOccludedArguments args{.context = &ctx}; #endif - RTCRay ray{}; - ray.org_x = ox; - ray.org_y = oy; - ray.org_z = oz; - ray.dir_x = dx; - ray.dir_y = dy; - ray.dir_z = dz; - ray.tnear = tmin; - ray.tfar = tmax; + RTCRay ray{}; + ray.org_x = ox; + ray.org_y = oy; + ray.org_z = oz; + ray.dir_x = dx; + ray.dir_y = dy; + ray.dir_z = dz; + ray.tnear = tmin; + ray.tfar = tmax; - ray.mask = mask; - ray.flags = 0; + ray.mask = mask; + ray.flags = 0; #if LUISA_COMPUTE_EMBREE_VERSION == 3 - rtcOccluded1(accel->scene(), &ctx, &ray); + rtcOccluded1(accel->scene(), &ctx, &ray); #else - rtcOccluded1(accel->scene(), &ray, &args); + rtcOccluded1(accel->scene(), &ray, &args); #endif - return ray.tfar < 0.f; - } - } // namespace detail + return ray.tfar < 0.f; + } } // namespace luisa::compute::fallback void intersect_closest_wrapper(void* accel, float ox, float oy, float oz, float dx, float dy, float dz, float tmin, float tmax, unsigned mask, void* hit) { - luisa::compute::fallback::detail::accel_trace_closest( + luisa::compute::fallback::accel_trace_closest( reinterpret_cast(accel), ox, oy, oz, dx, dy, dz, tmin, tmax, mask, reinterpret_cast(hit)); } void accel_transform_wrapper(void* accel, unsigned id, void* buffer) { - luisa::compute::fallback::detail::fill_transform( + luisa::compute::fallback::fill_transform( reinterpret_cast(accel), id, reinterpret_cast(buffer)); } diff --git a/src/backends/fallback/fallback_accel.h b/src/backends/fallback/fallback_accel.h index 7d1e761fc..9fc609532 100644 --- a/src/backends/fallback/fallback_accel.h +++ b/src/backends/fallback/fallback_accel.h @@ -48,6 +48,7 @@ class FallbackAccel { [[nodiscard]] static float4x4 _decompress(std::array m) noexcept; public: + [[nodiscard]]auto device()const noexcept{return _device;} [[nodiscard]] RTCScene scene()const noexcept {return _handle;} FallbackAccel(RTCDevice device, AccelUsageHint hint) noexcept; ~FallbackAccel() noexcept; diff --git a/src/backends/fallback/fallback_bindless_array.cpp b/src/backends/fallback/fallback_bindless_array.cpp index 1c007b710..6b93a1f85 100644 --- a/src/backends/fallback/fallback_bindless_array.cpp +++ b/src/backends/fallback/fallback_bindless_array.cpp @@ -3,9 +3,8 @@ // #include "fallback_bindless_array.h" +#include "fallback_buffer.h" #include "thread_pool.h" -#include "luisa/runtime/rtx/triangle.h" -#include "luisa/rust/api_types.hpp" namespace luisa::compute::fallback { @@ -36,6 +35,6 @@ namespace luisa::compute::fallback void bindless_buffer_read(void* bindless, size_t slot, size_t elem, unsigned stride, void* buffer) { auto a = reinterpret_cast(bindless); - auto ptr = reinterpret_cast(a->slot(slot).buffer); + auto ptr = reinterpret_cast(a->slot(slot).buffer)->addr(); std::memcpy(buffer, ptr + elem*stride, stride); } diff --git a/src/backends/fallback/fallback_buffer.h b/src/backends/fallback/fallback_buffer.h index 1b747d84a..6b70a35b0 100644 --- a/src/backends/fallback/fallback_buffer.h +++ b/src/backends/fallback/fallback_buffer.h @@ -15,7 +15,7 @@ struct alignas(16) FallbackBufferView { class FallbackBuffer { public: explicit FallbackBuffer(size_t size, unsigned elementStride); - void *addr() { return data; } + std::byte *addr()const noexcept { return data; } [[nodiscard]] FallbackBufferView view(size_t offset) noexcept; ~FallbackBuffer(); private: diff --git a/src/backends/fallback/fallback_codegen.cpp b/src/backends/fallback/fallback_codegen.cpp index 346ef6bdf..1d3f25b22 100644 --- a/src/backends/fallback/fallback_codegen.cpp +++ b/src/backends/fallback/fallback_codegen.cpp @@ -1707,6 +1707,7 @@ class FallbackCodegen { auto hit_type = _translate_type(Type::of(), false); auto hit_alloca = b.CreateAlloca(hit_type, nullptr, ""); + hit_alloca->setAlignment(llvm::Align(8u)); // Extract ray components auto compressed_origin = b.CreateExtractValue(llvm_ray, 0, ""); diff --git a/src/backends/fallback/fallback_shader.cpp b/src/backends/fallback/fallback_shader.cpp index 1b5b55b89..e98154f69 100644 --- a/src/backends/fallback/fallback_shader.cpp +++ b/src/backends/fallback/fallback_shader.cpp @@ -127,7 +127,7 @@ luisa::compute::fallback::FallbackShader::FallbackShader(const luisa::compute::S auto xir_module = xir::ast_to_xir_translate(kernel, {}); xir_module->set_name(luisa::format("kernel_{:016x}", kernel.hash())); if (!option.name.empty()) { xir_module->set_location(option.name); } - LUISA_INFO("Kernel XIR:\n{}", xir::xir_to_text_translate(xir_module, true)); + //LUISA_INFO("Kernel XIR:\n{}", xir::xir_to_text_translate(xir_module, true)); auto llvm_ctx = std::make_unique(); auto llvm_module = luisa_fallback_backend_codegen(*llvm_ctx, xir_module); @@ -135,7 +135,7 @@ luisa::compute::fallback::FallbackShader::FallbackShader(const luisa::compute::S LUISA_ERROR_WITH_LOCATION("Failed to generate LLVM IR."); } //llvm_module->print(llvm::errs(), nullptr, true, true); - llvm_module->print(llvm::outs(), nullptr, true, true); + //llvm_module->print(llvm::outs(), nullptr, true, true); if (llvm::verifyModule(*llvm_module, &llvm::errs())) { LUISA_ERROR_WITH_LOCATION("LLVM module verification failed."); } @@ -175,7 +175,7 @@ luisa::compute::fallback::FallbackShader::FallbackShader(const luisa::compute::S if (::llvm::verifyModule(*llvm_module, &::llvm::errs())) { LUISA_ERROR_WITH_LOCATION("Failed to verify module."); } - llvm_module->print(llvm::outs(), nullptr, true, true); + //llvm_module->print(llvm::outs(), nullptr, true, true); // compile to machine code auto m = llvm::orc::ThreadSafeModule(std::move(llvm_module), std::move(llvm_ctx)); @@ -272,22 +272,23 @@ void compute::fallback::FallbackShader::dispatch(ThreadPool &pool, const compute auto data = argument_buffer.data(); - for (int i = 0; i < dispatch_counts.x; ++i) { - for (int j = 0; j < dispatch_counts.y; ++j) { - for (int k = 0; k < dispatch_counts.z; ++k) { - auto c = config; - c.block_id = make_uint3(i, j, k); - (*_kernel_entry)(data, &c); - } - } - } +// for (int i = 0; i < dispatch_counts.x; ++i) { +// for (int j = 0; j < dispatch_counts.y; ++j) { +// for (int k = 0; k < dispatch_counts.z; ++k) { +// auto c = config; +// c.block_id = make_uint3(i, j, k); +// (*_kernel_entry)(data, &c); +// } +// } +// } -// pool.parallel(dispatch_counts.x, dispatch_counts.y, dispatch_counts.z, -// [this, config, data](auto bx, auto by, auto bz) noexcept { -// auto c = config; -// c.block_id = make_uint3(bx, by, bz); -// (*_kernel_entry)(data, &c); -// }); + pool.parallel(dispatch_counts.x, dispatch_counts.y, dispatch_counts.z, + [this, config, data](auto bx, auto by, auto bz) noexcept { + auto c = config; + c.block_id = make_uint3(bx, by, bz); + (*_kernel_entry)(data, &c); + }); + pool.synchronize(); // pool.barrier(); } void compute::fallback::FallbackShader::build_bound_arguments(compute::Function kernel) { diff --git a/src/backends/fallback/fallback_stream.cpp b/src/backends/fallback/fallback_stream.cpp index dd32552e8..ce105dec5 100644 --- a/src/backends/fallback/fallback_stream.cpp +++ b/src/backends/fallback/fallback_stream.cpp @@ -134,18 +134,18 @@ void FallbackStream::visit(const AccelBuildCommand *command) noexcept { } void FallbackStream::visit(const MeshBuildCommand *command) noexcept { - auto v_b = command->vertex_buffer(); + auto v_b = reinterpret_cast(command->vertex_buffer())->view(0).ptr; auto v_b_o = command->vertex_buffer_offset(); auto v_s = command->vertex_stride(); auto v_b_s = command->vertex_buffer_size(); auto v_b_c = v_b_s/v_s; - auto t_b = command->triangle_buffer(); + auto t_b = reinterpret_cast(command->triangle_buffer())->view(0).ptr; auto t_b_o = command->triangle_buffer_offset(); auto t_b_s = command->triangle_buffer_size(); auto t_b_c = t_b_s/12u; _pool.async([=,mesh = reinterpret_cast(command->handle())] { - mesh->commit(v_b, v_b_o, v_s, v_b_c, t_b, t_b_o, t_b_c); + mesh->commit(reinterpret_cast(v_b), v_b_o, v_s, v_b_c, reinterpret_cast(t_b), t_b_o, t_b_c); }); _pool.barrier(); }