diff --git a/src/backends/fallback/fallback_device_api.cpp b/src/backends/fallback/fallback_device_api.cpp index 5801b631a..fb30d0041 100644 --- a/src/backends/fallback/fallback_device_api.cpp +++ b/src/backends/fallback/fallback_device_api.cpp @@ -21,8 +21,8 @@ void luisa_bc6h_read(const FallbackTextureView *tex, int x, int y, float4 &out) } [[nodiscard]] int4 luisa_fallback_texture2d_read_int(void *texture_data, uint64_t texture_data_extra, uint x, uint y) noexcept { - PackedTextureView handle{texture_data, texture_data_extra}; - auto tex = reinterpret_cast(&handle); + PackedTextureView view{texture_data, texture_data_extra}; + auto tex = reinterpret_cast(&view); switch (tex->storage()) { case PixelStorage::BC7: { float4 out; @@ -48,8 +48,8 @@ void luisa_bc6h_read(const FallbackTextureView *tex, int x, int y, float4 &out) } [[nodiscard]] uint4 luisa_fallback_texture2d_read_uint(void *texture_data, uint64_t texture_data_extra, uint x, uint y) noexcept { - PackedTextureView handle{texture_data, texture_data_extra}; - auto tex = reinterpret_cast(&handle); + PackedTextureView view{texture_data, texture_data_extra}; + auto tex = reinterpret_cast(&view); switch (tex->storage()) { case PixelStorage::BC7: { float4 out; @@ -75,8 +75,8 @@ void luisa_bc6h_read(const FallbackTextureView *tex, int x, int y, float4 &out) } [[nodiscard]] float4 luisa_fallback_texture2d_read_float(void *texture_data, uint64_t texture_data_extra, uint x, uint y) noexcept { - PackedTextureView handle{texture_data, texture_data_extra}; - auto tex = reinterpret_cast(&handle); + PackedTextureView view{texture_data, texture_data_extra}; + auto tex = reinterpret_cast(&view); switch (tex->storage()) { case PixelStorage::BC7: { float4 out; @@ -96,8 +96,8 @@ void luisa_bc6h_read(const FallbackTextureView *tex, int x, int y, float4 &out) } void luisa_fallback_texture2d_write_float(void *texture_data, uint64_t texture_data_extra, uint x, uint y, float4 value) noexcept { - PackedTextureView handle{texture_data, texture_data_extra}; - auto tex = reinterpret_cast(&handle); + PackedTextureView view{texture_data, texture_data_extra}; + auto tex = reinterpret_cast(&view); switch (tex->storage()) { case PixelStorage::BC7: { LUISA_ERROR("cannot write to BC texture"); @@ -115,8 +115,8 @@ void luisa_fallback_texture2d_write_float(void *texture_data, uint64_t texture_d } void luisa_fallback_texture2d_write_uint(void *texture_data, uint64_t texture_data_extra, uint x, uint y, uint4 value) noexcept { - PackedTextureView handle{texture_data, texture_data_extra}; - auto tex = reinterpret_cast(&handle); + PackedTextureView view{texture_data, texture_data_extra}; + auto tex = reinterpret_cast(&view); switch (tex->storage()) { case PixelStorage::BC7: { LUISA_ERROR("cannot write to BC texture"); @@ -134,8 +134,8 @@ void luisa_fallback_texture2d_write_uint(void *texture_data, uint64_t texture_da } void luisa_fallback_texture2d_write_int(void *texture_data, uint64_t texture_data_extra, uint x, uint y, int4 value) noexcept { - PackedTextureView handle{texture_data, texture_data_extra}; - auto tex = reinterpret_cast(&handle); + PackedTextureView view{texture_data, texture_data_extra}; + auto tex = reinterpret_cast(&view); switch (tex->storage()) { case PixelStorage::BC7: { LUISA_ERROR("cannot write to BC texture"); @@ -205,7 +205,7 @@ template } [[nodiscard]] float4 luisa_fallback_bindless_texture2d_sample(const Texture *handle, uint sampler, float u, float v) noexcept { - auto tex = reinterpret_cast(&handle); + auto tex = reinterpret_cast(handle); auto s = Sampler::decode(sampler); auto view = tex->view(0); return s.filter() == Sampler::Filter::POINT ? @@ -214,7 +214,7 @@ template } [[nodiscard]] float4 luisa_fallback_bindless_texture2d_sample_level(const Texture *handle, uint sampler, float u, float v, float level) noexcept { - auto tex = reinterpret_cast(&handle); + auto tex = reinterpret_cast(handle); auto s = Sampler::decode(sampler); auto filter = s.filter(); if (level <= 0.f || tex->mip_levels() == 0u || filter == Sampler::Filter::POINT) { diff --git a/src/backends/fallback/fallback_device_api.h b/src/backends/fallback/fallback_device_api.h index 02eaf1783..7c27f0f0f 100644 --- a/src/backends/fallback/fallback_device_api.h +++ b/src/backends/fallback/fallback_device_api.h @@ -81,8 +81,8 @@ struct alignas(16u) PackedTextureView { static_assert(sizeof(TextureView) == 16 && sizeof(PackedTextureView) == 16); -struct Texture; -struct Accel; +struct alignas(16) Texture; +struct alignas(16) Accel; struct alignas(16) BindlessSlot { const void *buffer; diff --git a/src/backends/fallback/fallback_shader.cpp b/src/backends/fallback/fallback_shader.cpp index 7f4781d2b..a4bff6cbe 100644 --- a/src/backends/fallback/fallback_shader.cpp +++ b/src/backends/fallback/fallback_shader.cpp @@ -312,7 +312,7 @@ void FallbackShader::dispatch(ThreadPool &pool, const ShaderDispatchCommand *com break; } case Tag::BINDLESS_ARRAY: { - auto bindless = reinterpret_cast(arg.buffer.handle); + auto bindless = reinterpret_cast(arg.bindless_array.handle); auto view = bindless->view(); auto ptr = allocate_argument(sizeof(view)); std::memcpy(ptr, &view, sizeof(view)); diff --git a/src/backends/fallback/fallback_texture_sampling.cpp b/src/backends/fallback/fallback_texture_sampling.cpp index c89f292b5..16bc0939a 100644 --- a/src/backends/fallback/fallback_texture_sampling.cpp +++ b/src/backends/fallback/fallback_texture_sampling.cpp @@ -8,434 +8,353 @@ #include "llvm_abi.h" //swfly tries to write more about sampling bc textures -namespace luisa::compute::fallback -{ - void texture_write_2d_float_wrapper(void* ptr, uint x, uint y, void* val) - { - texture_write_2d_float(reinterpret_cast(ptr), x, y, *(float4 *) val); - } - - void texture_read_2d_float_wrapper(void* ptr, uint x, uint y, void* out) - { - *(float4 *) out = texture_read_2d_float(reinterpret_cast(ptr), x, y); - } - - void texture_write_2d_uint_wrapper(void* ptr, uint x, uint y, void* val) - { - texture_write_2d_uint(reinterpret_cast(ptr), x, y, *(uint4 *) val); - } - - void texture_read_2d_uint_wrapper(void* ptr, uint x, uint y, void* out) - { - *(uint4 *) out = texture_read_2d_uint(reinterpret_cast(ptr), x, y); - } - - void texture_read_3d_float_wrapper(void* ptr, uint x, uint y, uint z, void* out) - { - *(float4 *) out = texture_read_3d_float(reinterpret_cast(ptr), x, y, z); - } - void texture_read_3d_uint_wrapper(void* ptr, uint x, uint y, uint z, void* out) - { - *(uint4 *) out = texture_read_3d_uint(reinterpret_cast(ptr), x, y, z); - } - - - - - - - - void luisa_bc7_read(const FallbackTextureView* tex, uint x, uint y, float4& out) noexcept - { - auto block_pos = make_uint2(x / 4, y / 4); - auto block_per_row = tex->size2d().x / 4; - const bc::BC7Block* bc_block = reinterpret_cast(tex->data()) + ( - block_pos.x + block_pos.y * block_per_row); - bc_block->Decode(x % 4, y % 4, reinterpret_cast(&out)); - } - - void luisa_bc6h_read(const FallbackTextureView* tex, int x, int y, float4& out) noexcept - { - auto block_pos = make_uint2(x / 4, y / 4); - auto block_per_row = tex->size2d().x / 4; - const bc::BC6HBlock* bc_block = reinterpret_cast(tex->data()) + ( - block_pos.x + block_pos.y * block_per_row); - bc_block->Decode(false, x % 4, y % 4, reinterpret_cast(&out)); - } - - int4 fallback::texture_read_2d_int(const FallbackTextureView* tex, uint x, uint y) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - float4 out; - luisa_bc7_read(tex, x, y, out); - return make_int4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); - } - case PixelStorage::BC6: - { - float4 out; - luisa_bc6h_read(tex, x, y, out); - return make_int4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); - } - default: - return tex->read2d(make_uint2(x, y)); +namespace luisa::compute::fallback { +void texture_write_2d_float_wrapper(void *ptr, uint x, uint y, void *val) { + texture_write_2d_float(reinterpret_cast(ptr), x, y, *(float4 *)val); +} + +void texture_read_2d_float_wrapper(void *ptr, uint x, uint y, void *out) { + *(float4 *)out = texture_read_2d_float(reinterpret_cast(ptr), x, y); +} + +void texture_write_2d_uint_wrapper(void *ptr, uint x, uint y, void *val) { + texture_write_2d_uint(reinterpret_cast(ptr), x, y, *(uint4 *)val); +} + +void texture_read_2d_uint_wrapper(void *ptr, uint x, uint y, void *out) { + *(uint4 *)out = texture_read_2d_uint(reinterpret_cast(ptr), x, y); +} + +void texture_read_3d_float_wrapper(void *ptr, uint x, uint y, uint z, void *out) { + *(float4 *)out = texture_read_3d_float(reinterpret_cast(ptr), x, y, z); +} +void texture_read_3d_uint_wrapper(void *ptr, uint x, uint y, uint z, void *out) { + *(uint4 *)out = texture_read_3d_uint(reinterpret_cast(ptr), x, y, z); +} + +void luisa_bc7_read(const FallbackTextureView *tex, uint x, uint y, float4 &out) noexcept { + auto block_pos = make_uint2(x / 4, y / 4); + auto block_per_row = tex->size2d().x / 4; + const bc::BC7Block *bc_block = reinterpret_cast(tex->data()) + (block_pos.x + block_pos.y * block_per_row); + bc_block->Decode(x % 4, y % 4, reinterpret_cast(&out)); +} + +void luisa_bc6h_read(const FallbackTextureView *tex, int x, int y, float4 &out) noexcept { + auto block_pos = make_uint2(x / 4, y / 4); + auto block_per_row = tex->size2d().x / 4; + const bc::BC6HBlock *bc_block = reinterpret_cast(tex->data()) + (block_pos.x + block_pos.y * block_per_row); + bc_block->Decode(false, x % 4, y % 4, reinterpret_cast(&out)); +} + +int4 fallback::texture_read_2d_int(const FallbackTextureView *tex, uint x, uint y) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + float4 out; + luisa_bc7_read(tex, x, y, out); + return make_int4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); } - } - - int4 fallback::texture_read_3d_int(const FallbackTextureView* tex, uint x, uint y, uint z) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - LUISA_ERROR("Block compression doesn't work for 3D texture"); - return make_int4(0); - } - case PixelStorage::BC6: - { - LUISA_ERROR("Block compression doesn't work for 3D texture"); - return make_int4(0); - } - default: - return tex->read2d(make_uint2(x, y)); + case PixelStorage::BC6: { + float4 out; + luisa_bc6h_read(tex, x, y, out); + return make_int4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); } + default: + return tex->read2d(make_uint2(x, y)); } +} - uint4 fallback::texture_read_2d_uint(const FallbackTextureView* tex, uint x, uint y) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - float4 out; - luisa_bc7_read(tex, x, y, out); - return make_uint4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); - } - case PixelStorage::BC6: - { - float4 out; - luisa_bc6h_read(tex, x, y, out); - return make_uint4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); - } - default: - return tex->read2d(make_uint2(x, y)); +int4 fallback::texture_read_3d_int(const FallbackTextureView *tex, uint x, uint y, uint z) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + LUISA_ERROR("Block compression doesn't work for 3D texture"); + return make_int4(0); } - } - - uint4 fallback::texture_read_3d_uint(const FallbackTextureView* tex, uint x, uint y, uint z) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - LUISA_ERROR("Block compression doesn't work for 3D texture"); - return make_uint4(0); - } - case PixelStorage::BC6: - { - LUISA_ERROR("Block compression doesn't work for 3D texture"); - return make_uint4(0); - } - default: - return tex->read3d(make_uint3(x, y, z)); + case PixelStorage::BC6: { + LUISA_ERROR("Block compression doesn't work for 3D texture"); + return make_int4(0); } + default: + return tex->read2d(make_uint2(x, y)); } - - float4 fallback::texture_read_2d_float(const FallbackTextureView* tex, uint x, uint y) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - float4 out; - luisa_bc7_read(tex, x, y, out); - return out; - } - case PixelStorage::BC6: - { - float4 out; - luisa_bc6h_read(tex, x, y, out); - return out; - } - default: - return tex->read2d(make_uint2(x, y)); +} + +uint4 fallback::texture_read_2d_uint(const FallbackTextureView *tex, uint x, uint y) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + float4 out; + luisa_bc7_read(tex, x, y, out); + return make_uint4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); } + case PixelStorage::BC6: { + float4 out; + luisa_bc6h_read(tex, x, y, out); + return make_uint4(out.x * 255.f, out.y * 255.f, out.z * 255.f, out.w * 255.f); + } + default: + return tex->read2d(make_uint2(x, y)); } +} - void fallback::texture_write_2d_float(const FallbackTextureView* tex, uint x, uint y, float4 value) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - LUISA_ERROR("cannot write to BC texture"); - break; - } - case PixelStorage::BC6: - { - LUISA_ERROR("cannot write to BC texture"); - break; - } - default: - return tex->write2d(make_uint2(x, y), value); +uint4 fallback::texture_read_3d_uint(const FallbackTextureView *tex, uint x, uint y, uint z) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + LUISA_ERROR("Block compression doesn't work for 3D texture"); + return make_uint4(0); } - } - void fallback::texture_write_2d_uint(const FallbackTextureView* tex, uint x, uint y, uint4 value) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - LUISA_ERROR("cannot write to BC texture"); - break; - } - case PixelStorage::BC6: - { - LUISA_ERROR("cannot write to BC texture"); - break; - } - default: - return tex->write2d(make_uint2(x, y), value); + case PixelStorage::BC6: { + LUISA_ERROR("Block compression doesn't work for 3D texture"); + return make_uint4(0); } + default: + return tex->read3d(make_uint3(x, y, z)); } - - float4 fallback::texture_read_3d_float(const FallbackTextureView* tex, uint x, uint y, uint z) noexcept - { - switch (tex->storage()) - { - case PixelStorage::BC7: - { - LUISA_ERROR("Block compression doesn't work for 3D texture"); - return make_float4(0.f); - } - case PixelStorage::BC6: - { - LUISA_ERROR("Block compression doesn't work for 3D texture"); - return make_float4(0.f); - } - default: - return tex->read3d(make_uint3(x, y, z)); +} + +float4 fallback::texture_read_2d_float(const FallbackTextureView *tex, uint x, uint y) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + float4 out; + luisa_bc7_read(tex, x, y, out); + return out; } - } - - - float4 fallback::bindless_texture_2d_read(const FallbackTexture* tex, uint level, uint x, uint y) noexcept - { - auto view = tex->view(level); - return texture_read_2d_float(&view, x, y); - } - - float4 fallback::bindless_texture_3d_read(const FallbackTexture* tex, uint level, uint x, uint y, uint z) noexcept - { - auto view = tex->view(level); - return texture_read_3d_float(&view, x, y, z); - } - - - //2D sampling - template - [[nodiscard]] inline auto texture_coord_point(Sampler::Address address, const T& uv, T s) noexcept - { - switch (address) - { - case Sampler::Address::EDGE: return luisa::clamp(uv, 0.0f, one_minus_epsilon) * s; - case Sampler::Address::REPEAT: return luisa::fract(uv) * s; - case Sampler::Address::MIRROR: - { - auto uv0 = luisa::fmod(luisa::abs(uv), T{2.0f}); - uv0 = select(2.f - uv, uv, uv < T{1.f}); - return luisa::min(uv, one_minus_epsilon) * s; - } - case Sampler::Address::ZERO: return luisa::select(uv * s, T{65536.f}, uv < 0.f || uv >= 1.f); + case PixelStorage::BC6: { + float4 out; + luisa_bc6h_read(tex, x, y, out); + return out; } - return T{65536.f}; - } - - [[nodiscard]] inline auto texture_sample_point(FallbackTextureView* view, Sampler::Address address, - const float2& uv) noexcept - { - auto size = make_float2(view->size2d()); - auto c = make_uint2(texture_coord_point(address, uv, size)); - return texture_read_2d_float(view, c.x, c.y); - } - - [[nodiscard]] inline auto texture_coord_linear(Sampler::Address address, float2 uv, const float2& size) noexcept - { - auto s = make_float2(size); - auto inv_s = 1.f / s; - auto c_min = texture_coord_point(address, uv - .5f * inv_s, s); - auto c_max = texture_coord_point(address, uv + .5f * inv_s, s); - return std::make_pair(luisa::min(c_min, c_max), luisa::max(c_min, c_max)); - } - - [[nodiscard]] inline auto texture_sample_linear(FallbackTextureView* view, Sampler::Address address, - const float2& uv) noexcept - { - auto size = make_float2(view->size2d()); - auto [st_min, st_max] = texture_coord_linear(address, uv, size); - auto t = luisa::fract(st_max); - auto c0 = make_uint2(st_min); - auto c1 = make_uint2(st_max); - auto v00 = texture_read_2d_float(view, c0.x, c0.y); - auto v01 = texture_read_2d_float(view, c1.x, c0.y); - auto v10 = texture_read_2d_float(view, c0.x, c1.y); - auto v11 = texture_read_2d_float(view, c1.x, c1.y); - return luisa::lerp(luisa::lerp(v00, v01, t.x), - luisa::lerp(v10, v11, t.x), t.y); - } - - float4 fallback::bindless_texture_2d_sample(const FallbackTexture* tex, uint sampler, float u, float v) noexcept - { - auto s = Sampler::decode(sampler); - auto view = tex->view(0); - return s.filter() == Sampler::Filter::POINT - ? texture_sample_point(&view, s.address(), make_float2(u, v)) - : texture_sample_linear(&view, s.address(), make_float2(u, v)); + default: + return tex->read2d(make_uint2(x, y)); } +} - float4 bindless_texture_2d_sample_level(const FallbackTexture* tex, uint sampler, float u, float v, - float lod) noexcept - { - auto s = Sampler::decode(sampler); - auto filter = s.filter(); - if (lod <= 0.f || tex->mip_levels() == 0u || - filter == Sampler::Filter::POINT) - { - return bindless_texture_2d_sample(tex, sampler, u, v); +void fallback::texture_write_2d_float(const FallbackTextureView *tex, uint x, uint y, float4 value) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + LUISA_ERROR("cannot write to BC texture"); + break; } - auto level0 = std::min(static_cast(lod), - tex->mip_levels() - 1u); - auto view0 = tex->view(level0); - auto v0 = texture_sample_linear( - &view0, s.address(), make_float2(u, v)); - if (level0 == tex->mip_levels() - 1u || - filter == Sampler::Filter::LINEAR_POINT) - { - return v0; + case PixelStorage::BC6: { + LUISA_ERROR("cannot write to BC texture"); + break; } - auto view1 = tex->view(level0 + 1); - auto v1 = texture_sample_linear( - &view1, s.address(), make_float2(u, v)); - return luisa::lerp(v0, v1, luisa::fract(lod)); - } - - //swfly: im too lazy to do this. complete it someday - float4 bindless_texture_2d_sample_grad(const FallbackTexture* tex, uint sampler, float u, float v, int64_t dpdx, - int64_t dpdy) noexcept - { - return bindless_texture_2d_sample(tex, sampler, u, v); + default: + return tex->write2d(make_uint2(x, y), value); } - - //3D sampling - [[nodiscard]] inline auto texture_sample_point(FallbackTextureView* view, Sampler::Address address, - float3 uv) noexcept - { - auto size = make_float3(view->size3d()); - auto c = make_uint3(texture_coord_point(address, uv, size)); - return texture_read_3d_float(view, c.x, c.y, c.z); +} +void fallback::texture_write_2d_uint(const FallbackTextureView *tex, uint x, uint y, uint4 value) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + LUISA_ERROR("cannot write to BC texture"); + break; + } + case PixelStorage::BC6: { + LUISA_ERROR("cannot write to BC texture"); + break; + } + default: + return tex->write2d(make_uint2(x, y), value); } +} - [[nodiscard]] inline auto texture_coord_linear(Sampler::Address address, float3 uv, float3 size) noexcept - { - auto s = make_float3(size); - auto inv_s = 1.f / s; - auto c_min = texture_coord_point(address, uv - .5f * inv_s, s); - auto c_max = texture_coord_point(address, uv + .5f * inv_s, s); - return std::make_pair(luisa::min(c_min, c_max), luisa::max(c_min, c_max)); +float4 fallback::texture_read_3d_float(const FallbackTextureView *tex, uint x, uint y, uint z) noexcept { + switch (tex->storage()) { + case PixelStorage::BC7: { + LUISA_ERROR("Block compression doesn't work for 3D texture"); + return make_float4(0.f); + } + case PixelStorage::BC6: { + LUISA_ERROR("Block compression doesn't work for 3D texture"); + return make_float4(0.f); + } + default: + return tex->read3d(make_uint3(x, y, z)); } - - [[nodiscard]] inline auto texture_sample_linear(FallbackTextureView* view, Sampler::Address address, - float3 uvw) noexcept - { - auto size = make_float3(view->size3d()); - auto [st_min, st_max] = texture_coord_linear(address, uvw, size); - auto t = luisa::fract(st_max); - auto c0 = make_uint3(st_min); - auto c1 = make_uint3(st_max); - auto v000 = texture_read_3d_float(view, c0.x, c0.y, c0.z); - auto v001 = texture_read_3d_float(view, c1.x, c0.y, c0.z); - auto v010 = texture_read_3d_float(view, c0.x, c1.y, c0.z); - auto v011 = texture_read_3d_float(view, c1.x, c1.y, c0.z); - auto v100 = texture_read_3d_float(view, c0.x, c0.y, c1.z); - auto v101 = texture_read_3d_float(view, c1.x, c0.y, c1.z); - auto v110 = texture_read_3d_float(view, c0.x, c1.y, c1.z); - auto v111 = texture_read_3d_float(view, c1.x, c1.y, c1.z); - return luisa::lerp( - luisa::lerp(luisa::lerp(v000, v001, t.x), - luisa::lerp(v010, v011, t.x), t.y), - luisa::lerp(luisa::lerp(v100, v101, t.x), - luisa::lerp(v110, v111, t.x), t.y), - t.z); +} + +float4 fallback::bindless_texture_2d_read(const FallbackTexture *tex, uint level, uint x, uint y) noexcept { + auto view = tex->view(level); + return texture_read_2d_float(&view, x, y); +} + +float4 fallback::bindless_texture_3d_read(const FallbackTexture *tex, uint level, uint x, uint y, uint z) noexcept { + auto view = tex->view(level); + return texture_read_3d_float(&view, x, y, z); +} + +//2D sampling +template +[[nodiscard]] inline auto texture_coord_point(Sampler::Address address, const T &uv, T s) noexcept { + switch (address) { + case Sampler::Address::EDGE: return luisa::clamp(uv, 0.0f, one_minus_epsilon) * s; + case Sampler::Address::REPEAT: return luisa::fract(uv) * s; + case Sampler::Address::MIRROR: { + auto uv0 = luisa::fmod(luisa::abs(uv), T{2.0f}); + uv0 = select(2.f - uv, uv, uv < T{1.f}); + return luisa::min(uv, one_minus_epsilon) * s; + } + case Sampler::Address::ZERO: return luisa::select(uv * s, T{65536.f}, uv < 0.f || uv >= 1.f); } - - float4 fallback::bindless_texture_3d_sample(const FallbackTexture* tex, uint sampler, float u, float v, - float w) noexcept - { - auto s = Sampler::decode(sampler); - auto view = tex->view(0); - return s.filter() == Sampler::Filter::POINT - ? texture_sample_point(&view, s.address(), make_float3(u, v, w)) - : texture_sample_linear(&view, s.address(), make_float3(u, v, w)); + return T{65536.f}; +} + +[[nodiscard]] inline auto texture_sample_point(FallbackTextureView *view, Sampler::Address address, + const float2 &uv) noexcept { + auto size = make_float2(view->size2d()); + auto c = make_uint2(texture_coord_point(address, uv, size)); + return texture_read_2d_float(view, c.x, c.y); +} + +[[nodiscard]] inline auto texture_coord_linear(Sampler::Address address, float2 uv, const float2 &size) noexcept { + auto s = make_float2(size); + auto inv_s = 1.f / s; + auto c_min = texture_coord_point(address, uv - .5f * inv_s, s); + auto c_max = texture_coord_point(address, uv + .5f * inv_s, s); + return std::make_pair(luisa::min(c_min, c_max), luisa::max(c_min, c_max)); +} + +[[nodiscard]] inline auto texture_sample_linear(FallbackTextureView *view, Sampler::Address address, + const float2 &uv) noexcept { + auto size = make_float2(view->size2d()); + auto [st_min, st_max] = texture_coord_linear(address, uv, size); + auto t = luisa::fract(st_max); + auto c0 = make_uint2(st_min); + auto c1 = make_uint2(st_max); + auto v00 = texture_read_2d_float(view, c0.x, c0.y); + auto v01 = texture_read_2d_float(view, c1.x, c0.y); + auto v10 = texture_read_2d_float(view, c0.x, c1.y); + auto v11 = texture_read_2d_float(view, c1.x, c1.y); + return luisa::lerp(luisa::lerp(v00, v01, t.x), + luisa::lerp(v10, v11, t.x), t.y); +} + +float4 fallback::bindless_texture_2d_sample(const FallbackTexture *tex, uint sampler, float u, float v) noexcept { + auto s = Sampler::decode(sampler); + auto view = tex->view(0); + return s.filter() == Sampler::Filter::POINT ? texture_sample_point(&view, s.address(), make_float2(u, v)) : texture_sample_linear(&view, s.address(), make_float2(u, v)); +} + +float4 bindless_texture_2d_sample_level(const FallbackTexture *tex, uint sampler, float u, float v, + float lod) noexcept { + auto s = Sampler::decode(sampler); + auto filter = s.filter(); + if (lod <= 0.f || tex->mip_levels() == 0u || + filter == Sampler::Filter::POINT) { + return bindless_texture_2d_sample(tex, sampler, u, v); } - - float4 bindless_texture_3d_sample_level(const FallbackTexture* tex, uint sampler, float u, float v, float w, - float lod) noexcept - { - auto s = Sampler::decode(sampler); - auto filter = s.filter(); - if (lod <= 0.f || tex->mip_levels() == 0u || - filter == Sampler::Filter::POINT) - { - return bindless_texture_3d_sample(tex, sampler, u, v, w); - } - auto level0 = std::min(static_cast(lod), - tex->mip_levels() - 1u); - auto view0 = tex->view(level0); - auto v0 = texture_sample_linear( - &view0, s.address(), make_float3(u, v, w)); - if (level0 == tex->mip_levels() - 1u || - filter == Sampler::Filter::LINEAR_POINT) - { - return v0; - } - auto view1 = tex->view(level0 + 1); - auto v1 = texture_sample_linear( - &view1, s.address(), make_float3(u, v, w)); - return luisa::lerp(v0, v1, luisa::fract(lod)); + auto level0 = std::min(static_cast(lod), + tex->mip_levels() - 1u); + auto view0 = tex->view(level0); + auto v0 = texture_sample_linear( + &view0, s.address(), make_float2(u, v)); + if (level0 == tex->mip_levels() - 1u || + filter == Sampler::Filter::LINEAR_POINT) { + return v0; } - - - //swfly: im too lazy to do this. complete it someday - float4 bindless_texture_3d_sample_grad(const FallbackTexture* tex, uint sampler, float u, float v, float w, - int64_t dudxy, int64_t dvdxy, int64_t dwdxy) noexcept - { + auto view1 = tex->view(level0 + 1); + auto v1 = texture_sample_linear( + &view1, s.address(), make_float2(u, v)); + return luisa::lerp(v0, v1, luisa::fract(lod)); +} + +//swfly: im too lazy to do this. complete it someday +float4 bindless_texture_2d_sample_grad(const FallbackTexture *tex, uint sampler, float u, float v, int64_t dpdx, + int64_t dpdy) noexcept { + return bindless_texture_2d_sample(tex, sampler, u, v); +} + +//3D sampling +[[nodiscard]] inline auto texture_sample_point(FallbackTextureView *view, Sampler::Address address, + float3 uv) noexcept { + auto size = make_float3(view->size3d()); + auto c = make_uint3(texture_coord_point(address, uv, size)); + return texture_read_3d_float(view, c.x, c.y, c.z); +} + +[[nodiscard]] inline auto texture_coord_linear(Sampler::Address address, float3 uv, float3 size) noexcept { + auto s = make_float3(size); + auto inv_s = 1.f / s; + auto c_min = texture_coord_point(address, uv - .5f * inv_s, s); + auto c_max = texture_coord_point(address, uv + .5f * inv_s, s); + return std::make_pair(luisa::min(c_min, c_max), luisa::max(c_min, c_max)); +} + +[[nodiscard]] inline auto texture_sample_linear(FallbackTextureView *view, Sampler::Address address, + float3 uvw) noexcept { + auto size = make_float3(view->size3d()); + auto [st_min, st_max] = texture_coord_linear(address, uvw, size); + auto t = luisa::fract(st_max); + auto c0 = make_uint3(st_min); + auto c1 = make_uint3(st_max); + auto v000 = texture_read_3d_float(view, c0.x, c0.y, c0.z); + auto v001 = texture_read_3d_float(view, c1.x, c0.y, c0.z); + auto v010 = texture_read_3d_float(view, c0.x, c1.y, c0.z); + auto v011 = texture_read_3d_float(view, c1.x, c1.y, c0.z); + auto v100 = texture_read_3d_float(view, c0.x, c0.y, c1.z); + auto v101 = texture_read_3d_float(view, c1.x, c0.y, c1.z); + auto v110 = texture_read_3d_float(view, c0.x, c1.y, c1.z); + auto v111 = texture_read_3d_float(view, c1.x, c1.y, c1.z); + return luisa::lerp( + luisa::lerp(luisa::lerp(v000, v001, t.x), + luisa::lerp(v010, v011, t.x), t.y), + luisa::lerp(luisa::lerp(v100, v101, t.x), + luisa::lerp(v110, v111, t.x), t.y), + t.z); +} + +float4 fallback::bindless_texture_3d_sample(const FallbackTexture *tex, uint sampler, float u, float v, + float w) noexcept { + auto s = Sampler::decode(sampler); + auto view = tex->view(0); + return s.filter() == Sampler::Filter::POINT ? texture_sample_point(&view, s.address(), make_float3(u, v, w)) : texture_sample_linear(&view, s.address(), make_float3(u, v, w)); +} + +float4 bindless_texture_3d_sample_level(const FallbackTexture *tex, uint sampler, float u, float v, float w, + float lod) noexcept { + auto s = Sampler::decode(sampler); + auto filter = s.filter(); + if (lod <= 0.f || tex->mip_levels() == 0u || + filter == Sampler::Filter::POINT) { return bindless_texture_3d_sample(tex, sampler, u, v, w); } - - void bindless_tex2d_level_wrapper(void* bindless, uint slot, float x, float y, float level, void* out) - { - auto bd = reinterpret_cast(bindless); - auto slt = bd->slot(slot); - auto tex2d = slt.tex2d; - auto result_f4 = reinterpret_cast(out); - *result_f4 = bindless_texture_2d_sample_level(tex2d, slt.sampler_2d().code(), x, y, level); - - } - void bindless_tex2d_wrapper(void* bindless, uint slot, float x, float y, void* out) - { - auto bd = reinterpret_cast(bindless); - auto slt = bd->slot(slot); - auto tex2d = slt.tex2d; - *((float4*)out) = bindless_texture_2d_sample(tex2d, slt.sampler_2d().code(), x, y); - } - - void bindless_tex2d_size_wrapper(void* bindless, uint slot, void* out) - { - auto bd = reinterpret_cast(bindless); - auto slt = bd->slot(slot); - auto tex2d = slt.tex2d; - *((uint2*)out) = tex2d->view(0).size2d(); - } -} // namespace luisa::compute::fallback + auto level0 = std::min(static_cast(lod), + tex->mip_levels() - 1u); + auto view0 = tex->view(level0); + auto v0 = texture_sample_linear( + &view0, s.address(), make_float3(u, v, w)); + if (level0 == tex->mip_levels() - 1u || + filter == Sampler::Filter::LINEAR_POINT) { + return v0; + } + auto view1 = tex->view(level0 + 1); + auto v1 = texture_sample_linear( + &view1, s.address(), make_float3(u, v, w)); + return luisa::lerp(v0, v1, luisa::fract(lod)); +} + +//swfly: im too lazy to do this. complete it someday +float4 bindless_texture_3d_sample_grad(const FallbackTexture *tex, uint sampler, float u, float v, float w, + int64_t dudxy, int64_t dvdxy, int64_t dwdxy) noexcept { + return bindless_texture_3d_sample(tex, sampler, u, v, w); +} + +void bindless_tex2d_level_wrapper(void *bindless, uint slot, float x, float y, float level, void *out) { + auto bd = reinterpret_cast(bindless); + auto slt = bd->slot(slot); + auto tex2d = slt.tex2d; + auto result_f4 = reinterpret_cast(out); + *result_f4 = bindless_texture_2d_sample_level(tex2d, slt.sampler_2d().code(), x, y, level); +} +void bindless_tex2d_wrapper(void *bindless, uint slot, float x, float y, void *out) { + auto bd = reinterpret_cast(bindless); + auto slt = bd->slot(slot); + auto tex2d = slt.tex2d; + *((float4 *)out) = bindless_texture_2d_sample(tex2d, slt.sampler_2d().code(), x, y); +} + +void bindless_tex2d_size_wrapper(void *bindless, uint slot, void *out) { + auto bd = reinterpret_cast(bindless); + auto slt = bd->slot(slot); + auto tex2d = slt.tex2d; + *((uint2 *)out) = tex2d->view(0).size2d(); +} +}// namespace luisa::compute::fallback