diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e194587390..2a174cb8e84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -687,6 +687,7 @@ add_subdirectory(${EXTERNAL}/robin-map/tnt) add_subdirectory(${EXTERNAL}/smol-v/tnt) add_subdirectory(${EXTERNAL}/benchmark/tnt) add_subdirectory(${EXTERNAL}/meshoptimizer/tnt) +add_subdirectory(${EXTERNAL}/mikktspace) add_subdirectory(${EXTERNAL}/cgltf/tnt) add_subdirectory(${EXTERNAL}/draco/tnt) add_subdirectory(${EXTERNAL}/jsmn/tnt) diff --git a/README.md b/README.md index 4fae4a07cb3..19fd0df12ae 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.31.6' + implementation 'com.google.android.filament:filament-android:1.31.7' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ``` -pod 'Filament', '~> 1.31.6' +pod 'Filament', '~> 1.31.7' ``` ### Snapshots diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 57a57b816bb..e92aef42bd2 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,8 @@ A new header is inserted each time a *tag* is created. Instead, if you are authoring a PR for the main branch, add your release note to [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md). +## v1.31.7 + ## v1.31.6 - engine: the default render channel is now 2 instead of 0 diff --git a/android/gradle.properties b/android/gradle.properties index 50a961ba32b..1b9b322b135 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.31.6 +VERSION_NAME=1.31.7 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/filament/CMakeLists.txt b/filament/CMakeLists.txt index a153b1bce45..e94884c05b1 100644 --- a/filament/CMakeLists.txt +++ b/filament/CMakeLists.txt @@ -50,7 +50,6 @@ set(PUBLIC_HDRS set(SRCS src/AtlasAllocator.cpp - src/Box.cpp src/BufferObject.cpp src/Camera.cpp src/Color.cpp diff --git a/filament/backend/include/backend/PixelBufferDescriptor.h b/filament/backend/include/backend/PixelBufferDescriptor.h index 2a1a2c66547..1b498032fdc 100644 --- a/filament/backend/include/backend/PixelBufferDescriptor.h +++ b/filament/backend/include/backend/PixelBufferDescriptor.h @@ -279,8 +279,8 @@ class UTILS_PUBLIC PixelBufferDescriptor : public BufferDescriptor { break; } - size_t bpr = bpp * stride; - size_t bprAligned = (bpr + (alignment - 1)) & (~alignment + 1); + size_t const bpr = bpp * stride; + size_t const bprAligned = (bpr + (alignment - 1)) & (~alignment + 1); return bprAligned * height; } diff --git a/filament/backend/src/CommandStream.cpp b/filament/backend/src/CommandStream.cpp index 6e3d1c1b5fb..29bb2184575 100644 --- a/filament/backend/src/CommandStream.cpp +++ b/filament/backend/src/CommandStream.cpp @@ -75,6 +75,7 @@ CommandStream::CommandStream(Driver& driver, CircularBuffer& buffer) noexcept void CommandStream::execute(void* buffer) { SYSTRACE_CALL(); + SYSTRACE_CONTEXT(); Profiler profiler; diff --git a/filament/backend/src/opengl/GLUtils.h b/filament/backend/src/opengl/GLUtils.h index ffb640eb811..bdb1327a285 100644 --- a/filament/backend/src/opengl/GLUtils.h +++ b/filament/backend/src/opengl/GLUtils.h @@ -120,7 +120,12 @@ constexpr inline GLenum getBufferBindingType(BufferObjectBinding bindingType) no case BufferObjectBinding::UNIFORM: return GL_UNIFORM_BUFFER; case BufferObjectBinding::SHADER_STORAGE: +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) return GL_SHADER_STORAGE_BUFFER; +#else + utils::panic(__func__, __FILE__, __LINE__, "SHADER_STORAGE not supported"); + return 0x90D2; // just to return something +#endif } } diff --git a/filament/backend/src/opengl/OpenGLContext.cpp b/filament/backend/src/opengl/OpenGLContext.cpp index 10c6f541bdc..48772dcdf99 100644 --- a/filament/backend/src/opengl/OpenGLContext.cpp +++ b/filament/backend/src/opengl/OpenGLContext.cpp @@ -16,6 +16,10 @@ #include "OpenGLContext.h" +#include + +#include + // change to true to display all GL extensions in the console on start-up #define DEBUG_PRINT_EXTENSIONS false @@ -70,7 +74,9 @@ OpenGLContext::OpenGLContext() noexcept { constexpr GLint MAX_FRAGMENT_SAMPLER_COUNT = caps3.MAX_FRAGMENT_SAMPLER_COUNT; if constexpr (BACKEND_OPENGL_VERSION == BACKEND_OPENGL_VERSION_GLES) { +#if defined(GL_ES_VERSION_2_0) initExtensionsGLES(); +#endif if (state.major == 3) { assert_invariant(gets.max_texture_image_units >= 16); assert_invariant(gets.max_combined_texture_image_units >= 32); @@ -88,8 +94,9 @@ OpenGLContext::OpenGLContext() noexcept { } } } else if constexpr (BACKEND_OPENGL_VERSION == BACKEND_OPENGL_VERSION_GL) { - // OpenGL version +#if defined(GL_VERSION_4_1) initExtensionsGL(); +#endif if (state.major == 4) { assert_invariant(state.minor >= 1); mShaderModel = ShaderModel::DESKTOP; @@ -351,6 +358,8 @@ void OpenGLContext::setDefaultState() noexcept { #endif } +#if defined(GL_ES_VERSION_2_0) + void OpenGLContext::initExtensionsGLES() noexcept { const char * const extensions = (const char*)glGetString(GL_EXTENSIONS); GLUtils::unordered_string_set const exts = GLUtils::split(extensions); @@ -395,6 +404,10 @@ void OpenGLContext::initExtensionsGLES() noexcept { } } +#endif // defined(GL_ES_VERSION_2_0) + +#if defined(GL_VERSION_4_1) + void OpenGLContext::initExtensionsGL() noexcept { GLUtils::unordered_string_set exts; GLint n = 0; @@ -418,21 +431,31 @@ void OpenGLContext::initExtensionsGL() noexcept { ext.EXT_color_buffer_float = true; // Assumes core profile. ext.EXT_color_buffer_half_float = true; // Assumes core profile. ext.EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv); + ext.EXT_disjoint_timer_query = true; + ext.EXT_multisampled_render_to_texture = false; + ext.EXT_multisampled_render_to_texture2 = false; ext.EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv); + ext.EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv); ext.EXT_texture_compression_etc2 = exts.has("GL_ARB_ES3_compatibility"sv); + ext.EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv); ext.EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv); ext.EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv); - ext.EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv); - ext.EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv); + ext.EXT_texture_cube_map_array = true; ext.EXT_texture_filter_anisotropic = exts.has("GL_EXT_texture_filter_anisotropic"sv); ext.EXT_texture_sRGB = exts.has("GL_EXT_texture_sRGB"sv); ext.GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv); ext.KHR_debug = major >= 4 && minor >= 3; ext.KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv); ext.KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv); - ext.OES_EGL_image_external_essl3 = exts.has("GL_OES_EGL_image_external_essl3"sv); + ext.OES_EGL_image_external_essl3 = false; + ext.QCOM_tiled_rendering = false; + ext.WEBGL_compressed_texture_etc = false; + ext.WEBGL_compressed_texture_s3tc = false; + ext.WEBGL_compressed_texture_s3tc_srgb = false; } +#endif // defined(GL_VERSION_4_1) + void OpenGLContext::bindBuffer(GLenum target, GLuint buffer) noexcept { if (target == GL_ELEMENT_ARRAY_BUFFER) { constexpr size_t targetIndex = getIndexForBufferTarget(GL_ELEMENT_ARRAY_BUFFER); @@ -465,13 +488,8 @@ void OpenGLContext::pixelStore(GLenum pname, GLint param) noexcept { switch (pname) { case GL_PACK_ALIGNMENT: pcur = &state.pack.alignment; break; - case GL_PACK_ROW_LENGTH: pcur = &state.pack.row_length; break; - case GL_PACK_SKIP_PIXELS: pcur = &state.pack.skip_pixels; break; // convenience - case GL_PACK_SKIP_ROWS: pcur = &state.pack.skip_row; break; // convenience case GL_UNPACK_ALIGNMENT: pcur = &state.unpack.alignment; break; case GL_UNPACK_ROW_LENGTH: pcur = &state.unpack.row_length; break; - case GL_UNPACK_SKIP_PIXELS: pcur = &state.unpack.skip_pixels; break; // convenience - case GL_UNPACK_SKIP_ROWS: pcur = &state.unpack.skip_row; break; // convenience default: goto default_case; } @@ -618,12 +636,10 @@ void OpenGLContext::resetState() noexcept { GLenum const bufferTargets[] = { GL_UNIFORM_BUFFER, GL_TRANSFORM_FEEDBACK_BUFFER, -#if !defined(__EMSCRIPTEN__) +#if !defined(__EMSCRIPTEN__) && (defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1)) GL_SHADER_STORAGE_BUFFER, #endif GL_ARRAY_BUFFER, - GL_COPY_READ_BUFFER, - GL_COPY_WRITE_BUFFER, GL_ELEMENT_ARRAY_BUFFER, GL_PIXEL_PACK_BUFFER, GL_PIXEL_UNPACK_BUFFER, @@ -646,23 +662,30 @@ void OpenGLContext::resetState() noexcept { // Reset state.textures to its default state to avoid the complexity and error-prone // nature of resetting the GL state to its existing state state.textures = {}; - const GLuint textureTargets[] = { - GL_TEXTURE_2D, - GL_TEXTURE_2D_ARRAY, - GL_TEXTURE_CUBE_MAP, - GL_TEXTURE_3D, + const std::pair textureTargets[] = { + { GL_TEXTURE_2D, true }, + { GL_TEXTURE_2D_ARRAY, true }, + { GL_TEXTURE_CUBE_MAP, true }, + { GL_TEXTURE_3D, true }, #if !defined(__EMSCRIPTEN__) - GL_TEXTURE_2D_MULTISAMPLE, - GL_TEXTURE_EXTERNAL_OES, - GL_TEXTURE_CUBE_MAP_ARRAY, +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) + { GL_TEXTURE_2D_MULTISAMPLE, true }, +#endif +#if defined(GL_OES_EGL_image_external) + { GL_TEXTURE_EXTERNAL_OES, ext.OES_EGL_image_external_essl3 }, +#endif +#if defined(GL_VERSION_4_1) || defined(GL_EXT_texture_cube_map_array) + { GL_TEXTURE_CUBE_MAP_ARRAY, ext.EXT_texture_cube_map_array }, +#endif #endif }; for (GLint unit = 0; unit < gets.max_combined_texture_image_units; ++unit) { glActiveTexture(GL_TEXTURE0 + unit); glBindSampler(unit, 0); - - for (auto const target : textureTargets) { - glBindTexture(target, 0); + for (auto [target, available] : textureTargets) { + if (available) { + glBindTexture(target, 0); + } } } glActiveTexture(GL_TEXTURE0 + state.textures.active); @@ -670,14 +693,10 @@ void OpenGLContext::resetState() noexcept { // state.unpack glPixelStorei(GL_UNPACK_ALIGNMENT, state.unpack.alignment); glPixelStorei(GL_UNPACK_ROW_LENGTH, state.unpack.row_length); - glPixelStorei(GL_UNPACK_SKIP_PIXELS, state.unpack.skip_pixels); - glPixelStorei(GL_UNPACK_SKIP_ROWS, state.unpack.skip_row); // state.pack glPixelStorei(GL_PACK_ALIGNMENT, state.pack.alignment); - glPixelStorei(GL_PACK_ROW_LENGTH, state.pack.row_length); - glPixelStorei(GL_PACK_SKIP_PIXELS, state.pack.skip_pixels); - glPixelStorei(GL_PACK_SKIP_ROWS, state.pack.skip_row); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); // we rely on GL_PACK_ROW_LENGTH being zero // state.window glScissor( @@ -696,4 +715,30 @@ void OpenGLContext::resetState() noexcept { } +OpenGLContext::FenceSync OpenGLContext::createFenceSync( + OpenGLPlatform&) noexcept { + auto sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + CHECK_GL_ERROR(utils::slog.e) + return { .sync = sync }; +} + +void OpenGLContext::destroyFenceSync( + OpenGLPlatform&, FenceSync sync) noexcept { + glDeleteSync(sync.sync); + CHECK_GL_ERROR(utils::slog.e) +} + +OpenGLContext::FenceSync::Status OpenGLContext::clientWaitSync( + OpenGLPlatform&, FenceSync sync) const noexcept { + GLenum const status = glClientWaitSync(sync.sync, 0, 0u); + CHECK_GL_ERROR(utils::slog.e) + using Status = OpenGLContext::FenceSync::Status; + switch (status) { + case GL_ALREADY_SIGNALED: return Status::ALREADY_SIGNALED; + case GL_TIMEOUT_EXPIRED: return Status::TIMEOUT_EXPIRED; + case GL_CONDITION_SATISFIED: return Status::CONDITION_SATISFIED; + default: return Status::FAILURE; + } +} + } // namesapce filament diff --git a/filament/backend/src/opengl/OpenGLContext.h b/filament/backend/src/opengl/OpenGLContext.h index 85f3ce7fc23..ded404f83aa 100644 --- a/filament/backend/src/opengl/OpenGLContext.h +++ b/filament/backend/src/opengl/OpenGLContext.h @@ -33,6 +33,8 @@ namespace filament::backend { +class OpenGLPlatform; + class OpenGLContext { public: static constexpr const size_t MAX_TEXTURE_UNIT_COUNT = MAX_SAMPLER_COUNT; @@ -110,9 +112,6 @@ class OpenGLContext { GLenum sfailBack, GLenum dpfailBack, GLenum dppassBack) noexcept; inline void stencilMaskSeparate(GLuint maskFront, GLuint maskBack) noexcept; inline void polygonOffset(GLfloat factor, GLfloat units) noexcept; - inline void beginQuery(GLenum target, GLuint query) noexcept; - inline void endQuery(GLenum target) noexcept; - inline GLuint getQuery(GLenum target) const noexcept; inline void setScissor(GLint left, GLint bottom, GLsizei width, GLsizei height) noexcept; inline void viewport(GLint left, GLint bottom, GLsizei width, GLsizei height) noexcept; @@ -121,6 +120,26 @@ class OpenGLContext { void deleteBuffers(GLsizei n, const GLuint* buffers, GLenum target) noexcept; void deleteVertexArrays(GLsizei n, const GLuint* arrays) noexcept; + // we abstract GL's sync because it's not available in ES2, but we can use EGL's sync + // instead, if available. + struct FenceSync { + enum class Status { + ALREADY_SIGNALED, + TIMEOUT_EXPIRED, + CONDITION_SATISFIED, + FAILURE + }; + union { + void* fence; + GLsync sync; + }; + }; + + FenceSync createFenceSync(OpenGLPlatform& platform) noexcept; + void destroyFenceSync(OpenGLPlatform& platform, FenceSync sync) noexcept; + FenceSync::Status clientWaitSync(OpenGLPlatform& platform, FenceSync sync) const noexcept; + + // glGet*() values struct { GLfloat max_anisotropy; @@ -149,21 +168,21 @@ class OpenGLContext { bool EXT_color_buffer_half_float; bool EXT_debug_marker; bool EXT_disjoint_timer_query; - bool EXT_multisampled_render_to_texture; bool EXT_multisampled_render_to_texture2; + bool EXT_multisampled_render_to_texture; bool EXT_shader_framebuffer_fetch; - bool KHR_texture_compression_astc_hdr; - bool KHR_texture_compression_astc_ldr; + bool EXT_texture_compression_bptc; bool EXT_texture_compression_etc2; + bool EXT_texture_compression_rgtc; bool EXT_texture_compression_s3tc; bool EXT_texture_compression_s3tc_srgb; - bool EXT_texture_compression_rgtc; - bool EXT_texture_compression_bptc; bool EXT_texture_cube_map_array; bool EXT_texture_filter_anisotropic; bool EXT_texture_sRGB; bool GOOGLE_cpp_style_line_directive; bool KHR_debug; + bool KHR_texture_compression_astc_hdr; + bool KHR_texture_compression_astc_ldr; bool OES_EGL_image_external_essl3; bool QCOM_tiled_rendering; bool WEBGL_compressed_texture_etc; @@ -315,8 +334,8 @@ class OpenGLContext { GLintptr offset = 0; GLsizeiptr size = 0; } buffers[MAX_BUFFER_BINDINGS]; - } targets[2]; // there are only 2 indexed buffer target (uniform and transform feedback) - GLuint genericBinding[9] = { 0 }; + } targets[3]; // there are only 3 indexed buffer targets + GLuint genericBinding[7] = {}; } buffers; struct { @@ -332,15 +351,10 @@ class OpenGLContext { struct { GLint row_length = 0; GLint alignment = 4; - GLint skip_pixels = 0; - GLint skip_row = 0; } unpack; struct { - GLint row_length = 0; GLint alignment = 4; - GLint skip_pixels = 0; - GLint skip_row = 0; } pack; struct { @@ -348,10 +362,6 @@ class OpenGLContext { vec4gli viewport { 0 }; vec2glf depthRange { 0.0f, 1.0f }; } window; - - struct { - GLuint timer = -1u; - } queries; } state; private: @@ -403,8 +413,12 @@ class OpenGLContext { RenderPrimitive mDefaultVAO; // this is chosen to minimize code size +#if defined(GL_ES_VERSION_2_0) void initExtensionsGLES() noexcept; +#endif +#if defined(GL_VERSION_4_1) void initExtensionsGL() noexcept; +#endif template static inline void update_state(T& state, T const& expected, F functor, bool force = false) noexcept { @@ -429,7 +443,9 @@ constexpr size_t OpenGLContext::getIndexForTextureTarget(GLuint target) noexcept case GL_TEXTURE_2D: return 0; case GL_TEXTURE_2D_ARRAY: return 1; case GL_TEXTURE_CUBE_MAP: return 2; +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) case GL_TEXTURE_2D_MULTISAMPLE: return 3; +#endif case GL_TEXTURE_EXTERNAL_OES: return 4; case GL_TEXTURE_3D: return 5; case GL_TEXTURE_CUBE_MAP_ARRAY: return 6; @@ -450,17 +466,15 @@ constexpr size_t OpenGLContext::getIndexForCap(GLenum cap) noexcept { //NOLINT case GL_SAMPLE_ALPHA_TO_COVERAGE: index = 6; break; case GL_SAMPLE_COVERAGE: index = 7; break; case GL_POLYGON_OFFSET_FILL: index = 8; break; - case GL_PRIMITIVE_RESTART_FIXED_INDEX: index = 9; break; - case GL_RASTERIZER_DISCARD: index = 10; break; #ifdef GL_ARB_seamless_cube_map - case GL_TEXTURE_CUBE_MAP_SEAMLESS: index = 11; break; + case GL_TEXTURE_CUBE_MAP_SEAMLESS: index = 9; break; #endif #if BACKEND_OPENGL_VERSION == BACKEND_OPENGL_VERSION_GL - case GL_PROGRAM_POINT_SIZE: index = 12; break; + case GL_PROGRAM_POINT_SIZE: index = 10; break; #endif - default: index = 13; break; // should never happen + default: break; } - assert_invariant(index < 13 && index < state.enables.caps.size()); + assert_invariant(index < state.enables.caps.size()); return index; } @@ -470,15 +484,14 @@ constexpr size_t OpenGLContext::getIndexForBufferTarget(GLenum target) noexcept // The indexed buffers MUST be first in this list (those usable with bindBufferRange) case GL_UNIFORM_BUFFER: index = 0; break; case GL_TRANSFORM_FEEDBACK_BUFFER: index = 1; break; +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) case GL_SHADER_STORAGE_BUFFER: index = 2; break; - +#endif case GL_ARRAY_BUFFER: index = 3; break; - case GL_COPY_READ_BUFFER: index = 4; break; - case GL_COPY_WRITE_BUFFER: index = 5; break; - case GL_ELEMENT_ARRAY_BUFFER: index = 6; break; - case GL_PIXEL_PACK_BUFFER: index = 7; break; - case GL_PIXEL_UNPACK_BUFFER: index = 8; break; - default: index = 9; break; // should never happen + case GL_ELEMENT_ARRAY_BUFFER: index = 4; break; + case GL_PIXEL_PACK_BUFFER: index = 5; break; + case GL_PIXEL_UNPACK_BUFFER: index = 6; break; + default: break; } assert_invariant(index < sizeof(state.buffers.genericBinding)/sizeof(state.buffers.genericBinding[0])); // NOLINT(misc-redundant-expression) return index; @@ -501,21 +514,21 @@ void OpenGLContext::bindSampler(GLuint unit, GLuint sampler) noexcept { } void OpenGLContext::setScissor(GLint left, GLint bottom, GLsizei width, GLsizei height) noexcept { - vec4gli scissor(left, bottom, width, height); + vec4gli const scissor(left, bottom, width, height); update_state(state.window.scissor, scissor, [&]() { glScissor(left, bottom, width, height); }); } void OpenGLContext::viewport(GLint left, GLint bottom, GLsizei width, GLsizei height) noexcept { - vec4gli viewport(left, bottom, width, height); + vec4gli const viewport(left, bottom, width, height); update_state(state.window.viewport, viewport, [&]() { glViewport(left, bottom, width, height); }); } void OpenGLContext::depthRange(GLclampf near, GLclampf far) noexcept { - vec2glf depthRange(near, far); + vec2glf const depthRange(near, far); update_state(state.window.depthRange, depthRange, [&]() { glDepthRangef(near, far); }); @@ -526,7 +539,7 @@ void OpenGLContext::bindVertexArray(RenderPrimitive const* p) noexcept { update_state(state.vao.p, vao, [&]() { glBindVertexArray(vao->vao); // update GL_ELEMENT_ARRAY_BUFFER, which is updated by glBindVertexArray - size_t targetIndex = getIndexForBufferTarget(GL_ELEMENT_ARRAY_BUFFER); + size_t const targetIndex = getIndexForBufferTarget(GL_ELEMENT_ARRAY_BUFFER); state.buffers.genericBinding[targetIndex] = vao->elementArray; if (UTILS_UNLIKELY(bugs.vao_doesnt_store_element_array_buffer_binding)) { // This shouldn't be needed, but it looks like some drivers don't do the implicit @@ -538,8 +551,10 @@ void OpenGLContext::bindVertexArray(RenderPrimitive const* p) noexcept { void OpenGLContext::bindBufferRange(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size) noexcept { - size_t targetIndex = getIndexForBufferTarget(target); - assert_invariant(targetIndex <= 2); // validity check + size_t const targetIndex = getIndexForBufferTarget(target); + + // validity check + assert_invariant(targetIndex < sizeof(state.buffers.targets) / sizeof(*state.buffers.targets)); // this ALSO sets the generic binding if ( state.buffers.targets[targetIndex].buffers[index].name != buffer @@ -616,7 +631,7 @@ void OpenGLContext::disableVertexAttribArray(GLuint index) noexcept { } void OpenGLContext::enable(GLenum cap) noexcept { - size_t index = getIndexForCap(cap); + size_t const index = getIndexForCap(cap); if (UTILS_UNLIKELY(!state.enables.caps[index])) { state.enables.caps.set(index); glEnable(cap); @@ -624,7 +639,7 @@ void OpenGLContext::enable(GLenum cap) noexcept { } void OpenGLContext::disable(GLenum cap) noexcept { - size_t index = getIndexForCap(cap); + size_t const index = getIndexForCap(cap); if (UTILS_UNLIKELY(state.enables.caps[index])) { state.enables.caps.unset(index); glDisable(cap); @@ -723,41 +738,6 @@ void OpenGLContext::polygonOffset(GLfloat factor, GLfloat units) noexcept { }); } -void OpenGLContext::beginQuery(GLenum target, GLuint query) noexcept { - switch (target) { - case GL_TIME_ELAPSED: - if (state.queries.timer != -1u) { - // this is an error - break; - } - state.queries.timer = query; - break; - default: - return; - } - glBeginQuery(target, query); -} - -void OpenGLContext::endQuery(GLenum target) noexcept { - switch (target) { - case GL_TIME_ELAPSED: - state.queries.timer = -1u; - break; - default: - return; - } - glEndQuery(target); -} - -GLuint OpenGLContext::getQuery(GLenum target) const noexcept { - switch (target) { - case GL_TIME_ELAPSED: - return state.queries.timer; - default: - return 0; - } -} - } // namespace filament #endif //TNT_FILAMENT_BACKEND_OPENGLCONTEXT_H diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index a13ac558155..0a3ab8ac9a2 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -69,7 +69,9 @@ using namespace utils; namespace filament::backend { Driver* OpenGLDriverFactory::create( - OpenGLPlatform* const platform, void* const sharedGLContext, const Platform::DriverConfig& driverConfig) noexcept { + OpenGLPlatform* const platform, + void* const sharedGLContext, + const Platform::DriverConfig& driverConfig) noexcept { return OpenGLDriver::create(platform, sharedGLContext, driverConfig); } @@ -177,6 +179,10 @@ OpenGLDriver::OpenGLDriver(OpenGLPlatform* platform, const Platform::DriverConfi slog.i << "OS version: " << mPlatform.getOSVersion() << io::endl; #endif + // Timer queries are core in GL 3.3, otherwise we need EXT_disjoint_timer_query + // iOS headers don't define GL_EXT_disjoint_timer_query, so make absolutely sure + // we won't use it. +#if defined(GL_VERSION_3_3) || defined(GL_EXT_disjoint_timer_query) if (mContext.ext.EXT_disjoint_timer_query || BACKEND_OPENGL_VERSION == BACKEND_OPENGL_VERSION_GL) { // timer queries are available @@ -187,7 +193,9 @@ OpenGLDriver::OpenGLDriver(OpenGLPlatform* platform, const Platform::DriverConfi mTimerQueryImpl = new TimerQueryNative(mContext); } mFrameTimeSupported = true; - } else if (mPlatform.canCreateFence()) { + } else +#endif + if (mPlatform.canCreateFence()) { // no timer queries, but we can use fences mTimerQueryImpl = new OpenGLTimerQueryFence(mPlatform); mFrameTimeSupported = true; @@ -537,6 +545,7 @@ void OpenGLDriver::textureStorage(OpenGLDriver::GLTexture* t, GLsizei(width), GLsizei(height), GLsizei(depth) * 6); break; } +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) case GL_TEXTURE_2D_MULTISAMPLE: if constexpr (TEXTURE_2D_MULTISAMPLE_SUPPORTED) { // NOTE: if there is a mix of texture and renderbuffers, "fixed_sample_locations" must be true @@ -554,6 +563,7 @@ void OpenGLDriver::textureStorage(OpenGLDriver::GLTexture* t, PANIC_LOG("GL_TEXTURE_2D_MULTISAMPLE is not supported"); } break; +#endif default: // cannot happen break; } @@ -621,6 +631,7 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint if (t->samples > 1) { // Note: we can't be here in practice because filament's user API doesn't // allow the creation of multi-sampled textures. +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) if (gl.features.multisample_texture) { // multi-sample texture on GL 3.2 / GLES 3.1 and above t->gl.target = GL_TEXTURE_2D_MULTISAMPLE; @@ -629,6 +640,7 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint } else { // Turn off multi-sampling for that texture. It's just not supported. } +#endif } textureStorage(t, w, h, depth); } @@ -721,6 +733,7 @@ void OpenGLDriver::importTextureR(Handle th, intptr_t id, if (t->samples > 1) { // Note: we can't be here in practice because filament's user API doesn't // allow the creation of multi-sampled textures. +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) if (gl.features.multisample_texture) { // multi-sample texture on GL 3.2 / GLES 3.1 and above t->gl.target = GL_TEXTURE_2D_MULTISAMPLE; @@ -728,6 +741,7 @@ void OpenGLDriver::importTextureR(Handle th, intptr_t id, } else { // Turn off multi-sampling for that texture. It's just not supported. } +#endif } CHECK_GL_ERROR(utils::slog.e) @@ -800,6 +814,7 @@ void OpenGLDriver::framebufferTexture(TargetBufferInfo const& binfo, GLTexture* t = handle_cast(binfo.handle); + assert_invariant(t); assert_invariant(t->target != SamplerType::SAMPLER_EXTERNAL); assert_invariant(rt->width <= valueForLevel(binfo.level, t->width) && rt->height <= valueForLevel(binfo.level, t->height)); @@ -907,7 +922,9 @@ void OpenGLDriver::framebufferTexture(TargetBufferInfo const& binfo, case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_2D: +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) case GL_TEXTURE_2D_MULTISAMPLE: +#endif if (any(t->usage & TextureUsage::SAMPLEABLE)) { glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, target, t->gl.id, binfo.level); @@ -1135,21 +1152,26 @@ void OpenGLDriver::createRenderTargetR(Handle rth, rt->gl.samples = samples; rt->targets = targets; - UTILS_UNUSED_IN_RELEASE math::vec2 tmin = {std::numeric_limits::max()}; - UTILS_UNUSED_IN_RELEASE math::vec2 tmax = {0}; + UTILS_UNUSED_IN_RELEASE math::vec2 tmin = { std::numeric_limits::max() }; + UTILS_UNUSED_IN_RELEASE math::vec2 tmax = { 0 }; + auto checkDimensions = [&tmin, &tmax](GLTexture* t, uint8_t level) { + const auto twidth = std::max(1u, t->width >> level); + const auto theight = std::max(1u, t->height >> level); + tmin = { std::min(tmin.x, twidth), std::min(tmin.y, theight) }; + tmax = { std::max(tmax.x, twidth), std::max(tmax.y, theight) }; + }; + if (any(targets & TargetBufferFlags::COLOR_ALL)) { GLenum bufs[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT] = { GL_NONE }; const size_t maxDrawBuffers = getMaxDrawBuffers(); for (size_t i = 0; i < maxDrawBuffers; i++) { if (any(targets & getTargetBufferFlagsAt(i))) { - auto t = rt->gl.color[i] = handle_cast(color[i].handle); - const auto twidth = std::max(1u, t->width >> color[i].level); - const auto theight = std::max(1u, t->height >> color[i].level); - tmin = { std::min(tmin.x, twidth), std::min(tmin.y, theight) }; - tmax = { std::max(tmax.x, twidth), std::max(tmax.y, theight) }; + assert_invariant(color[i].handle); + rt->gl.color[i] = handle_cast(color[i].handle); framebufferTexture(color[i], rt, GL_COLOR_ATTACHMENT0 + i); bufs[i] = GL_COLOR_ATTACHMENT0 + i; + checkDimensions(rt->gl.color[i], color[i].level); } } glDrawBuffers((GLsizei)maxDrawBuffers, bufs); @@ -1159,37 +1181,28 @@ void OpenGLDriver::createRenderTargetR(Handle rth, // handle special cases first (where depth/stencil are packed) bool specialCased = false; if ((targets & TargetBufferFlags::DEPTH_AND_STENCIL) == TargetBufferFlags::DEPTH_AND_STENCIL) { - assert_invariant(!stencil.handle || stencil.handle == depth.handle); - auto t = rt->gl.depth = handle_cast(depth.handle); - const auto twidth = std::max(1u, t->width >> depth.level); - const auto theight = std::max(1u, t->height >> depth.level); - tmin = { std::min(tmin.x, twidth), std::min(tmin.y, theight) }; - tmax = { std::max(tmax.x, twidth), std::max(tmax.y, theight) }; - if (any(rt->gl.depth->usage & TextureUsage::SAMPLEABLE) || - (!depth.handle && !stencil.handle)) { - // special case: depth & stencil requested, and both provided as the same texture - // special case: depth & stencil requested, but both not provided - specialCased = true; + assert_invariant(depth.handle); + // either we supplied only the depth handle or both depth/stencil are identical and not null + if (depth.handle && (stencil.handle == depth.handle || !stencil.handle)) { + rt->gl.depth = handle_cast(depth.handle); framebufferTexture(depth, rt, GL_DEPTH_STENCIL_ATTACHMENT); + specialCased = true; + checkDimensions(rt->gl.depth, depth.level); } } if (!specialCased) { if (any(targets & TargetBufferFlags::DEPTH)) { - auto t = rt->gl.depth = handle_cast(depth.handle); - const auto twidth = std::max(1u, t->width >> depth.level); - const auto theight = std::max(1u, t->height >> depth.level); - tmin = { std::min(tmin.x, twidth), std::min(tmin.y, theight) }; - tmax = { std::max(tmax.x, twidth), std::max(tmax.y, theight) }; + assert_invariant(depth.handle); + rt->gl.depth = handle_cast(depth.handle); framebufferTexture(depth, rt, GL_DEPTH_ATTACHMENT); + checkDimensions(rt->gl.depth, depth.level); } if (any(targets & TargetBufferFlags::STENCIL)) { - auto t = rt->gl.stencil = handle_cast(stencil.handle); - const auto twidth = std::max(1u, t->width >> stencil.level); - const auto theight = std::max(1u, t->height >> stencil.level); - tmin = { std::min(tmin.x, twidth), std::min(tmin.y, theight) }; - tmax = { std::max(tmax.x, twidth), std::max(tmax.y, theight) }; + assert_invariant(stencil.handle); + rt->gl.stencil = handle_cast(stencil.handle); framebufferTexture(stencil, rt, GL_STENCIL_ATTACHMENT); + checkDimensions(rt->gl.stencil, stencil.level); } } @@ -1211,19 +1224,19 @@ void OpenGLDriver::createSyncR(Handle fh, int) { DEBUG_MARKER() GLSync* f = handle_cast(fh); - f->gl.sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - CHECK_GL_ERROR(utils::slog.e) + f->handle = mContext.createFenceSync(mPlatform); // check the status of the sync once a frame, since we must do this from our thread std::weak_ptr const weak = f->result; - runEveryNowAndThen([sync = f->gl.sync, weak]() -> bool { + runEveryNowAndThen( + [&platform = mPlatform, context = mContext, handle = f->handle, weak]() -> bool { auto result = weak.lock(); if (result) { - GLenum const status = glClientWaitSync(sync, 0, 0u); + auto const status = context.clientWaitSync(platform, handle); result->status.store(status, std::memory_order_relaxed); - return (status != GL_TIMEOUT_EXPIRED); + return (status != OpenGLContext::FenceSync::Status::TIMEOUT_EXPIRED); } - return true; // we're done + return true; }); } @@ -1332,9 +1345,6 @@ void OpenGLDriver::destroyTexture(Handle th) { assert_invariant(t->gl.target == GL_RENDERBUFFER); glDeleteRenderbuffers(1, &t->gl.id); } - if (t->gl.fence) { - glDeleteSync(t->gl.fence); - } if (t->gl.sidecarRenderBufferMS) { glDeleteRenderbuffers(1, &t->gl.sidecarRenderBufferMS); } @@ -1413,10 +1423,9 @@ void OpenGLDriver::destroyTimerQuery(Handle tqh) { void OpenGLDriver::destroySync(Handle sh) { DEBUG_MARKER() - if (sh) { GLSync* s = handle_cast(sh); - glDeleteSync(s->gl.sync); + mContext.destroyFenceSync(mPlatform, s->handle); destruct(sh, s); } } @@ -1970,7 +1979,9 @@ void OpenGLDriver::generateMipmaps(Handle th) { auto& gl = mContext; GLTexture* t = handle_cast(th); +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) assert_invariant(t->gl.target != GL_TEXTURE_2D_MULTISAMPLE); +#endif // Note: glGenerateMimap can also fail if the internal format is not both // color-renderable and filterable (i.e.: doesn't work for depth) bindTexture(OpenGLContext::DUMMY_TEXTURE_BINDING, t); @@ -2011,8 +2022,13 @@ void OpenGLDriver::setTextureData(GLTexture* t, uint32_t level, gl.pixelStore(GL_UNPACK_ROW_LENGTH, GLint(p.stride)); gl.pixelStore(GL_UNPACK_ALIGNMENT, GLint(p.alignment)); - gl.pixelStore(GL_UNPACK_SKIP_PIXELS, GLint(p.left)); - gl.pixelStore(GL_UNPACK_SKIP_ROWS, GLint(p.top)); + + // This is equivalent to using GL_UNPACK_SKIP_PIXELS and GL_UNPACK_SKIP_ROWS + using PBD = PixelBufferDescriptor; + size_t const stride = p.stride ? p.stride : width; + size_t const bpp = PBD::computeDataSize(p.format, p.type, 1, 1, 1); + size_t const bpr = PBD::computeDataSize(p.format, p.type, stride, 1, p.alignment); + void const* const buffer = static_cast(p.buffer) + p.left * bpp + bpr * p.top; switch (t->target) { case SamplerType::SAMPLER_EXTERNAL: @@ -2026,7 +2042,7 @@ void OpenGLDriver::setTextureData(GLTexture* t, uint32_t level, assert_invariant(t->gl.target == GL_TEXTURE_2D); glTexSubImage2D(t->gl.target, GLint(level), GLint(xoffset), GLint(yoffset), - GLsizei(width), GLsizei(height), glFormat, glType, p.buffer); + GLsizei(width), GLsizei(height), glFormat, glType, buffer); break; case SamplerType::SAMPLER_3D: assert_invariant(zoffset + depth <= std::max(1u, t->depth >> level)); @@ -2035,7 +2051,7 @@ void OpenGLDriver::setTextureData(GLTexture* t, uint32_t level, assert_invariant(t->gl.target == GL_TEXTURE_3D); glTexSubImage3D(t->gl.target, GLint(level), GLint(xoffset), GLint(yoffset), GLint(zoffset), - GLsizei(width), GLsizei(height), GLsizei(depth), glFormat, glType, p.buffer); + GLsizei(width), GLsizei(height), GLsizei(depth), glFormat, glType, buffer); break; case SamplerType::SAMPLER_2D_ARRAY: case SamplerType::SAMPLER_CUBEMAP_ARRAY: @@ -2047,7 +2063,7 @@ void OpenGLDriver::setTextureData(GLTexture* t, uint32_t level, t->gl.target == GL_TEXTURE_CUBE_MAP_ARRAY); glTexSubImage3D(t->gl.target, GLint(level), GLint(xoffset), GLint(yoffset), GLint(zoffset), - GLsizei(width), GLsizei(height), GLsizei(depth), glFormat, glType, p.buffer); + GLsizei(width), GLsizei(height), GLsizei(depth), glFormat, glType, buffer); break; case SamplerType::SAMPLER_CUBEMAP: { assert_invariant(t->gl.target == GL_TEXTURE_CUBE_MAP); @@ -2063,7 +2079,7 @@ void OpenGLDriver::setTextureData(GLTexture* t, uint32_t level, GLenum const target = getCubemapTarget(zoffset + face); glTexSubImage2D(target, GLint(level), GLint(xoffset), GLint(yoffset), GLsizei(width), GLsizei(height), glFormat, glType, - static_cast(p.buffer) + faceSize * face); + static_cast(buffer) + faceSize * face); } break; } @@ -2337,13 +2353,14 @@ SyncStatus OpenGLDriver::getSyncStatus(Handle sh) { return SyncStatus::NOT_SIGNALED; } auto status = s->result->status.load(std::memory_order_relaxed); + using Status = OpenGLContext::FenceSync::Status; switch (status) { - case GL_CONDITION_SATISFIED: - case GL_ALREADY_SIGNALED: + case Status::CONDITION_SATISFIED: + case Status::ALREADY_SIGNALED: return SyncStatus::SIGNALED; - case GL_TIMEOUT_EXPIRED: + case Status::TIMEOUT_EXPIRED: return SyncStatus::NOT_SIGNALED; - case GL_WAIT_FAILED: + case Status::FAILURE: default: return SyncStatus::ERROR; } @@ -2714,10 +2731,7 @@ void OpenGLDriver::readPixels(Handle src, GLenum const glFormat = getFormat(p.format); GLenum const glType = getType(p.type); - gl.pixelStore(GL_PACK_ROW_LENGTH, (GLint)p.stride); - gl.pixelStore(GL_PACK_ALIGNMENT, (GLint)p.alignment); - gl.pixelStore(GL_PACK_SKIP_PIXELS, (GLint)p.left); - gl.pixelStore(GL_PACK_SKIP_ROWS, (GLint)p.top); + gl.pixelStore(GL_PACK_ALIGNMENT, (GLint)p.alignment); /* * glReadPixel() operation... @@ -2745,44 +2759,54 @@ void OpenGLDriver::readPixels(Handle src, */ GLRenderTarget const* s = handle_cast(src); - gl.bindFramebuffer(GL_READ_FRAMEBUFFER, s->gl.fbo); + + // glReadPixel doesn't resolve automatically, but it does with the auto-resolve extension, + // which we're always emulating. So if we have a resolved fbo (fbo_read), use that instead. + gl.bindFramebuffer(GL_READ_FRAMEBUFFER, s->gl.fbo_read ? s->gl.fbo_read : s->gl.fbo); + + using PBD = PixelBufferDescriptor; + + // The PBO only needs to accommodate the area we're reading, with alignment. + auto const pboSize = (GLsizeiptr)PBD::computeDataSize( + p.format, p.type, width, height, p.alignment); GLuint pbo; glGenBuffers(1, &pbo); gl.bindBuffer(GL_PIXEL_PACK_BUFFER, pbo); - glBufferData(GL_PIXEL_PACK_BUFFER, (GLsizeiptr)p.size, nullptr, GL_STATIC_DRAW); + glBufferData(GL_PIXEL_PACK_BUFFER, pboSize, nullptr, GL_STATIC_DRAW); glReadPixels(GLint(x), GLint(y), GLint(width), GLint(height), glFormat, glType, nullptr); gl.bindBuffer(GL_PIXEL_PACK_BUFFER, 0); CHECK_GL_ERROR(utils::slog.e) // we're forced to make a copy on the heap because otherwise it deletes std::function<> copy // constructor. - auto* pUserBuffer = new PixelBufferDescriptor(std::move(p)); - whenGpuCommandsComplete([this, width, height, pbo, pUserBuffer]() mutable { + auto* const pUserBuffer = new PixelBufferDescriptor(std::move(p)); + whenGpuCommandsComplete([this, width, height, pbo, pboSize, pUserBuffer]() mutable { PixelBufferDescriptor& p = *pUserBuffer; auto& gl = mContext; gl.bindBuffer(GL_PIXEL_PACK_BUFFER, pbo); void* vaddr = nullptr; #if defined(__EMSCRIPTEN__) - std::unique_ptr clientBuffer = std::make_unique(p.size); - glGetBufferSubData(GL_PIXEL_PACK_BUFFER, 0, p.size, clientBuffer.get()); + std::unique_ptr clientBuffer = std::make_unique(pboSize); + glGetBufferSubData(GL_PIXEL_PACK_BUFFER, 0, pboSize, clientBuffer.get()); vaddr = clientBuffer.get(); #else - vaddr = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, (GLsizeiptr)p.size, GL_MAP_READ_BIT); + vaddr = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pboSize, GL_MAP_READ_BIT); #endif if (vaddr) { // now we need to flip the buffer vertically to match our API size_t const stride = p.stride ? p.stride : width; - size_t const bpp = PixelBufferDescriptor::computeDataSize( - p.format, p.type, 1, 1, 1); - size_t const bpr = PixelBufferDescriptor::computeDataSize( - p.format, p.type, stride, 1, p.alignment); - char const* head = (char const*)vaddr + p.left * bpp + bpr * p.top; - char* tail = (char*)p.buffer + p.left * bpp + bpr * (p.top + height - 1); + size_t const bpp = PBD::computeDataSize(p.format, p.type, 1, 1, 1); + size_t const dstBpr = PBD::computeDataSize(p.format, p.type, stride, 1, p.alignment); + char* pDst = (char*)p.buffer + p.left * bpp + dstBpr * (p.top + height - 1); + + size_t const srcBpr = PBD::computeDataSize(p.format, p.type, width, 1, p.alignment); + char const* pSrc = (char const*)vaddr; + for (size_t i = 0; i < height; ++i) { - memcpy(tail, head, bpp * width); - head += bpr; - tail -= bpr; + memcpy(pDst, pSrc, bpp * width); + pSrc += srcBpr; + pDst -= dstBpr; } #if !defined(__EMSCRIPTEN__) glUnmapBuffer(GL_PIXEL_PACK_BUFFER); @@ -2849,7 +2873,7 @@ void OpenGLDriver::readBufferSubData(backend::BufferObjectHandle boh, } void OpenGLDriver::whenGpuCommandsComplete(std::function fn) noexcept { - GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + OpenGLContext::FenceSync sync = mContext.createFenceSync(mPlatform); mGpuCommandCompleteOps.emplace_back(sync, std::move(fn)); CHECK_GL_ERROR(utils::slog.e) } @@ -2862,15 +2886,16 @@ void OpenGLDriver::executeGpuCommandsCompleteOps() noexcept { auto& v = mGpuCommandCompleteOps; auto it = v.begin(); while (it != v.end()) { - GLenum const status = glClientWaitSync(it->first, 0, 0); - if (status == GL_ALREADY_SIGNALED || status == GL_CONDITION_SATISFIED) { + using Status = OpenGLContext::FenceSync::Status; + auto const status = mContext.clientWaitSync(mPlatform, it->first); + if (status == Status::ALREADY_SIGNALED || status == Status::CONDITION_SATISFIED) { it->second(); - glDeleteSync(it->first); + mContext.destroyFenceSync(mPlatform, it->first); it = v.erase(it); - } else if (UTILS_UNLIKELY(status == GL_WAIT_FAILED)) { + } else if (UTILS_UNLIKELY(status == Status::FAILURE)) { // This should never happen, but is very problematic if it does, as we might leak // some data depending on what the callback does. However, we clean up our own state. - glDeleteSync(it->first); + mContext.destroyFenceSync(mPlatform, it->first); it = v.erase(it); } else { ++it; diff --git a/filament/backend/src/opengl/OpenGLDriver.h b/filament/backend/src/opengl/OpenGLDriver.h index d9c8b9ccff0..7df05e84854 100644 --- a/filament/backend/src/opengl/OpenGLDriver.h +++ b/filament/backend/src/opengl/OpenGLDriver.h @@ -122,7 +122,6 @@ class OpenGLDriver final : public DriverBase { GLenum target = 0; GLenum internalFormat = 0; GLuint sidecarRenderBufferMS = 0; // multi-sample sidecar renderbuffer - mutable GLsync fence = {}; // texture parameters go here too GLfloat anisotropy = 1.0; @@ -186,11 +185,10 @@ class OpenGLDriver final : public DriverBase { struct GLSync : public HwSync { using HwSync::HwSync; struct State { - std::atomic status{ GL_TIMEOUT_EXPIRED }; + std::atomic status{ + OpenGLContext::FenceSync::Status::TIMEOUT_EXPIRED }; }; - struct { - GLsync sync; - } gl; + OpenGLContext::FenceSync handle{}; std::shared_ptr result{ std::make_shared() }; }; @@ -364,7 +362,7 @@ class OpenGLDriver final : public DriverBase { // tasks executed on the main thread after the fence signaled void whenGpuCommandsComplete(std::function fn) noexcept; void executeGpuCommandsCompleteOps() noexcept; - std::vector>> mGpuCommandCompleteOps; + std::vector>> mGpuCommandCompleteOps; // tasks regularly executed on the main thread at until they return true void runEveryNowAndThen(std::function fn) noexcept; diff --git a/filament/backend/src/opengl/OpenGLProgram.cpp b/filament/backend/src/opengl/OpenGLProgram.cpp index 274c052f5ec..2605306dac1 100644 --- a/filament/backend/src/opengl/OpenGLProgram.cpp +++ b/filament/backend/src/opengl/OpenGLProgram.cpp @@ -121,11 +121,21 @@ void OpenGLProgram::compileShaders(OpenGLContext& context, UTILS_NOUNROLL for (size_t i = 0; i < Program::SHADER_TYPE_COUNT; i++) { const ShaderStage stage = static_cast(i); - GLenum glShaderType; + GLenum glShaderType{}; switch (stage) { - case ShaderStage::VERTEX: glShaderType = GL_VERTEX_SHADER; break; - case ShaderStage::FRAGMENT: glShaderType = GL_FRAGMENT_SHADER; break; - case ShaderStage::COMPUTE: glShaderType = GL_COMPUTE_SHADER; break; + case ShaderStage::VERTEX: + glShaderType = GL_VERTEX_SHADER; + break; + case ShaderStage::FRAGMENT: + glShaderType = GL_FRAGMENT_SHADER; + break; + case ShaderStage::COMPUTE: +#if defined(GL_VERSION_4_1) || defined(GL_ES_VERSION_3_1) + glShaderType = GL_COMPUTE_SHADER; +#else + continue; +#endif + break; } if (UTILS_LIKELY(!shadersSource[i].empty())) { @@ -406,11 +416,6 @@ void OpenGLProgram::updateSamplers(OpenGLDriver* gld) const noexcept { const GLTexture* const t = sb->textureUnitEntries[j].texture; GLuint const s = sb->textureUnitEntries[j].sampler; if (t) { // program may not use all samplers of sampler group - if (UTILS_UNLIKELY(t->gl.fence)) { - glWaitSync(t->gl.fence, 0, GL_TIMEOUT_IGNORED); - glDeleteSync(t->gl.fence); - t->gl.fence = nullptr; - } gld->bindTexture(tmu, t); gld->bindSampler(tmu, s); } diff --git a/filament/backend/src/opengl/OpenGLTimerQuery.cpp b/filament/backend/src/opengl/OpenGLTimerQuery.cpp index 4c585749896..dd07ac30f89 100644 --- a/filament/backend/src/opengl/OpenGLTimerQuery.cpp +++ b/filament/backend/src/opengl/OpenGLTimerQuery.cpp @@ -34,8 +34,9 @@ OpenGLTimerQueryInterface::~OpenGLTimerQueryInterface() = default; // ------------------------------------------------------------------------------------------------ -TimerQueryNative::TimerQueryNative(OpenGLContext& context) - : gl(context) { +#if defined(GL_VERSION_3_3) || defined(GL_EXT_disjoint_timer_query) + +TimerQueryNative::TimerQueryNative(OpenGLContext&) { } TimerQueryNative::~TimerQueryNative() = default; @@ -44,12 +45,12 @@ void TimerQueryNative::flush() { } void TimerQueryNative::beginTimeElapsedQuery(GLTimerQuery* query) { - gl.beginQuery(GL_TIME_ELAPSED, query->gl.query); + glBeginQuery(GL_TIME_ELAPSED, query->gl.query); CHECK_GL_ERROR(utils::slog.e) } void TimerQueryNative::endTimeElapsedQuery(GLTimerQuery*) { - gl.endQuery(GL_TIME_ELAPSED); + glEndQuery(GL_TIME_ELAPSED); CHECK_GL_ERROR(utils::slog.e) } @@ -62,14 +63,14 @@ bool TimerQueryNative::queryResultAvailable(GLTimerQuery* query) { uint64_t TimerQueryNative::queryResult(GLTimerQuery* query) { GLuint64 elapsedTime = 0; - // IOS doesn't have glGetQueryObjectui64v, we'll never end-up here on ios anyways -#ifndef IOS + // we won't end-up here if we're on ES and don't have GL_EXT_disjoint_timer_query glGetQueryObjectui64v(query->gl.query, GL_QUERY_RESULT, &elapsedTime); -#endif CHECK_GL_ERROR(utils::slog.e) return elapsedTime; } +#endif + // ------------------------------------------------------------------------------------------------ OpenGLTimerQueryFence::OpenGLTimerQueryFence(OpenGLPlatform& platform) @@ -85,7 +86,7 @@ OpenGLTimerQueryFence::OpenGLTimerQueryFence(OpenGLPlatform& platform) }); exitRequested = mExitRequested; if (!queue.empty()) { - Job job(queue.front()); + Job const job(queue.front()); queue.erase(queue.begin()); lock.unlock(); job(); @@ -105,7 +106,7 @@ OpenGLTimerQueryFence::~OpenGLTimerQueryFence() { } void OpenGLTimerQueryFence::enqueue(OpenGLTimerQueryFence::Job&& job) { - std::unique_lock lock(mLock); + std::unique_lock const lock(mLock); mQueue.push_back(std::forward(job)); mCondition.notify_one(); } @@ -114,9 +115,9 @@ void OpenGLTimerQueryFence::flush() { // Use calls to flush() as a proxy for when the GPU work started. GLTimerQuery* query = mActiveQuery; if (query) { - uint64_t elapsed = query->gl.emulation->elapsed.load(std::memory_order_relaxed); + uint64_t const elapsed = query->gl.emulation->elapsed.load(std::memory_order_relaxed); if (!elapsed) { - uint64_t now = clock::now().time_since_epoch().count(); + uint64_t const now = clock::now().time_since_epoch().count(); query->gl.emulation->elapsed.store(now, std::memory_order_relaxed); //SYSTRACE_CONTEXT(); //SYSTRACE_ASYNC_BEGIN("gpu", query->gl.query); @@ -139,7 +140,7 @@ void OpenGLTimerQueryFence::beginTimeElapsedQuery(GLTimerQuery* query) { void OpenGLTimerQueryFence::endTimeElapsedQuery(GLTimerQuery* query) { assert_invariant(mActiveQuery); Platform::Fence* fence = mPlatform.createFence(); - std::weak_ptr weak = query->gl.emulation; + std::weak_ptr const weak = query->gl.emulation; mActiveQuery = nullptr; //uint32_t cookie = cookie = query->gl.query; push([&platform = mPlatform, fence, weak]() { diff --git a/filament/backend/src/opengl/OpenGLTimerQuery.h b/filament/backend/src/opengl/OpenGLTimerQuery.h index 69cb0849071..e6fc88e2c2f 100644 --- a/filament/backend/src/opengl/OpenGLTimerQuery.h +++ b/filament/backend/src/opengl/OpenGLTimerQuery.h @@ -48,6 +48,8 @@ class OpenGLTimerQueryInterface { virtual uint64_t queryResult(GLTimerQuery* query) = 0; }; +#if defined(GL_VERSION_3_3) || defined(GL_EXT_disjoint_timer_query) + class TimerQueryNative : public OpenGLTimerQueryInterface { public: explicit TimerQueryNative(OpenGLContext& context); @@ -58,9 +60,10 @@ class TimerQueryNative : public OpenGLTimerQueryInterface { void endTimeElapsedQuery(GLTimerQuery* query) override; bool queryResultAvailable(GLTimerQuery* query) override; uint64_t queryResult(GLTimerQuery* query) override; - OpenGLContext& gl; }; +#endif + class OpenGLTimerQueryFence : public OpenGLTimerQueryInterface { public: explicit OpenGLTimerQueryFence(OpenGLPlatform& platform); diff --git a/filament/backend/src/opengl/gl_headers.cpp b/filament/backend/src/opengl/gl_headers.cpp index fa1b1414f7b..2c29aae796f 100644 --- a/filament/backend/src/opengl/gl_headers.cpp +++ b/filament/backend/src/opengl/gl_headers.cpp @@ -16,11 +16,17 @@ #include "gl_headers.h" -#if defined(__ANDROID__) || defined(FILAMENT_USE_EXTERNAL_GLES3) || defined(__EMSCRIPTEN__) +#if defined(FILAMENT_IMPORT_ENTRY_POINTS) #include #include +// for non EGL platforms, we'd need to implement this differently. Currently, it's not a problem. +template +static void getProcAddress(T& pfn, const char* name) noexcept { + pfn = (T)eglGetProcAddress(name); +} + namespace glext { #ifdef GL_QCOM_tiled_rendering PFNGLSTARTTILINGQCOMPROC glStartTilingQCOM; @@ -58,71 +64,39 @@ PFNGLDISPATCHCOMPUTEPROC glDispatchCompute; static std::once_flag sGlExtInitialized; void importGLESExtensionsEntryPoints() { - std::call_once(sGlExtInitialized, []() { + std::call_once(sGlExtInitialized, +[]() { #ifdef GL_QCOM_tiled_rendering - glStartTilingQCOM = - (PFNGLSTARTTILINGQCOMPROC)eglGetProcAddress( - "glStartTilingQCOM"); - - glEndTilingQCOM = - (PFNGLENDTILINGQCOMPROC)eglGetProcAddress( - "glEndTilingQCOM"); + getProcAddress(glStartTilingQCOM, "glStartTilingQCOM"); + getProcAddress(glEndTilingQCOM, "glEndTilingQCOM"); #endif - #ifdef GL_OES_EGL_image - glEGLImageTargetTexture2DOES = - (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)eglGetProcAddress( - "glEGLImageTargetTexture2DOES"); + getProcAddress(glEGLImageTargetTexture2DOES, "glEGLImageTargetTexture2DOES"); #endif - #if GL_EXT_debug_marker - glInsertEventMarkerEXT = - (PFNGLINSERTEVENTMARKEREXTPROC)eglGetProcAddress( - "glInsertEventMarkerEXT"); - - glPushGroupMarkerEXT = - (PFNGLPUSHGROUPMARKEREXTPROC)eglGetProcAddress( - "glPushGroupMarkerEXT"); - - glPopGroupMarkerEXT = - (PFNGLPOPGROUPMARKEREXTPROC)eglGetProcAddress( - "glPopGroupMarkerEXT"); + getProcAddress(glInsertEventMarkerEXT, "glInsertEventMarkerEXT"); + getProcAddress(glPushGroupMarkerEXT, "glPushGroupMarkerEXT"); + getProcAddress(glPopGroupMarkerEXT, "glPopGroupMarkerEXT"); #endif #if GL_EXT_multisampled_render_to_texture - glFramebufferTexture2DMultisampleEXT = - (PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC)eglGetProcAddress( - "glFramebufferTexture2DMultisampleEXT"); - glRenderbufferStorageMultisampleEXT = - (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)eglGetProcAddress( - "glRenderbufferStorageMultisampleEXT"); + getProcAddress(glFramebufferTexture2DMultisampleEXT, "glFramebufferTexture2DMultisampleEXT"); + getProcAddress(glRenderbufferStorageMultisampleEXT, "glRenderbufferStorageMultisampleEXT"); #endif #ifdef GL_KHR_debug - glDebugMessageCallbackKHR = - (PFNGLDEBUGMESSAGECALLBACKKHRPROC)eglGetProcAddress( - "glDebugMessageCallbackKHR"); - glGetDebugMessageLogKHR = - (PFNGLGETDEBUGMESSAGELOGKHRPROC)eglGetProcAddress( - "glGetDebugMessageLogKHR"); + getProcAddress(glDebugMessageCallbackKHR, "glDebugMessageCallbackKHR"); + getProcAddress(glGetDebugMessageLogKHR, "glGetDebugMessageLogKHR"); #endif #ifdef GL_EXT_disjoint_timer_query - glGetQueryObjectui64v = - (PFNGLGETQUERYOBJECTUI64VEXTPROC)eglGetProcAddress( - "glGetQueryObjectui64vEXT"); + getProcAddress(glGetQueryObjectui64v, "glGetQueryObjectui64vEXT"); #endif - }); #ifdef GL_EXT_clip_control - glClipControl = - (PFNGLCLIPCONTROLEXTPROC)eglGetProcAddress( - "glClipControlEXT"); + getProcAddress(glClipControl, "glClipControlEXT"); #endif - #if defined(__ANDROID__) - glDispatchCompute = - (PFNGLDISPATCHCOMPUTEPROC)eglGetProcAddress( - "glDispatchCompute"); + getProcAddress(glDispatchCompute, "glDispatchCompute"); #endif + }); } } // namespace glext -#endif +#endif // defined(FILAMENT_IMPORT_ENTRY_POINTS) diff --git a/filament/backend/src/opengl/gl_headers.h b/filament/backend/src/opengl/gl_headers.h index 68fc3849103..e59d341cc29 100644 --- a/filament/backend/src/opengl/gl_headers.h +++ b/filament/backend/src/opengl/gl_headers.h @@ -26,45 +26,6 @@ #endif #include - /* The Android NDK doesn't expose extensions, fake it with eglGetProcAddress */ - namespace glext { - // importGLESExtensionsEntryPoints is thread-safe and can be called multiple times. - // it is currently called from PlatformEGL. - void importGLESExtensionsEntryPoints(); - -#ifdef GL_QCOM_tiled_rendering - extern PFNGLSTARTTILINGQCOMPROC glStartTilingQCOM; - extern PFNGLENDTILINGQCOMPROC glEndTilingQCOM; -#endif -#ifdef GL_OES_EGL_image - extern PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glEGLImageTargetTexture2DOES; -#endif -#ifdef GL_EXT_debug_marker - extern PFNGLINSERTEVENTMARKEREXTPROC glInsertEventMarkerEXT; - extern PFNGLPUSHGROUPMARKEREXTPROC glPushGroupMarkerEXT; - extern PFNGLPOPGROUPMARKEREXTPROC glPopGroupMarkerEXT; -#endif -#ifdef GL_EXT_multisampled_render_to_texture - extern PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC glRenderbufferStorageMultisampleEXT; - extern PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC glFramebufferTexture2DMultisampleEXT; -#endif -#ifdef GL_KHR_debug - extern PFNGLDEBUGMESSAGECALLBACKKHRPROC glDebugMessageCallbackKHR; - extern PFNGLGETDEBUGMESSAGELOGKHRPROC glGetDebugMessageLogKHR; -#endif -#ifdef GL_EXT_disjoint_timer_query - extern PFNGLGETQUERYOBJECTUI64VEXTPROC glGetQueryObjectui64v; -#endif -#ifdef GL_EXT_clip_control - extern PFNGLCLIPCONTROLEXTPROC glClipControl; -#endif -#if defined(__ANDROID__) - extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute; -#endif - } - - using namespace glext; - #elif defined(IOS) #define GLES_SILENCE_DEPRECATION @@ -85,111 +46,110 @@ #endif + #if (!defined(GL_ES_VERSION_2_0) && !defined(GL_VERSION_4_1)) #error "Minimum header version must be OpenGL ES 2.0 or OpenGL 4.1" #endif /* - * Since we need ES3.1 headers and iOS only has ES3.0, we also define the constants we - * need to avoid many #ifdef in the actual code. + * GLES extensions */ -#if defined(GL_ES_VERSION_2_0) +#if defined(GL_ES_VERSION_2_0) // this basically means all versions of GLES -#ifdef GL_EXT_disjoint_timer_query -# ifndef GL_TIME_ELAPSED -# define GL_TIME_ELAPSED GL_TIME_ELAPSED_EXT -# endif -#endif +#if defined(IOS) + +// iOS headers only provide prototypes, nothing to do. +#else + +#define FILAMENT_IMPORT_ENTRY_POINTS + +/* The Android NDK doesn't expose extensions, fake it with eglGetProcAddress */ +namespace glext { +// importGLESExtensionsEntryPoints is thread-safe and can be called multiple times. +// it is currently called from PlatformEGL. +void importGLESExtensionsEntryPoints(); + +#ifdef GL_QCOM_tiled_rendering +extern PFNGLSTARTTILINGQCOMPROC glStartTilingQCOM; +extern PFNGLENDTILINGQCOMPROC glEndTilingQCOM; +#endif +#ifdef GL_OES_EGL_image +extern PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glEGLImageTargetTexture2DOES; +#endif +#ifdef GL_EXT_debug_marker +extern PFNGLINSERTEVENTMARKEREXTPROC glInsertEventMarkerEXT; +extern PFNGLPUSHGROUPMARKEREXTPROC glPushGroupMarkerEXT; +extern PFNGLPOPGROUPMARKEREXTPROC glPopGroupMarkerEXT; +#endif +#ifdef GL_EXT_multisampled_render_to_texture +extern PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC glRenderbufferStorageMultisampleEXT; +extern PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC glFramebufferTexture2DMultisampleEXT; +#endif +#ifdef GL_KHR_debug +extern PFNGLDEBUGMESSAGECALLBACKKHRPROC glDebugMessageCallbackKHR; +extern PFNGLGETDEBUGMESSAGELOGKHRPROC glGetDebugMessageLogKHR; +#endif #ifdef GL_EXT_clip_control -# ifndef GL_LOWER_LEFT -# define GL_LOWER_LEFT GL_LOWER_LEFT_EXT -# endif -# ifndef GL_ZERO_TO_ONE -# define GL_ZERO_TO_ONE GL_ZERO_TO_ONE_EXT -# endif +extern PFNGLCLIPCONTROLEXTPROC glClipControl; +#endif +#ifdef GL_EXT_disjoint_timer_query +extern PFNGLGETQUERYOBJECTUI64VEXTPROC glGetQueryObjectui64v; #endif +#if defined(__ANDROID__) +extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute; +#endif +} // namespace glext + +using namespace glext; -#ifndef GL_TEXTURE_CUBE_MAP_ARRAY -# define GL_TEXTURE_CUBE_MAP_ARRAY 0x9009 #endif // Prevent lots of #ifdef's between desktop and mobile -#if defined(GL_KHR_debug) -# ifndef GL_DEBUG_OUTPUT -# define GL_DEBUG_OUTPUT GL_DEBUG_OUTPUT_KHR -# endif -# ifndef GL_DEBUG_OUTPUT_SYNCHRONOUS -# define GL_DEBUG_OUTPUT_SYNCHRONOUS GL_DEBUG_OUTPUT_SYNCHRONOUS_KHR -# endif - -# ifndef GL_DEBUG_SEVERITY_HIGH -# define GL_DEBUG_SEVERITY_HIGH GL_DEBUG_SEVERITY_HIGH_KHR -# endif -# ifndef GL_DEBUG_SEVERITY_MEDIUM -# define GL_DEBUG_SEVERITY_MEDIUM GL_DEBUG_SEVERITY_MEDIUM_KHR -# endif -# ifndef GL_DEBUG_SEVERITY_LOW -# define GL_DEBUG_SEVERITY_LOW GL_DEBUG_SEVERITY_LOW_KHR -# endif -# ifndef GL_DEBUG_SEVERITY_NOTIFICATION -# define GL_DEBUG_SEVERITY_NOTIFICATION GL_DEBUG_SEVERITY_NOTIFICATION_KHR -# endif - -# ifndef GL_DEBUG_TYPE_MARKER -# define GL_DEBUG_TYPE_MARKER GL_DEBUG_TYPE_MARKER_KHR -# endif -# ifndef GL_DEBUG_TYPE_ERROR -# define GL_DEBUG_TYPE_ERROR GL_DEBUG_TYPE_ERROR_KHR -# endif -# ifndef GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR -# define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_KHR -# endif -# ifndef GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR -# define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_KHR -# endif -# ifndef GL_DEBUG_TYPE_PORTABILITY -# define GL_DEBUG_TYPE_PORTABILITY GL_DEBUG_TYPE_PORTABILITY_KHR -# endif -# ifndef GL_DEBUG_TYPE_PERFORMANCE -# define GL_DEBUG_TYPE_PERFORMANCE GL_DEBUG_TYPE_PERFORMANCE_KHR -# endif -# ifndef GL_DEBUG_TYPE_OTHER -# define GL_DEBUG_TYPE_OTHER GL_DEBUG_TYPE_OTHER_KHR -# endif - -# define glDebugMessageCallback glDebugMessageCallbackKHR -#endif - -/* The iOS SDK only provides OpenGL ES headers up to 3.0. Filament works with OpenGL 3.0, but - * requires ES3.1 headers */ -#if !defined(GL_ES_VERSION_3_1) - #define GL_SHADER_STORAGE_BUFFER 0x90D2 - #define GL_COMPUTE_SHADER 0x91B9 - - #define GL_TEXTURE_2D_MULTISAMPLE 0x9100 - -// FIXME: The GL_TIME_ELAPSED define is used unconditionally in Filament, but -// requires extension support. -#ifndef GL_TIME_ELAPSED - #define GL_TIME_ELAPSED 0x88BF -#endif - - #define GL_TEXTURE_BINDING_CUBE_MAP_ARRAY 0x900A - #define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C - #define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D - #define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E - #define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F - #define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 - #define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F - #define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#ifdef GL_EXT_disjoint_timer_query +# define GL_TIME_ELAPSED GL_TIME_ELAPSED_EXT +#endif + +#ifdef GL_EXT_clip_control +# define GL_LOWER_LEFT GL_LOWER_LEFT_EXT +# define GL_ZERO_TO_ONE GL_ZERO_TO_ONE_EXT +#endif + +// we need GL_TEXTURE_CUBE_MAP_ARRAY defined, but we won't use it if the extension/feature +// is not available. +#if defined(GL_EXT_texture_cube_map_array) +# define GL_TEXTURE_CUBE_MAP_ARRAY GL_TEXTURE_CUBE_MAP_ARRAY_EXT +#else +# define GL_TEXTURE_CUBE_MAP_ARRAY 0x9009 +#endif +#if defined(GL_KHR_debug) +# define GL_DEBUG_OUTPUT GL_DEBUG_OUTPUT_KHR +# define GL_DEBUG_OUTPUT_SYNCHRONOUS GL_DEBUG_OUTPUT_SYNCHRONOUS_KHR +# define GL_DEBUG_SEVERITY_HIGH GL_DEBUG_SEVERITY_HIGH_KHR +# define GL_DEBUG_SEVERITY_MEDIUM GL_DEBUG_SEVERITY_MEDIUM_KHR +# define GL_DEBUG_SEVERITY_LOW GL_DEBUG_SEVERITY_LOW_KHR +# define GL_DEBUG_SEVERITY_NOTIFICATION GL_DEBUG_SEVERITY_NOTIFICATION_KHR +# define GL_DEBUG_TYPE_MARKER GL_DEBUG_TYPE_MARKER_KHR +# define GL_DEBUG_TYPE_ERROR GL_DEBUG_TYPE_ERROR_KHR +# define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_KHR +# define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_KHR +# define GL_DEBUG_TYPE_PORTABILITY GL_DEBUG_TYPE_PORTABILITY_KHR +# define GL_DEBUG_TYPE_PERFORMANCE GL_DEBUG_TYPE_PERFORMANCE_KHR +# define GL_DEBUG_TYPE_OTHER GL_DEBUG_TYPE_OTHER_KHR +# define glDebugMessageCallback glDebugMessageCallbackKHR #endif + #endif // GL_ES_VERSION_2_0 +// This is just to simplify the implementation (i.e. so we don't have to have #ifdefs everywhere) +#ifndef GL_OES_EGL_image_external +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 +#endif + // This is an odd duck function that exists in WebGL 2.0 but not in OpenGL ES. #if defined(__EMSCRIPTEN__) extern "C" { @@ -222,11 +182,6 @@ void glGetBufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, void *d # define BACKEND_OPENGL_LEVEL BACKEND_OPENGL_LEVEL_GLES20 #endif -// This is just to simplify the implementation (i.e. so we don't have to have #ifdefs everywhere) -#ifndef GL_OES_EGL_image_external -#define GL_TEXTURE_EXTERNAL_OES 0x8D65 -#endif - #include "NullGLES.h" #endif // TNT_FILAMENT_BACKEND_OPENGL_GL_HEADERS_H diff --git a/filament/backend/src/opengl/platforms/PlatformEGL.cpp b/filament/backend/src/opengl/platforms/PlatformEGL.cpp index bfff2f6cae5..3d54ef2ffb8 100644 --- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp +++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp @@ -94,9 +94,7 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon return nullptr; } -#if defined(__ANDROID__) || defined(FILAMENT_USE_EXTERNAL_GLES3) || defined(__EMSCRIPTEN__) - // PlatofrmEGL is used with and without GLES, but this function is only - // meaningful when GLES is used. +#if defined(FILAMENT_IMPORT_ENTRY_POINTS) importGLESExtensionsEntryPoints(); #endif diff --git a/filament/backend/test/test_StencilBuffer.cpp b/filament/backend/test/test_StencilBuffer.cpp index caa23862563..62223713100 100644 --- a/filament/backend/test/test_StencilBuffer.cpp +++ b/filament/backend/test/test_StencilBuffer.cpp @@ -262,7 +262,7 @@ TEST_F(BasicStencilBufferTest, StencilBufferMSAA) { api.stopCapture(0); api.endFrame(0); - readPixelsAndAssertHash("StencilBufferAutoResolve", 512, 512, renderTarget1, 0xC7E34C43, true); + readPixelsAndAssertHash("StencilBufferAutoResolve", 512, 512, renderTarget1, 0x6CEFAC8F, true); flushAndWait(); getDriver().purge(); diff --git a/filament/include/filament/Box.h b/filament/include/filament/Box.h index f4cdb5fab6e..36f19924a16 100644 --- a/filament/include/filament/Box.h +++ b/filament/include/filament/Box.h @@ -104,22 +104,24 @@ class UTILS_PUBLIC Box { } /** - * Computes the bounding box of a box transformed by a rigid transform + * Transform a Box by a linear transform and a translation. + * + * @param m a 3x3 matrix, the linear transform + * @param t a float3, the translation * @param box the box to transform - * @param m a 4x4 matrix that must be a rigid transform - * @return the bounding box of the transformed box. - * Result is undefined if \p m is not a rigid transform + * @return the bounding box of the transformed box */ - friend Box rigidTransform(Box const& box, const math::mat4f& m) noexcept; + static Box transform(const math::mat3f& m, math::float3 const& t, const Box& box) noexcept { + return { m * box.center + t, abs(m) * box.halfExtent }; + } /** - * Computes the bounding box of a box transformed by a rigid transform - * @param box the box to transform - * @param m a 3x3 matrix that must be a rigid transform - * @return the bounding box of the transformed box. - * Result is undefined if \p m is not a rigid transform + * @deprecated Use transform() instead + * @see transform() */ - friend Box rigidTransform(Box const& box, const math::mat3f& m) noexcept; + friend Box rigidTransform(Box const& box, const math::mat4f& m) noexcept { + return transform(m.upperLeft(), m[3].xyz, box); + } }; /** @@ -174,7 +176,18 @@ struct UTILS_PUBLIC Aabb { /** * Returns the 8 corner vertices of the AABB. */ - Corners getCorners() const; + Corners getCorners() const { + return Aabb::Corners{ .vertices = { + { min.x, min.y, min.z }, + { max.x, min.y, min.z }, + { min.x, max.y, min.z }, + { max.x, max.y, min.z }, + { min.x, min.y, max.z }, + { max.x, min.y, max.z }, + { min.x, max.y, max.z }, + { max.x, max.y, max.z }, + }}; + } /** * Returns whether the box contains a given point. @@ -182,15 +195,44 @@ struct UTILS_PUBLIC Aabb { * @param p the point to test * @return the maximum signed distance to the box. Negative if p is in the box */ - float contains(math::float3 p) const noexcept; + float contains(math::float3 p) const noexcept { + float d = min.x - p.x; + d = std::max(d, min.y - p.y); + d = std::max(d, min.z - p.z); + d = std::max(d, p.x - max.x); + d = std::max(d, p.y - max.y); + d = std::max(d, p.z - max.z); + return d; + } /** * Applies an affine transformation to the AABB. * - * @param m the 4x4 transformation to apply + * @param m the 3x3 transformation to apply + * @param t the translation * @return the transformed box */ - Aabb transform(const math::mat4f& m) const noexcept; + static Aabb transform(const math::mat3f& m, math::float3 const& t, const Aabb& box) noexcept { + // Fast AABB transformation per Jim Arvo in Graphics Gems (1990). + Aabb result{ t, t }; + for (size_t col = 0; col < 3; ++col) { + for (size_t row = 0; row < 3; ++row) { + const float a = m[col][row] * box.min[col]; + const float b = m[col][row] * box.max[col]; + result.min[row] += a < b ? a : b; + result.max[row] += a < b ? b : a; + } + } + return result; + } + + /** + * @deprecated Use transform() instead + * @see transform() + */ + Aabb transform(const math::mat4f& m) const noexcept { + return transform(m.upperLeft(), m[3].xyz, *this); + } }; } // namespace filament diff --git a/filament/src/Box.cpp b/filament/src/Box.cpp deleted file mode 100644 index a19cf3d79fd..00000000000 --- a/filament/src/Box.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -using namespace filament::math; - -namespace filament { - -Box rigidTransform(Box const& UTILS_RESTRICT box, const mat4f& UTILS_RESTRICT m) noexcept { - const mat3f u(m.upperLeft()); - return { u * box.center + m[3].xyz, abs(u) * box.halfExtent }; -} - -Box rigidTransform(Box const& UTILS_RESTRICT box, const mat3f& UTILS_RESTRICT u) noexcept { - return { u * box.center, abs(u) * box.halfExtent }; -} - -Aabb::Corners Aabb::getCorners() const { - return Aabb::Corners{ .vertices = { - { min.x, min.y, min.z }, - { max.x, min.y, min.z }, - { min.x, max.y, min.z }, - { max.x, max.y, min.z }, - { min.x, min.y, max.z }, - { max.x, min.y, max.z }, - { min.x, max.y, max.z }, - { max.x, max.y, max.z }, - }}; -} - -float Aabb::contains(float3 p) const noexcept { - float d = min.x - p.x; - d = std::max(d, min.y - p.y); - d = std::max(d, min.z - p.z); - d = std::max(d, p.x - max.x); - d = std::max(d, p.y - max.y); - d = std::max(d, p.z - max.z); - return d; -} - -// Fast AABB transformation per Jim Arvo in Graphics Gems (1990). -Aabb Aabb::transform(const mat4f& mat) const noexcept { - const float3 translation = mat[3].xyz; - const mat3f upperLeft = mat.upperLeft(); - Aabb result = { translation, translation }; - for (size_t col = 0; col < upperLeft.getColumnCount(); ++col) { - for (size_t row = 0; row < upperLeft.getRowCount(); ++row) { - const float a = upperLeft[col][row] * min[col]; - const float b = upperLeft[col][row] * max[col]; - result.min[row] += a < b ? a : b; - result.max[row] += a < b ? b : a; - } - } - return result; -} - -} // namespace filament diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index 99643f152cc..28fcb4edaf2 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -86,11 +86,12 @@ void RenderPass::setScissorViewport(backend::Viewport viewport) noexcept { } void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandTypeFlags) noexcept { + SYSTRACE_CALL(); SYSTRACE_CONTEXT(); assert_invariant(mRenderableSoa); - utils::Range vr = mVisibleRenderables; + utils::Range const vr = mVisibleRenderables; // trace the number of visible renderables SYSTRACE_VALUE32("visibleRenderables", vr.size()); if (UTILS_UNLIKELY(vr.empty())) { @@ -129,7 +130,7 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT work(vr.first, vr.size()); } else { auto* jobCommandsParallel = jobs::parallel_for(js, nullptr, vr.first, (uint32_t)vr.size(), - std::cref(work), jobs::CountSplitter()); + std::cref(work), jobs::CountSplitter()); js.runAndWait(jobCommandsParallel); } @@ -375,6 +376,8 @@ void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const comm FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward) noexcept { + SYSTRACE_CALL(); + // generateCommands() writes both the draw and depth commands simultaneously such that // we go throw the list of renderables just once. // (in principle, we could have split this method into two, at the cost of going through @@ -723,6 +726,7 @@ UTILS_NOINLINE // no need to be inlined void RenderPass::Executor::execute(backend::DriverApi& driver, const Command* first, const Command* last) const noexcept { SYSTRACE_CALL(); + SYSTRACE_CONTEXT(); if (first != last) { SYSTRACE_VALUE32("commandCount", last - first); diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index 5d4476c7672..2ef3e553ae1 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -389,9 +389,9 @@ class RenderPass { void resize(size_t count) noexcept; void instanceify(FEngine& engine) noexcept; - // on 64-bits systems, we process batches of 256 (64 bytes) cache-lines, or 512 (32 bytes) commands - // on 32-bits systems, we process batches of 512 (32 bytes) cache-lines, or 512 (32 bytes) commands - static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 512; + // we choose the command count per job to minimize JobSystem overhead. + // on a Pixel 4, 2048 commands is about half a millisecond of processing. + static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 2048; static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_SIZE = sizeof(Command) * JOBS_PARALLEL_FOR_COMMANDS_COUNT; diff --git a/filament/src/ShadowMap.cpp b/filament/src/ShadowMap.cpp index 2038dfba80a..b6690a08e53 100644 --- a/filament/src/ShadowMap.cpp +++ b/filament/src/ShadowMap.cpp @@ -58,7 +58,7 @@ ShadowMap::ShadowMap(FEngine& engine) noexcept void ShadowMap::terminate(FEngine& engine) { Entity entities[] = { mCamera->getEntity(), mDebugCamera->getEntity() }; - for (Entity e : entities) { + for (Entity const e : entities) { engine.destroyCameraComponent(e); } engine.getEntityManager().destroy(sizeof(entities) / sizeof(Entity), entities); @@ -112,12 +112,12 @@ math::mat4f ShadowMap::getPointLightViewMatrix(backend::TextureCubemapFace face, ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine, const FScene::LightSoa& lightData, size_t index, filament::CameraInfo const& camera, - ShadowMapInfo const& shadowMapInfo, FScene const& scene, - SceneInfo& sceneInfo) noexcept { + ShadowMapInfo const& shadowMapInfo, + SceneInfo const& sceneInfo) noexcept { // Note: we keep the polygon offset even with VSM as it seems to help. auto& lcm = engine.getLightManager(); - FLightManager::Instance li = lightData.elementAt(index); + FLightManager::Instance const li = lightData.elementAt(index); FLightManager::ShadowParams params = lcm.getShadowParams(li); #ifndef NDEBUG @@ -134,8 +134,8 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine, // Adjust the camera's projection for the light's shadowFar const mat4f cullingProjection{ [&](auto p) { if (params.options.shadowFar > 0.0f) { - float n = camera.zn; - float f = params.options.shadowFar; + float const n = camera.zn; + float const f = params.options.shadowFar; // orthographic projection assert_invariant(std::abs(p[2].w) <= std::numeric_limits::epsilon()); p[2].z = 2.0f / (n - f); @@ -153,10 +153,7 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine, // We compute the directional light's model matrix using the origin's as the light position. // The choice of the light's origin initially doesn't matter for a directional light. // This will be adjusted later because of how we compute the depth metric for VSM. - const mat4f MvAtOrigin = getDirectionalLightViewMatrix(direction); - - // Compute scene-dependent values shared across all cascades - ShadowMap::updateSceneInfoDirectional(MvAtOrigin, scene, sceneInfo); + const mat4f MvAtOrigin = ShadowMap::getDirectionalLightViewMatrix(direction); const Aabb wsShadowCastersVolume = sceneInfo.wsShadowCastersVolume; const Aabb wsShadowReceiversVolume = sceneInfo.wsShadowReceiversVolume; @@ -177,7 +174,7 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine, // compute the intersection of the shadow receivers' volume with the view volume // in world space. This returns a set of points on the convex-hull of the intersection. - size_t vertexCount = intersectFrustumWithBox(wsClippedShadowReceiverVolume, + size_t const vertexCount = intersectFrustumWithBox(wsClippedShadowReceiverVolume, wsFrustum, wsViewFrustumVertices, wsShadowReceiversVolume); if (UTILS_UNLIKELY(vertexCount < 4)) { @@ -206,7 +203,7 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine, } for (size_t i = 0; i < vertexCount; ++i) { // far: figure out the farthest shadow receivers - float3 v = mat4f::project(MvAtOrigin, wsClippedShadowReceiverVolume[i]); + float3 const v = mat4f::project(MvAtOrigin, wsClippedShadowReceiverVolume[i]); lsLightFrustumBounds.min.z = std::min(lsLightFrustumBounds.min.z, v.z); if constexpr (USE_DEPTH_CLAMP) { // tighten the shadow receiver volume further @@ -487,7 +484,7 @@ ShadowMap::ShaderParameters ShadowMap::updatePunctual( ShadowMap::ShaderParameters ShadowMap::updateSpot(FEngine& engine, const FScene::LightSoa& lightData, size_t index, - filament::CameraInfo const& camera, + filament::CameraInfo const&, const ShadowMapInfo& shadowMapInfo, FScene const& scene, SceneInfo sceneInfo) noexcept { @@ -499,8 +496,17 @@ ShadowMap::ShaderParameters ShadowMap::updateSpot(FEngine& engine, const FLightManager::ShadowParams& params = lcm.getShadowParams(li); const mat4f Mv = getDirectionalLightViewMatrix(direction, position); - // find decent near/far - ShadowMap::updateSceneInfoSpot(Mv, scene, sceneInfo); + // We only keep this for reference. updateSceneInfoSpot() is quite expensive on large scenes + // currently, and only needed to find a near/far. Instead, we just use a small near and the + // radius as far. + // TODO: Another potential solution would be to visit only the part of the scene that's visible + // by the light -- which should be much smaller. + if constexpr (false) { + // find decent near/far + ShadowMap::updateSceneInfoSpot(Mv, scene, sceneInfo); + } else { + sceneInfo.lsNearFar = { -0.01f, -radius }; + } // if the scene was empty, near > far mHasVisibleShadows = -sceneInfo.lsNearFar[0] < -sceneInfo.lsNearFar[1]; @@ -509,22 +515,22 @@ ShadowMap::ShaderParameters ShadowMap::updateSpot(FEngine& engine, } // FIXME: we need a configuration for minimum near plane (for now hardcoded to 1cm) - float nearPlane = std::max(0.01f, -sceneInfo.lsNearFar[0]); - float farPlane = std::min(radius, -sceneInfo.lsNearFar[1]); + float const nearPlane = std::max(0.01f, -sceneInfo.lsNearFar[0]); + float const farPlane = std::min(radius, -sceneInfo.lsNearFar[1]); + auto outerConeAngle = lcm.getSpotLightOuterCone(li); return updatePunctual(Mv, outerConeAngle, nearPlane, farPlane, shadowMapInfo, params); } ShadowMap::ShaderParameters ShadowMap::updatePoint(FEngine& engine, - const FScene::LightSoa& lightData, size_t index, - filament::CameraInfo const& camera, const ShadowMapInfo& shadowMapInfo, FScene const& scene, - SceneInfo, uint8_t face) noexcept { + const FScene::LightSoa& lightData, size_t index, filament::CameraInfo const&, + const ShadowMapInfo& shadowMapInfo, FScene const& scene, uint8_t face) noexcept { // check if this shadow map has anything to render mHasVisibleShadows = false; FScene::RenderableSoa const& UTILS_RESTRICT soa = scene.getRenderableData(); auto const* const UTILS_RESTRICT visibleMasks = soa.data(); - size_t c = soa.size(); + size_t const c = soa.size(); for (size_t i = 0; i < c; i++) { if (visibleMasks[i] & VISIBLE_DYN_SHADOW_RENDERABLE) { mHasVisibleShadows = true; @@ -702,18 +708,12 @@ mat4f ShadowMap::directionalLightFrustum(float near, float far) noexcept { return m; } -float2 ShadowMap::computeNearFar(const mat4f& view, - Aabb const& wsShadowCastersVolume) noexcept { - const Aabb::Corners wsSceneCastersCorners = wsShadowCastersVolume.getCorners(); - return computeNearFar(view, wsSceneCastersCorners.data(), wsSceneCastersCorners.size()); -} - float2 ShadowMap::computeNearFar(const mat4f& view, float3 const* wsVertices, size_t count) noexcept { float2 nearFar = { std::numeric_limits::lowest(), std::numeric_limits::max() }; for (size_t i = 0; i < count; i++) { // we're on the z axis in light space (looking down to -z) - float c = mat4f::project(view, wsVertices[i]).z; + float const c = mat4f::project(view, wsVertices[i]).z; nearFar.x = std::max(nearFar.x, c); // near nearFar.y = std::min(nearFar.y, c); // far } @@ -726,7 +726,7 @@ float2 ShadowMap::computeNearFarOfWarpSpace(mat4f const& lightView, #pragma nounroll for (size_t i = 0; i < count; i++) { // we're on the y-axis in light space (looking down to +y) - float c = mat4f::project(lightView, wsVertices[i]).y; + float const c = mat4f::project(lightView, wsVertices[i]).y; nearFar.x = std::min(nearFar.x, c); nearFar.y = std::max(nearFar.y, c); } @@ -791,11 +791,11 @@ void ShadowMap::intersectWithShadowCasters(Aabb& UTILS_RESTRICT lightFrustum, // first intersect wsShadowCastersVolume with the light's frustum, otherwise we end-up // transforming vertices that are "outside" the frustum, and that's forbidden. FrustumBoxIntersection wsClippedShadowCasterVolumeVertices; - size_t vertexCount = intersectFrustumWithBox(wsClippedShadowCasterVolumeVertices, + size_t const vertexCount = intersectFrustumWithBox(wsClippedShadowCasterVolumeVertices, wsLightFrustum, wsLightFrustumCorners, wsShadowCastersVolume); // compute shadow-caster bounds in light space - Aabb box = compute2DBounds(lightView, wsClippedShadowCasterVolumeVertices.data(), vertexCount); + Aabb const box = compute2DBounds(lightView, wsClippedShadowCasterVolumeVertices.data(), vertexCount); // intersect shadow-caster and current light frustum bounds lightFrustum.min.xy = max(box.min.xy, lightFrustum.min.xy); @@ -809,7 +809,7 @@ void ShadowMap::computeFrustumCorners(float3* UTILS_RESTRICT out, // matrix to convert: ndc -> camera -> world float near = csNearFar.x; float far = csNearFar.y; - float3 csViewFrustumCorners[8] = { + float3 const csViewFrustumCorners[8] = { { -1, -1, far }, { 1, -1, far }, { -1, 1, far }, @@ -819,7 +819,7 @@ void ShadowMap::computeFrustumCorners(float3* UTILS_RESTRICT out, { -1, 1, near }, { 1, 1, near }, }; - for (float3 c : csViewFrustumCorners) { + for (float3 const c : csViewFrustumCorners) { *out++ = mat4f::project(projectionViewInverse, c); } } @@ -869,7 +869,7 @@ size_t ShadowMap::intersectFrustumWithBox( // a) Keep the frustum's vertices that are known to be inside the scene's box UTILS_NOUNROLL for (size_t i = 0; i < 8; i++) { - float3 p = wsFrustumCorners[i]; + float3 const p = wsFrustumCorners[i]; outVertices[vertexCount] = p; if ((p.x >= wsBox.min.x && p.x <= wsBox.max.x) && (p.y >= wsBox.min.y && p.y <= wsBox.max.y) && @@ -889,14 +889,14 @@ size_t ShadowMap::intersectFrustumWithBox( // the frustum. This actually often happens due to fitting light-space // We fudge the distance to the plane by a small amount. #pragma nounroll - for (float3 p : wsSceneReceiversCorners) { + for (float3 const p : wsSceneReceiversCorners) { outVertices[vertexCount] = p; - float l = dot(wsFrustumPlanes[0].xyz, p) + wsFrustumPlanes[0].w; - float b = dot(wsFrustumPlanes[1].xyz, p) + wsFrustumPlanes[1].w; - float r = dot(wsFrustumPlanes[2].xyz, p) + wsFrustumPlanes[2].w; - float t = dot(wsFrustumPlanes[3].xyz, p) + wsFrustumPlanes[3].w; - float f = dot(wsFrustumPlanes[4].xyz, p) + wsFrustumPlanes[4].w; - float n = dot(wsFrustumPlanes[5].xyz, p) + wsFrustumPlanes[5].w; + float const l = dot(wsFrustumPlanes[0].xyz, p) + wsFrustumPlanes[0].w; + float const b = dot(wsFrustumPlanes[1].xyz, p) + wsFrustumPlanes[1].w; + float const r = dot(wsFrustumPlanes[2].xyz, p) + wsFrustumPlanes[2].w; + float const t = dot(wsFrustumPlanes[3].xyz, p) + wsFrustumPlanes[3].w; + float const f = dot(wsFrustumPlanes[4].xyz, p) + wsFrustumPlanes[4].w; + float const n = dot(wsFrustumPlanes[5].xyz, p) + wsFrustumPlanes[5].w; if ((l <= EPSILON) && (b <= EPSILON) && (r <= EPSILON) && (t <= EPSILON) && (f <= EPSILON) && (n <= EPSILON)) { @@ -981,7 +981,7 @@ size_t ShadowMap::intersectFrustum( const float3 s0{ segmentsVertices[segment.v0] }; const float3 s1{ segmentsVertices[segment.v1] }; // each segment should only intersect with 2 quads at most - size_t maxVertexCount = vertexCount + 2; + size_t const maxVertexCount = vertexCount + 2; for (size_t j = 0; j < 6 && vertexCount < maxVertexCount; ++j) { const Quad quad = sBoxQuads[j]; const float3 t0{ quadsVertices[quad.v0] }; @@ -1036,8 +1036,8 @@ inline bool ShadowMap::intersectSegmentWithTriangle(float3& UTILS_RESTRICT p, bool ShadowMap::intersectSegmentWithPlanarQuad(float3& UTILS_RESTRICT p, float3 s0, float3 s1, float3 t0, float3 t1, float3 t2, float3 t3) noexcept { - bool hit = intersectSegmentWithTriangle(p, s0, s1, t0, t1, t2) || - intersectSegmentWithTriangle(p, s0, s1, t0, t2, t3); + bool const hit = intersectSegmentWithTriangle(p, s0, s1, t0, t1, t2) || + intersectSegmentWithTriangle(p, s0, s1, t0, t2, t3); return hit; } @@ -1070,7 +1070,7 @@ float ShadowMap::texelSizeWorldSpace(const mat4f& Wp, const mat4f& MbMtF, // The Jacobian is not constant, so we evaluate it in the center of the shadow-map texture. // It might be better to do this computation in the vertex shader. - float3 p = {0.5, 0.5, 0.0}; + float3 const p = { 0.5f, 0.5f, 0.0f }; const float ures = 1.0f / float(shadowDimension); const float vres = 1.0f / float(shadowDimension); @@ -1115,9 +1115,9 @@ float ShadowMap::texelSizeWorldSpace(const mat4f& Wp, const mat4f& MbMtF, 0.0f, j * Z * sx, j * dz * sx }); - float3 Jx = J[0] * ures; - float3 Jy = J[1] * vres; - UTILS_UNUSED float3 Jz = J[2] * dres; + float3 const Jx = J[0] * ures; + float3 const Jy = J[1] * vres; + UTILS_UNUSED float3 const Jz = J[2] * dres; const float s = std::max(length(Jx), length(Jy)); return s; } @@ -1128,13 +1128,13 @@ void ShadowMap::visitScene(const FScene& scene, uint32_t visibleLayers, SYSTRACE_CALL(); using State = FRenderableManager::Visibility; - FScene::RenderableSoa const& UTILS_RESTRICT soa = scene.getRenderableData(); - float3 const* const UTILS_RESTRICT worldAABBCenter = soa.data(); - float3 const* const UTILS_RESTRICT worldAABBExtent = soa.data(); - uint8_t const* const UTILS_RESTRICT layers = soa.data(); - State const* const UTILS_RESTRICT visibility = soa.data(); - auto const* const UTILS_RESTRICT visibleMasks = soa.data(); - size_t c = soa.size(); + FScene::RenderableSoa const& soa = scene.getRenderableData(); + float3 const* const worldAABBCenter = soa.data(); + float3 const* const worldAABBExtent = soa.data(); + uint8_t const* const layers = soa.data(); + State const* const visibility = soa.data(); + auto const* const visibleMasks = soa.data(); + size_t const c = soa.size(); for (size_t i = 0; i < c; i++) { if (layers[i] & visibleLayers) { const Aabb aabb{ worldAABBCenter[i] - worldAABBExtent[i], @@ -1149,46 +1149,47 @@ void ShadowMap::visitScene(const FScene& scene, uint32_t visibleLayers, } } -void ShadowMap::initSceneInfo(ShadowMap::SceneInfo& sceneInfo, - uint8_t visibleLayers, FScene const& scene, mat4f const& viewMatrix) { - sceneInfo.csNearFar = { -1.0f, 1.0f }; - sceneInfo.lsNearFar = {}; - sceneInfo.visibleLayers = visibleLayers; - sceneInfo.vsNearFar = { std::numeric_limits::lowest(), std::numeric_limits::max() }; +ShadowMap::SceneInfo::SceneInfo( + FScene const& scene, uint8_t visibleLayers, mat4f const& viewMatrix) noexcept + : vsNearFar(std::numeric_limits::lowest(), std::numeric_limits::max()), + visibleLayers(visibleLayers) { + + // the code below only works with affine transforms + assert_invariant(transpose(viewMatrix)[3] == float4(0, 0, 0, 1)); // We assume the light is at the origin to compute the SceneInfo. This is consumed later by // computeShadowCameraDirectional() which takes this into account. // Compute scene bounds in world space, as well as the light-space and view-space near/far planes - sceneInfo.wsShadowCastersVolume = {}; - sceneInfo.wsShadowReceiversVolume = {}; - visitScene(scene, sceneInfo.visibleLayers, + wsShadowCastersVolume = {}; + wsShadowReceiversVolume = {}; + ShadowMap::visitScene(scene, visibleLayers, [&](Aabb caster, Culler::result_type) { - sceneInfo.wsShadowCastersVolume.min = - min(sceneInfo.wsShadowCastersVolume.min, caster.min); - sceneInfo.wsShadowCastersVolume.max = - max(sceneInfo.wsShadowCastersVolume.max, caster.max); + wsShadowCastersVolume.min = min(wsShadowCastersVolume.min, caster.min); + wsShadowCastersVolume.max = max(wsShadowCastersVolume.max, caster.max); }, [&](Aabb receiver, Culler::result_type) { - sceneInfo.wsShadowReceiversVolume.min = - min(sceneInfo.wsShadowReceiversVolume.min, receiver.min); - sceneInfo.wsShadowReceiversVolume.max = - max(sceneInfo.wsShadowReceiversVolume.max, receiver.max); - float2 nf = ShadowMap::computeNearFar(viewMatrix, receiver); - sceneInfo.vsNearFar.x = std::max(sceneInfo.vsNearFar.x, nf.x); - sceneInfo.vsNearFar.y = std::min(sceneInfo.vsNearFar.y, nf.y); + wsShadowReceiversVolume.min = min(wsShadowReceiversVolume.min, receiver.min); + wsShadowReceiversVolume.max = max(wsShadowReceiversVolume.max, receiver.max); + auto r = Aabb::transform(viewMatrix.upperLeft(), viewMatrix[3].xyz, receiver); + vsNearFar.x = std::max(vsNearFar.x, r.max.z); + vsNearFar.y = std::min(vsNearFar.y, r.min.z); } ); } void ShadowMap::updateSceneInfoDirectional(mat4f const& Mv, FScene const& scene, SceneInfo& sceneInfo) { + + // the code below only works with affine transforms + assert_invariant(transpose(Mv)[3] == float4(0, 0, 0, 1)); + sceneInfo.lsNearFar = { std::numeric_limits::lowest(), std::numeric_limits::max() }; visitScene(scene, sceneInfo.visibleLayers, [&](Aabb caster, Culler::result_type) { - float2 nf = ShadowMap::computeNearFar(Mv, caster); - sceneInfo.lsNearFar.x = std::max(sceneInfo.lsNearFar.x, nf.x); // near - sceneInfo.lsNearFar.y = std::min(sceneInfo.lsNearFar.y, nf.y); // far + auto r = Aabb::transform(Mv.upperLeft(), Mv[3].xyz, caster); + sceneInfo.lsNearFar.x = std::max(sceneInfo.lsNearFar.x, r.max.z); // near + sceneInfo.lsNearFar.y = std::min(sceneInfo.lsNearFar.y, r.min.z); // far }, [&](Aabb receiver, Culler::result_type) { } @@ -1197,14 +1198,17 @@ void ShadowMap::updateSceneInfoDirectional(mat4f const& Mv, FScene const& scene, void ShadowMap::updateSceneInfoSpot(mat4f const& Mv, FScene const& scene, SceneInfo& sceneInfo) { + + // the code below only works with affine transforms + assert_invariant(transpose(Mv)[3] == float4(0, 0, 0, 1)); + sceneInfo.lsNearFar = { std::numeric_limits::lowest(), std::numeric_limits::max() }; - sceneInfo.vsNearFar = { std::numeric_limits::lowest(), std::numeric_limits::max() }; visitScene(scene, sceneInfo.visibleLayers, [&](Aabb caster, Culler::result_type mask) { if (mask & VISIBLE_DYN_SHADOW_RENDERABLE) { - float2 nf = ShadowMap::computeNearFar(Mv, caster); - sceneInfo.lsNearFar.x = std::max(sceneInfo.lsNearFar.x, nf.x); // near - sceneInfo.lsNearFar.y = std::min(sceneInfo.lsNearFar.y, nf.y); // far + auto r = Aabb::transform(Mv.upperLeft(), Mv[3].xyz, caster); + sceneInfo.lsNearFar.x = std::max(sceneInfo.lsNearFar.x, r.max.z); // near + sceneInfo.lsNearFar.y = std::min(sceneInfo.lsNearFar.y, r.min.z); // far } }, [&](Aabb receiver, Culler::result_type) { diff --git a/filament/src/ShadowMap.h b/filament/src/ShadowMap.h index 214ed4a31dc..de198387c62 100644 --- a/filament/src/ShadowMap.h +++ b/filament/src/ShadowMap.h @@ -95,16 +95,20 @@ class ShadowMap { }; struct SceneInfo { + + SceneInfo() noexcept = default; + SceneInfo(FScene const& scene, uint8_t visibleLayers, math::mat4f const& viewMatrix) noexcept; + // scratch data: The near and far planes, in clip space, to use for this shadow map math::float2 csNearFar = { -1.0f, 1.0f }; // scratch data: light's near/far expressed in light-space, calculated from the scene's // content assuming the light is at the origin. - math::float2 lsNearFar{}; + math::float2 lsNearFar; - // scratch data: Viewing camera's near/far expressed in view-space, calculated from the + // Viewing camera's near/far expressed in view-space, calculated from the // scene's content. - math::float2 vsNearFar{}; + math::float2 vsNearFar; // World-space shadow-casters volume Aabb wsShadowCastersVolume; @@ -136,8 +140,8 @@ class ShadowMap { ShaderParameters updateDirectional(FEngine& engine, const FScene::LightSoa& lightData, size_t index, filament::CameraInfo const& camera, - ShadowMapInfo const& shadowMapInfo, FScene const& scene, - SceneInfo& sceneInfo) noexcept; + ShadowMapInfo const& shadowMapInfo, + SceneInfo const& sceneInfo) noexcept; ShaderParameters updateSpot(FEngine& engine, const FScene::LightSoa& lightData, size_t index, @@ -145,11 +149,9 @@ class ShadowMap { const ShadowMapInfo& shadowMapInfo, FScene const& scene, SceneInfo sceneInfo) noexcept; - ShaderParameters updatePoint(FEngine& engine, - const FScene::LightSoa& lightData, size_t index, - filament::CameraInfo const& camera, const ShadowMapInfo& shadowMapInfo, - FScene const& scene, - SceneInfo sceneInfo, uint8_t face) noexcept; + ShadowMap::ShaderParameters updatePoint(FEngine& engine, + const FScene::LightSoa& lightData, size_t index, filament::CameraInfo const& camera, + const ShadowMapInfo& shadowMapInfo, FScene const& scene, uint8_t face) noexcept; // Do we have visible shadows. Valid after calling update(). bool hasVisibleShadows() const noexcept { return mHasVisibleShadows; } @@ -160,11 +162,6 @@ class ShadowMap { // use only for debugging FCamera const& getDebugCamera() const noexcept { return *mDebugCamera; } - // Call once per frame to populate the SceneInfo struct, then pass to update(). - // This computes values constant across all shadow maps. - static void initSceneInfo(ShadowMap::SceneInfo& sceneInfo, uint8_t visibleLayers, - FScene const& scene, math::mat4f const& viewMatrix); - // Update SceneInfo struct for a given light static void updateSceneInfoDirectional(const math::mat4f& Mv, FScene const& scene, SceneInfo& sceneInfo); @@ -230,9 +227,6 @@ class ShadowMap { static inline void computeFrustumCorners(math::float3* out, const math::mat4f& projectionViewInverse, math::float2 csNearFar = { -1.0f, 1.0f }) noexcept; - static inline math::float2 computeNearFar(math::mat4f const& view, - Aabb const& wsShadowCastersVolume) noexcept; - static inline math::float2 computeNearFar(math::mat4f const& view, math::float3 const* wsVertices, size_t count) noexcept; diff --git a/filament/src/ShadowMapManager.cpp b/filament/src/ShadowMapManager.cpp index b6a11cea814..725cf4424b8 100644 --- a/filament/src/ShadowMapManager.cpp +++ b/filament/src/ShadowMapManager.cpp @@ -76,15 +76,16 @@ ShadowMapManager::ShadowTechnique ShadowMapManager::update(FEngine& engine, FVie calculateTextureRequirements(engine, view, lightData); // Compute scene-dependent values shared across all shadow maps - ShadowMap::initSceneInfo(mSceneInfo, - view.getVisibleLayers(), *view.getScene(), cameraInfo.view); + ShadowMap::SceneInfo const info{ *view.getScene(), view.getVisibleLayers(), cameraInfo.view }; shadowTechnique |= updateCascadeShadowMaps( - engine, view, cameraInfo, renderableData, lightData, mSceneInfo); + engine, view, cameraInfo, renderableData, lightData, info); shadowTechnique |= updateSpotShadowMaps( engine, lightData); + mSceneInfo = info; + return shadowTechnique; } @@ -207,7 +208,7 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG }, [this, &engine, &view, vsmShadowOptions, scene, mainCameraInfo, userTime, passTemplate = pass]( - FrameGraphResources const& resources, auto const& data, DriverApi& driver) { + FrameGraphResources const&, auto const& data, DriverApi& driver) { // Note: we could almost parallel_for the loop below, the problem currently is // that updatePrimitivesLod() updates temporary global state. @@ -435,11 +436,11 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG ShadowMapManager::ShadowTechnique ShadowMapManager::updateCascadeShadowMaps(FEngine& engine, FView& view, CameraInfo const& cameraInfo, FScene::RenderableSoa& renderableData, - FScene::LightSoa& lightData, ShadowMap::SceneInfo& sceneInfo) noexcept { + FScene::LightSoa& lightData, ShadowMap::SceneInfo sceneInfo) noexcept { FScene* scene = view.getScene(); auto& lcm = engine.getLightManager(); - FLightManager::Instance directionalLight = lightData.elementAt(0); + FLightManager::Instance const directionalLight = lightData.elementAt(0); FLightManager::ShadowOptions const& options = lcm.getShadowOptions(directionalLight); FLightManager::ShadowParams const& params = lcm.getShadowParams(directionalLight); @@ -458,8 +459,18 @@ ShadowMapManager::ShadowTechnique ShadowMapManager::updateCascadeShadowMaps(FEng // entire camera frustum, as if we only had a single cascade. ShadowMap& shadowMap = *mCascadeShadowMaps[0]; + const auto direction = lightData.elementAt(0); + + // We compute the directional light's model matrix using the origin's as the light position. + // The choice of the light's origin initially doesn't matter for a directional light. + // This will be adjusted later because of how we compute the depth metric for VSM. + const mat4f MvAtOrigin = ShadowMap::getDirectionalLightViewMatrix(direction); + + // Compute scene-dependent values shared across all cascades + ShadowMap::updateSceneInfoDirectional(MvAtOrigin, *scene, sceneInfo); + shadowMap.updateDirectional(mEngine, - lightData, 0, cameraInfo, shadowMapInfo, *scene, sceneInfo); + lightData, 0, cameraInfo, shadowMapInfo, sceneInfo); hasVisibleShadows = shadowMap.hasVisibleShadows(); @@ -533,7 +544,7 @@ ShadowMapManager::ShadowTechnique ShadowMapManager::updateCascadeShadowMaps(FEng sceneInfo.csNearFar = { csSplitPosition[i], csSplitPosition[i + 1] }; auto shaderParameters = shadowMap.updateDirectional(mEngine, - lightData, 0, cameraInfo, shadowMapInfo, *scene, sceneInfo); + lightData, 0, cameraInfo, shadowMapInfo, sceneInfo); if (shadowMap.hasVisibleShadows()) { const size_t shadowIndex = shadowMap.getShadowIndex(); @@ -560,7 +571,7 @@ ShadowMapManager::ShadowTechnique ShadowMapManager::updateCascadeShadowMaps(FEng } // screen-space contact shadows for the directional light - float screenSpaceShadowDistance = options.maxShadowDistance; + float const screenSpaceShadowDistance = options.maxShadowDistance; if (options.screenSpaceContactShadows) { shadowTechnique |= ShadowTechnique::SCREEN_SPACE; } @@ -749,7 +760,7 @@ void ShadowMapManager::preparePointShadowMap(ShadowMap& shadowMap, }; auto shaderParameters = shadowMap.updatePoint(mEngine, lightData, lightIndex, - mainCameraInfo, shadowMapInfo, *view.getScene(), sceneInfo, face); + mainCameraInfo, shadowMapInfo, *view.getScene(), face); // and if we need to generate it, update all the UBO data @@ -812,8 +823,8 @@ ShadowMapManager::ShadowTechnique ShadowMapManager::updateSpotShadowMaps(FEngine return shadowTechnique; } -void ShadowMapManager::calculateTextureRequirements(FEngine& engine, FView& view, - FScene::LightSoa& lightData) noexcept { +void ShadowMapManager::calculateTextureRequirements(FEngine&, FView& view, + FScene::LightSoa&) noexcept { // Lay out the shadow maps. For now, we take the largest requested dimension and allocate a // texture of that size. Each cascade / shadow map gets its own layer in the array texture. @@ -867,7 +878,7 @@ void ShadowMapManager::calculateTextureRequirements(FEngine& engine, FView& view if (useMipmapping) { // Limit the lowest mipmap level to 256x256. // This avoids artifacts on high derivative tangent surfaces. - int lowMipmapLevel = 7; // log2(256) - 1 + int const lowMipmapLevel = 7; // log2(256) - 1 mipLevels = std::max(1, FTexture::maxLevelCount(maxDimension) - lowMipmapLevel); } diff --git a/filament/src/ShadowMapManager.h b/filament/src/ShadowMapManager.h index 39d106d9af2..952cb9e0d46 100644 --- a/filament/src/ShadowMapManager.h +++ b/filament/src/ShadowMapManager.h @@ -110,7 +110,7 @@ class ShadowMapManager { private: ShadowMapManager::ShadowTechnique updateCascadeShadowMaps(FEngine& engine, FView& view, CameraInfo const& cameraInfo, FScene::RenderableSoa& renderableData, - FScene::LightSoa& lightData, ShadowMap::SceneInfo& sceneInfo) noexcept; + FScene::LightSoa& lightData, ShadowMap::SceneInfo sceneInfo) noexcept; ShadowMapManager::ShadowTechnique updateSpotShadowMaps(FEngine& engine, FScene::LightSoa& lightData) noexcept; diff --git a/filament/src/details/Scene.cpp b/filament/src/details/Scene.cpp index 07d4d61fa4d..0475e268f1c 100644 --- a/filament/src/details/Scene.cpp +++ b/filament/src/details/Scene.cpp @@ -49,76 +49,136 @@ FScene::FScene(FEngine& engine) : FScene::~FScene() noexcept = default; -void FScene::prepare(const mat4& worldOriginTransform, bool shadowReceiversAreCasters) noexcept { +void FScene::prepare(utils::JobSystem& js, + LinearAllocatorArena& allocator, + const mat4& worldOriginTransform, + bool shadowReceiversAreCasters) noexcept { // TODO: can we skip this in most cases? Since we rely on indices staying the same, // we could only skip, if nothing changed in the RCM. SYSTRACE_CALL(); + SYSTRACE_CONTEXT(); + + // This will reset the allocator upon exiting + ArenaScope const arena(allocator); + FEngine& engine = mEngine; - EntityManager& em = engine.getEntityManager(); - FRenderableManager& rcm = engine.getRenderableManager(); - FTransformManager& tcm = engine.getTransformManager(); - FLightManager& lcm = engine.getLightManager(); + EntityManager const& em = engine.getEntityManager(); + FRenderableManager const& rcm = engine.getRenderableManager(); + FTransformManager const& tcm = engine.getTransformManager(); + FLightManager const& lcm = engine.getLightManager(); // go through the list of entities, and gather the data of those that are renderables auto& sceneData = mRenderableData; auto& lightData = mLightData; auto const& entities = mEntities; + using RenderableContainerData = std::pair; + using RenderableInstanceContainer = FixedCapacityVector, false>; - // NOTE: we can't know in advance how many entities are renderable or lights because the corresponding - // component can be added after the entity is added to the scene. + using LightContainerData = std::pair; + using LightInstanceContainer = FixedCapacityVector, false>; + + RenderableInstanceContainer renderableInstances{ + RenderableInstanceContainer::with_capacity(entities.size(), allocator) }; + + LightInstanceContainer lightInstances{ + LightInstanceContainer::with_capacity(entities.size(), allocator) }; + + SYSTRACE_NAME_BEGIN("InstanceLoop"); + + // find the max intensity directional light index in our local array + float maxIntensity = 0.0f; + std::pair directionalLightInstances{}; + + /* + * First compute the exact number of renderables and lights in the scene. + * Also find the main directional light. + */ + + for (Entity const e: entities) { + if (UTILS_LIKELY(em.isAlive(e))) { + auto ti = tcm.getInstance(e); + auto li = lcm.getInstance(e); + auto ri = rcm.getInstance(e); + if (li) { + // we handle the directional light here because it'd prevent multithreading below + if (UTILS_UNLIKELY(lcm.isDirectionalLight(li))) { + // we don't store the directional lights, because we only have a single one + if (lcm.getIntensity(li) >= maxIntensity) { + maxIntensity = lcm.getIntensity(li); + directionalLightInstances = { li, ti }; + } + } else { + lightInstances.emplace_back(li, ti); + } + } + if (ri) { + renderableInstances.emplace_back(ri, ti); + } + } + } + + SYSTRACE_NAME_END(); + + /* + * Evaluate the capacity needed for the renderable and light SoAs + */ - size_t renderableDataCapacity = entities.size(); // we need the capacity to be multiple of 16 for SIMD loops - renderableDataCapacity = (renderableDataCapacity + 0xFu) & ~0xFu; // we need 1 extra entry at the end for the summed primitive count + size_t renderableDataCapacity = entities.size(); + renderableDataCapacity = (renderableDataCapacity + 0xFu) & ~0xFu; renderableDataCapacity = renderableDataCapacity + 1; - sceneData.clear(); - if (sceneData.capacity() < renderableDataCapacity) { - sceneData.setCapacity(renderableDataCapacity); - } - // The light data list will always contain at least one entry for the // dominating directional light, even if there are no entities. - size_t lightDataCapacity = std::max(1, entities.size()); // we need the capacity to be multiple of 16 for SIMD loops + size_t lightDataCapacity = std::max(DIRECTIONAL_LIGHTS_COUNT, entities.size()); lightDataCapacity = (lightDataCapacity + 0xFu) & ~0xFu; - lightData.clear(); - if (lightData.capacity() < lightDataCapacity) { - lightData.setCapacity(lightDataCapacity); - } - // the first entries are reserved for the directional lights (currently only one) - lightData.resize(DIRECTIONAL_LIGHTS_COUNT); - + /* + * Now resize the SoAs if needed + */ - // find the max intensity directional light index in our local array - float maxIntensity = 0.0f; + // TODO: the resize below could happen in a job - for (Entity e : entities) { - if (!em.isAlive(e)) { - continue; + if (sceneData.size() != renderableInstances.size()) { + sceneData.clear(); + if (sceneData.capacity() < renderableDataCapacity) { + sceneData.setCapacity(renderableDataCapacity); } + assert_invariant(renderableInstances.size() <= sceneData.capacity()); + sceneData.resize(renderableInstances.size()); + } - // getInstance() always returns null if the entity is the Null entity, - // so we don't need to check for that, but we need to check it's alive - auto ri = rcm.getInstance(e); - auto li = lcm.getInstance(e); - if (!ri && !li) { - continue; + if (lightData.size() != lightInstances.size() + DIRECTIONAL_LIGHTS_COUNT) { + lightData.clear(); + if (lightData.capacity() < lightDataCapacity) { + lightData.setCapacity(lightDataCapacity); } + assert_invariant(lightInstances.size() + DIRECTIONAL_LIGHTS_COUNT <= lightData.capacity()); + lightData.resize(lightInstances.size() + DIRECTIONAL_LIGHTS_COUNT); + } - // get the world transform - auto ti = tcm.getInstance(e); - // this is where we go from double to float for our transforms - const mat4f worldTransform{ worldOriginTransform * tcm.getWorldTransformAccurate(ti) }; - const bool reversedWindingOrder = det(worldTransform.upperLeft()) < 0; + /* + * Fill the SoA with the JobSystem + */ + + auto renderableWork = [first = renderableInstances.data(), &rcm, &tcm, &worldOriginTransform, + &sceneData, shadowReceiversAreCasters](auto* p, auto c) { + SYSTRACE_NAME("renderableWork"); + + for (size_t i = 0; i < c; i++) { + auto [ri, ti] = p[i]; + + // this is where we go from double to float for our transforms + const mat4f worldTransform{ + worldOriginTransform * tcm.getWorldTransformAccurate(ti) }; + const bool reversedWindingOrder = det(worldTransform.upperLeft()) < 0; - // don't even draw this object if it doesn't have a transform (which shouldn't happen - // because one is always created when creating a Renderable component). - if (ri && ti) { // compute the world AABB so we can perform culling const Box worldAABB = rigidTransform(rcm.getAABB(ri), worldTransform); @@ -131,81 +191,117 @@ void FScene::prepare(const mat4& worldOriginTransform, bool shadowReceiversAreCa // FIXME: We compute and store the local scale because it's needed for glTF but // we need a better way to handle this const mat4f& transform = tcm.getTransform(ti); - float scale = (length(transform[0].xyz) + length(transform[1].xyz) + - length(transform[2].xyz)) / 3.0f; - - // we know there is enough space in the array - sceneData.push_back_unsafe( - ri, // RENDERABLE_INSTANCE - worldTransform, // WORLD_TRANSFORM - visibility, // VISIBILITY_STATE - rcm.getSkinningBufferInfo(ri), // SKINNING_BUFFER - rcm.getMorphingBufferInfo(ri), // MORPHING_BUFFER - worldAABB.center, // WORLD_AABB_CENTER - 0, // VISIBLE_MASK - rcm.getChannels(ri), // CHANNELS - rcm.getInstanceCount(ri), // INSTANCE_COUNT - rcm.getLayerMask(ri), // LAYERS - worldAABB.halfExtent, // WORLD_AABB_EXTENT - {}, // PRIMITIVES - 0, // SUMMED_PRIMITIVE_COUNT - {}, // UBO - scale // USER_DATA - ); + float const scale = (length(transform[0].xyz) + length(transform[1].xyz) + + length(transform[2].xyz)) / 3.0f; + + size_t const index = std::distance(first, p) + i; + assert_invariant(index < sceneData.size()); + + sceneData.elementAt(index) = ri; + sceneData.elementAt(index) = worldTransform; + sceneData.elementAt(index) = visibility; + sceneData.elementAt(index) = rcm.getSkinningBufferInfo(ri); + sceneData.elementAt(index) = rcm.getMorphingBufferInfo(ri); + sceneData.elementAt(index) = worldAABB.center; + sceneData.elementAt(index) = 0; + sceneData.elementAt(index) = rcm.getChannels(ri); + sceneData.elementAt(index) = rcm.getInstanceCount(ri); + sceneData.elementAt(index) = rcm.getLayerMask(ri); + sceneData.elementAt(index) = worldAABB.halfExtent; + //sceneData.elementAt(index) = {}; // already initialized, Slice<> + sceneData.elementAt(index) = 0; + //sceneData.elementAt(index) = {}; // not needed here + sceneData.elementAt(index) = scale; } - - if (li) { - // find the dominant directional light - if (UTILS_UNLIKELY(lcm.isDirectionalLight(li))) { - // we don't store the directional lights, because we only have a single one - if (lcm.getIntensity(li) >= maxIntensity) { - maxIntensity = lcm.getIntensity(li); - float3 d = lcm.getLocalDirection(li); - // using mat3f::getTransformForNormals handles non-uniform scaling - d = normalize(mat3f::getTransformForNormals(worldTransform.upperLeft()) * d); - lightData.elementAt(0) = - float4{ 0, 0, 0, std::numeric_limits::infinity() }; - lightData.elementAt(0) = d; - lightData.elementAt(0) = li; - } - } else { - const float4 p = worldTransform * float4{ lcm.getLocalPosition(li), 1 }; - float3 d = 0; - if (!lcm.isPointLight(li) || lcm.isIESLight(li)) { - d = lcm.getLocalDirection(li); - // using mat3f::getTransformForNormals handles non-uniform scaling - d = normalize(mat3f::getTransformForNormals(worldTransform.upperLeft()) * d); - } - lightData.push_back_unsafe( - float4{ p.xyz, lcm.getRadius(li) }, d, li, {}, {}, {}); + }; + + auto lightWork = [first = lightInstances.data(), &lcm, &tcm, &worldOriginTransform, + &lightData](auto* p, auto c) { + SYSTRACE_NAME("lightWork"); + for (size_t i = 0; i < c; i++) { + auto [li, ti] = p[i]; + // this is where we go from double to float for our transforms + const mat4f worldTransform{ worldOriginTransform * tcm.getWorldTransformAccurate(ti) }; + const float4 position = worldTransform * float4{ lcm.getLocalPosition(li), 1 }; + float3 d = 0; + if (!lcm.isPointLight(li) || lcm.isIESLight(li)) { + d = lcm.getLocalDirection(li); + // using mat3f::getTransformForNormals handles non-uniform scaling + d = normalize(mat3f::getTransformForNormals(worldTransform.upperLeft()) * d); } + size_t const index = DIRECTIONAL_LIGHTS_COUNT + std::distance(first, p) + i; + assert_invariant(index < lightData.size()); + lightData.elementAt(index) = float4{ position.xyz, lcm.getRadius(li) }; + lightData.elementAt(index) = d; + lightData.elementAt(index) = li; } + }; + + + SYSTRACE_NAME_BEGIN("Renderable and Light jobs"); + + JobSystem::Job* rootJob = js.createJob(); + + auto* renderableJob = jobs::parallel_for(js, rootJob, + renderableInstances.data(), renderableInstances.size(), + std::cref(renderableWork), jobs::CountSplitter<128, 5>()); + + auto* lightJob = jobs::parallel_for(js, rootJob, + lightInstances.data(), lightInstances.size(), + std::cref(lightWork), jobs::CountSplitter<32, 5>()); + + js.run(renderableJob); + js.run(lightJob); + + // Everything below can be done in parallel. + + /* + * Handle the directional light separately + */ + + if (auto [li, ti] = directionalLightInstances ; li) { + const mat4f worldTransform{ + worldOriginTransform * tcm.getWorldTransformAccurate(ti) }; + // using mat3f::getTransformForNormals handles non-uniform scaling + float3 d = lcm.getLocalDirection(li); + d = normalize(mat3f::getTransformForNormals(worldTransform.upperLeft()) * d); + constexpr float inf = std::numeric_limits::infinity(); + lightData.elementAt(0) = float4{ 0, 0, 0, inf }; + lightData.elementAt(0) = d; + lightData.elementAt(0) = li; + } else { + lightData.elementAt(0) = 0; } // some elements past the end of the array will be accessed by SIMD code, we need to make // sure the data is valid enough as not to produce errors such as divide-by-zero // (e.g. in computeLightRanges()) - for (size_t i = lightData.size(), e = lightDataCapacity; i < e; i++) { + for (size_t i = lightData.size(), e = lightData.capacity(); i < e; i++) { new(lightData.data() + i) float4{ 0, 0, 0, 1 }; } // Purely for the benefit of MSAN, we can avoid uninitialized reads by zeroing out the // unused scene elements between the end of the array and the rounded-up count. if (UTILS_HAS_SANITIZE_MEMORY) { - for (size_t i = sceneData.size(), e = renderableDataCapacity; i < e; i++) { + for (size_t i = sceneData.size(), e = sceneData.capacity(); i < e; i++) { sceneData.data()[i] = 0; sceneData.data()[i] = 0; sceneData.data()[i] = {}; } } + + js.runAndWait(rootJob); + + SYSTRACE_NAME_END(); } void FScene::prepareVisibleRenderables(Range visibleRenderables) noexcept { + SYSTRACE_CALL(); RenderableSoa& sceneData = mRenderableData; - FRenderableManager& rcm = mEngine.getRenderableManager(); + FRenderableManager const& rcm = mEngine.getRenderableManager(); mHasContactShadows = false; - for (uint32_t i : visibleRenderables) { + for (uint32_t const i : visibleRenderables) { PerRenderableData& uboData = sceneData.elementAt(i); auto const visibility = sceneData.elementAt(i); @@ -257,6 +353,7 @@ void FScene::prepareVisibleRenderables(Range visibleRenderables) noexc void FScene::updateUBOs( Range visibleRenderables, Handle renderableUbh) noexcept { + SYSTRACE_CALL(); FEngine::DriverApi& driver = mEngine.getDriverApi(); // store the UBO handle @@ -278,7 +375,7 @@ void FScene::updateUBOs( // copy our data into the UBO for each visible renderable PerRenderableData const* const uboData = mRenderableData.data(); - for (uint32_t i : visibleRenderables) { + for (uint32_t const i : visibleRenderables) { buffer[i] = uboData[i]; } @@ -308,15 +405,15 @@ void FScene::updateUBOs( } } -void FScene::terminate(FEngine& engine) { +void FScene::terminate(FEngine&) { // DO NOT destroy this UBO, it's owned by the View mRenderableViewUbh.clear(); } -void FScene::prepareDynamicLights(const CameraInfo& camera, ArenaScope& rootArena, +void FScene::prepareDynamicLights(const CameraInfo& camera, ArenaScope&, Handle lightUbh) noexcept { FEngine::DriverApi& driver = mEngine.getDriverApi(); - FLightManager& lcm = mEngine.getLightManager(); + FLightManager const& lcm = mEngine.getLightManager(); FScene::LightSoa& lightData = getLightData(); /* @@ -325,7 +422,7 @@ void FScene::prepareDynamicLights(const CameraInfo& camera, ArenaScope& rootAren size_t const size = lightData.size(); // number of point-light/spotlights - size_t positionalLightCount = size - DIRECTIONAL_LIGHTS_COUNT; + size_t const positionalLightCount = size - DIRECTIONAL_LIGHTS_COUNT; assert_invariant(positionalLightCount); float4 const* const UTILS_RESTRICT spheres = lightData.data(); @@ -418,11 +515,11 @@ void FScene::removeEntities(const Entity* entities, size_t count) { UTILS_NOINLINE size_t FScene::getRenderableCount() const noexcept { FEngine& engine = mEngine; - EntityManager& em = engine.getEntityManager(); - FRenderableManager& rcm = engine.getRenderableManager(); + EntityManager const& em = engine.getEntityManager(); + FRenderableManager const& rcm = engine.getRenderableManager(); size_t count = 0; auto const& entities = mEntities; - for (Entity e : entities) { + for (Entity const e : entities) { count += em.isAlive(e) && rcm.getInstance(e) ? 1 : 0; } return count; @@ -431,11 +528,11 @@ size_t FScene::getRenderableCount() const noexcept { UTILS_NOINLINE size_t FScene::getLightCount() const noexcept { FEngine& engine = mEngine; - EntityManager& em = engine.getEntityManager(); - FLightManager& lcm = engine.getLightManager(); + EntityManager const& em = engine.getEntityManager(); + FLightManager const& lcm = engine.getLightManager(); size_t count = 0; auto const& entities = mEntities; - for (Entity e : entities) { + for (Entity const e : entities) { count += em.isAlive(e) && lcm.getInstance(e) ? 1 : 0; } return count; diff --git a/filament/src/details/Scene.h b/filament/src/details/Scene.h index 87da462a715..1f05c9e3af6 100644 --- a/filament/src/details/Scene.h +++ b/filament/src/details/Scene.h @@ -70,7 +70,8 @@ class FScene : public Scene { ~FScene() noexcept; void terminate(FEngine& engine); - void prepare(const math::mat4& worldOriginTransform, bool shadowReceiversAreCasters) noexcept; + void prepare(utils::JobSystem& js, LinearAllocatorArena& allocator, + math::mat4 const& worldOriginTransform, bool shadowReceiversAreCasters) noexcept; void prepareVisibleRenderables(utils::Range visibleRenderables) noexcept; diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp index ae8923ed165..bd3ee5747df 100644 --- a/filament/src/details/View.cpp +++ b/filament/src/details/View.cpp @@ -268,9 +268,8 @@ bool FView::isSkyboxVisible() const noexcept { return skybox != nullptr && (skybox->getLayerMask() & mVisibleLayers); } -void FView::prepareShadowing(FEngine& engine, DriverApi& driver, - FScene::RenderableSoa& renderableData, FScene::LightSoa& lightData, - CameraInfo const& cameraInfo) noexcept { +void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableData, + FScene::LightSoa& lightData, CameraInfo const& cameraInfo) noexcept { SYSTRACE_CALL(); mHasShadowing = false; @@ -284,7 +283,7 @@ void FView::prepareShadowing(FEngine& engine, DriverApi& driver, auto& lcm = engine.getLightManager(); // dominant directional light is always as index 0 - FLightManager::Instance directionalLight = lightData.elementAt(0); + FLightManager::Instance const directionalLight = lightData.elementAt(0); const bool hasDirectionalShadows = directionalLight && lcm.isShadowCaster(directionalLight); if (UTILS_UNLIKELY(hasDirectionalShadows)) { const auto& shadowOptions = lcm.getShadowOptions(directionalLight); @@ -338,6 +337,7 @@ void FView::prepareShadowing(FEngine& engine, DriverApi& driver, void FView::prepareLighting(FEngine& engine, FEngine::DriverApi& driver, ArenaScope& arena, filament::Viewport const& viewport, CameraInfo const& cameraInfo) noexcept { SYSTRACE_CALL(); + SYSTRACE_CONTEXT(); FScene* const scene = mScene; auto const& lightData = scene->getLightData(); @@ -433,6 +433,9 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, filament::Viewport const& viewport, CameraInfo const& cameraInfo, float4 const& userTime, bool needsAlphaChannel) noexcept { + SYSTRACE_CALL(); + SYSTRACE_CONTEXT(); + JobSystem& js = engine.getJobSystem(); /* @@ -464,7 +467,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, * Gather all information needed to render this scene. Apply the world origin to all * objects in the scene. */ - scene->prepare(cameraInfo.worldOrigin, hasVSM()); + scene->prepare(js, arena.getAllocator(), cameraInfo.worldOrigin, hasVSM()); /* * Light culling: runs in parallel with Renderable culling (below) @@ -504,7 +507,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, if (prepareVisibleLightsJob) { js.waitAndRelease(prepareVisibleLightsJob); } - prepareShadowing(engine, driver, renderableData, scene->getLightData(), cameraInfo); + prepareShadowing(engine, renderableData, scene->getLightData(), cameraInfo); /* * Partition the SoA so that renderables are partitioned w.r.t their visibility into the @@ -528,6 +531,8 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, // TODO: we need to compare performance of doing this partitioning vs not doing it. // and rely on checking visibility in the loops + SYSTRACE_NAME_BEGIN("Partitioning"); + // calculate the sorting key for all elements, based on their visibility uint8_t const* layers = renderableData.data(); auto const* visibility = renderableData.data(); @@ -568,6 +573,8 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, mSpotLightShadowCasters = merged; + SYSTRACE_NAME_END(); + // TODO: when any spotlight is used, `merged` ends-up being the whole list. However, // some of the items will end-up not being visible by any light. Can we do better? // e.g. could we deffer some of the prepareVisibleRenderables() to later? diff --git a/filament/src/details/View.h b/filament/src/details/View.h index 8c9f42a9be3..28e8cfbf3c2 100644 --- a/filament/src/details/View.h +++ b/filament/src/details/View.h @@ -140,9 +140,8 @@ class FView : public View { const Viewport& physicalViewport, const filament::Viewport& logicalViewport) const noexcept; - void prepareShadowing(FEngine& engine, backend::DriverApi& driver, - FScene::RenderableSoa& renderableData, FScene::LightSoa& lightData, - CameraInfo const& cameraInfo) noexcept; + void prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableData, + FScene::LightSoa& lightData, CameraInfo const& cameraInfo) noexcept; void prepareLighting(FEngine& engine, FEngine::DriverApi& driver, ArenaScope& arena, filament::Viewport const& viewport, CameraInfo const &cameraInfo) noexcept; diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 5581bd0372c..a47b27ad2a2 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.31.6" + spec.version = "1.31.7" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.31.6/filament-v1.31.6-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.31.7/filament-v1.31.7-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/libs/filabridge/include/private/filament/UibStructs.h b/libs/filabridge/include/private/filament/UibStructs.h index 03e1282d6a5..7073769ef38 100644 --- a/libs/filabridge/include/private/filament/UibStructs.h +++ b/libs/filabridge/include/private/filament/UibStructs.h @@ -175,6 +175,7 @@ static_assert(sizeof(PerViewUib) == sizeof(math::float4) * 128, struct PerRenderableData { struct alignas(16) vec3_std140 : public std::array { }; + struct alignas(16) vec4_std140 : public std::array { }; struct mat33_std140 : public std::array { mat33_std140& operator=(math::mat3f const& rhs) noexcept { for (int i = 0; i < 3; i++) { @@ -185,8 +186,19 @@ struct PerRenderableData { return *this; } }; + struct mat44_std140 : public std::array { + mat44_std140& operator=(math::mat4f const& rhs) noexcept { + for (int i = 0; i < 4; i++) { + (*this)[i][0] = rhs[i][0]; + (*this)[i][1] = rhs[i][1]; + (*this)[i][2] = rhs[i][2]; + (*this)[i][3] = rhs[i][3]; + } + return *this; + } + }; - math::mat4f worldFromModelMatrix; + mat44_std140 worldFromModelMatrix; mat33_std140 worldFromModelNormalMatrix; uint32_t morphTargetCount; uint32_t flagsChannels; // see packFlags() below (0x00000fll) @@ -204,6 +216,13 @@ struct PerRenderableData { channels; } }; + +#ifndef _MSC_VER +// not sure why this static_assert fails on MSVC +static_assert(std::is_trivially_default_constructible_v, + "make sure PerRenderableData stays trivially_default_constructible"); +#endif + static_assert(sizeof(PerRenderableData) == 256, "sizeof(PerRenderableData) must be 256 bytes"); diff --git a/libs/geometry/CMakeLists.txt b/libs/geometry/CMakeLists.txt index 77286760b95..38e8a9e878b 100644 --- a/libs/geometry/CMakeLists.txt +++ b/libs/geometry/CMakeLists.txt @@ -14,6 +14,7 @@ set(PUBLIC_HDRS ) set(SRCS + src/MikktspaceImpl.cpp src/SurfaceOrientation.cpp src/TangentSpaceMesh.cpp src/Transcoder.cpp @@ -26,7 +27,13 @@ include_directories(${PUBLIC_HDR_DIR}) add_library(${TARGET} STATIC ${PUBLIC_HDRS} ${SRCS}) +set(GEOMETRY_DEPS + meshoptimizer + mikktspace +) + target_link_libraries(${TARGET} PUBLIC math utils) +target_link_libraries(${TARGET} PRIVATE ${GEOMETRY_DEPS}) target_include_directories(${TARGET} PUBLIC ${PUBLIC_HDR_DIR}) set_target_properties(${TARGET} PROPERTIES FOLDER Libs) @@ -47,6 +54,19 @@ endif() install(TARGETS ${TARGET} ARCHIVE DESTINATION lib/${DIST_DIR}) install(DIRECTORY ${PUBLIC_HDR_DIR}/geometry DESTINATION include) +set(COMBINED_DEPS + ${TARGET} + ${GEOMETRY_DEPS} +) + +# Combine the deps into a single static lib so that client only have to link this lib and not have +# to link its dependencies. +set(GEOMETRY_COMBINED_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/libgeometry_combined.a") +combine_static_libs(${TARGET} "${GEOMETRY_COMBINED_OUTPUT}" "${COMBINED_DEPS}") + +set(GEOMETRY_LIB_NAME ${CMAKE_STATIC_LIBRARY_PREFIX}geometry${CMAKE_STATIC_LIBRARY_SUFFIX}) +install(FILES "${GEOMETRY_COMBINED_OUTPUT}" DESTINATION lib/${DIST_DIR} RENAME ${GEOMETRY_LIB_NAME}) + # ================================================================================================== # Tests # ================================================================================================== diff --git a/libs/geometry/include/geometry/TangentSpaceMesh.h b/libs/geometry/include/geometry/TangentSpaceMesh.h index c8f1502f77b..bb47c030a83 100644 --- a/libs/geometry/include/geometry/TangentSpaceMesh.h +++ b/libs/geometry/include/geometry/TangentSpaceMesh.h @@ -20,7 +20,6 @@ #include #include #include -#include namespace filament { namespace geometry { diff --git a/libs/geometry/src/MikktspaceImpl.cpp b/libs/geometry/src/MikktspaceImpl.cpp new file mode 100644 index 00000000000..6c35647f6ba --- /dev/null +++ b/libs/geometry/src/MikktspaceImpl.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MikktspaceImpl.h" + +#include +#include + + +#include +#include + +#include + +namespace filament::geometry { + +using namespace filament::math; + +int MikktspaceImpl::getNumFaces(SMikkTSpaceContext const* context) noexcept { + auto const wrapper = MikktspaceImpl::getThis(context); + return wrapper->mFaceCount; +} + +int MikktspaceImpl::getNumVerticesOfFace(SMikkTSpaceContext const* context, + int const iFace) noexcept { + return 3; +} + +void MikktspaceImpl::getPosition(SMikkTSpaceContext const* context, float fvPosOut[], + const int iFace, const int iVert) noexcept { + auto const wrapper = MikktspaceImpl::getThis(context); + float3 const pos = *pointerAdd(wrapper->mPositions, wrapper->getTriangle(iFace)[iVert], + wrapper->mPositionStride); + fvPosOut[0] = pos.x; + fvPosOut[1] = pos.y; + fvPosOut[2] = pos.z; +} + +void MikktspaceImpl::getNormal(SMikkTSpaceContext const* context, float fvNormOut[], + int const iFace, int const iVert) noexcept { + auto const wrapper = MikktspaceImpl::getThis(context); + float3 const normal = *pointerAdd(wrapper->mNormals, wrapper->getTriangle(iFace)[iVert], + wrapper->mNormalStride); + fvNormOut[0] = normal.x; + fvNormOut[1] = normal.y; + fvNormOut[2] = normal.z; +} + +void MikktspaceImpl::getTexCoord(SMikkTSpaceContext const* context, float fvTexcOut[], + const int iFace, const int iVert) noexcept { + auto const wrapper = MikktspaceImpl::getThis(context); + float2 const texc = + *pointerAdd(wrapper->mUVs, wrapper->getTriangle(iFace)[iVert], wrapper->mUVStride); + fvTexcOut[0] = texc.x; + fvTexcOut[1] = texc.y; +} + +void MikktspaceImpl::setTSpaceBasic(SMikkTSpaceContext const* context, float const fvTangent[], + float const fSign, int const iFace, int const iVert) noexcept { + auto const wrapper = MikktspaceImpl::getThis(context); + uint32_t const vertInd = wrapper->getTriangle(iFace)[iVert]; + float3 const pos = *pointerAdd(wrapper->mPositions, vertInd, wrapper->mPositionStride); + float3 const n = normalize(*pointerAdd(wrapper->mNormals, vertInd, wrapper->mNormalStride)); + float2 const uv = *pointerAdd(wrapper->mUVs, vertInd, wrapper->mUVStride); + float3 const t{fvTangent[0], fvTangent[1], fvTangent[2]}; + float3 const b = fSign * normalize(cross(n, t)); + + // TODO: packTangentFrame actually changes the orientation of b. + quatf const quat = mat3f::packTangentFrame({t, b, n}, sizeof(int32_t)); + + wrapper->mOutVertices.push_back({pos, uv, quat}); +} + +MikktspaceImpl::MikktspaceImpl(const TangentSpaceMeshInput* input) noexcept + : mFaceCount((int) input->triangleCount), + mPositions(input->positions), + mPositionStride(input->positionStride ? input->positionStride : sizeof(float3)), + mNormals(input->normals), + mNormalStride(input->normalStride ? input->normalStride : sizeof(float3)), + mUVs(input->uvs), + mUVStride(input->uvStride ? input->uvStride : sizeof(float2)), + mIsTriangle16(input->triangles16), + mTriangles( + input->triangles16 ? (uint8_t*) input->triangles16 : (uint8_t*) input->triangles32) { + mOutVertices.reserve(mFaceCount * 3); +} + +MikktspaceImpl* MikktspaceImpl::getThis(SMikkTSpaceContext const* context) noexcept { + return (MikktspaceImpl*) context->m_pUserData; +} + +inline const uint3 MikktspaceImpl::getTriangle(int triangleIndex) const noexcept { + const size_t tstride = mIsTriangle16 ? sizeof(ushort3) : sizeof(uint3); + return mIsTriangle16 ? uint3(*(ushort3*) (pointerAdd(mTriangles, triangleIndex, tstride))) + : *(uint3*) (pointerAdd(mTriangles, triangleIndex, tstride)); +} + +void MikktspaceImpl::run(TangentSpaceMeshOutput* output) noexcept { + SMikkTSpaceInterface interface { + .m_getNumFaces = MikktspaceImpl::getNumFaces, + .m_getNumVerticesOfFace = MikktspaceImpl::getNumVerticesOfFace, + .m_getPosition = MikktspaceImpl::getPosition, + .m_getNormal = MikktspaceImpl::getNormal, + .m_getTexCoord = MikktspaceImpl::getTexCoord, + .m_setTSpaceBasic = MikktspaceImpl::setTSpaceBasic + }; + SMikkTSpaceContext context{.m_pInterface = &interface, .m_pUserData = this}; + genTangSpaceDefault(&context); + + std::vector remap(mOutVertices.size()); + size_t vertexCount = meshopt_generateVertexRemap(remap.data(), NULL, mOutVertices.size(), + mOutVertices.data(), mOutVertices.size(), sizeof(IOVertex)); + + std::vector newVertices(vertexCount); + meshopt_remapVertexBuffer((void*) newVertices.data(), mOutVertices.data(), mOutVertices.size(), + sizeof(IOVertex), remap.data()); + + uint3* triangles32 = new uint3[mFaceCount]; + meshopt_remapIndexBuffer((uint32_t*) triangles32, NULL, mOutVertices.size(), remap.data()); + + float3* outPositions = new float3[vertexCount]; + float2* outUVs = new float2[vertexCount]; + quatf* outQuats = new quatf[vertexCount]; + + for (size_t i = 0; i < vertexCount; ++i) { + outPositions[i] = newVertices[i].position; + outUVs[i] = newVertices[i].uv; + outQuats[i] = newVertices[i].tangentSpace; + } + + output->vertexCount = vertexCount; + output->positions = outPositions; + output->uvs = outUVs; + output->tangentSpace = outQuats; + output->triangles32 = triangles32; + output->triangleCount = mFaceCount; +} + +}// namespace filament::geometry diff --git a/libs/geometry/src/MikktspaceImpl.h b/libs/geometry/src/MikktspaceImpl.h new file mode 100644 index 00000000000..cc4831d4d66 --- /dev/null +++ b/libs/geometry/src/MikktspaceImpl.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_GEOMETRY_MIKKTSPACEIMPL_H +#define TNT_GEOMETRY_MIKKTSPACEIMPL_H + +#include "TangentSpaceMeshInternal.h" + +#include +#include +#include + +#include + +struct SMikkTSpaceContext; + +namespace filament::geometry { + +using namespace filament::math; + +class MikktspaceImpl { +public: + struct IOVertex { + float3 position; + float2 uv; + quatf tangentSpace; + }; + + MikktspaceImpl(const TangentSpaceMeshInput* input) noexcept; + + MikktspaceImpl(const MikktspaceImpl&) = delete; + MikktspaceImpl& operator=(const MikktspaceImpl&) = delete; + + void run(TangentSpaceMeshOutput* output) noexcept; + +private: + static int getNumFaces(SMikkTSpaceContext const* context) noexcept; + static int getNumVerticesOfFace(SMikkTSpaceContext const* context, int const iFace) noexcept; + static void getPosition(SMikkTSpaceContext const* context, float fvPosOut[], const int iFace, + const int iVert) noexcept; + static void getNormal(SMikkTSpaceContext const* context, float fvNormOut[], int const iFace, + int const iVert) noexcept; + static void getTexCoord(SMikkTSpaceContext const* context, float fvTexcOut[], const int iFace, + const int iVert) noexcept; + static void setTSpaceBasic(SMikkTSpaceContext const* context, float const fvTangent[], + float const fSign, int const iFace, int const iVert) noexcept; + + static MikktspaceImpl* getThis(SMikkTSpaceContext const* context) noexcept; + + inline const uint3 getTriangle(int triangleIndex) const noexcept; + + int const mFaceCount; + float3 const* mPositions; + size_t const mPositionStride; + float3 const* mNormals; + size_t const mNormalStride; + float2 const* mUVs; + size_t const mUVStride; + uint8_t const* mTriangles; + bool mIsTriangle16; + + std::vector mOutVertices; +}; + +}// namespace filament::geometry + +#endif//TNT_GEOMETRY_MIKKTSPACEIMPL_H diff --git a/libs/geometry/src/TangentSpaceMesh.cpp b/libs/geometry/src/TangentSpaceMesh.cpp index 62e503a6229..8e21c9e3b95 100644 --- a/libs/geometry/src/TangentSpaceMesh.cpp +++ b/libs/geometry/src/TangentSpaceMesh.cpp @@ -16,61 +16,36 @@ #include +#include "MikktspaceImpl.h" +#include "TangentSpaceMeshInternal.h" + #include #include #include #include +#include + namespace filament { namespace geometry { using namespace filament::math; using Builder = TangentSpaceMesh::Builder; using Algorithm = TangentSpaceMesh::Algorithm; -using MethodPtr = void(*)(const TangentSpaceMeshInput*, TangentSpaceMeshOutput*); -using NormalsOnlyKernelPtr = void(*)(const float3& N, float3& T, float3& B); - -struct TangentSpaceMeshInput { - size_t vertexCount = 0; - const float3* normals = nullptr; - const float2* uvs = nullptr; - const float3* positions = nullptr; - const ushort3* triangles16 = nullptr; - const uint3* triangles32 = nullptr; - - size_t normalStride = 0; - size_t uvStride = 0; - size_t positionStride = 0; - size_t triangleCount = 0; - - Algorithm algorithm; -}; - -struct TangentSpaceMeshOutput { - Algorithm algorithm; - - size_t triangleCount = 0; - size_t vertexCount = 0; - - quatf const* tangentSpace = nullptr; - float2 const* uvs = nullptr; - float3 const* positions = nullptr; - uint3 const* triangles32 = nullptr; - ushort3 const* triangles16 = nullptr; -}; +using MethodPtr = void(*)(TangentSpaceMeshInput const*, TangentSpaceMeshOutput*); namespace { -const uint8_t NORMALS_BIT = 0x01; -const uint8_t UVS_BIT = 0x02; -const uint8_t POSITIONS_BIT = 0x04; -const uint8_t INDICES_BIT = 0x08; +uint8_t const NORMALS_BIT = 0x01; +uint8_t const UVS_BIT = 0x02; +uint8_t const POSITIONS_BIT = 0x04; +uint8_t const INDICES_BIT = 0x08; // Input types -const uint8_t NORMALS = NORMALS_BIT; -const uint8_t POSITIONS_INDICES = POSITIONS_BIT | INDICES_BIT; -const uint8_t NORMALS_UVS_POSITIONS_INDICES = NORMALS_BIT | UVS_BIT | POSITIONS_BIT | INDICES_BIT; +uint8_t const NORMALS = NORMALS_BIT; +uint8_t const POSITIONS_INDICES = POSITIONS_BIT | INDICES_BIT; +uint8_t const NORMALS_UVS_POSITIONS_INDICES = NORMALS_BIT | UVS_BIT | POSITIONS_BIT | INDICES_BIT; std::string_view to_string(Algorithm algorithm) noexcept { switch (algorithm) { @@ -89,20 +64,10 @@ std::string_view to_string(Algorithm algorithm) noexcept { } } -inline bool isInputType(const uint8_t inputType, const uint8_t checkType) noexcept { +inline bool isInputType(uint8_t const inputType, uint8_t const checkType) noexcept { return ((inputType & checkType) == checkType); } -template -inline const InputType* pointerAdd(const InputType* ptr, size_t index, size_t stride) noexcept { - return (InputType*) (((const uint8_t*) ptr) + (index * stride)); -} - -template -inline InputType* pointerAdd(InputType* ptr, size_t index, size_t stride) noexcept { - return (InputType*) (((uint8_t*) ptr) + (index * stride)); -} - template inline void takeStride(InputType*& out, size_t stride) noexcept { out = pointerAdd(out, 1, stride); @@ -179,7 +144,7 @@ Algorithm selectAlgorithm(TangentSpaceMeshInput *input) noexcept { if (!foundAlgo) { outAlgo = selectBestDefaultAlgorithm(inputType); utils::slog.w << "Cannot satisfy algorithm=" << to_string(input->algorithm) - << ". Selected algorithm=" << to_string(input->algorithm) << " instead" + << ". Selected algorithm=" << to_string(outAlgo) << " instead" << utils::io::endl; } @@ -187,32 +152,32 @@ Algorithm selectAlgorithm(TangentSpaceMeshInput *input) noexcept { } // The paper uses a Z-up world basis, which has been converted to Y-up here -inline std::pair frisvadKernel(const float3& n) { +inline std::pair frisvadKernel(float3 const& n) { float3 b, t; if (n.y < -1.0f + std::numeric_limits::epsilon()) { // Handle the singularity t = float3{-1.0f, 0.0f, 0.0f}; b = float3{0.0f, 0.0f, -1.0f}; } else { - const float va = 1.0f / (1.0f + n.y); - const float vb = -n.z * n.x * va; + float const va = 1.0f / (1.0f + n.y); + float const vb = -n.z * n.x * va; t = float3{vb, -n.z, 1.0f - n.z * n.z * va}; b = float3{1.0f - n.x * n.x * va, -n.x, vb}; } return {b, t}; } -void frisvadMethod(const TangentSpaceMeshInput* input, TangentSpaceMeshOutput* output) +void frisvadMethod(TangentSpaceMeshInput const* input, TangentSpaceMeshOutput* output) noexcept { - const size_t vertexCount = input->vertexCount; + size_t const vertexCount = input->vertexCount; quatf* quats = new quatf[vertexCount]; - const float3* UTILS_RESTRICT normals = input->normals; + float3 const* UTILS_RESTRICT normals = input->normals; size_t nstride = input->normalStride ? input->normalStride : sizeof(float3); for (size_t qindex = 0; qindex < vertexCount; ++qindex) { - const float3 n = *normals; - const auto [b, t] = frisvadKernel(n); + float3 const n = *normals; + auto const [b, t] = frisvadKernel(n); quats[qindex] = mat3f::packTangentFrame({t, b, n}, sizeof(int32_t)); normals = pointerAdd(normals, 1, nstride); } @@ -225,17 +190,16 @@ void frisvadMethod(const TangentSpaceMeshInput* input, TangentSpaceMeshOutput* o output->triangles16 = input->triangles16; } - -void hughesMollerMethod(const TangentSpaceMeshInput* input, TangentSpaceMeshOutput* output) +void hughesMollerMethod(TangentSpaceMeshInput const* input, TangentSpaceMeshOutput* output) noexcept { - const size_t vertexCount = input->vertexCount; + size_t const vertexCount = input->vertexCount; quatf* quats = new quatf[vertexCount]; - const float3* UTILS_RESTRICT normals = input->normals; + float3 const* UTILS_RESTRICT normals = input->normals; size_t nstride = input->normalStride ? input->normalStride : sizeof(float3); for (size_t qindex = 0; qindex < vertexCount; ++qindex) { - const float3 n = *normals; + float3 const n = *normals; float3 b, t; if (abs(n.x) > abs(n.z) + std::numeric_limits::epsilon()) { @@ -258,7 +222,7 @@ void hughesMollerMethod(const TangentSpaceMeshInput* input, TangentSpaceMeshOutp output->triangles16 = input->triangles16; } -void flatShadingMethod(const TangentSpaceMeshInput* input, TangentSpaceMeshOutput* output) +void flatShadingMethod(TangentSpaceMeshInput const* input, TangentSpaceMeshOutput* output) noexcept { const float3* positions = input->positions; const size_t pstride = input->positionStride ? input->positionStride : sizeof(float3); @@ -321,6 +285,102 @@ void flatShadingMethod(const TangentSpaceMeshInput* input, TangentSpaceMeshOutpu output->triangleCount = outTriangleCount; } +void mikktspaceMethod(TangentSpaceMeshInput const* input, TangentSpaceMeshOutput* output) { + MikktspaceImpl impl(input); + impl.run(output); +} + +inline float3 randomPerp(const float3& n) { + float3 perp = cross(n, float3{1, 0, 0}); + float sqrlen = dot(perp, perp); + if (sqrlen <= std::numeric_limits::epsilon()) { + perp = cross(n, float3{0, 1, 0}); + sqrlen = dot(perp, perp); + } + return perp / sqrlen; +} + +void lengyelMethod(TangentSpaceMeshInput const* input, TangentSpaceMeshOutput* output) { + size_t const vertexCount = input->vertexCount; + size_t const triangleCount = input->triangleCount; + size_t const positionStride = input->positionStride ? input->positionStride : sizeof(float3); + size_t const normalStride = input->normalStride ? input->normalStride : sizeof(float3); + size_t const uvStride = input->uvStride ? input->uvStride : sizeof(float2); + auto const* triangles16 = input->triangles16; + auto const* triangles32 = input->triangles32; + auto const* positions = input->positions; + auto const* uvs = input->uvs; + auto const* normals = input->normals; + + std::vector tan1(vertexCount, float3{0.0f}); + std::vector tan2(vertexCount, float3{0.0f}); + for (size_t a = 0; a < triangleCount; ++a) { + uint3 tri = triangles16 ? uint3(triangles16[a]) : triangles32[a]; + assert_invariant(tri.x < vertexCount && tri.y < vertexCount && tri.z < vertexCount); + float3 const& v1 = *pointerAdd(positions, tri.x, positionStride); + float3 const& v2 = *pointerAdd(positions, tri.y, positionStride); + float3 const& v3 = *pointerAdd(positions, tri.z, positionStride); + float2 const& w1 = *pointerAdd(uvs, tri.x, uvStride); + float2 const& w2 = *pointerAdd(uvs, tri.y, uvStride); + float2 const& w3 = *pointerAdd(uvs, tri.z, uvStride); + float const x1 = v2.x - v1.x; + float const x2 = v3.x - v1.x; + float const y1 = v2.y - v1.y; + float const y2 = v3.y - v1.y; + float const z1 = v2.z - v1.z; + float const z2 = v3.z - v1.z; + float const s1 = w2.x - w1.x; + float const s2 = w3.x - w1.x; + float const t1 = w2.y - w1.y; + float const t2 = w3.y - w1.y; + float const d = s1 * t2 - s2 * t1; + float3 sdir, tdir; + // In general we can't guarantee smooth tangents when the UV's are non-smooth, but let's at + // least avoid divide-by-zero and fall back to normals-only method. + if (d == 0.0) { + float3 const& n1 = *pointerAdd(normals, tri.x, normalStride); + sdir = randomPerp(n1); + tdir = cross(n1, sdir); + } else { + sdir = {t2 * x1 - t1 * x2, t2 * y1 - t1 * y2, t2 * z1 - t1 * z2}; + tdir = {s1 * x2 - s2 * x1, s1 * y2 - s2 * y1, s1 * z2 - s2 * z1}; + float const r = 1.0f / d; + sdir *= r; + tdir *= r; + } + tan1[tri.x] += sdir; + tan1[tri.y] += sdir; + tan1[tri.z] += sdir; + tan2[tri.x] += tdir; + tan2[tri.y] += tdir; + tan2[tri.z] += tdir; + } + + quatf* quats = new quatf[vertexCount]; + for (size_t a = 0; a < vertexCount; a++) { + float3 const& n = normals[a]; + float3 const& t1 = tan1[a]; + float3 const& t2 = tan2[a]; + + // Gram-Schmidt orthogonalize + float3 t = normalize(t1 - n * dot(n, t1)); + + // Calculate handedness + float w = (dot(cross(n, t1), t2) < 0.0f) ? -1.0f : 1.0f; + + float3 b = w < 0 ? cross(t, n) : cross(n, t); + quats[a] = mat3f::packTangentFrame({t, b, n}, sizeof(int32_t)); + } + + output->tangentSpace = quats; + output->vertexCount = vertexCount; + output->triangleCount = triangleCount; + output->uvs = uvs; + output->positions = positions; + output->triangles32 = triangles32; + output->triangles16 = triangles16; +} + template inline void cleanOutputPointer(DataType*& ptr, InputType inputPtr) noexcept { if (ptr && ptr != (const DataType*) inputPtr) { @@ -397,12 +457,18 @@ TangentSpaceMesh* Builder::build() { mMesh->mOutput->algorithm = selectAlgorithm(mMesh->mInput); MethodPtr method = nullptr; switch (mMesh->mOutput->algorithm) { - case Algorithm::FRISVAD: - method = frisvadMethod; + case Algorithm::MIKKTSPACE: + method = mikktspaceMethod; + break; + case Algorithm::LENGYEL: + method = lengyelMethod; break; case Algorithm::HUGHES_MOLLER: method = hughesMollerMethod; break; + case Algorithm::FRISVAD: + method = frisvadMethod; + break; case Algorithm::FLAT_SHADING: method = flatShadingMethod; break; diff --git a/libs/geometry/src/TangentSpaceMeshInternal.h b/libs/geometry/src/TangentSpaceMeshInternal.h new file mode 100644 index 00000000000..f9beb0684fe --- /dev/null +++ b/libs/geometry/src/TangentSpaceMeshInternal.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_GEOMETRY_TANGENTSPACEMESHIMPL_H +#define TNT_GEOMETRY_TANGENTSPACEMESHIMPL_H + +#include + +#include +#include + +namespace filament::geometry { + +using namespace filament::math; +using Algorithm = TangentSpaceMesh::Algorithm; + +struct TangentSpaceMeshInput { + size_t vertexCount = 0; + float3 const* normals = nullptr; + float2 const* uvs = nullptr; + float3 const* positions = nullptr; + ushort3 const* triangles16 = nullptr; + uint3 const* triangles32 = nullptr; + + size_t normalStride = 0; + size_t uvStride = 0; + size_t positionStride = 0; + size_t triangleCount = 0; + + Algorithm algorithm; +}; + +struct TangentSpaceMeshOutput { + Algorithm algorithm; + + size_t triangleCount = 0; + size_t vertexCount = 0; + + quatf const* tangentSpace = nullptr; + float2 const* uvs = nullptr; + float3 const* positions = nullptr; + uint3 const* triangles32 = nullptr; + ushort3 const* triangles16 = nullptr; +}; + +template +inline const InputType* pointerAdd(InputType const* ptr, size_t index, size_t stride) noexcept { + return (InputType*) (((uint8_t const*) ptr) + (index * stride)); +} + +template +inline InputType* pointerAdd(InputType* ptr, size_t index, size_t stride) noexcept { + return (InputType*) (((uint8_t*) ptr) + (index * stride)); +} + +}// namespace filament::geometry + +#endif//TNT_GEOMETRY_TANGENTSPACEMESHIMPL_H diff --git a/libs/geometry/tests/test_tangent_space_mesh.cpp b/libs/geometry/tests/test_tangent_space_mesh.cpp index ba31e2c01d4..b048235fb77 100644 --- a/libs/geometry/tests/test_tangent_space_mesh.cpp +++ b/libs/geometry/tests/test_tangent_space_mesh.cpp @@ -18,7 +18,6 @@ #include #include -#include #include @@ -39,15 +38,27 @@ const std::vector CUBE_VERTS { float3{1, 1, 1} }; -const std::vector CUBE_UVS{ +const std::vector CUBE_UVS { + float2{0, 0}, float2{0, 0}, float2{1, 0}, + float2{1, 1}, + float2{0, 1}, float2{0, 1}, float2{1, 1}, - float2{.5, 0}, - float2{0, .5}, - float2{.5, .5}, - float2{0, 0} + float2{0, 1} +}; + +const float3 CUBE_CENTER{.5, .5, .5}; +const std::vector CUBE_NORMALS { + normalize(CUBE_VERTS[0] - CUBE_CENTER), + normalize(CUBE_VERTS[1] - CUBE_CENTER), + normalize(CUBE_VERTS[2] - CUBE_CENTER), + normalize(CUBE_VERTS[3] - CUBE_CENTER), + normalize(CUBE_VERTS[4] - CUBE_CENTER), + normalize(CUBE_VERTS[5] - CUBE_CENTER), + normalize(CUBE_VERTS[6] - CUBE_CENTER), + normalize(CUBE_VERTS[7] - CUBE_CENTER), }; const std::vector CUBE_TRIANGLES { @@ -121,6 +132,17 @@ TEST_F(TangentSpaceMeshTest, BuilderDefaultAlgorithms) { .build(); EXPECT_EQ(mesh->getAlgorithm(), TangentSpaceMesh::Algorithm::FRISVAD); TangentSpaceMesh::destroy(mesh); + + mesh = TangentSpaceMesh::Builder() + .vertexCount(CUBE_VERTS.size()) + .positions(CUBE_VERTS.data()) + .uvs(CUBE_UVS.data()) + .normals(CUBE_NORMALS.data()) + .triangleCount(CUBE_TRIANGLES.size()) + .triangles(CUBE_TRIANGLES.data()) + .build(); + EXPECT_EQ(mesh->getAlgorithm(), TangentSpaceMesh::Algorithm::MIKKTSPACE); + TangentSpaceMesh::destroy(mesh); } // Remeshed vertices/uvs should map to input vertices/uvs @@ -189,7 +211,6 @@ TEST_F(TangentSpaceMeshTest, FlatShading) { TangentSpaceMesh::destroy(mesh); } - TEST_F(TangentSpaceMeshTest, Frisvad) { TangentSpaceMesh* mesh = TangentSpaceMesh::Builder() .vertexCount(TEST_NORMALS.size()) @@ -244,6 +265,103 @@ TEST_F(TangentSpaceMeshTest, HughesMoller) { TangentSpaceMesh::destroy(mesh); } +TEST_F(TangentSpaceMeshTest, MikktspaceRemesh) { + TangentSpaceMesh* mesh = TangentSpaceMesh::Builder() + .vertexCount(CUBE_VERTS.size()) + .normals(CUBE_NORMALS.data()) + .positions(CUBE_VERTS.data()) + .uvs(CUBE_UVS.data()) + .triangleCount(CUBE_TRIANGLES.size()) + .triangles(CUBE_TRIANGLES.data()) + .algorithm(TangentSpaceMesh::Algorithm::MIKKTSPACE) + .build(); + + size_t const vertexCount = mesh->getVertexCount(); + + std::vector outPositions(vertexCount); + mesh->getPositions(outPositions.data()); + + std::vector outUVs(vertexCount); + mesh->getUVs(outUVs.data()); + + for (size_t i = 0; i < outPositions.size(); ++i) { + auto const& outPos = outPositions[i]; + auto const& outUV = outUVs[i]; + + bool found = false; + for (size_t j = 0; j < CUBE_VERTS.size(); ++j) { + auto const& inPos = CUBE_VERTS[j]; + auto const& inUV = CUBE_UVS[j]; + if (isAlmostEqual3(outPos, inPos)) { + found = true; + EXPECT_PRED2(isAlmostEqual2, outUV, inUV); + break; + } + } + EXPECT_TRUE(found); + } + TangentSpaceMesh::destroy(mesh); +} + +TEST_F(TangentSpaceMeshTest, Mikktspace) { + // It's unclear why the dot product between n and b is greater epsilon, but since we don't + // control the implementation of mikktspace, we simply add a little slack to the test. + constexpr float MAGIC_SLACK = 1.00001; + TangentSpaceMesh* mesh = TangentSpaceMesh::Builder() + .vertexCount(CUBE_VERTS.size()) + .normals(CUBE_NORMALS.data()) + .positions(CUBE_VERTS.data()) + .uvs(CUBE_UVS.data()) + .triangleCount(CUBE_TRIANGLES.size()) + .triangles(CUBE_TRIANGLES.data()) + .algorithm(TangentSpaceMesh::Algorithm::MIKKTSPACE) + .build(); + + size_t const vertexCount = mesh->getVertexCount(); + std::vector quats(vertexCount); + mesh->getQuats(quats.data()); + for (size_t i = 0; i < vertexCount; ++i) { + float3 const n = quats[i] * NORMAL_AXIS; + float3 const b = quats[i] * BITANGENT_AXIS; + float3 const t = quats[i] * TANGENT_AXIS; + + EXPECT_LT(abs(dot(b, t)), std::numeric_limits::epsilon()); + EXPECT_LT(abs(dot(n, t)), std::numeric_limits::epsilon()); + EXPECT_LT(abs(dot(n, b)), std::numeric_limits::epsilon() * MAGIC_SLACK); + EXPECT_PRED2(isAlmostEqual3, cross(n, t), b); + } + TangentSpaceMesh::destroy(mesh); +} + +TEST_F(TangentSpaceMeshTest, Lengyel) { + TangentSpaceMesh* mesh = TangentSpaceMesh::Builder() + .vertexCount(CUBE_VERTS.size()) + .normals(CUBE_NORMALS.data()) + .positions(CUBE_VERTS.data()) + .uvs(CUBE_UVS.data()) + .triangleCount(CUBE_TRIANGLES.size()) + .triangles(CUBE_TRIANGLES.data()) + .algorithm(TangentSpaceMesh::Algorithm::LENGYEL) + .build(); + + size_t const vertexCount = mesh->getVertexCount(); + std::vector quats(vertexCount); + mesh->getQuats(quats.data()); + for (size_t i = 0; i < vertexCount; ++i) { + float3 const n = quats[i] * NORMAL_AXIS; + EXPECT_PRED2(isAlmostEqual3, n, CUBE_NORMALS[i]); + + float3 const b = quats[i] * BITANGENT_AXIS; + float3 const t = quats[i] * TANGENT_AXIS; + + EXPECT_LT(abs(dot(b, t)), std::numeric_limits::epsilon()); + EXPECT_LT(abs(dot(n, t)), std::numeric_limits::epsilon()); + EXPECT_LT(abs(dot(n, b)), std::numeric_limits::epsilon()); + EXPECT_PRED2(isAlmostEqual3, cross(n, t), b); + } + TangentSpaceMesh::destroy(mesh); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/libs/utils/include/utils/SingleInstanceComponentManager.h b/libs/utils/include/utils/SingleInstanceComponentManager.h index 2ed7f2c9dd9..c03ec5f100c 100644 --- a/libs/utils/include/utils/SingleInstanceComponentManager.h +++ b/libs/utils/include/utils/SingleInstanceComponentManager.h @@ -60,16 +60,19 @@ class UTILS_PUBLIC SingleInstanceComponentManager { protected: static constexpr size_t ENTITY_INDEX = sizeof ... (Elements); + public: using SoA = StructureOfArrays; + using Structure = typename SoA::Structure; + using Instance = EntityInstanceBase::Type; SingleInstanceComponentManager() noexcept { // We always start with a dummy entry because index=0 is reserved. The component // at index = 0, is guaranteed to be default-initialized. // Sub-classes can use this to their advantage. - mData.push_back(); + mData.push_back(Structure{}); } SingleInstanceComponentManager(SingleInstanceComponentManager&&) noexcept {/* = default */} @@ -269,7 +272,7 @@ SingleInstanceComponentManager::addComponent(Entity e) { if (!e.isNull()) { if (!hasComponent(e)) { // this is like a push_back(e); - mData.push_back().template back() = e; + mData.push_back(Structure{}).template back() = e; // index 0 is used when the component doesn't exist ci = Instance(mData.size() - 1); mInstanceMap[e] = ci; diff --git a/libs/utils/include/utils/StructureOfArrays.h b/libs/utils/include/utils/StructureOfArrays.h index b4623e34163..b6ea3bfbe47 100644 --- a/libs/utils/include/utils/StructureOfArrays.h +++ b/libs/utils/include/utils/StructureOfArrays.h @@ -41,11 +41,13 @@ class StructureOfArraysBase { static constexpr const size_t kArrayCount = sizeof...(Elements); public: - using SoA = StructureOfArraysBase; + using SoA = StructureOfArraysBase; + + using Structure = std::tuple; // Type of the Nth array template - using TypeAt = typename std::tuple_element_t>; + using TypeAt = typename std::tuple_element_t; // Number of arrays static constexpr size_t getArrayCount() noexcept { return kArrayCount; } @@ -57,7 +59,7 @@ class StructureOfArraysBase { // -------------------------------------------------------------------------------------------- - class Structure; + class IteratorValue; template class Iterator; using iterator = Iterator; using const_iterator = Iterator; @@ -69,45 +71,45 @@ class StructureOfArraysBase { * In other words, it's the return type of iterator::operator*(), and since it * cannot be a C++ reference (&), it's an object that acts like it. */ - class StructureRef { - friend class Structure; + class IteratorValueRef { + friend class IteratorValue; friend iterator; friend const_iterator; StructureOfArraysBase* const UTILS_RESTRICT soa; size_t const index; - StructureRef(StructureOfArraysBase* soa, size_t index) : soa(soa), index(index) { } + IteratorValueRef(StructureOfArraysBase* soa, size_t index) : soa(soa), index(index) { } // assigns a value_type to a reference (i.e. assigns to what's pointed to by the reference) template - StructureRef& assign(Structure const& rhs, std::index_sequence); + IteratorValueRef& assign(IteratorValue const& rhs, std::index_sequence); // assigns a value_type to a reference (i.e. assigns to what's pointed to by the reference) template - StructureRef& assign(Structure&& rhs, std::index_sequence) noexcept; + IteratorValueRef& assign(IteratorValue&& rhs, std::index_sequence) noexcept; // objects pointed to by reference can be swapped, so provide the special swap() function. - friend void swap(StructureRef lhs, StructureRef rhs) { + friend void swap(IteratorValueRef lhs, IteratorValueRef rhs) { lhs.soa->swap(lhs.index, rhs.index); } public: // references can be created by copy-assignment only - StructureRef(StructureRef const& rhs) noexcept : soa(rhs.soa), index(rhs.index) { } + IteratorValueRef(IteratorValueRef const& rhs) noexcept : soa(rhs.soa), index(rhs.index) { } // copy the content of a reference to the content of this one - StructureRef& operator=(StructureRef const& rhs); + IteratorValueRef& operator=(IteratorValueRef const& rhs); // move the content of a reference to the content of this one - StructureRef& operator=(StructureRef&& rhs) noexcept; + IteratorValueRef& operator=(IteratorValueRef&& rhs) noexcept; // copy a value_type to the content of this reference - StructureRef& operator=(Structure const& rhs) { + IteratorValueRef& operator=(IteratorValue const& rhs) { return assign(rhs, std::make_index_sequence()); } // move a value_type to the content of this reference - StructureRef& operator=(Structure&& rhs) noexcept { + IteratorValueRef& operator=(IteratorValue&& rhs) noexcept { return assign(rhs, std::make_index_sequence()); } @@ -122,36 +124,36 @@ class StructureOfArraysBase { * Internally we're using a tuple<> to store the data. * This object is not trivial to construct, as it copies an entry of the SoA. */ - class Structure { - friend class StructureRef; + class IteratorValue { + friend class IteratorValueRef; friend iterator; friend const_iterator; using Type = std::tuple::type...>; Type elements; template - static Type init(StructureRef const& rhs, std::index_sequence) { + static Type init(IteratorValueRef const& rhs, std::index_sequence) { return Type{ rhs.soa->template elementAt(rhs.index)... }; } template - static Type init(StructureRef&& rhs, std::index_sequence) noexcept { + static Type init(IteratorValueRef&& rhs, std::index_sequence) noexcept { return Type{ std::move(rhs.soa->template elementAt(rhs.index))... }; } public: - Structure(Structure const& rhs) = default; - Structure(Structure&& rhs) noexcept = default; - Structure& operator=(Structure const& rhs) = default; - Structure& operator=(Structure&& rhs) noexcept = default; + IteratorValue(IteratorValue const& rhs) = default; + IteratorValue(IteratorValue&& rhs) noexcept = default; + IteratorValue& operator=(IteratorValue const& rhs) = default; + IteratorValue& operator=(IteratorValue&& rhs) noexcept = default; // initialize and assign from a StructureRef - Structure(StructureRef const& rhs) + IteratorValue(IteratorValueRef const& rhs) : elements(init(rhs, std::make_index_sequence())) {} - Structure(StructureRef&& rhs) noexcept + IteratorValue(IteratorValueRef&& rhs) noexcept : elements(init(rhs, std::make_index_sequence())) {} - Structure& operator=(StructureRef const& rhs) { return operator=(Structure(rhs)); } - Structure& operator=(StructureRef&& rhs) noexcept { return operator=(Structure(rhs)); } + IteratorValue& operator=(IteratorValueRef const& rhs) { return operator=(IteratorValue(rhs)); } + IteratorValue& operator=(IteratorValueRef&& rhs) noexcept { return operator=(IteratorValue(rhs)); } // access the elements of this value_Type (i.e. the "fields" of the structure) template TypeAt const& get() const { return std::get(elements); } @@ -174,9 +176,9 @@ class StructureOfArraysBase { Iterator(CVQualifiedSOAPointer soa, size_t index) : soa(soa), index(index) {} public: - using value_type = Structure; - using reference = StructureRef; - using pointer = StructureRef*; // FIXME: this should be a StructurePtr type + using value_type = IteratorValue; + using reference = IteratorValueRef; + using pointer = IteratorValueRef*; // FIXME: this should be a StructurePtr type using difference_type = ptrdiff_t; using iterator_category = std::random_access_iterator_tag; @@ -335,6 +337,11 @@ class StructureOfArraysBase { return *this; } + StructureOfArraysBase& push_back(Structure&& args) noexcept { + ensureCapacity(mSize + 1); + return push_back_unsafe(std::forward(args)); + } + StructureOfArraysBase& push_back(Elements const& ... args) noexcept { ensureCapacity(mSize + 1); return push_back_unsafe(args...); @@ -349,23 +356,29 @@ class StructureOfArraysBase { struct PushBackUnsafeClosure { size_t last; std::tuple args; - inline explicit PushBackUnsafeClosure(size_t last, Elements&& ... args) - : last(last), args(std::forward(args)...) {} - inline explicit PushBackUnsafeClosure(size_t last, Elements const& ... args) - : last(last), args(args...) {} + inline explicit PushBackUnsafeClosure(size_t last, Structure&& args) + : last(last), args(std::forward(args)) {} template inline void operator()(TypeAt* p) { new(p + last) TypeAt{ std::get(args) }; } }; + StructureOfArraysBase& push_back_unsafe(Structure&& args) noexcept { + for_each_index(mArrays, + PushBackUnsafeClosure{ mSize++, std::forward(args) }); + return *this; + } + StructureOfArraysBase& push_back_unsafe(Elements const& ... args) noexcept { - for_each_index(mArrays, PushBackUnsafeClosure{ mSize++, args... }); + for_each_index(mArrays, + PushBackUnsafeClosure{ mSize++, { args... } }); return *this; } StructureOfArraysBase& push_back_unsafe(Elements&& ... args) noexcept { - for_each_index(mArrays, PushBackUnsafeClosure{ mSize++, std::forward(args)... }); + for_each_index(mArrays, + PushBackUnsafeClosure{ mSize++, { std::forward(args)... }}); return *this; } @@ -562,8 +575,10 @@ class StructureOfArraysBase { forEach([from, to](auto p) { using T = typename std::decay::type; // note: scalar types like int/float get initialized to zero - for (size_t i = from; i < to; i++) { - new(p + i) T(); + if constexpr (!std::is_trivially_default_constructible_v) { + for (size_t i = from; i < to; i++) { + new(p + i) T(); + } } }); } @@ -571,8 +586,10 @@ class StructureOfArraysBase { void destroy_each(size_t from, size_t to) noexcept { forEach([from, to](auto p) { using T = typename std::decay::type; - for (size_t i = from; i < to; i++) { - p[i].~T(); + if constexpr (!std::is_trivially_destructible_v) { + for (size_t i = from; i < to; i++) { + p[i].~T(); + } } }); } @@ -592,15 +609,17 @@ class StructureOfArraysBase { reinterpret_cast(uintptr_t(b) + offsets[index]); // for trivial cases, just call memcpy() - if (std::is_trivially_copyable::value && - std::is_trivially_destructible::value) { + if constexpr (std::is_trivially_copyable_v && + std::is_trivially_destructible_v) { memcpy(arrayPointer, p, size * sizeof(T)); } else { for (size_t i = 0; i < size; i++) { // we move an element by using the in-place move-constructor new(arrayPointer + i) T(std::move(p[i])); - // and delete them by calling the destructor directly - p[i].~T(); + if constexpr (!std::is_trivially_destructible_v) { + // and delete them by calling the destructor directly + p[i].~T(); + } } } index++; @@ -626,27 +645,27 @@ class StructureOfArraysBase { template inline -typename StructureOfArraysBase::StructureRef& -StructureOfArraysBase::StructureRef::operator=( - StructureOfArraysBase::StructureRef const& rhs) { - return operator=(Structure(rhs)); +typename StructureOfArraysBase::IteratorValueRef& +StructureOfArraysBase::IteratorValueRef::operator=( + StructureOfArraysBase::IteratorValueRef const& rhs) { + return operator=(IteratorValue(rhs)); } template inline -typename StructureOfArraysBase::StructureRef& -StructureOfArraysBase::StructureRef::operator=( - StructureOfArraysBase::StructureRef&& rhs) noexcept { - return operator=(Structure(rhs)); +typename StructureOfArraysBase::IteratorValueRef& +StructureOfArraysBase::IteratorValueRef::operator=( + StructureOfArraysBase::IteratorValueRef&& rhs) noexcept { + return operator=(IteratorValue(rhs)); } template template inline -typename StructureOfArraysBase::StructureRef& -StructureOfArraysBase::StructureRef::assign( - StructureOfArraysBase::Structure const& rhs, std::index_sequence) { - // implements StructureRef& StructureRef::operator=(Structure const& rhs) +typename StructureOfArraysBase::IteratorValueRef& +StructureOfArraysBase::IteratorValueRef::assign( + StructureOfArraysBase::IteratorValue const& rhs, std::index_sequence) { + // implements IteratorValueRef& IteratorValueRef::operator=(IteratorValue const& rhs) auto UTILS_UNUSED l = { (soa->elementAt(index) = std::get(rhs.elements), 0)... }; return *this; } @@ -654,10 +673,10 @@ StructureOfArraysBase::StructureRef::assign( template template inline -typename StructureOfArraysBase::StructureRef& -StructureOfArraysBase::StructureRef::assign( - StructureOfArraysBase::Structure&& rhs, std::index_sequence) noexcept { - // implements StructureRef& StructureRef::operator=(Structure&& rhs) noexcept +typename StructureOfArraysBase::IteratorValueRef& +StructureOfArraysBase::IteratorValueRef::assign( + StructureOfArraysBase::IteratorValue&& rhs, std::index_sequence) noexcept { + // implements IteratorValueRef& IteratorValueRef::operator=(IteratorValue&& rhs) noexcept auto UTILS_UNUSED l = { (soa->elementAt(index) = std::move(std::get(rhs.elements)), 0)... }; return *this; diff --git a/libs/utils/include/utils/android/Systrace.h b/libs/utils/include/utils/android/Systrace.h index 8af96aeaf5a..41b64f16723 100644 --- a/libs/utils/include/utils/android/Systrace.h +++ b/libs/utils/include/utils/android/Systrace.h @@ -36,7 +36,7 @@ * Creates a Systrace context in the current scope. needed for calling all other systrace * commands below. */ -#define SYSTRACE_CONTEXT() ::utils::details::Systrace ___tracer(SYSTRACE_TAG) +#define SYSTRACE_CONTEXT() ::utils::details::Systrace ___trctx(SYSTRACE_TAG) // SYSTRACE_NAME traces the beginning and end of the current scope. To trace @@ -57,10 +57,10 @@ #define SYSTRACE_CALL() SYSTRACE_NAME(__FUNCTION__) #define SYSTRACE_NAME_BEGIN(name) \ - ___tracer.traceBegin(SYSTRACE_TAG, name) + ___trctx.traceBegin(SYSTRACE_TAG, name) #define SYSTRACE_NAME_END() \ - ___tracer.traceEnd(SYSTRACE_TAG) + ___trctx.traceEnd(SYSTRACE_TAG) /** @@ -71,24 +71,24 @@ * used to end it. */ #define SYSTRACE_ASYNC_BEGIN(name, cookie) \ - ___tracer.asyncBegin(SYSTRACE_TAG, name, cookie) + ___trctx.asyncBegin(SYSTRACE_TAG, name, cookie) /** * Trace the end of an asynchronous event. * This should have a corresponding SYSTRACE_ASYNC_BEGIN. */ #define SYSTRACE_ASYNC_END(name, cookie) \ - ___tracer.asyncEnd(SYSTRACE_TAG, name, cookie) + ___trctx.asyncEnd(SYSTRACE_TAG, name, cookie) /** * Traces an integer counter value. name is used to identify the counter. * This can be used to track how a value changes over time. */ #define SYSTRACE_VALUE32(name, val) \ - ___tracer.value(SYSTRACE_TAG, name, int32_t(val)) + ___trctx.value(SYSTRACE_TAG, name, int32_t(val)) #define SYSTRACE_VALUE64(name, val) \ - ___tracer.value(SYSTRACE_TAG, name, int64_t(val)) + ___trctx.value(SYSTRACE_TAG, name, int64_t(val)) // ------------------------------------------------------------------------------------------------ // No user serviceable code below... @@ -221,9 +221,9 @@ class Systrace { // ------------------------------------------------------------------------------------------------ class ScopedTrace { - public: +public: // we don't inline this because it's relatively heavy due to a global check - ScopedTrace(uint32_t tag, const char* name) noexcept : mTrace(tag), mTag(tag) { + ScopedTrace(uint32_t tag, const char* name) noexcept: mTrace(tag), mTag(tag) { mTrace.traceBegin(tag, name); } @@ -231,15 +231,7 @@ class ScopedTrace { mTrace.traceEnd(mTag); } - inline void value(uint32_t tag, const char* name, int32_t v) noexcept { - mTrace.value(tag, name, v); - } - - inline void value(uint32_t tag, const char* name, int64_t v) noexcept { - mTrace.value(tag, name, v); - } - - private: +private: Systrace mTrace; const uint32_t mTag; }; diff --git a/third_party/mikktspace/CMakeLists.txt b/third_party/mikktspace/CMakeLists.txt new file mode 100644 index 00000000000..120013568da --- /dev/null +++ b/third_party/mikktspace/CMakeLists.txt @@ -0,0 +1,40 @@ +cmake_minimum_required(VERSION 3.19) +project(mikktspace) + +set(TARGET mikktspace) +set(PUBLIC_HDR_DIR include) + +# ================================================================================================== +# Sources and headers +# ================================================================================================== +set(PUBLIC_HDRS + ${PUBLIC_HDR_DIR}/mikktspace/mikktspace.h +) + +set(SRCS + src/mikktspace.c +) + +# ================================================================================================== +# Target definitions +# ================================================================================================== +include_directories(${PUBLIC_HDR_DIR}) + +add_library(${TARGET} ${PUBLIC_HDRS} ${PRIVATE_HDRS} ${SRCS}) +target_include_directories(${TARGET} PUBLIC ${PUBLIC_HDR_DIR}) +set_target_properties(${TARGET} PROPERTIES FOLDER Libs) + +# ================================================================================================== +# Compile options and optimizations +# ================================================================================================== +if (MSVC) + target_compile_options(${TARGET} PRIVATE /fp:fast) +else() + target_compile_options(${TARGET} PRIVATE -ffast-math) +endif() + +# ================================================================================================== +# Installation +# ================================================================================================== +install(TARGETS ${TARGET} ARCHIVE DESTINATION lib/${DIST_DIR}) +install(DIRECTORY ${PUBLIC_HDR_DIR}/mikktspace DESTINATION include) diff --git a/third_party/mikktspace/include/mikktspace/mikktspace.h b/third_party/mikktspace/include/mikktspace/mikktspace.h new file mode 100644 index 00000000000..52c44a713c6 --- /dev/null +++ b/third_party/mikktspace/include/mikktspace/mikktspace.h @@ -0,0 +1,145 @@ +/** \file mikktspace/mikktspace.h + * \ingroup mikktspace + */ +/** + * Copyright (C) 2011 by Morten S. Mikkelsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#ifndef __MIKKTSPACE_H__ +#define __MIKKTSPACE_H__ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Author: Morten S. Mikkelsen + * Version: 1.0 + * + * The files mikktspace.h and mikktspace.c are designed to be + * stand-alone files and it is important that they are kept this way. + * Not having dependencies on structures/classes/libraries specific + * to the program, in which they are used, allows them to be copied + * and used as is into any tool, program or plugin. + * The code is designed to consistently generate the same + * tangent spaces, for a given mesh, in any tool in which it is used. + * This is done by performing an internal welding step and subsequently an order-independent evaluation + * of tangent space for meshes consisting of triangles and quads. + * This means faces can be received in any order and the same is true for + * the order of vertices of each face. The generated result will not be affected + * by such reordering. Additionally, whether degenerate (vertices or texture coordinates) + * primitives are present or not will not affect the generated results either. + * Once tangent space calculation is done the vertices of degenerate primitives will simply + * inherit tangent space from neighboring non degenerate primitives. + * The analysis behind this implementation can be found in my master's thesis + * which is available for download --> http://image.diku.dk/projects/media/morten.mikkelsen.08.pdf + * Note that though the tangent spaces at the vertices are generated in an order-independent way, + * by this implementation, the interpolated tangent space is still affected by which diagonal is + * chosen to split each quad. A sensible solution is to have your tools pipeline always + * split quads by the shortest diagonal. This choice is order-independent and works with mirroring. + * If these have the same length then compare the diagonals defined by the texture coordinates. + * XNormal which is a tool for baking normal maps allows you to write your own tangent space plugin + * and also quad triangulator plugin. + */ + + +typedef int tbool; +typedef struct SMikkTSpaceContext SMikkTSpaceContext; + +typedef struct { + // Returns the number of faces (triangles/quads) on the mesh to be processed. + int (*m_getNumFaces)(const SMikkTSpaceContext * pContext); + + // Returns the number of vertices on face number iFace + // iFace is a number in the range {0, 1, ..., getNumFaces()-1} + int (*m_getNumVerticesOfFace)(const SMikkTSpaceContext * pContext, const int iFace); + + // returns the position/normal/texcoord of the referenced face of vertex number iVert. + // iVert is in the range {0,1,2} for triangles and {0,1,2,3} for quads. + void (*m_getPosition)(const SMikkTSpaceContext * pContext, float fvPosOut[], const int iFace, const int iVert); + void (*m_getNormal)(const SMikkTSpaceContext * pContext, float fvNormOut[], const int iFace, const int iVert); + void (*m_getTexCoord)(const SMikkTSpaceContext * pContext, float fvTexcOut[], const int iFace, const int iVert); + + // either (or both) of the two setTSpace callbacks can be set. + // The call-back m_setTSpaceBasic() is sufficient for basic normal mapping. + + // This function is used to return the tangent and fSign to the application. + // fvTangent is a unit length vector. + // For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level. + // bitangent = fSign * cross(vN, tangent); + // Note that the results are returned unindexed. It is possible to generate a new index list + // But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results. + // DO NOT! use an already existing index list. + void (*m_setTSpaceBasic)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert); + + // This function is used to return tangent space results to the application. + // fvTangent and fvBiTangent are unit length vectors and fMagS and fMagT are their + // true magnitudes which can be used for relief mapping effects. + // fvBiTangent is the "real" bitangent and thus may not be perpendicular to fvTangent. + // However, both are perpendicular to the vertex normal. + // For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level. + // fSign = bIsOrientationPreserving ? 1.0f : (-1.0f); + // bitangent = fSign * cross(vN, tangent); + // Note that the results are returned unindexed. It is possible to generate a new index list + // But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results. + // DO NOT! use an already existing index list. + void (*m_setTSpace)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fvBiTangent[], const float fMagS, const float fMagT, + const tbool bIsOrientationPreserving, const int iFace, const int iVert); +} SMikkTSpaceInterface; + +struct SMikkTSpaceContext +{ + SMikkTSpaceInterface * m_pInterface; // initialized with callback functions + void * m_pUserData; // pointer to client side mesh data etc. (passed as the first parameter with every interface call) +}; + +// these are both thread safe! +tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext); // Default (recommended) fAngularThreshold is 180 degrees (which means threshold disabled) +tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold); + + +// To avoid visual errors (distortions/unwanted hard edges in lighting), when using sampled normal maps, the +// normal map sampler must use the exact inverse of the pixel shader transformation. +// The most efficient transformation we can possibly do in the pixel shader is +// achieved by using, directly, the "unnormalized" interpolated tangent, bitangent and vertex normal: vT, vB and vN. +// pixel shader (fast transform out) +// vNout = normalize( vNt.x * vT + vNt.y * vB + vNt.z * vN ); +// where vNt is the tangent space normal. The normal map sampler must likewise use the +// interpolated and "unnormalized" tangent, bitangent and vertex normal to be compliant with the pixel shader. +// sampler does (exact inverse of pixel shader): +// float3 row0 = cross(vB, vN); +// float3 row1 = cross(vN, vT); +// float3 row2 = cross(vT, vB); +// float fSign = dot(vT, row0)<0 ? -1 : 1; +// vNt = normalize( fSign * float3(dot(vNout,row0), dot(vNout,row1), dot(vNout,row2)) ); +// where vNout is the sampled normal in some chosen 3D space. +// +// Should you choose to reconstruct the bitangent in the pixel shader instead +// of the vertex shader, as explained earlier, then be sure to do this in the normal map sampler also. +// Finally, beware of quad triangulations. If the normal map sampler doesn't use the same triangulation of +// quads as your renderer then problems will occur since the interpolated tangent spaces will differ +// eventhough the vertex level tangent spaces match. This can be solved either by triangulating before +// sampling/exporting or by using the order-independent choice of diagonal for splitting quads suggested earlier. +// However, this must be used both by the sampler and your tools/rendering pipeline. + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/mikktspace/src/mikktspace.c b/third_party/mikktspace/src/mikktspace.c new file mode 100644 index 00000000000..4e27620b277 --- /dev/null +++ b/third_party/mikktspace/src/mikktspace.c @@ -0,0 +1,1899 @@ +/** \file mikktspace/mikktspace.c + * \ingroup mikktspace + */ +/** + * Copyright (C) 2011 by Morten S. Mikkelsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define TFALSE 0 +#define TTRUE 1 + +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + +#define INTERNAL_RND_SORT_SEED 39871946 + +// internal structure +typedef struct { + float x, y, z; +} SVec3; + +static tbool veq( const SVec3 v1, const SVec3 v2 ) +{ + return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z); +} + +static SVec3 vadd( const SVec3 v1, const SVec3 v2 ) +{ + SVec3 vRes; + + vRes.x = v1.x + v2.x; + vRes.y = v1.y + v2.y; + vRes.z = v1.z + v2.z; + + return vRes; +} + + +static SVec3 vsub( const SVec3 v1, const SVec3 v2 ) +{ + SVec3 vRes; + + vRes.x = v1.x - v2.x; + vRes.y = v1.y - v2.y; + vRes.z = v1.z - v2.z; + + return vRes; +} + +static SVec3 vscale(const float fS, const SVec3 v) +{ + SVec3 vRes; + + vRes.x = fS * v.x; + vRes.y = fS * v.y; + vRes.z = fS * v.z; + + return vRes; +} + +static float LengthSquared( const SVec3 v ) +{ + return v.x*v.x + v.y*v.y + v.z*v.z; +} + +static float Length( const SVec3 v ) +{ + return sqrtf(LengthSquared(v)); +} + +static SVec3 Normalize( const SVec3 v ) +{ + return vscale(1 / Length(v), v); +} + +static float vdot( const SVec3 v1, const SVec3 v2) +{ + return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z; +} + + +static tbool NotZero(const float fX) +{ + // could possibly use FLT_EPSILON instead + return fabsf(fX) > FLT_MIN; +} + +static tbool VNotZero(const SVec3 v) +{ + // might change this to an epsilon based test + return NotZero(v.x) || NotZero(v.y) || NotZero(v.z); +} + + + +typedef struct { + int iNrFaces; + int * pTriMembers; +} SSubGroup; + +typedef struct { + int iNrFaces; + int * pFaceIndices; + int iVertexRepresentitive; + tbool bOrientPreservering; +} SGroup; + +// +#define MARK_DEGENERATE 1 +#define QUAD_ONE_DEGEN_TRI 2 +#define GROUP_WITH_ANY 4 +#define ORIENT_PRESERVING 8 + + + +typedef struct { + int FaceNeighbors[3]; + SGroup * AssignedGroup[3]; + + // normalized first order face derivatives + SVec3 vOs, vOt; + float fMagS, fMagT; // original magnitudes + + // determines if the current and the next triangle are a quad. + int iOrgFaceNumber; + int iFlag, iTSpacesOffs; + unsigned char vert_num[4]; +} STriInfo; + +typedef struct { + SVec3 vOs; + float fMagS; + SVec3 vOt; + float fMagT; + int iCounter; // this is to average back into quads. + tbool bOrient; +} STSpace; + +static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn); +static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[], + const int iNrActiveGroups, const int piTriListIn[], const float fThresCos, + const SMikkTSpaceContext * pContext); + +static int MakeIndex(const int iFace, const int iVert) +{ + assert(iVert>=0 && iVert<4 && iFace>=0); + return (iFace<<2) | (iVert&0x3); +} + +static void IndexToData(int * piFace, int * piVert, const int iIndexIn) +{ + piVert[0] = iIndexIn&0x3; + piFace[0] = iIndexIn>>2; +} + +static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1) +{ + STSpace ts_res; + + // this if is important. Due to floating point precision + // averaging when ts0==ts1 will cause a slight difference + // which results in tangent space splits later on + if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT && + veq(pTS0->vOs,pTS1->vOs) && veq(pTS0->vOt, pTS1->vOt)) + { + ts_res.fMagS = pTS0->fMagS; + ts_res.fMagT = pTS0->fMagT; + ts_res.vOs = pTS0->vOs; + ts_res.vOt = pTS0->vOt; + } + else + { + ts_res.fMagS = 0.5f*(pTS0->fMagS+pTS1->fMagS); + ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT); + ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs); + ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt); + if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs); + if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt); + } + + return ts_res; +} + + + +static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index); +static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index); +static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index); + + +// degen triangles +static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris); +static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris); + + +tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext) +{ + return genTangSpace(pContext, 180.0f); +} + +tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold) +{ + // count nr_triangles + int * piTriListIn = NULL, * piGroupTrianglesBuffer = NULL; + STriInfo * pTriInfos = NULL; + SGroup * pGroups = NULL; + STSpace * psTspace = NULL; + int iNrTrianglesIn = 0, f=0, t=0, i=0; + int iNrTSPaces = 0, iTotTris = 0, iDegenTriangles = 0, iNrMaxGroups = 0; + int iNrActiveGroups = 0, index = 0; + const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext); + tbool bRes = TFALSE; + const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f); + + // verify all call-backs have been set + if ( pContext->m_pInterface->m_getNumFaces==NULL || + pContext->m_pInterface->m_getNumVerticesOfFace==NULL || + pContext->m_pInterface->m_getPosition==NULL || + pContext->m_pInterface->m_getNormal==NULL || + pContext->m_pInterface->m_getTexCoord==NULL ) + return TFALSE; + + // count triangles on supported faces + for (f=0; fm_pInterface->m_getNumVerticesOfFace(pContext, f); + if (verts==3) ++iNrTrianglesIn; + else if (verts==4) iNrTrianglesIn += 2; + } + if (iNrTrianglesIn<=0) return TFALSE; + + // allocate memory for an index list + piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn); + pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn); + if (piTriListIn==NULL || pTriInfos==NULL) + { + if (piTriListIn!=NULL) free(piTriListIn); + if (pTriInfos!=NULL) free(pTriInfos); + return TFALSE; + } + + // make an initial triangle --> face index list + iNrTSPaces = GenerateInitialVerticesIndexList(pTriInfos, piTriListIn, pContext, iNrTrianglesIn); + + // make a welded index list of identical positions and attributes (pos, norm, texc) + //printf("gen welded index list begin\n"); + GenerateSharedVerticesIndexList(piTriListIn, pContext, iNrTrianglesIn); + //printf("gen welded index list end\n"); + + // Mark all degenerate triangles + iTotTris = iNrTrianglesIn; + iDegenTriangles = 0; + for (t=0; tm_pInterface->m_getNumVerticesOfFace(pContext, f); + if (verts!=3 && verts!=4) continue; + + + // I've decided to let degenerate triangles and group-with-anythings + // vary between left/right hand coordinate systems at the vertices. + // All healthy triangles on the other hand are built to always be either or. + + /*// force the coordinate system orientation to be uniform for every face. + // (this is already the case for good triangles but not for + // degenerate ones and those with bGroupWithAnything==true) + bool bOrient = psTspace[index].bOrient; + if (psTspace[index].iCounter == 0) // tspace was not derived from a group + { + // look for a space created in GenerateTSpaces() by iCounter>0 + bool bNotFound = true; + int i=1; + while (i 0) bNotFound=false; + else ++i; + } + if (!bNotFound) bOrient = psTspace[index+i].bOrient; + }*/ + + // set data + for (i=0; ivOs.x, pTSpace->vOs.y, pTSpace->vOs.z}; + float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z}; + if (pContext->m_pInterface->m_setTSpace!=NULL) + pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i); + if (pContext->m_pInterface->m_setTSpaceBasic!=NULL) + pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i); + + ++index; + } + } + + free(psTspace); + + + return TTRUE; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef struct { + float vert[3]; + int index; +} STmpVert; + +static const int g_iCells = 2048; + +#ifdef _MSC_VER +# define NOINLINE __declspec(noinline) +#else +# define NOINLINE __attribute__ ((noinline)) +#endif + +// it is IMPORTANT that this function is called to evaluate the hash since +// inlining could potentially reorder instructions and generate different +// results for the same effective input value fVal. +static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal) +{ + const float fIndex = g_iCells * ((fVal-fMin)/(fMax-fMin)); + const int iIndex = (int)fIndex; + return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1); +} + +static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in); +static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries); +static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); + +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + + // Generate bounding box + int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL; + STmpVert * pTmpVert = NULL; + int i=0, iChannel=0, k=0, e=0; + int iMaxCount=0; + SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim; + float fMin, fMax; + for (i=1; i<(iNrTrianglesIn*3); i++) + { + const int index = piTriList_in_and_out[i]; + + const SVec3 vP = GetPosition(pContext, index); + if (vMin.x > vP.x) vMin.x = vP.x; + else if (vMax.x < vP.x) vMax.x = vP.x; + if (vMin.y > vP.y) vMin.y = vP.y; + else if (vMax.y < vP.y) vMax.y = vP.y; + if (vMin.z > vP.z) vMin.z = vP.z; + else if (vMax.z < vP.z) vMax.z = vP.z; + } + + vDim = vsub(vMax,vMin); + iChannel = 0; + fMin = vMin.x; fMax=vMax.x; + if (vDim.y>vDim.x && vDim.y>vDim.z) + { + iChannel=1; + fMin = vMin.y; + fMax = vMax.y; + } + else if (vDim.z>vDim.x) + { + iChannel=2; + fMin = vMin.z; + fMax = vMax.z; + } + + // make allocations + piHashTable = (int *) malloc(sizeof(int)*iNrTrianglesIn*3); + piHashCount = (int *) malloc(sizeof(int)*g_iCells); + piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); + piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); + + if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) + { + if (piHashTable!=NULL) free(piHashTable); + if (piHashCount!=NULL) free(piHashCount); + if (piHashOffsets!=NULL) free(piHashOffsets); + if (piHashCount2!=NULL) free(piHashCount2); + GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn); + return; + } + memset(piHashCount, 0, sizeof(int)*g_iCells); + memset(piHashCount2, 0, sizeof(int)*g_iCells); + + // count amount of elements in each cell unit + for (i=0; i<(iNrTrianglesIn*3); i++) + { + const int index = piTriList_in_and_out[i]; + const SVec3 vP = GetPosition(pContext, index); + const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); + const int iCell = FindGridCell(fMin, fMax, fVal); + ++piHashCount[iCell]; + } + + // evaluate start index of each cell. + piHashOffsets[0]=0; + for (k=1; kpTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; + if (fvMax[c]dx && dy>dz) channel=1; + else if (dz>dx) channel=2; + + fSep = 0.5f*(fvMax[channel]+fvMin[channel]); + + // stop if all vertices are NaNs + if (!isfinite(fSep)) + return; + + // terminate recursion when the separation/average value + // is no longer strictly between fMin and fMax values. + if (fSep>=fvMax[channel] || fSep<=fvMin[channel]) + { + // complete the weld + for (l=iL_in; l<=iR_in; l++) + { + int i = pTmpVert[l].index; + const int index = piTriList_in_and_out[i]; + const SVec3 vP = GetPosition(pContext, index); + const SVec3 vN = GetNormal(pContext, index); + const SVec3 vT = GetTexCoord(pContext, index); + + tbool bNotFound = TTRUE; + int l2=iL_in, i2rec=-1; + while (l20); // at least 2 entries + + // separate (by fSep) all points between iL_in and iR_in in pTmpVert[] + while (iL < iR) + { + tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE; + while ((!bReadyLeftSwap) && iL=iL_in && iL<=iR_in); + bReadyLeftSwap = !(pTmpVert[iL].vert[channel]=iL_in && iR<=iR_in); + bReadyRightSwap = pTmpVert[iR].vert[channel]m_pInterface->m_getNumFaces(pContext); f++) + { + const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f); + if (verts!=3 && verts!=4) continue; + + pTriInfos[iDstTriIndex].iOrgFaceNumber = f; + pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs; + + if (verts==3) + { + unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num; + pVerts[0]=0; pVerts[1]=1; pVerts[2]=2; + piTriList_out[iDstTriIndex*3+0] = MakeIndex(f, 0); + piTriList_out[iDstTriIndex*3+1] = MakeIndex(f, 1); + piTriList_out[iDstTriIndex*3+2] = MakeIndex(f, 2); + ++iDstTriIndex; // next + } + else + { + { + pTriInfos[iDstTriIndex+1].iOrgFaceNumber = f; + pTriInfos[iDstTriIndex+1].iTSpacesOffs = iTSpacesOffs; + } + + { + // need an order independent way to evaluate + // tspace on quads. This is done by splitting + // along the shortest diagonal. + const int i0 = MakeIndex(f, 0); + const int i1 = MakeIndex(f, 1); + const int i2 = MakeIndex(f, 2); + const int i3 = MakeIndex(f, 3); + const SVec3 T0 = GetTexCoord(pContext, i0); + const SVec3 T1 = GetTexCoord(pContext, i1); + const SVec3 T2 = GetTexCoord(pContext, i2); + const SVec3 T3 = GetTexCoord(pContext, i3); + const float distSQ_02 = LengthSquared(vsub(T2,T0)); + const float distSQ_13 = LengthSquared(vsub(T3,T1)); + tbool bQuadDiagIs_02; + if (distSQ_02m_pInterface->m_getPosition(pContext, pos, iF, iI); + res.x=pos[0]; res.y=pos[1]; res.z=pos[2]; + return res; +} + +static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index) +{ + int iF, iI; + SVec3 res; float norm[3]; + IndexToData(&iF, &iI, index); + pContext->m_pInterface->m_getNormal(pContext, norm, iF, iI); + res.x=norm[0]; res.y=norm[1]; res.z=norm[2]; + return res; +} + +static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index) +{ + int iF, iI; + SVec3 res; float texc[2]; + IndexToData(&iF, &iI, index); + pContext->m_pInterface->m_getTexCoord(pContext, texc, iF, iI); + res.x=texc[0]; res.y=texc[1]; res.z=1.0f; + return res; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef union { + struct + { + int i0, i1, f; + }; + int array[3]; +} SEdge; + +static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn); +static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn); + +// returns the texture area times 2 +static float CalcTexArea(const SMikkTSpaceContext * pContext, const int indices[]) +{ + const SVec3 t1 = GetTexCoord(pContext, indices[0]); + const SVec3 t2 = GetTexCoord(pContext, indices[1]); + const SVec3 t3 = GetTexCoord(pContext, indices[2]); + + const float t21x = t2.x-t1.x; + const float t21y = t2.y-t1.y; + const float t31x = t3.x-t1.x; + const float t31y = t3.y-t1.y; + + const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x; + + return fSignedAreaSTx2<0 ? (-fSignedAreaSTx2) : fSignedAreaSTx2; +} + +static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + int f=0, i=0, t=0; + // pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function. + + // generate neighbor info list + for (f=0; f0 ? ORIENT_PRESERVING : 0); + + if ( NotZero(fSignedAreaSTx2) ) + { + const float fAbsArea = fabsf(fSignedAreaSTx2); + const float fLenOs = Length(vOs); + const float fLenOt = Length(vOt); + const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f; + if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs); + if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt); + + // evaluate magnitudes prior to normalization of vOs and vOt + pTriInfos[f].fMagS = fLenOs / fAbsArea; + pTriInfos[f].fMagT = fLenOt / fAbsArea; + + // if this is a good triangle + if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT)) + pTriInfos[f].iFlag &= (~GROUP_WITH_ANY); + } + } + + // force otherwise healthy quads to a fixed orientation + while (t<(iNrTrianglesIn-1)) + { + const int iFO_a = pTriInfos[t].iOrgFaceNumber; + const int iFO_b = pTriInfos[t+1].iOrgFaceNumber; + if (iFO_a==iFO_b) // this is a quad + { + const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + + // bad triangles should already have been removed by + // DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false + if ((bIsDeg_a||bIsDeg_b)==TFALSE) + { + const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + // if this happens the quad has extremely bad mapping!! + if (bOrientA!=bOrientB) + { + //printf("found quad with bad mapping\n"); + tbool bChooseOrientFirstTri = TFALSE; + if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE; + else if ( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) ) + bChooseOrientFirstTri = TTRUE; + + // force match + { + const int t0 = bChooseOrientFirstTri ? t : (t+1); + const int t1 = bChooseOrientFirstTri ? (t+1) : t; + pTriInfos[t1].iFlag &= (~ORIENT_PRESERVING); // clear first + pTriInfos[t1].iFlag |= (pTriInfos[t0].iFlag&ORIENT_PRESERVING); // copy bit + } + } + } + t += 2; + } + else + ++t; + } + + // match up edge pairs + { + SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3); + if (pEdges==NULL) + BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn); + else + { + BuildNeighborsFast(pTriInfos, pEdges, piTriListIn, iNrTrianglesIn); + + free(pEdges); + } + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////// + +static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iMyTriIndex, SGroup * pGroup); +static void AddTriToGroup(SGroup * pGroup, const int iTriIndex); + +static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn) +{ + const int iNrMaxGroups = iNrTrianglesIn*3; + int iNrActiveGroups = 0; + int iOffset = 0, f=0, i=0; + (void)iNrMaxGroups; /* quiet warnings in non debug mode */ + for (f=0; fiVertexRepresentitive = vert_index; + pTriInfos[f].AssignedGroup[i]->bOrientPreservering = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0; + pTriInfos[f].AssignedGroup[i]->iNrFaces = 0; + pTriInfos[f].AssignedGroup[i]->pFaceIndices = &piGroupTrianglesBuffer[iOffset]; + ++iNrActiveGroups; + + AddTriToGroup(pTriInfos[f].AssignedGroup[i], f); + bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + neigh_indexL = pTriInfos[f].FaceNeighbors[i]; + neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2]; + if (neigh_indexL>=0) // neighbor + { + const tbool bAnswer = + AssignRecur(piTriListIn, pTriInfos, neigh_indexL, + pTriInfos[f].AssignedGroup[i] ); + + const tbool bOrPre2 = (pTriInfos[neigh_indexL].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE; + assert(bAnswer || bDiff); + (void)bAnswer, (void)bDiff; /* quiet warnings in non debug mode */ + } + if (neigh_indexR>=0) // neighbor + { + const tbool bAnswer = + AssignRecur(piTriListIn, pTriInfos, neigh_indexR, + pTriInfos[f].AssignedGroup[i] ); + + const tbool bOrPre2 = (pTriInfos[neigh_indexR].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE; + assert(bAnswer || bDiff); + (void)bAnswer, (void)bDiff; /* quiet warnings in non debug mode */ + } + + // update offset + iOffset += pTriInfos[f].AssignedGroup[i]->iNrFaces; + // since the groups are disjoint a triangle can never + // belong to more than 3 groups. Subsequently something + // is completely screwed if this assertion ever hits. + assert(iOffset <= iNrMaxGroups); + } + } + } + + return iNrActiveGroups; +} + +static void AddTriToGroup(SGroup * pGroup, const int iTriIndex) +{ + pGroup->pFaceIndices[pGroup->iNrFaces] = iTriIndex; + ++pGroup->iNrFaces; +} + +static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], + const int iMyTriIndex, SGroup * pGroup) +{ + STriInfo * pMyTriInfo = &psTriInfos[iMyTriIndex]; + + // track down vertex + const int iVertRep = pGroup->iVertexRepresentitive; + const int * pVerts = &piTriListIn[3*iMyTriIndex+0]; + int i=-1; + if (pVerts[0]==iVertRep) i=0; + else if (pVerts[1]==iVertRep) i=1; + else if (pVerts[2]==iVertRep) i=2; + assert(i>=0 && i<3); + + // early out + if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE; + else if (pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE; + if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0) + { + // first to group with a group-with-anything triangle + // determines it's orientation. + // This is the only existing order dependency in the code!! + if ( pMyTriInfo->AssignedGroup[0] == NULL && + pMyTriInfo->AssignedGroup[1] == NULL && + pMyTriInfo->AssignedGroup[2] == NULL ) + { + pMyTriInfo->iFlag &= (~ORIENT_PRESERVING); + pMyTriInfo->iFlag |= (pGroup->bOrientPreservering ? ORIENT_PRESERVING : 0); + } + } + { + const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + if (bOrient != pGroup->bOrientPreservering) return TFALSE; + } + + AddTriToGroup(pGroup, iMyTriIndex); + pMyTriInfo->AssignedGroup[i] = pGroup; + + { + const int neigh_indexL = pMyTriInfo->FaceNeighbors[i]; + const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2]; + if (neigh_indexL>=0) + AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup); + if (neigh_indexR>=0) + AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup); + } + + + + return TTRUE; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////// + +static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2); +static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed); +static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[], const SMikkTSpaceContext * pContext, const int iVertexRepresentitive); + +static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[], + const int iNrActiveGroups, const int piTriListIn[], const float fThresCos, + const SMikkTSpaceContext * pContext) +{ + STSpace * pSubGroupTspace = NULL; + SSubGroup * pUniSubGroups = NULL; + int * pTmpMembers = NULL; + int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0; + for (g=0; giNrFaces; i++) // triangles + { + const int f = pGroup->pFaceIndices[i]; // triangle number + int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0; + SSubGroup tmp_group; + tbool bFound; + SVec3 n, vOs, vOt; + if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0; + else if (pTriInfos[f].AssignedGroup[1]==pGroup) index=1; + else if (pTriInfos[f].AssignedGroup[2]==pGroup) index=2; + assert(index>=0 && index<3); + + iVertIndex = piTriListIn[f*3+index]; + assert(iVertIndex==pGroup->iVertexRepresentitive); + + // is normalized already + n = GetNormal(pContext, iVertIndex); + + // project + vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); + vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); + if ( VNotZero(vOs) ) vOs = Normalize(vOs); + if ( VNotZero(vOt) ) vOt = Normalize(vOt); + + // original face number + iOF_1 = pTriInfos[f].iOrgFaceNumber; + + iMembers = 0; + for (j=0; jiNrFaces; j++) + { + const int t = pGroup->pFaceIndices[j]; // triangle number + const int iOF_2 = pTriInfos[t].iOrgFaceNumber; + + // project + SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n)); + SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n)); + if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2); + if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2); + + { + const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE; + // make sure triangles which belong to the same quad are joined. + const tbool bSameOrgFace = iOF_1==iOF_2 ? TTRUE : TFALSE; + + const float fCosS = vdot(vOs,vOs2); + const float fCosT = vdot(vOt,vOt2); + + assert(f!=t || bSameOrgFace); // sanity check + if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos)) + pTmpMembers[iMembers++] = t; + } + } + + // sort pTmpMembers + tmp_group.iNrFaces = iMembers; + tmp_group.pTriMembers = pTmpMembers; + if (iMembers>1) + { + unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed? + QuickSort(pTmpMembers, 0, iMembers-1, uSeed); + } + + // look for an existing match + bFound = TFALSE; + l=0; + while (liVertexRepresentitive); + ++iUniqueSubGroups; + } + + // output tspace + { + const int iOffs = pTriInfos[f].iTSpacesOffs; + const int iVert = pTriInfos[f].vert_num[index]; + STSpace * pTS_out = &psTspace[iOffs+iVert]; + assert(pTS_out->iCounter<2); + assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering); + if (pTS_out->iCounter==1) + { + *pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]); + pTS_out->iCounter = 2; // update counter + pTS_out->bOrient = pGroup->bOrientPreservering; + } + else + { + assert(pTS_out->iCounter==0); + *pTS_out = pSubGroupTspace[l]; + pTS_out->iCounter = 1; // update counter + pTS_out->bOrient = pGroup->bOrientPreservering; + } + } + } + + // clean up and offset iUniqueTspaces + for (s=0; s=0 && i<3); + + // project + index = piTriListIn[3*f+i]; + n = GetNormal(pContext, index); + vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); + vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); + if ( VNotZero(vOs) ) vOs = Normalize(vOs); + if ( VNotZero(vOt) ) vOt = Normalize(vOt); + + i2 = piTriListIn[3*f + (i<2?(i+1):0)]; + i1 = piTriListIn[3*f + i]; + i0 = piTriListIn[3*f + (i>0?(i-1):2)]; + + p0 = GetPosition(pContext, i0); + p1 = GetPosition(pContext, i1); + p2 = GetPosition(pContext, i2); + v1 = vsub(p0,p1); + v2 = vsub(p2,p1); + + // project + v1 = vsub(v1, vscale(vdot(n,v1),n)); if ( VNotZero(v1) ) v1 = Normalize(v1); + v2 = vsub(v2, vscale(vdot(n,v2),n)); if ( VNotZero(v2) ) v2 = Normalize(v2); + + // weight contribution by the angle + // between the two edge vectors + fCos = vdot(v1,v2); fCos=fCos>1?1:(fCos<(-1) ? (-1) : fCos); + fAngle = (float) acos(fCos); + fMagS = pTriInfos[f].fMagS; + fMagT = pTriInfos[f].fMagT; + + res.vOs=vadd(res.vOs, vscale(fAngle,vOs)); + res.vOt=vadd(res.vOt,vscale(fAngle,vOt)); + res.fMagS+=(fAngle*fMagS); + res.fMagT+=(fAngle*fMagT); + fAngleSum += fAngle; + } + } + + // normalize + if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs); + if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt); + if (fAngleSum>0) + { + res.fMagS /= fAngleSum; + res.fMagT /= fAngleSum; + } + + return res; +} + +static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2) +{ + tbool bStillSame=TTRUE; + int i=0; + if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE; + while (iiNrFaces && bStillSame) + { + bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE; + if (bStillSame) ++i; + } + return bStillSame; +} + +static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed) +{ + int iL, iR, n, index, iMid, iTmp; + + // Random + unsigned int t=uSeed&31; + t=(uSeed<>(32-t)); + uSeed=uSeed+t+3; + // Random end + + iL=iLeft; iR=iRight; + n = (iR-iL)+1; + assert(n>=0); + index = (int) (uSeed%n); + + iMid=pSortBuffer[index + iL]; + + + do + { + while (pSortBuffer[iL] < iMid) + ++iL; + while (pSortBuffer[iR] > iMid) + --iR; + + if (iL <= iR) + { + iTmp = pSortBuffer[iL]; + pSortBuffer[iL] = pSortBuffer[iR]; + pSortBuffer[iR] = iTmp; + ++iL; --iR; + } + } + while (iL <= iR); + + if (iLeft < iR) + QuickSort(pSortBuffer, iLeft, iR, uSeed); + if (iL < iRight) + QuickSort(pSortBuffer, iL, iRight, uSeed); +} + +///////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////// + +static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed); +static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in); + +static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn) +{ + // build array of edges + unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed? + int iEntries=0, iCurStartIndex=-1, f=0, i=0; + for (f=0; f pSortBuffer[iRight].array[channel]) + { + sTmp = pSortBuffer[iLeft]; + pSortBuffer[iLeft] = pSortBuffer[iRight]; + pSortBuffer[iRight] = sTmp; + } + return; + } + + // Random + t=uSeed&31; + t=(uSeed<>(32-t)); + uSeed=uSeed+t+3; + // Random end + + iL = iLeft; + iR = iRight; + n = (iR-iL)+1; + assert(n>=0); + index = (int) (uSeed%n); + + iMid=pSortBuffer[index + iL].array[channel]; + + do + { + while (pSortBuffer[iL].array[channel] < iMid) + ++iL; + while (pSortBuffer[iR].array[channel] > iMid) + --iR; + + if (iL <= iR) + { + sTmp = pSortBuffer[iL]; + pSortBuffer[iL] = pSortBuffer[iR]; + pSortBuffer[iR] = sTmp; + ++iL; --iR; + } + } + while (iL <= iR); + + if (iLeft < iR) + QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed); + if (iL < iRight) + QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed); +} + +// resolve ordering and edge number +static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in) +{ + *edgenum_out = -1; + + // test if first index is on the edge + if (indices[0]==i0_in || indices[0]==i1_in) + { + // test if second index is on the edge + if (indices[1]==i0_in || indices[1]==i1_in) + { + edgenum_out[0]=0; // first edge + i0_out[0]=indices[0]; + i1_out[0]=indices[1]; + } + else + { + edgenum_out[0]=2; // third edge + i0_out[0]=indices[2]; + i1_out[0]=indices[0]; + } + } + else + { + // only second and third index is on the edge + edgenum_out[0]=1; // second edge + i0_out[0]=indices[1]; + i1_out[0]=indices[2]; + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////// Degenerate triangles //////////////////////////////////// + +static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris) +{ + int iNextGoodTriangleSearchIndex=-1; + tbool bStillFindingGoodOnes; + + // locate quads with only one good triangle + int t=0; + while (t<(iTotTris-1)) + { + const int iFO_a = pTriInfos[t].iOrgFaceNumber; + const int iFO_b = pTriInfos[t+1].iOrgFaceNumber; + if (iFO_a==iFO_b) // this is a quad + { + const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + if ((bIsDeg_a^bIsDeg_b)!=0) + { + pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI; + pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI; + } + t += 2; + } + else + ++t; + } + + // reorder list so all degen triangles are moved to the back + // without reordering the good triangles + iNextGoodTriangleSearchIndex = 1; + t=0; + bStillFindingGoodOnes = TTRUE; + while (t (t+1)); + + // swap triangle t0 and t1 + if (!bJustADegenerate) + { + int i=0; + for (i=0; i<3; i++) + { + const int index = piTriList_out[t0*3+i]; + piTriList_out[t0*3+i] = piTriList_out[t1*3+i]; + piTriList_out[t1*3+i] = index; + } + { + const STriInfo tri_info = pTriInfos[t0]; + pTriInfos[t0] = pTriInfos[t1]; + pTriInfos[t1] = tri_info; + } + } + else + bStillFindingGoodOnes = TFALSE; // this is not supposed to happen + } + + if (bStillFindingGoodOnes) ++t; + } + + assert(bStillFindingGoodOnes); // code will still work. + assert(iNrTrianglesIn == t); +} + +static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris) +{ + int t=0, i=0; + // deal with degenerate triangles + // punishment for degenerate triangles is O(N^2) + for (t=iNrTrianglesIn; t