diff --git a/AE/CMakeLists.txt b/AE/CMakeLists.txt index c05a373f..69b03dba 100644 --- a/AE/CMakeLists.txt +++ b/AE/CMakeLists.txt @@ -2,9 +2,26 @@ cmake_minimum_required( VERSION 3.10 FATAL_ERROR ) +# CMAKE_BUILD_TYPE is not defined if used IDE +if (DEFINED CMAKE_BUILD_TYPE) + if ((${CMAKE_BUILD_TYPE} STREQUAL "Debug") OR + (${CMAKE_BUILD_TYPE} STREQUAL "Develop") OR + (${CMAKE_BUILD_TYPE} STREQUAL "Profile") OR + (${CMAKE_BUILD_TYPE} STREQUAL "Release") ) + # ok + else() + message( STATUS "override unsupported CMAKE_BUILD_TYPE '${CMAKE_BUILD_TYPE}' by 'Release', see CMAKE_CONFIGURATION_TYPES in 'compilers.cmake'" ) + set( CMAKE_BUILD_TYPE "Release" CACHE INTERNAL "" FORCE ) + endif() +endif() + +#---------------------------------------------------------- + project( "AE" - VERSION 23.10.199 # year, month, version - LANGUAGES C CXX ) + VERSION 23.12.214 # year, month, version + LANGUAGES C CXX + DESCRIPTION "async game engine" + ) if (APPLE) enable_language(OBJC) endif() diff --git a/AE/android/.gitignore b/AE/android/.gitignore index 2ff1daf9..598240fe 100644 --- a/AE/android/.gitignore +++ b/AE/android/.gitignore @@ -4,4 +4,3 @@ /local.properties /build /captures -/keystore diff --git a/AE/android/build.gradle b/AE/android/build.gradle index 019d0b1b..bb62a26b 100644 --- a/AE/android/build.gradle +++ b/AE/android/build.gradle @@ -5,7 +5,7 @@ buildscript { mavenCentral() } dependencies { - classpath 'com.android.tools.build:gradle:8.0.0' + classpath 'com.android.tools.build:gradle:8.1.4' } } diff --git a/AE/android/demo/build.gradle b/AE/android/demo/build.gradle index fb687d90..287bd6ac 100644 --- a/AE/android/demo/build.gradle +++ b/AE/android/demo/build.gradle @@ -1,7 +1,7 @@ apply plugin: 'com.android.application' android { - compileSdkVersion 33 + compileSdk 34 compileOptions { sourceCompatibility JavaVersion.VERSION_1_8 @@ -11,14 +11,14 @@ android { defaultConfig { applicationId 'AE.Demo' minSdkVersion 24 // for vulkan - targetSdkVersion 33 + targetSdkVersion 34 ndk { abiFilters "arm64-v8a", "armeabi-v7a" //, "x86", "x86_64" } externalNativeBuild { cmake { - arguments '-DANDROID_STL=c++_static', + arguments '-DANDROID_STL=c++_static', // see https://developer.android.com/ndk/guides/cpp-support#static_runtimes '-DANDROID_ARM_NEON=ON', '-DAE_ENABLE_VULKAN=ON' } @@ -30,6 +30,7 @@ android { //debuggable true // to enable logcat in release minifyEnabled = false proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + signingConfig signingConfigs.debug } } diff --git a/AE/android/demo/src/main/AndroidManifest.xml b/AE/android/demo/src/main/AndroidManifest.xml index e5720991..c06e753c 100644 --- a/AE/android/demo/src/main/AndroidManifest.xml +++ b/AE/android/demo/src/main/AndroidManifest.xml @@ -5,37 +5,34 @@ - + android:name=".DemoApplication" + android:label="AE Demo" + android:hardwareAccelerated="true" + android:isGame="true"> - - - - - + + + + + - - - - + + + + diff --git a/AE/android/demo/src/main/java/AE/demo/DemoActivity.java b/AE/android/demo/src/main/java/AE/demo/DemoActivity.java index 8e5e3c8f..3b327acc 100644 --- a/AE/android/demo/src/main/java/AE/demo/DemoActivity.java +++ b/AE/android/demo/src/main/java/AE/demo/DemoActivity.java @@ -39,4 +39,9 @@ public final class DemoActivity Log.i(TAG, "onStop"); super.onStop(); } + + @Override public void onBackPressed() { + super.onBackPressed(); + this.finish(); + } } diff --git a/AE/android/engine/build.gradle b/AE/android/engine/build.gradle index 1efaaff7..9c1cc539 100644 --- a/AE/android/engine/build.gradle +++ b/AE/android/engine/build.gradle @@ -1,17 +1,18 @@ apply plugin: 'com.android.library' android { - compileSdkVersion 33 + compileSdk 34 defaultConfig { minSdkVersion 24 // 24+ for vulkan - targetSdkVersion 33 + targetSdkVersion 34 } buildTypes { release { minifyEnabled false proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + signingConfig signingConfigs.debug } } @@ -20,7 +21,7 @@ android { } dependencies { - implementation 'androidx.core:core:1.10.1' + implementation 'androidx.core:core:1.12.0' implementation 'androidx.legacy:legacy-support-core-ui:1.0.0' implementation 'androidx.legacy:legacy-support-v4:1.0.0' } diff --git a/AE/android/engine/src/main/java/AE/engine/BaseActivity.java b/AE/android/engine/src/main/java/AE/engine/BaseActivity.java index cc170472..6998379f 100644 --- a/AE/android/engine/src/main/java/AE/engine/BaseActivity.java +++ b/AE/android/engine/src/main/java/AE/engine/BaseActivity.java @@ -38,7 +38,7 @@ public class BaseActivity View.OnTouchListener, SensorEventListener { - public static final String TAG = "AE"; + public static final String TAG = "<<<< AE >>>>"; private int _wndID = 0; @@ -291,7 +291,7 @@ public void Close () //----------------------------------------------------------------------------- // native - private static native int native_OnCreate (Object wnd); + private static native int native_OnCreate (Object wnd); private static native void native_OnDestroy (int id); private static native void native_OnStart (int id); private static native void native_OnStop (int id); @@ -299,7 +299,7 @@ public void Close () private static native void native_OnEnterBackground (int id); private static native void native_SurfaceChanged (int id, Object surface); private static native void native_SurfaceDestroyed (int id); - private static native int native_Update (int id); + private static native int native_Update (int id); private static native void native_OnKey (int id, int keycode, int action, int repeatCount); private static native void native_OnTouch (int id, int action, int index, int count, float[] data); private static native void native_OnOrientationChanged (int id, int newOrientation); diff --git a/AE/android/engine/src/main/java/AE/engine/BaseApplication.java b/AE/android/engine/src/main/java/AE/engine/BaseApplication.java index 5e098ab0..84704b86 100644 --- a/AE/android/engine/src/main/java/AE/engine/BaseApplication.java +++ b/AE/android/engine/src/main/java/AE/engine/BaseApplication.java @@ -43,14 +43,12 @@ private void _SendDirectories () File internalCache = getCacheDir(); File externalPath = getExternalFilesDir(null); File externalCache = getExternalCacheDir(); - File externalStorage = Environment.getExternalStorageDirectory(); native_SetDirectories( (internalPath == null ? "" : internalPath.getAbsolutePath() ), (internalCache == null ? "" : internalCache.getAbsolutePath() ), (externalPath == null ? "" : externalPath.getAbsolutePath() ), - (externalCache == null ? "" : externalCache.getAbsolutePath() ), - (externalStorage == null ? "" : externalStorage.getAbsolutePath()) ); + (externalCache == null ? "" : externalCache.getAbsolutePath() )); } private void _SendSystemInfo () @@ -111,7 +109,7 @@ public final boolean IsNetworkConnected () // native private static native void native_OnCreate (Object app, Object assetMngr); - private static native void native_SetDirectories (String internal, String internalCache, String external, String externalCache, String externalStorage); + private static native void native_SetDirectories (String internal, String internalCache, String external, String externalCache); private static native void native_SetSystemInfo (String iso3Lang0, String iso3Lang1); private static native void native_SetDisplayInfo (int width, int height, float xdpi, float ydpi, int orientation); } diff --git a/AE/android/test/build.gradle b/AE/android/test/build.gradle index cbaee62b..3aba35e8 100644 --- a/AE/android/test/build.gradle +++ b/AE/android/test/build.gradle @@ -1,19 +1,19 @@ apply plugin: 'com.android.application' android { - compileSdkVersion 33 + compileSdk 34 defaultConfig { applicationId 'AE.Test' minSdkVersion 24 // for vulkan - targetSdkVersion 33 + targetSdkVersion 34 ndk { abiFilters "arm64-v8a", "armeabi-v7a" //, "x86", "x86_64" } externalNativeBuild { cmake { - arguments '-DANDROID_STL=c++_static', + arguments '-DANDROID_STL=c++_static', // see https://developer.android.com/ndk/guides/cpp-support#static_runtimes '-DANDROID_ARM_NEON=ON', '-DAE_ENABLE_VULKAN=ON' } @@ -22,9 +22,10 @@ android { buildTypes { release { - //debuggable true // to enable logcat in release + debuggable true // to enable logcat in release minifyEnabled = false proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + signingConfig signingConfigs.debug } } @@ -36,6 +37,11 @@ android { path '../../CMakeLists.txt' } } + sourceSets { + main { + assets.srcDirs = ['../../../AE-Temp/engine/graphics/vk'] + } + } packagingOptions { jniLibs { excludes += ['lib/**/*.so'] diff --git a/AE/android/test/src/main/AndroidManifest.xml b/AE/android/test/src/main/AndroidManifest.xml index bd107caf..1370cfc5 100644 --- a/AE/android/test/src/main/AndroidManifest.xml +++ b/AE/android/test/src/main/AndroidManifest.xml @@ -5,37 +5,34 @@ - + android:name=".TestApplication" + android:label="AE tests" + android:hardwareAccelerated="true" + android:isGame="true"> - - - - - + + + + + - - - - + + + + diff --git a/AE/build_scripts/android/_build-debug.bat b/AE/build_scripts/android/_build-debug.bat index 6292e022..92f2f953 100644 --- a/AE/build_scripts/android/_build-debug.bat +++ b/AE/build_scripts/android/_build-debug.bat @@ -1,12 +1,5 @@ cd ../../android -rmdir /Q /S ".gradle" -rmdir /Q /S ".idea" -rmdir /Q /S "build" -rmdir /Q /S "engine/build" -rmdir /Q /S "test/.cxx" -rmdir /Q /S "test/build" -rmdir /Q /S "demo/.cxx" -rmdir /Q /S "demo/build" -gradlew buildCMakeDebug[arm64-v8a] +gradlew assembleDebug +::gradlew buildCMakeDebug[arm64-v8a] ::gradlew buildCMakeDebug[armeabi-v7a] diff --git a/AE/build_scripts/android/_build-release.bat b/AE/build_scripts/android/_build-release.bat new file mode 100644 index 00000000..318403d4 --- /dev/null +++ b/AE/build_scripts/android/_build-release.bat @@ -0,0 +1,5 @@ +cd ../../android + +gradlew assembleRelease +::gradlew buildCMakeRelWithDebInfo[arm64-v8a] +::gradlew buildCMakeRelWithDebInfo[armeabi-v7a] diff --git a/AE/build_scripts/android/build-debug.bat b/AE/build_scripts/android/build-debug.bat index 58d66c68..8915b659 100644 --- a/AE/build_scripts/android/build-debug.bat +++ b/AE/build_scripts/android/build-debug.bat @@ -1 +1 @@ -"_build-debug.bat" && pause +"cleanup.bat" && "_build-debug.bat" && pause diff --git a/AE/build_scripts/android/build-release.bat b/AE/build_scripts/android/build-release.bat new file mode 100644 index 00000000..605a3817 --- /dev/null +++ b/AE/build_scripts/android/build-release.bat @@ -0,0 +1 @@ +"cleanup.bat" && "_build-release.bat" && pause diff --git a/AE/build_scripts/android/cleanup.bat b/AE/build_scripts/android/cleanup.bat index 7c8c150f..d6c30883 100644 --- a/AE/build_scripts/android/cleanup.bat +++ b/AE/build_scripts/android/cleanup.bat @@ -11,3 +11,5 @@ rmdir /Q /S "engine\build" rmdir /Q /S "test\.cxx" rmdir /Q /S "test\build" + +cd ../build_scripts/android diff --git a/AE/build_scripts/android/copy-graphics-test-result.bat b/AE/build_scripts/android/copy-graphics-test-result.bat new file mode 100644 index 00000000..29544709 --- /dev/null +++ b/AE/build_scripts/android/copy-graphics-test-result.bat @@ -0,0 +1,3 @@ +adb pull "sdcard/Android/data/AE.Test/cache/tests_graphics_vulkan_refdump/Mali-G57 MC2" "C:\Projects\AllinOne\AE\engine\tests\graphics\Vulkan\ref" +adb pull "sdcard/Android/data/AE.Test/cache/tests_graphics_vulkan_refimg/Mali-G57 MC2" "C:\Projects\AllinOne\AE-Data\tests\graphics\vulkan" +pause diff --git a/AE/build_scripts/clone_3party.bat b/AE/build_scripts/clone_3party.bat index 9a469ff4..2929adf6 100644 --- a/AE/build_scripts/clone_3party.bat +++ b/AE/build_scripts/clone_3party.bat @@ -63,6 +63,9 @@ git clone --bare "https://github.com/google/hardware-perfcounter.git" "hardware- rmdir /Q /S "hardware-perfcounter-envytools" git clone --bare "https://github.com/freedreno/envytools.git" "hardware-perfcounter-envytools" +rmdir /Q /S "xxHash" +git clone --bare "https://github.com/Cyan4973/xxHash.git" "xxHash" + rmdir /Q /S "Abseil" git clone --bare "https://github.com/abseil/abseil-cpp.git" "Abseil" diff --git a/AE/build_scripts/mac_arm64/init-vk.sh b/AE/build_scripts/mac_arm64/init-vk.sh new file mode 100644 index 00000000..ee03762a --- /dev/null +++ b/AE/build_scripts/mac_arm64/init-vk.sh @@ -0,0 +1,9 @@ +cd ../../.. +rm -rf _build_arm64 + +mkdir _build_arm64 +cd _build_arm64 +cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=arm64 -DAE_ENABLE_VULKAN=ON "../AE" + +read -p "open project?" +open ./AE.xcodeproj diff --git a/AE/build_scripts/mac_arm64/init.sh b/AE/build_scripts/mac_arm64/init.sh index 68b616c7..c6b38dd1 100644 --- a/AE/build_scripts/mac_arm64/init.sh +++ b/AE/build_scripts/mac_arm64/init.sh @@ -3,4 +3,7 @@ rm -rf _build_arm64 mkdir _build_arm64 cd _build_arm64 -cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=arm64 "../AE" +cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=arm64 -DAE_ENABLE_METAL=ON "../AE" + +read -p "open project?" +open ./AE.xcodeproj diff --git a/AE/build_scripts/mac_x64/init-vk.sh b/AE/build_scripts/mac_x64/init-vk.sh new file mode 100644 index 00000000..4780a3c8 --- /dev/null +++ b/AE/build_scripts/mac_x64/init-vk.sh @@ -0,0 +1,11 @@ +# warning: on ARM CPU use 'init_on_arm.sh' + +cd ../../.. +rm -rf _build_x64_vk + +mkdir _build_x64_vk +cd _build_x64_vk +cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DAE_SIMD_SSE=20 -DAE_ENABLE_VULKAN=ON "../AE" + +read -p "open project?" +open ./AE.xcodeproj diff --git a/AE/build_scripts/mac_x64/init.sh b/AE/build_scripts/mac_x64/init.sh index f201d357..fb89e7fe 100644 --- a/AE/build_scripts/mac_x64/init.sh +++ b/AE/build_scripts/mac_x64/init.sh @@ -6,3 +6,6 @@ rm -rf _build_x64 mkdir _build_x64 cd _build_x64 cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DAE_SIMD_AVX=2 "../AE" + +read -p "open project?" +open ./AE.xcodeproj diff --git a/AE/build_scripts/mac_x64/init_on_arm-vk.sh b/AE/build_scripts/mac_x64/init_on_arm-vk.sh new file mode 100644 index 00000000..f28d75a5 --- /dev/null +++ b/AE/build_scripts/mac_x64/init_on_arm-vk.sh @@ -0,0 +1,15 @@ +# how to compile on ARM64 +# https://stackoverflow.com/questions/67386941/using-x86-libraries-and-openmp-on-macos-arm64-architecture/67418208#67418208 + +export CC=/usr/local/opt/llvm/bin/clang +export CXX=/usr/local/opt/llvm/bin/clang++ + +cd ../../.. +rm -rf _build_x64_vk + +mkdir _build_x64_vk +cd _build_x64_vk +arch -x86_64 /usr/local/bin/cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DAE_SIMD_SSE=20 -DAE_ENABLE_VULKAN=ON "../AE" + +read -p "open project?" +open ./AE.xcodeproj diff --git a/AE/build_scripts/mac_x64/init_on_arm.sh b/AE/build_scripts/mac_x64/init_on_arm.sh index 1754c794..9af5678a 100644 --- a/AE/build_scripts/mac_x64/init_on_arm.sh +++ b/AE/build_scripts/mac_x64/init_on_arm.sh @@ -10,3 +10,6 @@ rm -rf _build_x64 mkdir _build_x64 cd _build_x64 arch -x86_64 /usr/local/bin/cmake -G "Xcode" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DAE_SIMD_SSE=20 "../AE" + +read -p "open project?" +open ./AE.xcodeproj diff --git a/AE/docs/CMakeLists.txt b/AE/docs/CMakeLists.txt new file mode 100644 index 00000000..8a6d8ad7 --- /dev/null +++ b/AE/docs/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +file( GLOB_RECURSE DOCS "*.*" ) +add_library( "Docs" STATIC EXCLUDE_FROM_ALL ${DOCS} "../engine/Changelog.md" "../Readme.md" ) +set_target_properties( "Docs" PROPERTIES LINKER_LANGUAGE CXX ) +set_property( TARGET "Docs" PROPERTY FOLDER "Engine" ) +source_group( TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${DOCS} ) +source_group( "engine" FILES "../engine/Changelog.md" ) diff --git a/AE/docs/EngineDocs.md b/AE/docs/EngineDocs.md new file mode 100644 index 00000000..f7923403 --- /dev/null +++ b/AE/docs/EngineDocs.md @@ -0,0 +1,18 @@ + +* [Build instructions](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/Build.md) +* [Licensing information](https://github.com/azhirnov/as-en/blob/dev/AE/LICENSE) +* [Engine Concept](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/Concept.md) +* Graphics: [DeviceProperties](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/DeviceProperties.md) +* Graphics: [FeatureSet](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/FeatureSet.md) +* Low-level Graphics features: [Vulkan](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/VulkanFeatures.md), [Metal](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/MetalFeatures.md) +* [Graphics Resources](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/GraphicsResources.md) +* [Supported platforms](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/Platforms.md) +* [Profiling](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/Profiling.md) +* [Resource Compilation](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/ResourceCompilation.md) + +## rus + +* [Планировщик асинхронных задач](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/TaskScheduler-ru.md) +* [Планировщик графических задач](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/RenderTaskScheduler-ru.md) +* [Рендер граф](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/RenderGraph-ru.md) +* [Файловая система](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/VirtualFileSystem-ru.md) diff --git a/AE/docs/Papers.md b/AE/docs/Papers.md new file mode 100644 index 00000000..fc5099b3 --- /dev/null +++ b/AE/docs/Papers.md @@ -0,0 +1,9 @@ + +* [HDR Display](https://github.com/azhirnov/as-en/blob/dev/AE/docs/papers/HDR_Display.md) + +## rus + +* [HDR дисплеи](https://github.com/azhirnov/as-en/blob/dev/AE/docs/papers/HDR_Display-ru.md) +* [Асинхронное программирование](https://github.com/azhirnov/as-en/blob/dev/AE/docs/papers/AsyncProgramming-ru.md) +* [Заметки по архитектуре](https://github.com/azhirnov/as-en/blob/dev/AE/docs/papers/ArchitectureNotes-ru.md) +* [Сферический куб](https://github.com/azhirnov/as-en/blob/dev/AE/docs/papers/SphericalCube-ru.md) diff --git a/AE/docs/Tools.md b/AE/docs/Tools.md new file mode 100644 index 00000000..13ebbf58 --- /dev/null +++ b/AE/docs/Tools.md @@ -0,0 +1,7 @@ + +* [Lock-Free Algorithms Sandbox](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/lfas/Readme.md) +* [GLSL-Trace](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/res_pack/shader_trace/Readme.md) +* [Detect Image Z-Curve](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/vulkan_image_zcurve/Readme.md) +* [Vulkan Header Generator](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/vulkan_header_gen/Readme.md) +* [Vulkan Synchronizations Logger](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/vulkan_sync_log/Readme.md) +* [FeatureSet Generator](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/feature_set_gen/Readme.md) diff --git a/AE/engine/docs/Build.md b/AE/docs/engine/Build.md similarity index 89% rename from AE/engine/docs/Build.md rename to AE/docs/engine/Build.md index b47b3959..7498a602 100644 --- a/AE/engine/docs/Build.md +++ b/AE/docs/engine/Build.md @@ -55,11 +55,11 @@ From [stackoverflow](https://stackoverflow.com/questions/67386941/using-x86-libr ``` # launch x86_64 shell -arch -x86_64 zsh -# install x86_64 variant of brew +arch -x86_64 zsh +# install x86_64 variant of brew arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)" # install x86_64 variant of clang -arch -x86_64 /usr/local/bin/brew install llvm +arch -x86_64 /usr/local/bin/brew install llvm # compile using x86_64 variant of clang /usr/local/opt/llvm/bin/clang++ -arch x86_64 omp_ex.cpp ``` @@ -89,7 +89,7 @@ Projects which is require precompiled resources has cmake targets with `.PackRes You should run it first to compile resources. From console: `cmake --build . --config --target .PackRes`.
-From IDE: build target `.PackRes`. +From IDE: build target `.PackRes`. Compiled resources will be stored in `AE/../AE-Temp` folder. They can be used on all platforms. @@ -97,10 +97,12 @@ Compiled resources will be stored in `AE/../AE-Temp` folder. They can be used on ## Update file paths (optional) Visual Studio and some other IDEs and programs (Notepad++) allows to open file in URL format like a `file://absolute/path/to/a/file.txt`, but it requires absolute paths. Source code may contain links to other source files or docs which is written as URLs.
-Use IDE or another tool to replace path `[](https://github.com/azhirnov/as-en/blob/dev/AE/...)` to an absolute path like a `file://C:/Projects/AE/...`. +Use IDE or another tool to replace path `[](https://github.com/azhirnov/as-en/blob/dev/AE/)` to an absolute path like a `file://C:/Projects/AE/`. -## Setup IDE (optional) +## Setup IDE and environment (optional) + +Associate file extensions `.as`, `.glsl` with Visual Studio, Notepad++ or other IDE or text editor. This text editor will be used to open files by links like `file://path`. ### Visual Studio diff --git a/AE/engine/docs/Concept.md b/AE/docs/engine/Concept.md similarity index 98% rename from AE/engine/docs/Concept.md rename to AE/docs/engine/Concept.md index 3a7c02af..5e68a136 100644 --- a/AE/engine/docs/Concept.md +++ b/AE/docs/engine/Concept.md @@ -3,7 +3,7 @@ Most of opensource frameworks have problems: * Vulkan backend is copy-pasted from vulkan tutorials and, as a result, has poor performance. -* Vulkan backend has synchronization problems (*use vkconfig with synchronization validation preset to see them*). +* Vulkan backend has synchronization problems (*use vkconfig with synchronization validation preset to see them*). * Vulkan backend has many memory allocations, many unnecessary stalls, uses VMA without understanding how it implemented internally. * Vulkan backend has small number of compatibility checks. Without validation layers Vulkan driver can create resource which description is not supported by hardware, driver will crash somewhen later. [Vulkan Profiles](https://github.com/KhronosGroup/Vulkan-Profiles) is just a first step to get guarantees that all used features are supported, but it doesn't cover limits which is not in Vulkan properties. * Vulkan backend doesn't support queue ownership transfer for resources, this may cause performance problems on some GPUs. diff --git a/AE/engine/docs/DeviceProperties.md b/AE/docs/engine/DeviceProperties.md similarity index 98% rename from AE/engine/docs/DeviceProperties.md rename to AE/docs/engine/DeviceProperties.md index ca385985..c75ae283 100644 --- a/AE/engine/docs/DeviceProperties.md +++ b/AE/docs/engine/DeviceProperties.md @@ -1,6 +1,6 @@ Source: [DeviceProperties.h](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/graphics/Public/DeviceProperties.h) - -Use **DeviceProperties** from `DeviceProperties const& RenderTaskScheduler().GetDeviceProperties();` to get runtime limits for the current GPU. + +Use **DeviceProperties** from `DeviceProperties const& GraphicsScheduler().GetDeviceProperties();` to get runtime limits for the current GPU. Use **DeviceLimits** from `static constexpr DeviceProperties DeviceLimits;` to get compile time limits which is compatible with most GPUs. @@ -32,7 +32,7 @@ __Mali GPU__: from [ARM blog](https://community.arm.com/arm-community-blogs/b/gr this is not documented in Vulkan API and should be manually updated.*"
__Vulkan__: *not defined*
__Metal__: *not defined* - + * `minVertexBufferOffsetAlign`
__Vulkan__: from [specs](https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#fxvertex-input-address-calculation): "*If format is a packed format, attribAddress must be a multiple of the size in bytes of the whole attribute data type as described in Packed Formats. Otherwise, attribAddress must be a multiple of the size in bytes of the component type indicated by format.*"
__Metal__: from [specs](https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1515785-offset?language=objc): "*must be a multiple of 4 bytes*" diff --git a/AE/engine/docs/FeatureSet.md b/AE/docs/engine/FeatureSet.md similarity index 98% rename from AE/engine/docs/FeatureSet.md rename to AE/docs/engine/FeatureSet.md index 4486ddc2..d6e76a1f 100644 --- a/AE/engine/docs/FeatureSet.md +++ b/AE/docs/engine/FeatureSet.md @@ -5,7 +5,7 @@ This is some kind of [Vulkan Profiles](https://github.com/KhronosGroup/Vulkan-Pr Use [FeatureSetGen](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/feature_set_gen/Readme.md) application to generate feature sets from Vulkan device info JSON and from Metal feature set table.
Predefined FeatureSet's located in the [feature_set folder](https://github.com/azhirnov/as-en/blob/dev/AE/engine/shared_data/feature_set), use them to get GPU limits and features in resource compilation stage. -Use `FeatureSet const& RenderTaskScheduler().GetFeatureSet()` to get cross-platform limits and features of the current GPU in runtime. +Use `FeatureSet const& GraphicsScheduler().GetFeatureSet()` to get cross-platform limits and features of the current GPU in runtime. ## Render States @@ -275,7 +275,7 @@ Same as `VkPhysicalDevicePortabilitySubsetFeaturesKHR::tessellationPointMode`. * maxTotalTileMemory * maxVertAmplification -### Compute shader +### Compute shader * __computeShader__
Vulkan: always supported.
diff --git a/AE/engine/docs/GraphicsResources.md b/AE/docs/engine/GraphicsResources.md similarity index 100% rename from AE/engine/docs/GraphicsResources.md rename to AE/docs/engine/GraphicsResources.md diff --git a/AE/engine/docs/MetalFeatures.md b/AE/docs/engine/MetalFeatures.md similarity index 93% rename from AE/engine/docs/MetalFeatures.md rename to AE/docs/engine/MetalFeatures.md index f1bdf9f7..92c27129 100644 --- a/AE/engine/docs/MetalFeatures.md +++ b/AE/docs/engine/MetalFeatures.md @@ -6,3 +6,4 @@ Supported features: - [x] Push constant emulation - [x] GPU address - [x] Metal 3 +- [x] Async draw diff --git a/AE/engine/docs/Platforms.md b/AE/docs/engine/Platforms.md similarity index 87% rename from AE/engine/docs/Platforms.md rename to AE/docs/engine/Platforms.md index bd387fca..e995efd0 100644 --- a/AE/engine/docs/Platforms.md +++ b/AE/docs/engine/Platforms.md @@ -8,7 +8,7 @@ Supported platforms - [x] Vulkan (by default requires timeline semaphore) - [x] Vulkan (without timeline semaphore, compile with `-DAE_VK_TIMELINE_SEMAPHORE=OFF`) - [ ] Internal resources -- [x] Architecture: x64 +- [x] Architecture: x64 - [ ] Architecture: ARMv8 - [ ] Input: gamepad - [x] HDR display (Win10+) @@ -22,7 +22,7 @@ Supported platforms - [ ] Flatpack - [x] GCC12 - [x] Clang15 -- [x] Architecture: x64 +- [x] Architecture: x64 - [ ] Architecture: ARMv8 - [ ] HDR display @@ -43,11 +43,14 @@ Supported platforms ## MacOS -- [x] Metal API 2+ +- [x] Metal API 2.x +- [x] Metal API 3.x - [ ] Async file read/write - [x] Offline resource packing (x64 only) - [ ] Scripting (ARM64) -- [ ] HDR display +- [x] HDR display (Vulkan) +- [ ] HDR display (Metal) +- [x] MoltenVk ## iOS diff --git a/AE/engine/docs/Profiling.md b/AE/docs/engine/Profiling.md similarity index 99% rename from AE/engine/docs/Profiling.md rename to AE/docs/engine/Profiling.md index 70cd70d0..70e93713 100644 --- a/AE/engine/docs/Profiling.md +++ b/AE/docs/engine/Profiling.md @@ -110,11 +110,11 @@ no source * Don't use for profiling! * [RenderDocApi](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/graphics/Vulkan/Utils/RenderDocApi.h) class for interaction * `IBaseContext::DebugMarker()`, `IBaseContext::PushDebugGroup()`, `IBaseContext::PopDebugGroup()` methods for interaction - + #### Intel VTune / AMD uProf * CPU profiling - * CPU cache profiling + * CPU cache profiling #### Visual Studio diff --git a/AE/engine/docs/ru/RenderGraph.md b/AE/docs/engine/RenderGraph-ru.md similarity index 93% rename from AE/engine/docs/ru/RenderGraph.md rename to AE/docs/engine/RenderGraph-ru.md index 955f1767..f888e4f3 100644 --- a/AE/engine/docs/ru/RenderGraph.md +++ b/AE/docs/engine/RenderGraph-ru.md @@ -33,7 +33,12 @@ __Синхронизации между очередями.__
Для каждого рендер таска также можно указать начальное и конечное состояние ресурса, это позволит оптимизировать синхронизации между тасками. ```cpp -AsyncTask begin = rg.BeginFrame(); +// Ожидаем когда кадр -1 отправится на GPU и когда кадр -2 завершит выполнение на GPU. +// Пока идет ожидание внутри выполняются задачи из переданого списка очередей. +rg.WaitNextFrame(...); + +// начинаем новый кадр +rg.BeginFrame(); // создаем батч, в нем будем использовать 'image' для чтения в фрагментном шейдере auto batch_gfx = rg.CmdBatch( EQueueType::Graphics, {"graphics batch"} ) @@ -47,7 +52,7 @@ auto batch_ac = rg.CmdBatch( EQueueType::AsyncCompute, {"compute batch"} ) // при вызове 'UseResource()' неявно устанавливается зависимость 'batch_gfx -> batch_ac' -AsyncTask gfx_task = batch_gfx.Task( Tuple{...}, {"graphics task"} ).SubmitBatch().Run( Tuple{begin} ); +AsyncTask gfx_task = batch_gfx.Task( Tuple{...}, {"graphics task"} ).SubmitBatch().Run(); AsyncTask comp_task = batch_ac .Task( Tuple{...}, {"async compute task"} ).SubmitBatch().Run( Tuple{gfx_task} ); // 'SubmitBatch()' помечает задачу как последнюю, тогда вызов 'RenderTask::Execute(cmdbuf)' также добавит батч в очередь на отправку на ГП (submit), @@ -73,7 +78,7 @@ AsyncTask end = rg.EndFrame( Tuple{ gfx_task, comp_task }); ## Проверка на корректность синхронизаций -Для этого в движке есть логирование команд (проект `VulkanSyncLog`), который выдает читаемый лог вызовов Vulkan комманд и его результат не меняется в зависимости от запусков, что позволяет следить за изменениями. +Для этого в движке есть логирование команд (проект `VulkanSyncLog`), который выдает читаемый лог вызовов Vulkan команд и его результат не меняется в зависимости от запусков, что позволяет следить за изменениями. Но все синхронизации придется один раз вручную проверить на корректность.
[Пример лога](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tests/graphics/Vulkan/ref/NVIDIA%20GeForce%20RTX%202080/Test_AsyncCompute2.txt) diff --git a/AE/engine/docs/ru/RenderTaskScheduler.md b/AE/docs/engine/RenderTaskScheduler-ru.md similarity index 92% rename from AE/engine/docs/ru/RenderTaskScheduler.md rename to AE/docs/engine/RenderTaskScheduler-ru.md index 24afbf00..db0bb9b3 100644 --- a/AE/engine/docs/ru/RenderTaskScheduler.md +++ b/AE/docs/engine/RenderTaskScheduler-ru.md @@ -21,7 +21,9 @@ ## Планировщик графических задач (RenderTaskScheduler) #### Начало кадра -Метод `BeginFrame()` создает задачу, которая ожидает завершения кадра минус N, при двойной буферизации N=1, при тройной N=2. +Метода `WaitNextFrame()` ожидает пока предыдущий кадр отправится на выполнение и кадр минус N завершит выполнение на ГП, при двойной буферизации N=1, при тройной N=2. Пока идет ожидание внутри выполняются другие задачи. При ошибке или слишком долгом ожидании метод вернет `false`. + +Метод `BeginFrame()` начинает новый кадр, сбрасывает покадровый аллокатор, сбрасывает счетчики, создает задачу на отложенное удаление графических ресурсов и тд. Для каждого кадра передаются параметры в `BeginFrameConfig`, где можно установить лимиты на использование промежуточной памяти. (TODO: ссылка на управление памятью) @@ -55,7 +57,7 @@ Запись команда в Vulkan И Metal имеют отличия - в Metal используются энкодеры, они нужны для лучшего распараллеливания команд и сортировки их встроенным рендер графом. Переключение между graphics и compute дорогое, также и graphics и transfer, поэтому подход из Metal заставляет писать более оптимизированный код за счет своей архитектуры. -На Vulkan каждый контекст имеет свой набор этапов (pipeline stages), для лучшей производительности внутри контекста нужно минимизировать синхронизации. +На Vulkan каждый контекст имеет свой набор этапов (pipeline stages), для лучшей производительности внутри контекста нужно минимизировать синхронизации. Вместо энкодеров в движке используются контексты: Draw, Graphics, Transfer, ASBuild, RayTracing и тд. diff --git a/AE/engine/docs/ResourceCompilation.md b/AE/docs/engine/ResourceCompilation.md similarity index 99% rename from AE/engine/docs/ResourceCompilation.md rename to AE/docs/engine/ResourceCompilation.md index 295b4db7..b21a840b 100644 --- a/AE/engine/docs/ResourceCompilation.md +++ b/AE/docs/engine/ResourceCompilation.md @@ -3,8 +3,8 @@ Engine specific: * All shaders must be precompiled and validated. * Engine uses only binary data, scripts and configs are allowed only at the resource compilation stage. -* All graphics resources must have at least one [FeatureSet](https://github.com/azhirnov/as-en/blob/dev/AE/engine/docs/FeatureSet.md), which is used for validation. -* Engine guarantees that if a resource is compiled with FeatureSet and this FeatureSet is supported by the GPU then the resource is compatible with the GPU. +* All graphics resources must have at least one [FeatureSet](https://github.com/azhirnov/as-en/blob/dev/AE/docs/engine/FeatureSet.md), which is used for validation. +* Engine guarantees that if a resource is compiled with FeatureSet and this FeatureSet is supported by the GPU then the resource is compatible with the GPU. # Pipeline Compiler @@ -155,7 +155,7 @@ In scrips use: ```cpp GlobalConfig cfg; cfg.SetPreprocessor( EShaderPreprocessor::AEStyle ); -``` +``` New scalar types: bool, byte, ubyte, short, ushort, int, uint, long, ulong, float, double, half.
New vector types: same as scalar with 2, 3, 4 suffix, example: uint3.
diff --git a/AE/engine/docs/ru/TaskScheduler.md b/AE/docs/engine/TaskScheduler-ru.md similarity index 65% rename from AE/engine/docs/ru/TaskScheduler.md rename to AE/docs/engine/TaskScheduler-ru.md index 7faf7c35..9638d6e2 100644 --- a/AE/engine/docs/ru/TaskScheduler.md +++ b/AE/docs/engine/TaskScheduler-ru.md @@ -56,7 +56,7 @@ ETaskQueue co_await Coro_TaskQueue - Доступ к файлам. - Работа с сетью. * __FileIO__ - не является очередью, нужен для передачи управления в ОС, чтобы обработать завершенные асинхронные команды чтения/записи в файл. - + Исходник: [EThread.h](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/threading/TaskSystem/EThread.h) @@ -92,9 +92,55 @@ ETaskQueue co_await Coro_TaskQueue ## Управление потоками (ThreadManager) -Создает потоки, привязывает их к ядрам ЦП и к очередям в планировщике задач. +Встроенная в движок реализация потоков для обработки задач, пользователи могут сделать свою реализацию `IThread` и добавить ее в планировщик (`TaskScheduler::AddThread`). + +Стандартная реализация потоков принимает список типов очередей, поток будет выполнять задачи только из этого списка и в том порядке, в котором они расположены в списке. +Например поток с `EThreadArray{ ETaskQueue::PerFrame, ETaskQueue::Renderer }` будет выполнять `PerFrame` задачи с большим приоритетом. +ThreadManager также распределяет потоки по ядрам ЦП. Если ЦП содержит энергоэффективные ядра, то потоки с `Background, FileIO` будут привязаны к этим ядрам, а потоки с `Main, PerFrame, Renderer` будут привязаны к производительным ядрам ЦП. +Параметр `bindThreadToPhysicalCore` определяет будет ли привязка к физическим ядрам или к логическим (2 потока на ядро), потоки с `Background, FileIO` всегда привязываются к логическим ядрам, так как могут дольше простаивать. + +Потоки сами решают когда засыпать, если нет задач на выполнение. В стандартной реализации потоки постепенно увеличивают время сна, если задачи не поступают. Исходник: [ThreadManager.h](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/threading/TaskSystem/ThreadManager.h) + +## Примитивы синхронизации + +Блокирующие примитивы синхронизации не должны использоваться, за исключением коротких блокировок. + +### AsyncMutex + +Используется по аналогии с `Mutex`, но эксклюзивная блокировка достигается за счет зависимостей между задачами. +Недостатки:
+* Разблокируется только после выполнения всего метода `IAsyncTask::Run()`. +* Планировщик потоков решает когда запустить следующую задачу, поэтому между ними может быть большой интервал бездействия. + +Исходник: [AsyncMutex.h](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/threading/TaskSystem/AsyncMutex.h) + + +## Производительность + +Производительность планировщика зависит от скорости добавления и поиска задачи в очереди. +Очередь работает без блокировок (lock-free), используется несколько внутренних очередей и распределение поиска по ним в зависимости от потока (EThreadSeed). + +Производительность не зависит от количества потоков, алгоритм отлично масштабируется и выдает стабильный результат на слабых мобилках и на мощных ПК. + +**Пример худшего случая 1**
+Всего задач: 350k
+Максимальное количество задач в очереди: 25k
+Время выполнения задачи: 7мкс
+Потеря времени в планировщике: 30% + +**Пример худшего случая 2**
+Всего задач: 350k
+Максимальное количество задач в очереди: 32k
+Время выполнения задачи: 22мкс
+Потеря времени в планировщике: 12% + +**Пример хорошего случая**
+Всего задач: 350k
+Максимальное количество задач в очереди: 32k
+Время выполнения задачи: 110мкс
+Потеря времени в планировщике: 2.7% diff --git a/AE/engine/docs/ru/VirtualFileSystem.md b/AE/docs/engine/VirtualFileSystem-ru.md similarity index 88% rename from AE/engine/docs/ru/VirtualFileSystem.md rename to AE/docs/engine/VirtualFileSystem-ru.md index 1f2ab89e..25ea30ad 100644 --- a/AE/engine/docs/ru/VirtualFileSystem.md +++ b/AE/docs/engine/VirtualFileSystem-ru.md @@ -26,6 +26,9 @@ Используется хэш от строки, чтобы не было динамических выделений памяти. +Уникальность хэша гарантированна для архива.
+Остальные типы хранилищ проверяют уникальность хэша только в режиме отладки. + ## Файловая система (VirtualFileSystem) diff --git a/AE/engine/docs/VulkanFeatures.md b/AE/docs/engine/VulkanFeatures.md similarity index 63% rename from AE/engine/docs/VulkanFeatures.md rename to AE/docs/engine/VulkanFeatures.md index 9fcecdf6..e46b5c79 100644 --- a/AE/engine/docs/VulkanFeatures.md +++ b/AE/docs/engine/VulkanFeatures.md @@ -1,6 +1,6 @@ Source folder: [Vulkan backend](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/graphics/Vulkan/) -Supported extensions: +**Supported extensions:** - [x] VK_EXT_swapchain_colorspace - [x] VK_EXT_validation_features - [x] VK_KHR_descriptor_update_template @@ -34,41 +34,15 @@ Supported extensions: - [x] VK_EXT_texture_compression_astc_hdr - [x] VK_EXT_texel_buffer_alignment - [x] VK_KHR_portability_subset -- [x] VK_ARM_shader_core_builtins - [x] VK_AMD_shader_core_properties, VK_AMD_shader_core_properties2 - [x] VK_KHR_video_queue - [x] VK_KHR_video_encode_queue - [x] VK_KHR_video_decode_queue -- [ ] VK_KHR_video_decode_h264 -- [ ] VK_KHR_video_decode_h265 -- [ ] VK_EXT_video_encode_h264 -- [ ] VK_EXT_video_encode_h265 -- [ ] VK_KHR_multiview -- [ ] VK_KHR_uniform_buffer_standard_layout -- [ ] VK_KHR_format_feature_flags2 -- [ ] VK_EXT_inline_uniform_block -- [ ] VK_EXT_subgroup_size_control -- [ ] VK_EXT_depth_range_unrestricted -- [ ] VK_EXT_pageable_device_local_memory -- [ ] VK_EXT_sample_locations -- [ ] VK_EXT_rasterization_order_attachment_access -- [ ] VK_KHR_performance_query -- [ ] VK_EXT_device_memory_report -- [ ] VK_KHR_pipeline_library -- [ ] VK_EXT_astc_decode_mode -- [ ] VK_EXT_image_compression_control, VK_EXT_image_compression_control_swapchain -- [ ] VK_NV_shader_image_footprint -- [ ] VK_NV_device_generated_commands -- [ ] VK_ARM_rasterization_order_attachment_access -- [ ] VK_KHR_incremental_present -- [ ] VK_KHR_present_id -- [ ] VK_KHR_present_wait -- [ ] VK_EXT_full_screen_exclusive -- [ ] VK_EXT_host_image_copy -- [ ] VK_KHR_maintenance5 +- [x] VK_KHR_pipeline_executable_properties -Supported shader extensions: -- [ ] VK_NV_cooperative_matrix, GL_NV_cooperative_matrix, GL_NV_integer_cooperative_matrix - removed + + +**Supported shader extensions:** - [x] VK_KHR_shader_clock, GL_EXT_shader_realtime_clock, GL_ARB_shader_clock - [x] GL_EXT_control_flow_attributes - [x] GL_EXT_debug_printf @@ -91,30 +65,24 @@ Supported shader extensions: - [x] VK_EXT_shader_viewport_index_layer, GL_ARB_shader_viewport_layer_array - [x] GL_ARB_sparse_texture_clamp - [x] VK_NV_shader_sm_builtins, GL_NV_shader_sm_builtins -- [x] GL_ARM_shader_core_builtins +- [x] VK_ARM_shader_core_builtins, GL_ARM_shader_core_builtins - [x] GL_EXT_nonuniform_qualifier - [x] VK_KHR_vulkan_memory_model, GL_KHR_memory_scope_semantics -- [x] GL_EXT_demote_to_helper_invocation +- [x] VK_EXT_shader_demote_to_helper_invocation, GL_EXT_demote_to_helper_invocation - [x] GL_EXT_fragment_shading_rate - [x] VK_EXT_fragment_shader_interlock, GL_ARB_fragment_shader_interlock - [x] VK_KHR_shader_draw_parameters, GL_ARB_shader_draw_parameters - [x] VK_KHR_fragment_shader_barycentric, GL_EXT_fragment_shader_barycentric -- [x] VK_KHR_cooperative_matrix, GL_KHR_cooperative_matrix -- [ ] GL_EXT_multiview -- [ ] GL_EXT_null_initializer -- [ ] VK_KHR_zero_initialize_workgroup_memory -- [ ] VK_EXT_scalar_block_layout, GL_EXT_scalar_block_layout -- [ ] VK_HUAWEI_subpass_shading, GL_HUAWEI_subpass_shading ??? -- [ ] VK_KHR_shader_terminate_invocation -- [ ] VK_KHR_workgroup_memory_explicit_layout -- [ ] VK_EXT_shader_image_atomic_int64 -- [ ] VK_HUAWEI_cluster_culling_shader, GL_HUAWEI_cluster_culling_shader +- [x] VK_KHR_cooperative_matrix, GL_KHR_cooperative_matrix (deprecated: VK_NV_cooperative_matrix, GL_NV_cooperative_matrix, GL_NV_integer_cooperative_matrix) +- [x] VK_KHR_shader_subgroup_uniform_control_flow, GL_EXT_subgroupuniform_qualifier + -Features: +**Features:** - [x] Async compute and transfer queues - [x] Async queues with concurrent sharing mode -- [x] Async queues with exclusive sharing mode +- [x] Async queues with exclusive sharing mode - [x] Push constants (compatible with Metal) - [x] Custom allocator for GPU memory - [x] Shader subgroup +- [x] Secondary command buffers (async draw) diff --git a/AE/engine/docs/img/ARM-HWCounters.jpg b/AE/docs/engine/img/ARM-HWCounters.jpg similarity index 100% rename from AE/engine/docs/img/ARM-HWCounters.jpg rename to AE/docs/engine/img/ARM-HWCounters.jpg diff --git a/AE/engine/docs/img/GraphicsProfiler.jpg b/AE/docs/engine/img/GraphicsProfiler.jpg similarity index 100% rename from AE/engine/docs/img/GraphicsProfiler.jpg rename to AE/docs/engine/img/GraphicsProfiler.jpg diff --git a/AE/engine/docs/ru/img/RenderTaskScheduler-1.png b/AE/docs/engine/img/RenderTaskScheduler-1.png similarity index 100% rename from AE/engine/docs/ru/img/RenderTaskScheduler-1.png rename to AE/docs/engine/img/RenderTaskScheduler-1.png diff --git a/AE/engine/docs/ru/img/RenderTaskScheduler-2.png b/AE/docs/engine/img/RenderTaskScheduler-2.png similarity index 100% rename from AE/engine/docs/ru/img/RenderTaskScheduler-2.png rename to AE/docs/engine/img/RenderTaskScheduler-2.png diff --git a/AE/engine/docs/img/ShaderProfilerHeatmap.jpg b/AE/docs/engine/img/ShaderProfilerHeatmap.jpg similarity index 100% rename from AE/engine/docs/img/ShaderProfilerHeatmap.jpg rename to AE/docs/engine/img/ShaderProfilerHeatmap.jpg diff --git a/AE/engine/docs/img/TaskProfiler.jpg b/AE/docs/engine/img/TaskProfiler.jpg similarity index 100% rename from AE/engine/docs/img/TaskProfiler.jpg rename to AE/docs/engine/img/TaskProfiler.jpg diff --git a/AE/docs/papers/ArchitectureNotes-ru.md b/AE/docs/papers/ArchitectureNotes-ru.md new file mode 100644 index 00000000..889230fd --- /dev/null +++ b/AE/docs/papers/ArchitectureNotes-ru.md @@ -0,0 +1,75 @@ +Заметки по планированию архитектуры. + +## ООП + +В больших классах частая проблема - множество методов, которые имеют доступ ко всем полям класса.
+В чем тут проблема: +* Читаемость - непонятно что где меняется. +* Синхронизация - если поля класса защищены разными примитивами синхронизации, то очень сложно отследить где что используется и была ли синхронизация перед использованием. + +Возможные решения: +1. Использовать статические функции. У них остается доступ к приватным типам класса, но нет доступа к полям, для этого их нужно явно передавать, что сразу же улучшает читаемость кода. +2. Константные методы. Они могут читать все поля, но меняют только те, что передаются в виде параметров. +3. Приватные классы. В них хранятся только нужные данные и только их логика, но это работает только в редких случаях, так как часто данные нужны везде. + +```cpp +class Obj +{ + ReadOnly r; + Mutable m; + + static void StaticFn (const ReadOnly &, Mutable &); + void ConstMethod (Mutable &) const; +}; +``` + +## Синхронизации + +Если в классе используется mutex или другой примитив синхронизации, то он должен использоваться для всех полей, кроме константных. Иначе это выглядит как ошибка, когда часть методов не используют синхронизацию. + +Вместо нескольких примитивов синхронизации внутри одного класса лучше использовать [Synchronized](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/threading/Primitives/Synchronized.h) тип. + + +## Асинхронная архитектура + +Главное - алгоритмы и архитектура должны изначально разрабатываться под асинхронность. + +### Алгоритмы выполняются частями + +Например загрузка данных с диска на видеокарту.
+Скорость чтения с диска HDD: ~100Мб/с, SSD: ~500Мб/с, скорость передачи данных по PCI-E: Gen3 x16: 15.75 GB/s, Gen4 x16: 31.5 GB/s. +Если данные занимают несколько Гб, то мы не можем загрузить все с диска в ОЗУ и затем уже передать на видеокарту, это отберет слишком много памяти у других операций. Тогда нужно использовать фиксированный объем ОЗУ, чтобы читать часть с диска, загружать ее на видеокарту и запускать заново. При этом другие операции могут занимать всю фиксированную часть ОЗУ, поэтому наш алгоритм должен сначала пытаться захватить нужный блок памяти. +Чтобы не блокировать шину PCI-E передачей больших объемов данных, требуется передавать их малыми частями каждый кадр. Также захватывается блок видимой для видеокарты ОЗУ, в нее копируется (ОЗУ -> ОЗУ), потом на видеокарте идет копирование из видимой ОЗУ в память видеокарты.
+Поэтому асинхронный алгоритм сильно отличается от синхронного, но правильно написанный алгоритм позволяет распараллелить работу, тогда как синхронный алгоритм упрется в объем ОЗУ, пропускную способность диска или шины PCI-E. + +**Требуется следить за выделением памяти.**
+Всегда может возникнуть ситуация, когда один алгоритм запущен множество раз параллельно и требуется выделить слишком много памяти. + + +## Исключения + +Текущая реализация C++ не позволяет писать безопасный код с использованием исключений. + +Проблемы: +* Приходится вручную отслеживать какая функция бросает исключения. Компилятор выдает ошибку, если функция с `noexcept` бросает исключение, которое не перехватывается, но это работает только на явный вызов `throw` внутри функции, а используемые функции могут кидать исключения и компилятор никак это не проверяет. Пока не будет предупреждений на использование не-`noexcept` функций внутри `noexcept` функций исключения будут опасны. +* Концепция исключений предполагает, что после бросания исключений объект возвращается в первоначальное состояние, никаких частичных изменений, либо все, либо ничего, это часто требует выделение дополнительной памяти. +* Бросание исключений в конструкторах, особенно в move-конструкторах, приводит к выделению дополнительной памяти, как описано выше. +* В ObjC бросание исключения внутри `autoreleasepool` приводит к утечке памяти. +* Сложно писать код, который требует соблюдения правил и компилятор в этом никак не помогает, поэтому поддержка исключений усложняет код и увеличивает время разработки. + +Преимущества: +* Хорошо подходят для функций, вызываемых из скриптов, тогда в случае бросания исключения скрипт завершается и выдает ошибку из исключения. +* Неплохо подходит для десериализации, где из-за порчи данных может произойти попытка выделения большого объема памяти. + +В большинстве случаев удобнее использовать коды ошибок и атрибут `[[nodiscard]]`, что не позволит пользователю проигнорировать возвращаемое значение. + + +## Коды ошибок + +Обычно достаточно возвращать `bool` - успешно отработала функция или нет. Ошибки должны обрабатываться внутри функции, а пользователь может передать флаги и функторы (std::function), которые будут вызваны для обработки или исправления ошибки. + +Почему плохо возвращать `enum` с кодами ошибок - на каждый вызов функции от пользователя требуется обработать все возможные коды, это сильно увеличивает объем кода, это требует заново читать документацию и тд, тогда как чаще всего пользователю нужно получить ответ успешно ли отработала функция или нет. + +Как возвращать данные и код ошибки: +* Как `out` аргументы функции. Обязательные аргументы передаются по ссылке, опциональные - по указателю.
Это требует от пользователя передавать все аргументы, а внутри функции требуется проверять на `null` опциональные параметры. Все это не очень оптимально как для производительности так и для написания кода. +* Вместе с кодом ошибки. Для этого можно использовать `pair`, `tuple`, `optional<>` и тд.
При этом `pair/tuple` требуют чтобы все возвращаемые данные были валидны, так как данные могут быть прочитаны без проверки кода ошибки. А `optional` и аналоги не дают доступ к данным до проверки ошибки. diff --git a/AE/engine/docs/ru/AsyncProgramming.md b/AE/docs/papers/AsyncProgramming-ru.md similarity index 97% rename from AE/engine/docs/ru/AsyncProgramming.md rename to AE/docs/papers/AsyncProgramming-ru.md index 771be1ae..e5b8637b 100644 --- a/AE/engine/docs/ru/AsyncProgramming.md +++ b/AE/docs/papers/AsyncProgramming-ru.md @@ -1,5 +1,5 @@ -Сборник нюансов по асинхронному программированию. - +Сборник нюансов по асинхронному программированию.
+Про асинхронную архитектуру написано отдельно [тут](https://github.com/azhirnov/as-en/blob/dev/AE/docs/papers/ArchitectureNotes-ru.md). ## Атомарные операции (atomics) diff --git a/AE/engine/docs/ru/HDR_Display.md b/AE/docs/papers/HDR_Display-ru.md similarity index 100% rename from AE/engine/docs/ru/HDR_Display.md rename to AE/docs/papers/HDR_Display-ru.md diff --git a/AE/engine/docs/HDR_Display.md b/AE/docs/papers/HDR_Display.md similarity index 100% rename from AE/engine/docs/HDR_Display.md rename to AE/docs/papers/HDR_Display.md diff --git a/AE/docs/papers/SphericalCube-ru.md b/AE/docs/papers/SphericalCube-ru.md new file mode 100644 index 00000000..36d461ae --- /dev/null +++ b/AE/docs/papers/SphericalCube-ru.md @@ -0,0 +1,63 @@ +**Сферический куб** + +## Где используется + +* Рисование планет. +* Вывод кубических текстур (skybox) и видео 360/VR. + + +## Виды проекций + +В статье [Cube-to-sphere Projections for Procedural Texturing and Beyond](https://www.jcgt.org/published/0007/02/01/paper.pdf) исследованы различные виды проекций: + +![](img/SC_Proj.png) + +На картинке цветом обозначена площадь треугольника, более равномерный цвет характеризует равномерную площадь треугольников на сфере.
+[Исходник теста](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-1.as). + +Проекции 5th Poly и COBE в шейдере дают наименьшую точность расчетов. В тесте берется развертка сферы и применяются обратная и прямая проекции, сравниваются вектора и выводится погрешность.
+[Исходник теста](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-1.as). + + +## Проекция текстуры + +![](img/SC_TexProj.png) + +В tangential проекции текстурные координаты такие же как в identity, но за счет трансформации вершин текстура накладывается более равномерно.
+Если использовать координаты вершин как текстурные координаты, то искажения текстуры соответствуют identity проекции. + +При применении проекции, кроме identity, текстурные координаты перестают соответствовать 3D вектору, тогда при записи в текстуру требуется применять проекцию.
+Для всех проекций UV координаты грани куба перестают быть прямоугольными, то есть все формы искажаются.
+UV куба дает распределение, близкое к равномерному. При этом чем лучше проекция, тем равномернее результат.
+[Исходник теста](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-2.as). + + +## Топология + +![](img/SC_Topology.png) + +Обычная сетка после проецирования дает сильно искаженные треугольники, поэтому сетка должна быть симметричной относительно центра грани. + + +## Проекция из 3D + +### Искажения + +![](img/SC_Distortion.png) + +Пример проекции сферы (круга) на сферический куб, ближе к грани куба начинаются небольшие искажения, для круга это не критично.
+Проекция прямоугольника на сферический куб не имеет искажений только в центре грани куба, на краях начинаются искажения, но радиус вписанной окружности не меняется.
+[Исходник теста](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-3.as) + + +### Погрешность интерполяции + +![](img/SC_ProjError.png) + +Тест показывает погрешность, когда проекция применяется для вершин, а между ними происходит линейная интерполяция. За счет расхождения между проекцией и интерполяцией появляется ошибка. На картинке размер ошибки показан белым цветом, красный - когда ошибка более 1 после масштабирования. + +Улучшить точность можно повторив линейную интерполяцию между контрольными точками. На картинке это вариант справа. + +Исходники:
+[коррекция в вычислительном шейдере](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-4.as).
+[коррекция во фрагментном шейдере](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-5.as). diff --git a/AE/engine/docs/ru/img/HDR-monitor.jpg b/AE/docs/papers/img/HDR-monitor.jpg similarity index 100% rename from AE/engine/docs/ru/img/HDR-monitor.jpg rename to AE/docs/papers/img/HDR-monitor.jpg diff --git a/AE/engine/docs/ru/img/HDR-smartphone.jpg b/AE/docs/papers/img/HDR-smartphone.jpg similarity index 100% rename from AE/engine/docs/ru/img/HDR-smartphone.jpg rename to AE/docs/papers/img/HDR-smartphone.jpg diff --git a/AE/docs/papers/img/SC_Distortion.png b/AE/docs/papers/img/SC_Distortion.png new file mode 100644 index 00000000..e6af786e Binary files /dev/null and b/AE/docs/papers/img/SC_Distortion.png differ diff --git a/AE/docs/papers/img/SC_Proj.png b/AE/docs/papers/img/SC_Proj.png new file mode 100644 index 00000000..dc881666 Binary files /dev/null and b/AE/docs/papers/img/SC_Proj.png differ diff --git a/AE/docs/papers/img/SC_ProjError.png b/AE/docs/papers/img/SC_ProjError.png new file mode 100644 index 00000000..d38c95aa Binary files /dev/null and b/AE/docs/papers/img/SC_ProjError.png differ diff --git a/AE/docs/papers/img/SC_TexProj.png b/AE/docs/papers/img/SC_TexProj.png new file mode 100644 index 00000000..703a2a32 Binary files /dev/null and b/AE/docs/papers/img/SC_TexProj.png differ diff --git a/AE/docs/papers/img/SC_Topology.png b/AE/docs/papers/img/SC_Topology.png new file mode 100644 index 00000000..fc60e489 Binary files /dev/null and b/AE/docs/papers/img/SC_Topology.png differ diff --git a/AE/engine/CMakeLists.txt b/AE/engine/CMakeLists.txt index 5226be04..146f5a20 100644 --- a/AE/engine/CMakeLists.txt +++ b/AE/engine/CMakeLists.txt @@ -48,7 +48,6 @@ if (DEFINED ENGINE_LIBS_PATH) set( PREBUILD_CPP_FILE "${DUMMY_CPP_FILE}" ) endif() -add_subdirectory( "docs" ) add_subdirectory( "src" ) add_subdirectory( "tools" ) diff --git a/AE/engine/Changelog.md b/AE/engine/Changelog.md index 12b188b1..4c40506a 100644 --- a/AE/engine/Changelog.md +++ b/AE/engine/Changelog.md @@ -1,4 +1,29 @@ +## 23.12.214 + +- Graphics: remote graphics server +- CMake: added AE_ENABLE_EXCEPTIONS to switch on/off exceptions and RTTI +- CMake: added AE_ENABLE_LOGS +- Graphics: added VK_KHR_pipeline_executable_properties +- Graphics: added MoltenVk (MacOS) +- Graphics: BeginFrame() refactoring, now it completes synchronously +- Android: bugfix, run unit and performance tests +- Threading: progressive sleep if no tasks to process +- Networking: AsyncConnect() for TcpSocket + + +## 23.11.207 + +- Networking: TCP channel +- Networking: IPv6 support +- LfIndexedPool2 replaced by LfIndexedPool3 +- Removed: LfIndexedPool2, LfFixedBlockAllocator +- Rename LfIndexedPool3 -> LfIndexedPool, LfFixedBlockAllocator2 -> LfFixedBlockAllocator +- Added: ChunkList, MemChunkList, LfChunkList +- VFS: network storage client/server +- VFS: network storage server application + + ## 23.10.199 - CMake: remove AE_NO_EXCEPTIONS diff --git a/AE/engine/cmake/compiler_tests.cmake b/AE/engine/cmake/compiler_tests.cmake index 70f1f27e..fa4c2f6d 100644 --- a/AE/engine/cmake/compiler_tests.cmake +++ b/AE/engine/cmake/compiler_tests.cmake @@ -20,7 +20,7 @@ message( STATUS "Run compiler tests with flags: ${CMAKE_REQUIRED_FLAGS}" ) set( AE_COMPILER_DEFINITIONS "" ) #------------------------------------------------------------------------------ -set( STD_FILESYSTEM_SUPPORTED_SRC +set( STD_FILESYSTEM_SUPPORTED_SRC "#include int main () { (void)(std::filesystem::current_path()); @@ -52,7 +52,7 @@ else() endif() #------------------------------------------------------------------------------ -set( STD_CACHELINESIZE_SUPPORTED_SRC +set( STD_CACHELINESIZE_SUPPORTED_SRC "#include static constexpr size_t Align = std::hardware_destructive_interference_size; int main () { @@ -87,12 +87,12 @@ if (STD_CACHELINESIZE_SUPPORTED) elseif (APPLE) string( TOUPPER ${CMAKE_SYSTEM_PROCESSOR} PLATFORM_NAME ) if (${PLATFORM_NAME} STREQUAL "X86_64") - set( AE_COMPILER_DEFINITIONS "${AE_COMPILER_DEFINITIONS}" "AE_CACHE_LINE=64" ) + set( AE_COMPILER_DEFINITIONS "${AE_COMPILER_DEFINITIONS}" "AE_CACHE_LINE=std::size_t{64}" ) else() - set( AE_COMPILER_DEFINITIONS "${AE_COMPILER_DEFINITIONS}" "AE_CACHE_LINE=128" ) + set( AE_COMPILER_DEFINITIONS "${AE_COMPILER_DEFINITIONS}" "AE_CACHE_LINE=std::size_t{128}" ) endif() else () - set( AE_COMPILER_DEFINITIONS "${AE_COMPILER_DEFINITIONS}" "AE_CACHE_LINE=64" ) # TODO: check + set( AE_COMPILER_DEFINITIONS "${AE_COMPILER_DEFINITIONS}" "AE_CACHE_LINE=std::size_t{64}" ) # TODO: check endif() #------------------------------------------------------------------------------ @@ -160,7 +160,7 @@ if (NOT (HAS_HASHFN_HashArrayRepresentation OR HAS_HASHFN_Murmur2OrCityhash OR H endif() #============================================================================== -set( CPP_COROUTINE_SUPPORTED_SRC +set( CPP_COROUTINE_SUPPORTED_SRC "#include #ifndef __cpp_impl_coroutine # error coroutines are not supported by compiler diff --git a/AE/engine/cmake/compilers.cmake b/AE/engine/cmake/compilers.cmake index de2f86ec..1d7e7a2b 100644 --- a/AE/engine/cmake/compilers.cmake +++ b/AE/engine/cmake/compilers.cmake @@ -12,9 +12,14 @@ cmake_minimum_required( VERSION 3.10 FATAL_ERROR ) # options: -# AE_ENABLE_COMPILER_WARNINGS BOOL : TRUE - for engine, FALSE - for external projects +# AE_ENABLE_COMPILER_WARNINGS BOOL : TRUE - for engine, FALSE - for external projects # AE_USE_SANITIZER BOOL -# AE_DISABLE_THREADS BOOL : for emscripten +# AE_DISABLE_THREADS BOOL : for emscripten +# AE_ENABLE_EXCEPTIONS BOOL : TRUE - enable, FALSE - disable exception and RTTI +# AE_ENABLE_LOGS BOOL +# AE_SIMD_AVX STRING : 0, 1, 2, 3(AVX512) +# AE_SIMD_SSE STRING : 0, 20, 30, 31, 41, 42 +# AE_SIMD_AES STRING : 0, 1 # detect target platform @@ -101,7 +106,14 @@ set( "AE_CPU_ARCH_${TARGET_CPU_ARCH}" ON CACHE INTERNAL "" FORCE ) # default compiler flags set( PROJECTS_SHARED_DEFINES "AE_${TARGET_PLATFORM}" "AE_PLATFORM_NAME=\"${CMAKE_SYSTEM_NAME}\"" "AE_PLATFORM_BITS=${PLATFORM_BITS}" - "AE_CPU_ARCH_${TARGET_CPU_ARCH}" "AE_CPU_ARCH_NAME=\"${TARGET_CPU_ARCH}\"" "AE_ENABLE_RTTI" ) + "AE_CPU_ARCH_${TARGET_CPU_ARCH}" "AE_CPU_ARCH_NAME=\"${TARGET_CPU_ARCH}\"" ) + +if (${AE_ENABLE_EXCEPTIONS}) + set( PROJECTS_SHARED_DEFINES ${PROJECTS_SHARED_DEFINES} "AE_ENABLE_RTTI" "AE_ENABLE_EXCEPTIONS" ) +endif() +if (${AE_ENABLE_LOGS}) + set( PROJECTS_SHARED_DEFINES ${PROJECTS_SHARED_DEFINES} "AE_ENABLE_LOGS" ) +endif() set( CMAKE_CONFIGURATION_TYPES Release Profile Develop Debug ) set( CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING "Configurations" FORCE ) @@ -200,28 +212,41 @@ if ( COMPILER_MSVC ) message( FATAL_ERROR "multiple compiler types detected, previous: '${DETECTED_COMPILER}'" ) endif() set( DETECTED_COMPILER "COMPILER_MSVC" ) - set( CURRENT_C_FLAGS ${CMAKE_C_FLAGS} CACHE STRING "" FORCE ) - set( CURRENT_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE ) - set( CURRENT_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} CACHE STRING "" FORCE ) - set( CURRENT_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS} CACHE STRING "" FORCE ) - set( CURRENT_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS} CACHE STRING "" FORCE ) - #[[ disable c++ exceptions - if (${AE_NO_EXCEPTIONS}) + set( TEMP_CXX_FLAGS ${CMAKE_CXX_FLAGS} ) + string( REPLACE "/GR-" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/GR" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) # RTTI + string( REPLACE "/EHa-" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/EHa" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) # catches both structured (asynchronous) and standard C++ (synchronous) exceptions + string( REPLACE "/EHsc-" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/EHsc" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) # functions declared as extern "C" never throw a C++ exception. + string( REPLACE "/EHc-" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/EHc" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) # ignored without /EHs + string( REPLACE "/EHs-" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/EHs" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) # catches only standard C++ exceptions, functions declared as extern "C" may throw a C++ exception. + string( REPLACE "/EHr-" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/EHr" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) # forcing compiler to generate runtime checks for unhandled exceptions that escape a noexcept function + string( REPLACE "/D_HAS_EXCEPTIONS=0" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE "/D_HAS_EXCEPTIONS=1" " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + string( REPLACE " " " " TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS}" ) + + if (${AE_ENABLE_EXCEPTIONS}) + set( TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS} /D_HAS_EXCEPTIONS=1 /EHsc /GR" ) + else () + # disable c++ exceptions # _HAS_EXCEPTIONS=0 means 'there are no exceptions' and that the standard library can assume no exceptions. # The standard library team does not test any _HAS_EXCEPTIONS=0 configuration. - string( REPLACE " /EHa" " " CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS}" ) - string( REPLACE " /EHsc" " " CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS}" ) - string( REPLACE " /EHs" " " CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS}" ) - string( REPLACE " /EHr" " " CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS}" ) - string( REPLACE " /D_HAS_EXCEPTIONS=0" " " CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS}" ) - string( REPLACE " /D_HAS_EXCEPTIONS=1" " " CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS}" ) - set( CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS} /D_HAS_EXCEPTIONS=0 /EHsc" ) - else () - set( CURRENT_CXX_FLAGS "${CURRENT_CXX_FLAGS} /D_HAS_EXCEPTIONS=1 /EHsc" ) - endif()]] + set( TEMP_CXX_FLAGS "${TEMP_CXX_FLAGS} /D_HAS_EXCEPTIONS=0 /EHs- /GR-" ) # /EHc- /EHr- are not needed + endif() #-------------------------------------------- + set( CURRENT_C_FLAGS ${CMAKE_C_FLAGS} CACHE STRING "" FORCE ) + set( CURRENT_CXX_FLAGS "${TEMP_CXX_FLAGS}" CACHE STRING "" FORCE ) + set( CMAKE_CXX_FLAGS "${TEMP_CXX_FLAGS}" CACHE STRING "" FORCE ) + set( CURRENT_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} CACHE STRING "" FORCE ) + set( CURRENT_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS} CACHE STRING "" FORCE ) + set( CURRENT_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS} CACHE STRING "" FORCE ) + set( CONFIGURATION_DEPENDENT_PATH ON CACHE INTERNAL "" FORCE ) #-------------------------------------------- @@ -283,8 +308,6 @@ if ( COMPILER_MSVC ) list( APPEND MSVC_SHARED_OPTS_DBG /w14100 ) endif() - set( CMAKE_CXX_FLAGS "${CURRENT_CXX_FLAGS}" CACHE STRING "" FORCE ) - # Release set( CMAKE_C_FLAGS_RELEASE "${CURRENT_C_FLAGS} /D_NDEBUG /DNDEBUG /MT /Ox /MP " CACHE STRING "" FORCE ) set( CMAKE_CXX_FLAGS_RELEASE "${CURRENT_CXX_FLAGS} /D_NDEBUG /DNDEBUG /MT /Ox /MP " CACHE STRING "" FORCE ) @@ -299,7 +322,7 @@ if ( COMPILER_MSVC ) set( CMAKE_EXE_LINKER_FLAGS_PROFILE "${CURRENT_EXE_LINKER_FLAGS} /DEBUG /PROFILE " CACHE STRING "" FORCE ) set( CMAKE_STATIC_LINKER_FLAGS_PROFILE "${CURRENT_STATIC_LINKER_FLAGS} /DEBUG /PROFILE " CACHE STRING "" FORCE ) set( CMAKE_SHARED_LINKER_FLAGS_PROFILE "${CURRENT_SHARED_LINKER_FLAGS} /DEBUG /PROFILE " CACHE STRING "" FORCE ) - set( PROJECTS_SHARED_CXX_FLAGS_PROFILE ${MSVC_SHARED_OPTS_DBG} /Ob2 /Oi /Ot /Oy /GT /GL /GF /GS- /Ox /analyze- /Zi /GR CACHE INTERNAL "" FORCE ) + set( PROJECTS_SHARED_CXX_FLAGS_PROFILE ${MSVC_SHARED_OPTS_DBG} /Ob2 /Oi /Ot /Oy /GT /GL /GF /GS- /Ox /analyze- /Zi CACHE INTERNAL "" FORCE ) set( PROJECTS_SHARED_LINKER_FLAGS_PROFILE " /OPT:REF /OPT:ICF /INCREMENTAL:NO /DEBUG /PROFILE" CACHE INTERNAL "" FORCE ) # Develop set( CMAKE_C_FLAGS_DEVELOP "${CURRENT_C_FLAGS} /D_NDEBUG /DNDEBUG /D_ITERATOR_DEBUG_LEVEL=0 /MT /Od /MP " CACHE STRING "" FORCE ) @@ -307,7 +330,7 @@ if ( COMPILER_MSVC ) set( CMAKE_EXE_LINKER_FLAGS_DEVELOP "${CURRENT_EXE_LINKER_FLAGS} /DEBUG " CACHE STRING "" FORCE ) set( CMAKE_STATIC_LINKER_FLAGS_DEVELOP "${CURRENT_STATIC_LINKER_FLAGS} /DEBUG " CACHE STRING "" FORCE ) set( CMAKE_SHARED_LINKER_FLAGS_DEVELOP "${CURRENT_SHARED_LINKER_FLAGS} /DEBUG " CACHE STRING "" FORCE ) - set( PROJECTS_SHARED_CXX_FLAGS_DEVELOP ${MSVC_SHARED_OPTS_DBG} /Ob2 /Oi /Ot /Oy /GT /GL /GF /GS- /Od /analyze- /Zi /GR CACHE INTERNAL "" FORCE ) + set( PROJECTS_SHARED_CXX_FLAGS_DEVELOP ${MSVC_SHARED_OPTS_DBG} /Ob2 /Oi /Ot /Oy /GT /GL /GF /GS- /Od /analyze- /Zi CACHE INTERNAL "" FORCE ) set( PROJECTS_SHARED_LINKER_FLAGS_DEVELOP " /OPT:REF /OPT:ICF /INCREMENTAL:NO /DEBUG" CACHE INTERNAL "" FORCE ) # Debug set( CMAKE_C_FLAGS_DEBUG "${CURRENT_C_FLAGS} /D_DEBUG /D_ITERATOR_DEBUG_LEVEL=${AE_ITERATOR_DEBUG_LEVEL} /MTd /Od /MP " CACHE STRING "" FORCE ) @@ -315,7 +338,7 @@ if ( COMPILER_MSVC ) set( CMAKE_EXE_LINKER_FLAGS_DEBUG "${CURRENT_EXE_LINKER_FLAGS} /DEBUG:FULL " CACHE STRING "" FORCE ) set( CMAKE_STATIC_LINKER_FLAGS_DEBUG "${CURRENT_STATIC_LINKER_FLAGS} /DEBUG:FULL " CACHE STRING "" FORCE ) set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CURRENT_SHARED_LINKER_FLAGS} /DEBUG:FULL " CACHE STRING "" FORCE ) - set( PROJECTS_SHARED_CXX_FLAGS_DEBUG ${MSVC_SHARED_OPTS_DBG} /sdl /Od /Ob0 /Oy- /GF- /GS /GR /analyze- /Zi /RTC1 CACHE INTERNAL "" FORCE ) + set( PROJECTS_SHARED_CXX_FLAGS_DEBUG ${MSVC_SHARED_OPTS_DBG} /sdl /Od /Ob0 /Oy- /GF- /GS /analyze- /Zi /RTC1 CACHE INTERNAL "" FORCE ) set( PROJECTS_SHARED_LINKER_FLAGS_DEBUG " /OPT:REF /OPT:ICF /INCREMENTAL:NO /DEBUG:FULL" CACHE INTERNAL "" FORCE ) endif() @@ -324,15 +347,23 @@ endif() # GCC/Clang shared settings #================================================================================================== set( GCC_CLANG_SHARED_GLOBAL_WARNING_LIST "-Wno-unused -Wno-switch -Wno-undef -Wno-comment" ) -set( GCC_CLANG_SHARED_LOCAL_WARNING_LIST -Wdouble-promotion -Wchar-subscripts -Wformat -Wmain -Wmissing-braces -Werror=uninitialized -Wmissing-include-dirs -Wunknown-pragmas -Wpragmas -Wstrict-overflow -Wstrict-aliasing -Wendif-labels -Wpointer-arith -Wwrite-strings -Wconversion-null -Wenum-compare -Wsign-compare -Wno-unused -Wsizeof-pointer-memaccess -Wno-zero-as-null-pointer-constant -Wundef -Werror=init-self -Werror=parentheses -Werror=return-type -Werror=array-bounds -Werror=div-by-zero -Werror=missing-field-initializers -Werror=cast-qual -Werror=cast-align -Wno-switch -Werror=invalid-pch -Werror=defaulted-function-deleted ) +set( GCC_CLANG_SHARED_LOCAL_WARNING_LIST -Wdouble-promotion -Wchar-subscripts -Wformat -Wmain -Wmissing-braces -Werror=uninitialized -Wmissing-include-dirs -Wunknown-pragmas -Wpragmas -Wstrict-overflow -Wstrict-aliasing -Wendif-labels -Wpointer-arith -Wwrite-strings -Wconversion-null -Wenum-compare -Wsign-compare -Wno-unused -Wsizeof-pointer-memaccess -Wno-zero-as-null-pointer-constant -Wundef -Werror=init-self -Werror=parentheses -Werror=return-type -Werror=array-bounds -Werror=div-by-zero -Werror=missing-field-initializers -Werror=cast-qual -Werror=cast-align -Wno-switch -Werror=invalid-pch -Werror=defaulted-function-deleted -Werror=format-security ) -#if (${AE_NO_EXCEPTIONS}) -# set( GCC_CLANG_SHARED_GLOBAL_WARNING_LIST "${GCC_CLANG_SHARED_GLOBAL_WARNING_LIST} -fno-exceptions" ) -# set( GCC_CLANG_SHARED_LOCAL_WARNING_LIST ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} -fno-exceptions ) -#else() - set( GCC_CLANG_SHARED_GLOBAL_WARNING_LIST "${GCC_CLANG_SHARED_GLOBAL_WARNING_LIST} -fexceptions" ) - set( GCC_CLANG_SHARED_LOCAL_WARNING_LIST ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} -fexceptions ) -#endif() +if (${AE_ENABLE_EXCEPTIONS}) + set( GCC_CLANG_SHARED_GLOBAL_WARNING_LIST "${GCC_CLANG_SHARED_GLOBAL_WARNING_LIST} -frtti -fexceptions" ) + set( GCC_CLANG_SHARED_LOCAL_WARNING_LIST ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} -frtti -fexceptions ) +else() + # TODO: -fno-unwind-tables -fno-asynchronous-unwind-tables + set( GCC_CLANG_SHARED_GLOBAL_WARNING_LIST "${GCC_CLANG_SHARED_GLOBAL_WARNING_LIST} -fno-rtti -fno-exceptions" ) + set( GCC_CLANG_SHARED_LOCAL_WARNING_LIST ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} -fno-rtti -fno-exceptions ) +endif() + +# TODO: +# -ffast-math +# -ffp-contract=fast +# -fvisibility-inlines-hidden -fvisibility=hidden +# -ffunction-sections -fdata-sections +# -no-canonical-prefixes #================================================================================================== @@ -352,7 +383,7 @@ if ( COMPILER_GCC ) #-------------------------------------------- set( CONFIGURATION_DEPENDENT_PATH OFF CACHE INTERNAL "" FORCE ) - # -Wno-shadow -Wno-enum-compare -Wno-narrowing -Wno-attributes + # -Wno-shadow -Wno-enum-compare -Wno-narrowing -Wno-attributes set( GCC_SHARED_OPTS ${COMPILER_FLAGS} -Wmaybe-uninitialized -Wfree-nonheap-object -Wcast-align -Wlogical-op -Waddress -Wno-non-template-friend -Werror=return-local-addr -Werror=placement-new -Werror=sign-compare -Werror=literal-suffix -Werror=shadow=local -Werror=delete-incomplete -Werror=odr -Werror=subobject-linkage -Werror=multichar -Winvalid-offsetof ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} ) set( PROJECTS_SHARED_DEFINES ${PROJECTS_SHARED_DEFINES} "AE_COMPILER_GCC" ) @@ -399,9 +430,8 @@ endif() # Clang shared settings # https://clang.llvm.org/docs/DiagnosticsReference.html #================================================================================================== -set( CLANG_SHARED_GLOBAL_WARNING_LIST "${GCC_CLANG_SHARED_GLOBAL_WARNING_LIST} -Wnarrowing -stdlib=libc++" ) # -Wno-deprecated-builtins -set( CLANG_SHARED_LOCAL_WARNING_LIST ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} -Wnarrowing -Wlogical-op-parentheses -frtti -Wunused -Werror=conditional-uninitialized -Wloop-analysis -Wincrement-bool -Wno-undefined-inline -Wc++14-extensions -Wc++17-extensions -Wno-comment -Wunused-private-field -Werror=return-stack-address -Werror=address -Werror=unsupported-friend -Werror=unknown-warning-option -Werror=user-defined-literals -Werror=instantiation-after-specialization -Werror=keyword-macro -Werror=large-by-value-copy -Werror=method-signatures -Werror=self-assign -Werror=self-move -Werror=infinite-recursion -Werror=pessimizing-move -Werror=dangling-else -Werror=return-std-move -Werror=deprecated-increment-bool -Werror=abstract-final-class -Wno-ambiguous-reversed-operator -Wno-unneeded-internal-declaration -Wno-unused-function -Wno-unused-const-variable -Wno-unused-local-typedef -Wdelete-non-virtual-dtor -Wrange-loop-analysis -Wundefined-bool-conversion -Winconsistent-missing-override -Wincrement-bool -Wunused-lambda-capture -fno-short-enums -Werror=implicit-exception-spec-mismatch -Werror=range-loop-bind-reference ) - +set( CLANG_SHARED_GLOBAL_WARNING_LIST "${GCC_CLANG_SHARED_GLOBAL_WARNING_LIST} -Wnarrowing -stdlib=libc++" ) # -Wno-deprecated-builtins +set( CLANG_SHARED_LOCAL_WARNING_LIST ${GCC_CLANG_SHARED_LOCAL_WARNING_LIST} -Wnarrowing -Wlogical-op-parentheses -Wunused -Werror=conditional-uninitialized -Wloop-analysis -Wincrement-bool -Wno-undefined-inline -Wc++14-extensions -Wc++17-extensions -Wno-comment -Wunused-private-field -Werror=return-stack-address -Werror=address -Werror=unsupported-friend -Werror=unknown-warning-option -Werror=user-defined-literals -Werror=instantiation-after-specialization -Werror=keyword-macro -Werror=large-by-value-copy -Werror=method-signatures -Werror=self-assign -Werror=self-move -Werror=infinite-recursion -Werror=pessimizing-move -Werror=dangling-else -Werror=return-std-move -Werror=deprecated-increment-bool -Werror=abstract-final-class -Wno-ambiguous-reversed-operator -Wno-unneeded-internal-declaration -Wno-unused-function -Wno-unused-const-variable -Wno-unused-local-typedef -Wdelete-non-virtual-dtor -Wrange-loop-analysis -Wundefined-bool-conversion -Winconsistent-missing-override -Wincrement-bool -Wunused-lambda-capture -fno-short-enums -Werror=implicit-exception-spec-mismatch -Werror=range-loop-bind-reference ) #================================================================================================== # Clang Compilation settings @@ -645,7 +675,7 @@ if ( COMPILER_CLANG_ANDROID ) set( CONFIGURATION_DEPENDENT_PATH OFF CACHE INTERNAL "" FORCE ) #-------------------------------------------- - set( CLANG_SHARED_OPTS ${COMPILER_FLAGS} ${CLANG_SHARED_LOCAL_WARNING_LIST} -fstack-protector-strong -fPIC ) + set( CLANG_SHARED_OPTS ${COMPILER_FLAGS} ${CLANG_SHARED_LOCAL_WARNING_LIST} -fstack-protector-strong -fPIC -fcoroutines-ts ) # -mfloat-abi=hard set( PROJECTS_SHARED_DEFINES ${PROJECTS_SHARED_DEFINES} "AE_COMPILER_CLANG" ) diff --git a/AE/engine/cmake/config.cmake b/AE/engine/cmake/config.cmake index 93fb20fb..a5fd28b5 100644 --- a/AE/engine/cmake/config.cmake +++ b/AE/engine/cmake/config.cmake @@ -8,7 +8,10 @@ set( AE_ENABLE_MEMLEAK_CHECKS ON CACHE BOOL "enable memory leak checks" ) set( AE_USE_SANITIZER OFF CACHE BOOL "enable (address/...) sanitizer" ) set( AE_CI_BUILD_NO_GRAPHICS OFF CACHE BOOL "CI settings (without graphics)" ) set( AE_CI_BUILD OFF CACHE BOOL "CI settings" ) -mark_as_advanced( AE_NO_EXCEPTIONS AE_CI_BUILD_NO_GRAPHICS AE_CI_BUILD ) +set( AE_ENABLE_EXCEPTIONS ON CACHE BOOL "enable exception and RTTI" ) +set( AE_ENABLE_LOGS ON CACHE BOOL "enable logging, disable to remove a lot of strings" ) + +mark_as_advanced( AE_CI_BUILD_NO_GRAPHICS AE_CI_BUILD AE_ENABLE_LOGS ) #---------------------------------------------------------- # internal constants diff --git a/AE/engine/deprecated/threading/LfFixedBlockAllocator1.h b/AE/engine/deprecated/threading/LfFixedBlockAllocator1.h new file mode 100644 index 00000000..69c7ede0 --- /dev/null +++ b/AE/engine/deprecated/threading/LfFixedBlockAllocator1.h @@ -0,0 +1,139 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#ifndef AE_LFAS_ENABLED +# include "threading/Primitives/SpinLock.h" +# include "threading/Primitives/SyncEvent.h" +#endif + +#ifdef AE_DEBUG +# define AE_LFFIXEDBLOCKALLOC_DEBUG 1 +#else +# define AE_LFFIXEDBLOCKALLOC_DEBUG 0 +#endif + +namespace AE::Threading +{ + + // + // Lock-free Fixed block Allocator + // + + template + class LfFixedBlockAllocator1 final : public IAllocatorTS + { + StaticAssert( ChunkSize_v > 0 ); + StaticAssert( IsMultipleOf( ChunkSize_v, 32 ) or IsMultipleOf( ChunkSize_v, 64 )); + StaticAssert( ChunkSize_v <= 64*64 ); + StaticAssert( MaxChunks_v > 0 ); + StaticAssert( IsPowerOfTwo( ChunkSize_v )); // must be power of 2 to increase performance + StaticAssert( BlockAllocatorType::IsThreadSafe ); + StaticAssert( GeneralAllocatorType::IsThreadSafe ); + + // types + public: + using Self = LfFixedBlockAllocator1< ChunkSize_v, MaxChunks_v, BlockAllocatorType, GeneralAllocatorType >; + using BlockAllocator_t = BlockAllocatorType; + using GenAllocator_t = GeneralAllocatorType; + using Ptr_t = RstPtr; + + private: + static constexpr uint ChunkSize = ChunkSize_v; + static constexpr uint MaxChunks = MaxChunks_v; + static constexpr uint LowLevel_Count = (ChunkSize <= 32 ? 32 : 64); + static constexpr uint HiLevel_Count = Max( 1u, (ChunkSize + LowLevel_Count - 1) / LowLevel_Count ); + + using LowLevelBits_t = Conditional< (LowLevel_Count <= 32), uint, ulong >; + using LowLevels_t = StaticArray< Atomic< LowLevelBits_t >, HiLevel_Count >; + using HiLevelBits_t = Conditional< (HiLevel_Count <= 32), uint, ulong >; + + StaticAssert( HiLevel_Count <= 64 ); + + struct alignas(AE_CACHE_LINE) ChunkInfo + { + Atomic< HiLevelBits_t > hiLevel; // 0 - is unassigned bit, 1 - assigned bit + SpinLockRelaxed hiLevelGuard; // only for 'hiLevel' modification + + LowLevels_t lowLevel; // 0 - is unassigned bit, 1 - assigned bit + Atomic< void * > memBlock; + + #if AE_LFFIXEDBLOCKALLOC_DEBUG + Atomic< SourceLoc *> dbgInfo; + SyncEvent dbgInfoEvent {SyncEvent::EFlags::ManualReset}; + #endif + }; + + using ChunkInfos_t = StaticArray< ChunkInfo, MaxChunks >; + + static constexpr HiLevelBits_t MaxHighLevel = ToBitMask( HiLevel_Count ); + static constexpr HiLevelBits_t InitialHighLevel = ~MaxHighLevel; + static constexpr uint HighWaitCount = 2; + static constexpr usize ThreadToChunkMask = MaxChunks < 5 ? 0 : 15; + + + // variables + private: + ChunkInfos_t _chunkInfo; + const POTBytes _blockSize; + const POTBytes _blockAlign; + + NO_UNIQUE_ADDRESS BlockAllocator_t _blockAlloc; + NO_UNIQUE_ADDRESS GenAllocator_t _genAlloc; + + Atomic _dbgCounter {0}; + Atomic _dbgLockCounter {0}; + + + // methods + public: + LfFixedBlockAllocator1 (Bytes blockSize, + Bytes blockAlign, + const BlockAllocator_t& blockAlloc = Default, + const GenAllocator_t& genAlloc = Default) __NE___; + + ~LfFixedBlockAllocator1 () __NE___ { Release( True{"checkMemLeak"} ); } + + void Release (Bool checkMemLeak) __NE___; + + + + ND_ Ptr_t AllocBlock () __NE___ { return AllocBlock( Default ); } + ND_ Ptr_t AllocBlock (const SourceLoc &loc) __NE___; + bool DeallocBlock (void *) __NE___; + + ND_ Bytes BlockSize () C_NE___ { return Bytes{ _blockSize }; } + ND_ Bytes BlockAlign () C_NE___ { return Bytes{ _blockAlign }; } + ND_ POTBytes BlockSizePOT () C_NE___ { return _blockSize; } + ND_ POTBytes BlockAlignPOT () C_NE___ { return _blockAlign; } + + ND_ Bytes LargeBlockSize () C_NE___ { return BlockSize() * ChunkSize; } + ND_ Bytes MaxMemorySize () C_NE___ { return LargeBlockSize() * MaxChunks; } + ND_ usize MaxBlockCount () C_NE___ { return MaxChunks * ChunkSize; } + ND_ Bytes AllocatedSize () C_NE___; + + + // IAllocator // + ND_ void* Allocate (const SizeAndAlign sa) __NE_OV; + void Deallocate (void*, const SizeAndAlign) __NE_OV; + void Deallocate (void* ptr) __NE_OV { CHECK( DeallocBlock( ptr )); } + + using IAllocator::Allocate; + using IAllocator::Deallocate; + + private: + ND_ Ptr_t _Alloc (uint chunkIndex, const SourceLoc &loc, INOUT ulong& dbgCounter, INOUT ulong& lockCounter) __NE___; + + ND_ static constexpr Bytes _DbgInfoSize () __NE___ { return SizeOf * ChunkSize; } + ND_ static constexpr Bytes _DbgInfoAlign ()__NE___ { return AlignOf; } + }; + + +} // AE::Threading + +#include "LfFixedBlockAllocator1.inl.h" +#undef AE_LFFIXEDBLOCKALLOC_DEBUG diff --git a/AE/engine/src/threading/Memory/LfFixedBlockAllocator3.inl.h b/AE/engine/deprecated/threading/LfFixedBlockAllocator1.inl.h similarity index 67% rename from AE/engine/src/threading/Memory/LfFixedBlockAllocator3.inl.h rename to AE/engine/deprecated/threading/LfFixedBlockAllocator1.inl.h index 7ce39ed0..289f788d 100644 --- a/AE/engine/src/threading/Memory/LfFixedBlockAllocator3.inl.h +++ b/AE/engine/deprecated/threading/LfFixedBlockAllocator1.inl.h @@ -9,10 +9,10 @@ namespace AE::Threading ================================================= */ template - LfFixedBlockAllocator3::LfFixedBlockAllocator3 (Bytes blockSize, - Bytes blockAlign, - const BlockAllocator_t& blockAlloc, - const GenAllocator_t& genAlloc) __NE___ : + LfFixedBlockAllocator1::LfFixedBlockAllocator1 (Bytes blockSize, + Bytes blockAlign, + const BlockAllocator_t& blockAlloc, + const GenAllocator_t& genAlloc) __NE___ : _blockSize{ blockSize }, _blockAlign{ blockAlign }, _blockAlloc{ blockAlloc }, @@ -21,33 +21,23 @@ namespace AE::Threading ASSERT( BlockSize() == blockSize ); ASSERT( BlockAlign() == blockAlign ); - StaticArray< TopLevelBits_t, TopLevel_Count > top_bits = {}; - - for (usize i = 0; i < _bottomChunks.size(); ++i) + for (usize i = 0; i < MaxChunks; ++i) { - auto& chunk = _bottomChunks[i]; - auto& top = top_bits[ i / CT_SizeOfInBits ]; + auto& chunk = _chunkInfo[i]; for (usize j = 0; j < HiLevel_Count; ++j) { chunk.lowLevel[j].store( 0 ); } - chunk.hiLevel.store( InitialHighLevel ); // set 0 bit for working range, 1 bit for unused bits + chunk.hiLevel.store( InitialHighLevel ); // set 0 bits for working range, 1 bit for unused bits chunk.memBlock.store( null ); - top |= TopLevelBits_t{1} << (i % CT_SizeOfInBits); - #if AE_LFFIXEDBLOCKALLOC_DEBUG chunk.dbgInfo.store( null ); #endif } - for (usize i = 0; i < _topChunks.size(); ++i) - { - _topChunks[i].assigned.store( ~top_bits[i] ); // set 0 bit for working range, 1 bit for unused bits - } - MemoryBarrier( EMemoryOrder::Release ); } @@ -59,15 +49,14 @@ namespace AE::Threading ================================================= */ template - void LfFixedBlockAllocator3::Release (bool checkMemLeak) __NE___ + void LfFixedBlockAllocator1::Release (Bool checkMemLeak) __NE___ { MemoryBarrier( EMemoryOrder::Acquire ); for (usize i = 0; i < MaxChunks; ++i) { - auto& chunk = _bottomChunks[i]; - void* ptr = chunk.memBlock.exchange( null ); - auto& top_chunk = _topChunks[ i / CT_SizeOfInBits ]; + auto& chunk = _chunkInfo[i]; + void* ptr = chunk.memBlock.exchange( null ); if ( ptr != null ) _blockAlloc.Deallocate( ptr, SizeAndAlign{ LargeBlockSize(), BlockAlign() }); @@ -77,14 +66,8 @@ namespace AE::Threading HiLevelBits_t old_hi_level = chunk.hiLevel.exchange( InitialHighLevel ); if ( checkMemLeak ) - { CHECK( old_hi_level == InitialHighLevel ); // some blocks is still allocated - TopLevelBits_t top_bits = top_chunk.assigned.load(); - TopLevelBits_t top_lvl_bit = TopLevelBits_t{1} << (i % CT_SizeOfInBits); - CHECK( not (top_bits & top_lvl_bit) ); - } - #if AE_LFFIXEDBLOCKALLOC_DEBUG SourceLoc* dbg_info = chunk.dbgInfo.exchange( null ); #endif @@ -109,8 +92,9 @@ namespace AE::Threading AE_LOGI( "Leaked memory block: "s << ToString( BlockSize() ), dbg.file, dbg.line ); - old_low_level &= (LowLevelBits_t{1} << low_idx); // 1 -> 0 - low_idx = BitScanForward( old_low_level ); // first 1 bit + old_low_level &= (LowLevelBits_t{1} << low_idx); // 1 -> 0 + + low_idx = BitScanForward( old_low_level ); // first 1 bit } } #endif @@ -134,12 +118,12 @@ namespace AE::Threading /* ================================================= - Alloc + AllocBlock ================================================= */ template - typename LfFixedBlockAllocator3::Ptr_t - LfFixedBlockAllocator3::Alloc (const SourceLoc &loc) __NE___ + typename LfFixedBlockAllocator1::Ptr_t + LfFixedBlockAllocator1::AllocBlock (const SourceLoc &loc) __NE___ { struct Dbg { @@ -157,22 +141,32 @@ namespace AE::Threading } } dbg{ *this }; - for (usize i = 0; i < _topChunks.size(); ++i, ++dbg.counter) + for (uint i = ThreadUtils::GetIntID() & ThreadToChunkMask; + i < MaxChunks; i += (ThreadToChunkMask >> 1)) { - auto& top_chunk = _topChunks[i]; - TopLevelBits_t available = ~top_chunk.assigned.load(); // 1 - unassigned - int idx = BitScanForward( available ); // first 1 bit + auto& chunk = _chunkInfo[i]; - for (; idx >= 0; ++dbg.counter) - { - Ptr_t ptr = _Alloc( uint(idx + i * CT_SizeOfInBits), loc, INOUT dbg.counter, INOUT dbg.locks ); + if ( chunk.hiLevel.load() == UMax ) + continue; - if_likely( ptr != null ) - return ptr; + Ptr_t ptr = _Alloc( i, loc, INOUT dbg.counter, INOUT dbg.locks ); - available &= ~(TopLevelBits_t{1} << idx); // 1 -> 0 - idx = BitScanForward( available ); // first 1 bit - } + if_likely( ptr != null ) + return ptr; + } + + // TODO + for (uint i = 0; i < MaxChunks; ++i) + { + auto& chunk = _chunkInfo[i]; + + if ( chunk.hiLevel.load() == UMax ) + continue; + + Ptr_t ptr = _Alloc( i, loc, INOUT dbg.counter, INOUT dbg.locks ); + + if_likely( ptr != null ) + return ptr; } return null; @@ -184,14 +178,18 @@ namespace AE::Threading ================================================= */ template - typename LfFixedBlockAllocator3::Ptr_t - LfFixedBlockAllocator3::_Alloc (const uint chunkIndex, const SourceLoc &loc, INOUT ulong& dbgCounter, INOUT ulong& lockCounter) __NE___ + typename LfFixedBlockAllocator1::Ptr_t + LfFixedBlockAllocator1::_Alloc (const uint chunkIndex, const SourceLoc &loc, INOUT ulong& dbgCounter, INOUT ulong& lockCounter) __NE___ { - BottomChunk& chunk = _bottomChunks[ chunkIndex ]; - auto* ptr = chunk.memBlock.load( EMemoryOrder::Acquire ); + Unused( loc ); + + ChunkInfo& chunk = _chunkInfo[ chunkIndex ]; + void* ptr = chunk.memBlock.load( EMemoryOrder::Acquire ); + ++dbgCounter; #if AE_LFFIXEDBLOCKALLOC_DEBUG SourceLoc* dbg_info = chunk.dbgInfo.load( EMemoryOrder::Acquire ); + ++dbgCounter; #endif // allocate new block @@ -257,7 +255,7 @@ namespace AE::Threading // find available index in high level for (uint j = 0; j < HighWaitCount; ++j, ++dbgCounter) { - HiLevelBits_t hi_available = ~chunk.hiLevel.load(); // 1 - unassigned + HiLevelBits_t hi_available = ~chunk.hiLevel.load(); // 1 - unassigned bit int hi_lvl_idx = BitScanForward( hi_available ); // first 1 bit for (; hi_lvl_idx >= 0; ++dbgCounter) @@ -266,47 +264,28 @@ namespace AE::Threading // find available index in low level auto& level = chunk.lowLevel[ hi_lvl_idx ]; - LowLevelBits_t low_available = level.load(); // 0 - unassigned + LowLevelBits_t low_available = level.load(); // 0 - unassigned bit int low_lvl_idx = BitScanForward( ~low_available ); // first 0 bit for (; low_lvl_idx >= 0; ++dbgCounter) { const LowLevelBits_t low_lvl_bit = (LowLevelBits_t{1} << low_lvl_idx); - if ( level.CAS( INOUT low_available, low_available | low_lvl_bit )) // 0 -> 1 + if ( level.CAS( INOUT low_available, low_available | low_lvl_bit )) // 0 -> 1 { // update high level if_unlikely( low_available == ~low_lvl_bit ) { - bool update_top = false; - { - EXLOCK( chunk.hiLevelGuard ); - ++lockCounter; + EXLOCK( chunk.hiLevelGuard ); + ++lockCounter; - // low level value may be changed at any time so check it inside spinlock - if ( level.load() == UMax ) - { - const auto hi_lvl_bit = (HiLevelBits_t{1} << hi_lvl_idx); - - update_top = (chunk.hiLevel.Or( hi_lvl_bit ) == MaxHighLevel); // 0 -> 1 - ++dbgCounter; - } - } - - if_unlikely( update_top ) + // low level value may be changed at any time so check it inside spinlock + if ( level.load() == UMax ) { - auto& top_chunk = _topChunks[ chunkIndex / CT_SizeOfInBits ]; - - EXLOCK( top_chunk.assignedGuard ); - ++lockCounter; - - if ( chunk.hiLevel.load() == MaxHighLevel ) - { - const auto top_lvl_bit = TopLevelBits_t{1} << (chunkIndex % CT_SizeOfInBits); + const auto hi_lvl_bit = (HiLevelBits_t{1} << hi_lvl_idx); - top_chunk.assigned.fetch_or( top_lvl_bit ); // 0 -> 1 - ++dbgCounter; - } + chunk.hiLevel.fetch_or( hi_lvl_bit ); // 0 -> 1 + ++dbgCounter; } } @@ -320,7 +299,6 @@ namespace AE::Threading MemoryBarrier( EMemoryOrder::Release ); } #endif - Unused( loc ); void* result = ptr + BlockSize() * idx_in_chunk; @@ -345,17 +323,17 @@ namespace AE::Threading /* ================================================= - Dealloc + DeallocBlock ================================================= */ template - bool LfFixedBlockAllocator3::Dealloc (void* ptr) __NE___ + bool LfFixedBlockAllocator1::DeallocBlock (void* ptr) __NE___ { const Bytes block_size = LargeBlockSize(); for (uint i = 0; i < MaxChunks; ++i) { - auto& chunk = _bottomChunks[i]; + auto& chunk = _chunkInfo[i]; void* mem = chunk.memBlock.load(); if ( ptr < mem or ptr >= mem + block_size ) @@ -384,31 +362,14 @@ namespace AE::Threading // update high level bits if_unlikely( old_bits == UMax ) { - bool update_top = false; - { - EXLOCK( chunk.hiLevelGuard ); - - // low level value may be changed at any time so check it inside spinlock - if ( level.load() != UMax ) - { - const auto hi_lvl_bit = (HiLevelBits_t{1} << hi_lvl_idx); + EXLOCK( chunk.hiLevelGuard ); - update_top = (chunk.hiLevel.fetch_and( ~hi_lvl_bit ) == MaxHighLevel); // 1 -> 0 - } - } - - if_unlikely( update_top ) + // low level value may be changed at any time so check it inside spinlock + if ( level.load() != UMax ) { - auto& top_chunk = _topChunks[ i / CT_SizeOfInBits ]; - - EXLOCK( top_chunk.assignedGuard ); + const auto hi_bit = (HiLevelBits_t{1} << hi_lvl_idx); - if ( chunk.hiLevel.load() != MaxHighLevel ) - { - const auto top_lvl_bit = TopLevelBits_t{1} << (i % CT_SizeOfInBits); - - top_chunk.assigned.fetch_and( ~top_lvl_bit ); // 1 -> 0 - } + chunk.hiLevel.fetch_and( ~hi_bit ); // 1 -> 0 } } return true; @@ -424,14 +385,14 @@ namespace AE::Threading ================================================= */ template - Bytes LfFixedBlockAllocator3::AllocatedSize () C_NE___ + Bytes LfFixedBlockAllocator1::AllocatedSize () C_NE___ { const Bytes block_size = LargeBlockSize(); Bytes result; for (uint i = 0; i < MaxChunks; ++i) { - auto& chunk = _bottomChunks[i]; + auto& chunk = _chunkInfo[i]; if ( chunk.memBlock.load() != null ) result += block_size; @@ -440,4 +401,33 @@ namespace AE::Threading } +/* +================================================= + Allocate +================================================= +*/ + template + void* LfFixedBlockAllocator1::Allocate (const SizeAndAlign sizeAndAlign) __NE___ + { + CHECK_ERR( sizeAndAlign.size <= BlockSize() and + sizeAndAlign.align <= BlockAlign() ); + return AllocBlock(); + } + + +/* +================================================= + Deallocate +================================================= +*/ + template + void LfFixedBlockAllocator1::Deallocate (void* ptr, const SizeAndAlign sizeAndAlign) __NE___ + { + ASSERT( sizeAndAlign.size <= BlockSize() and + sizeAndAlign.align <= BlockAlign() ); + Unused( sizeAndAlign ); + CHECK( DeallocBlock( ptr )); + } + + } // AE::Threading diff --git a/AE/engine/src/threading/Containers/LfIndexedPool2.h b/AE/engine/deprecated/threading/LfIndexedPool2.h similarity index 89% rename from AE/engine/src/threading/Containers/LfIndexedPool2.h rename to AE/engine/deprecated/threading/LfIndexedPool2.h index 487b255f..269a24e1 100644 --- a/AE/engine/src/threading/Containers/LfIndexedPool2.h +++ b/AE/engine/deprecated/threading/LfIndexedPool2.h @@ -20,24 +20,24 @@ namespace AE::Threading { // - // Lock-Free Indexed Pool + // Lock-Free Indexed Pool v2 // template class LfIndexedPool2 final : public Noncopyable { - STATIC_ASSERT( ChunkSize_v > 0 ); - STATIC_ASSERT( IsMultipleOf( ChunkSize_v, 32 ) or IsMultipleOf( ChunkSize_v, 64 )); - STATIC_ASSERT( ChunkSize_v <= 64*64 ); - STATIC_ASSERT( MaxChunks_v > 0 and MaxChunks_v <= 64 ); - STATIC_ASSERT( IsPowerOfTwo( ChunkSize_v )); // must be power of 2 to increase performance - STATIC_ASSERT( AllocatorType::IsThreadSafe ); - STATIC_ASSERT( MaxValue() >= (ChunkSize_v * MaxChunks_v) ); + StaticAssert( ChunkSize_v > 0 ); + StaticAssert( IsMultipleOf( ChunkSize_v, 32 ) or IsMultipleOf( ChunkSize_v, 64 )); + StaticAssert( ChunkSize_v <= 64*64 ); + StaticAssert( MaxChunks_v > 0 and MaxChunks_v <= 64 ); + StaticAssert( IsPowerOfTwo( ChunkSize_v )); // must be power of 2 to increase performance + StaticAssert( AllocatorType::IsThreadSafe ); + StaticAssert( MaxValue() >= (ChunkSize_v * MaxChunks_v) ); // types public: @@ -131,4 +131,4 @@ namespace AE::Threading } // AE::Threading -#include "threading/Containers/LfIndexedPool2.inl.h" +#include "LfIndexedPool2.inl.h" diff --git a/AE/engine/src/threading/Containers/LfIndexedPool2.inl.h b/AE/engine/deprecated/threading/LfIndexedPool2.inl.h similarity index 99% rename from AE/engine/src/threading/Containers/LfIndexedPool2.inl.h rename to AE/engine/deprecated/threading/LfIndexedPool2.inl.h index dbc22b0e..b44e3d62 100644 --- a/AE/engine/src/threading/Containers/LfIndexedPool2.inl.h +++ b/AE/engine/deprecated/threading/LfIndexedPool2.inl.h @@ -306,7 +306,7 @@ namespace AE::Threading if_unlikely( i > ThreadUtils::SpinBeforeLock() ) { i = 0; - ThreadUtils::YieldOrSleep(); + ThreadUtils::YieldOrMicroSleep(); } ThreadUtils::Pause(); } diff --git a/AE/engine/tests/threading/UnitTest_LfIndexedPool2.cpp b/AE/engine/deprecated/threading/UnitTest_LfIndexedPool2.cpp similarity index 100% rename from AE/engine/tests/threading/UnitTest_LfIndexedPool2.cpp rename to AE/engine/deprecated/threading/UnitTest_LfIndexedPool2.cpp diff --git a/AE/engine/docs/CMakeLists.txt b/AE/engine/docs/CMakeLists.txt deleted file mode 100644 index 9ea66f32..00000000 --- a/AE/engine/docs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' - -file( GLOB_RECURSE SOURCES "*.*" ) -add_library( "Docs" STATIC EXCLUDE_FROM_ALL ${SOURCES} "../Changelog.md" "../../../Readme.md" ) -set_target_properties( "Docs" PROPERTIES LINKER_LANGUAGE CXX ) -set_property( TARGET "Docs" PROPERTY FOLDER "Engine" ) -source_group( TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES} ) diff --git a/AE/engine/docs/ru/ArchitectureNotes.md b/AE/engine/docs/ru/ArchitectureNotes.md deleted file mode 100644 index e19243ea..00000000 --- a/AE/engine/docs/ru/ArchitectureNotes.md +++ /dev/null @@ -1,47 +0,0 @@ -Заметки по планированию архитектуры. - -## ООП - -В больших классах частая проблема - множество методов, которые имеют доступ ко всем полям класса. -В чем тут проблема: -* Проблемы с читаемостью - непонятно что где меняется. -* Проблемы с синхронизацией - если поля класса защищены разными примитивами синхронизации, то очень сложно отследить где что используется и была ли синхронизация перед использованием. - -Решается это использованием статичных функций, у них остается доступ к приватным типам класса, но нет доступа к полям, для этого их нужно явно передавать, что сразу же улучшает читаемость кода. - -Другой вариант это константные методы, они могут читать все поля, но меняют только те, что передаются в виде параметров. -```cpp -class Obj -{ - ReadOnly r; - Mutable m; - - static void StaticFn (const ReadOnly &, Mutable &); - void ConstMethod (Mutable &) const; -}; -``` - -## Синхронизации - -Если в классе используется mutex или другой примитив синхронизации, то он должен использоваться для всех полей, кроме константных. Иначе это выглядит как ошибка, когда часть методов не используют синхронизацию. - -Вместо нескольких примитивов синхронизации внутри одного класса лучше использовать [Synchronized](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/threading/Primitives/Synchronized.h) тип. - - -## Исключения - -Текущая реализация C++ не позволяет писать безопасный код с использованием исключений. - -Проблемы: -* Приходится вручную отслеживать какая функция бросает исключения. Компилятор выдает ошибку, если функция с `noexcept` бросает исключение, которое не перехватывается, но это работает только на явный вызов `throw` внутри функции, а используемые функции могут кидать исключения и компилятор никак это не проверяет. Пока не будет предупреждений на использование не-`noexcept` функций внутри `noexcept` функций исключения будут опасны. -* Концепция исключений предполагает, что после бросания исключений объект возвращается в первоначальное состояние, никаких частичных изменений, либо все, либо ничего, это часто требует выделение дополнительной памяти. -* Бросание исключений в конструкторах, особенно в move-конструкторах, приводит к выделению дополнительной памяти, как описано выше. -* В ObjC бросание исключения внутри `autoreleasepool` приводит к утечке памяти. -* Сложно писать код, который требует соблюдения правил и компилятор в этом никак не помогает, поэтому поддержка исключений усложняет код и увеличивает время разработки. - -Преимущества: -* Хорошо подходят для функций, вызываемых из скриптов, тогда в случае бросания исключения скрипт завершается и выдает ошибку из исключения. -* Неплохо подходит для десериализации, где из-за порчи данных может произойти попытка выделить большого объема памяти. - -В большинстве случаев достаточно использовать атрибут `[[nodiscard]]`, что позволяет использовать возвращаемые коды ошибок, которые не будут проигнорированы. - diff --git a/AE/engine/external/shared/Abseil/CMakeLists.txt b/AE/engine/external/shared/Abseil/CMakeLists.txt index 9d62fec8..ca226dc6 100644 --- a/AE/engine/external/shared/Abseil/CMakeLists.txt +++ b/AE/engine/external/shared/Abseil/CMakeLists.txt @@ -27,7 +27,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.Abseil" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.Abseil" "tests/main.cpp" ) - add_test( NAME "Tests.Abseil" COMMAND "Tests.Abseil" ) + #add_test( NAME "Tests.Abseil" COMMAND "Tests.Abseil" ) endif() set_target_properties( "Tests.Abseil" PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES ) target_compile_features( "Tests.Abseil" PUBLIC cxx_std_17 ) diff --git a/AE/engine/external/shared/AngelScript/CMakeLists.txt b/AE/engine/external/shared/AngelScript/CMakeLists.txt index 77d9fb51..42b32bf9 100644 --- a/AE/engine/external/shared/AngelScript/CMakeLists.txt +++ b/AE/engine/external/shared/AngelScript/CMakeLists.txt @@ -41,7 +41,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.AngelScript" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.AngelScript" "tests/main.cpp" ) - add_test( NAME "Tests.AngelScript" COMMAND "Tests.AngelScript" ) + #add_test( NAME "Tests.AngelScript" COMMAND "Tests.AngelScript" ) endif() set_property( TARGET "Tests.AngelScript" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.AngelScript" PUBLIC "AngelScript-lib" ) diff --git a/AE/engine/external/shared/Assimp/CMakeLists.txt b/AE/engine/external/shared/Assimp/CMakeLists.txt index 61ff5741..b78f9803 100644 --- a/AE/engine/external/shared/Assimp/CMakeLists.txt +++ b/AE/engine/external/shared/Assimp/CMakeLists.txt @@ -40,7 +40,7 @@ endif() if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.Assimp" "tests/main.cpp" ) - add_test( NAME "Tests.Assimp" COMMAND "Tests.Assimp" ) + #add_test( NAME "Tests.Assimp" COMMAND "Tests.Assimp" ) set_target_properties( "Tests.Assimp" PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES ) target_compile_features( "Tests.Assimp" PUBLIC cxx_std_17 ) set_property( TARGET "Tests.Assimp" PROPERTY FOLDER "Engine/External/Tests" ) diff --git a/AE/engine/external/shared/Brotli/CMakeLists.txt b/AE/engine/external/shared/Brotli/CMakeLists.txt index 36dc8b52..ea7131b6 100644 --- a/AE/engine/external/shared/Brotli/CMakeLists.txt +++ b/AE/engine/external/shared/Brotli/CMakeLists.txt @@ -28,7 +28,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.Brotli" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.Brotli" "tests/main.cpp" ) - add_test( NAME "Tests.Brotli" COMMAND "Tests.Brotli" ) + #add_test( NAME "Tests.Brotli" COMMAND "Tests.Brotli" ) endif() set_property( TARGET "Tests.Brotli" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.Brotli" PUBLIC "Brotli-lib" ) diff --git a/AE/engine/external/shared/CDT/CMakeLists.txt b/AE/engine/external/shared/CDT/CMakeLists.txt index ef8c55b0..3ff81e43 100644 --- a/AE/engine/external/shared/CDT/CMakeLists.txt +++ b/AE/engine/external/shared/CDT/CMakeLists.txt @@ -18,7 +18,7 @@ set_property( TARGET "CDT-lib" PROPERTY INTERFACE_LINK_LIBRARIES if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.CDT" "tests/main.cpp" ) - add_test( NAME "Tests.CDT" COMMAND "Tests.CDT" ) + #add_test( NAME "Tests.CDT" COMMAND "Tests.CDT" ) set_property( TARGET "Tests.CDT" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.CDT" PUBLIC "CDT-lib" ) endif () diff --git a/AE/engine/external/shared/CMakeLists.txt b/AE/engine/external/shared/CMakeLists.txt index 4ad50416..f5ca990a 100644 --- a/AE/engine/external/shared/CMakeLists.txt +++ b/AE/engine/external/shared/CMakeLists.txt @@ -26,8 +26,6 @@ set( AE_ENABLE_KTX ON CACHE BOOL "use KTX" ) set( AE_ENABLE_MSDFGEN ON CACHE BOOL "use msdfgen" ) set( AE_ENABLE_METAL OFF CACHE BOOL "enable Metal API" ) -set( AE_ENABLE_OPENGL OFF CACHE BOOL "enable OpenGL (ES) API" ) - set( AE_ENABLE_REMOTE_GRAPHICS OFF CACHE BOOL "enable Remote Graphics" ) set( AE_ENABLE_RGRAPHICS_SERVER ON CACHE BOOL "enable Remote Graphics server" ) @@ -41,11 +39,7 @@ set( AE_ENABLE_VMA ON CACHE BOOL "use Vulkan Memory Allocator" ) set( AE_ENABLE_UTF8PROC ON CACHE BOOL "use Utf8Proc" ) -if (APPLE) - set( AE_ENABLE_METAL ON CACHE INTERNAL "" FORCE ) - set( AE_ENABLE_OPENGL OFF CACHE INTERNAL "" FORCE ) - set( AE_ENABLE_VULKAN OFF CACHE INTERNAL "" FORCE ) -endif() +set( AE_ENABLE_XXHASH ON CACHE BOOL "use xxHash" ) if (${AE_ENABLE_REMOTE_GRAPHICS} AND ${AE_ENABLE_RGRAPHICS_SERVER}) message( FATAL_ERROR "AE_ENABLE_REMOTE_GRAPHICS and AE_ENABLE_RGRAPHICS_SERVER are not compatible" ) @@ -102,6 +96,8 @@ if (${AE_ENABLE_FREETYPE}) add_subdirectory( "FreeType" ) endif() +add_subdirectory( "fameta-counter" ) + if (${AE_ENABLE_IMGUI}) add_subdirectory( "imgui" ) endif() @@ -114,10 +110,6 @@ if (${AE_ENABLE_MSDFGEN}) add_subdirectory( "msdfgen" ) endif() -if (${AE_ENABLE_OPENGL}) - add_subdirectory( "OpenGL" ) -endif() - if (${AE_ENABLE_SPIRV_CROSS}) add_subdirectory( "SPIRV-Cross" ) endif() @@ -130,7 +122,7 @@ if (${AE_ENABLE_VULKAN}) add_subdirectory( "Vulkan" ) endif() -if (${AE_ENABLE_VMA}) +if (${AE_ENABLE_VMA} AND ${AE_ENABLE_VULKAN}) add_subdirectory( "VMA" ) endif() @@ -142,3 +134,6 @@ if (${AE_ENABLE_TINYGLTF}) add_subdirectory( "tinygltf" ) endif() +if (${AE_ENABLE_XXHASH}) + add_subdirectory( "xxHash" ) +endif() diff --git a/AE/engine/external/shared/FreeType/CMakeLists.txt b/AE/engine/external/shared/FreeType/CMakeLists.txt index d2cdc9af..b2468992 100644 --- a/AE/engine/external/shared/FreeType/CMakeLists.txt +++ b/AE/engine/external/shared/FreeType/CMakeLists.txt @@ -29,7 +29,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.FreeType" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.FreeType" "tests/main.cpp" ) - add_test( NAME "Tests.FreeType" COMMAND "Tests.FreeType" ) + #add_test( NAME "Tests.FreeType" COMMAND "Tests.FreeType" ) endif() set_property( TARGET "Tests.FreeType" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.FreeType" PUBLIC "FreeType-lib" ) diff --git a/AE/engine/external/shared/GLFW/CMakeLists.txt b/AE/engine/external/shared/GLFW/CMakeLists.txt index 18851e9c..b117f170 100644 --- a/AE/engine/external/shared/GLFW/CMakeLists.txt +++ b/AE/engine/external/shared/GLFW/CMakeLists.txt @@ -31,5 +31,5 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.GLFW" "tests/main.cpp" ) set_property( TARGET "Tests.GLFW" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.GLFW" PUBLIC "GLFW-lib" ) - add_test( NAME "Tests.GLFW" COMMAND "Tests.GLFW" ) + #add_test( NAME "Tests.GLFW" COMMAND "Tests.GLFW" ) endif () diff --git a/AE/engine/external/shared/GLM/CMakeLists.txt b/AE/engine/external/shared/GLM/CMakeLists.txt index b87911c4..54c855af 100644 --- a/AE/engine/external/shared/GLM/CMakeLists.txt +++ b/AE/engine/external/shared/GLM/CMakeLists.txt @@ -16,7 +16,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.GLM" PRIVATE "${AE_TEST_DIR}" ) else () add_executable( "Tests.GLM" "tests/main.cpp" ) - add_test( NAME "Tests.GLM" COMMAND "Tests.GLM" ) + #add_test( NAME "Tests.GLM" COMMAND "Tests.GLM" ) endif () set_target_properties( "Tests.GLM" PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES ) target_compile_features( "Tests.GLM" PUBLIC cxx_std_17 ) diff --git a/AE/engine/external/shared/GLM/update.bat b/AE/engine/external/shared/GLM/update.bat index 973b8f16..8ca4b8dc 100644 --- a/AE/engine/external/shared/GLM/update.bat +++ b/AE/engine/external/shared/GLM/update.bat @@ -1,6 +1,7 @@ rmdir /Q /S "..\..\..\..\..\AE-Data\external\source\GLM" rmdir /Q /S "temp" git clone --branch "AE-version" "..\..\..\..\..\3party\ae-glm" "temp" +mkdir "..\..\..\..\..\AE-Data\external\source\GLM" robocopy "temp\glm" "..\..\..\..\..\AE-Data\external\source\GLM" *.h /S robocopy "temp\glm" "..\..\..\..\..\AE-Data\external\source\GLM" *.hpp /S robocopy "temp\glm" "..\..\..\..\..\AE-Data\external\source\GLM" *.inl /S diff --git a/AE/engine/external/shared/KTX-Software/CMakeLists.txt b/AE/engine/external/shared/KTX-Software/CMakeLists.txt index b0ed260f..bf2f8cd8 100644 --- a/AE/engine/external/shared/KTX-Software/CMakeLists.txt +++ b/AE/engine/external/shared/KTX-Software/CMakeLists.txt @@ -18,7 +18,7 @@ set_property( TARGET "KTX-lib" PROPERTY INTERFACE_LINK_LIBRARIES if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.KTX" "tests/main.cpp" ) - add_test( NAME "Tests.KTX" COMMAND "Tests.KTX" ) + #add_test( NAME "Tests.KTX" COMMAND "Tests.KTX" ) set_property( TARGET "Tests.KTX" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.KTX" PUBLIC "KTX-lib" ) endif () diff --git a/AE/engine/external/shared/KTX-Software/ktx_CMakeLists.txt b/AE/engine/external/shared/KTX-Software/ktx_CMakeLists.txt index 4629e369..80892a28 100644 --- a/AE/engine/external/shared/KTX-Software/ktx_CMakeLists.txt +++ b/AE/engine/external/shared/KTX-Software/ktx_CMakeLists.txt @@ -6,26 +6,38 @@ set( AE_EXTERNAL_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../.." ) include( "${AE_EXTERNAL_PATH}/config.cmake" ) # SIMD -if (${AE_SIMD_AVX} GREATER 0) - set( ASTCENC_ISA_SSE41 ON CACHE BOOL "" FORCE ) - set( BASISU_SUPPORT_SSE ON CACHE BOOL "" FORCE ) - -elseif (${AE_SIMD_SSE} GREATER_EQUAL 41) - set( ASTCENC_ISA_SSE41 ON CACHE BOOL "" FORCE ) - set( BASISU_SUPPORT_SSE ON CACHE BOOL "" FORCE ) - -elseif (${AE_SIMD_SSE} GREATER_EQUAL 20) - set( ASTCENC_ISA_SSE41 OFF CACHE BOOL "" FORCE ) - set( ASTCENC_ISA_SSE2 ON CACHE BOOL "" FORCE ) - set( BASISU_SUPPORT_SSE ON CACHE BOOL "" FORCE ) +if (AE_CPU_ARCH_X64 OR AE_CPU_ARCH_X86) + if (${AE_SIMD_AVX} GREATER 0) + set( ASTCENC_ISA_SSE41 ON CACHE BOOL "" FORCE ) + set( BASISU_SUPPORT_SSE ON CACHE BOOL "" FORCE ) + + elseif (${AE_SIMD_SSE} GREATER_EQUAL 41) + set( ASTCENC_ISA_SSE41 ON CACHE BOOL "" FORCE ) + set( BASISU_SUPPORT_SSE ON CACHE BOOL "" FORCE ) + + elseif (${AE_SIMD_SSE} GREATER_EQUAL 20) + set( ASTCENC_ISA_SSE41 OFF CACHE BOOL "" FORCE ) + set( ASTCENC_ISA_SSE2 ON CACHE BOOL "" FORCE ) + set( BASISU_SUPPORT_SSE ON CACHE BOOL "" FORCE ) + + else() + set( ASTCENC_ISA_SSE41 OFF CACHE BOOL "" FORCE ) + set( ASTCENC_ISA_SSE2 OFF CACHE BOOL "" FORCE ) + set( ASTCENC_ISA_NEON OFF CACHE BOOL "" FORCE ) + set( ASTCENC_ISA_NONE ON CACHE BOOL "" FORCE ) + set( BASISU_SUPPORT_SSE OFF CACHE BOOL "" FORCE ) + endif() +endif() -else() +if (AE_CPU_ARCH_ARM32 OR AE_CPU_ARCH_ARM64) set( ASTCENC_ISA_SSE41 OFF CACHE BOOL "" FORCE ) set( ASTCENC_ISA_SSE2 OFF CACHE BOOL "" FORCE ) - set( ASTCENC_ISA_NEON OFF CACHE BOOL "" FORCE ) # TODO: Mac/iOS ARM - set( ASTCENC_ISA_NONE ON CACHE BOOL "" FORCE ) + set( ASTCENC_ISA_NONE OFF CACHE BOOL "" FORCE ) set( BASISU_SUPPORT_SSE OFF CACHE BOOL "" FORCE ) endif() +if (AE_CPU_ARCH_ARM64) + set( ASTCENC_ISA_NEON ON CACHE BOOL "" FORCE ) +endif() set( KTX_FEATURE_DOC OFF CACHE BOOL "" FORCE ) set( KTX_FEATURE_GL_UPLOAD OFF CACHE BOOL "" FORCE ) diff --git a/AE/engine/external/shared/SPIRV-Cross/CMakeLists.txt b/AE/engine/external/shared/SPIRV-Cross/CMakeLists.txt index 84ce7a38..09de6bd8 100644 --- a/AE/engine/external/shared/SPIRV-Cross/CMakeLists.txt +++ b/AE/engine/external/shared/SPIRV-Cross/CMakeLists.txt @@ -31,5 +31,5 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.SPIRV-Cross" "tests/main.cpp" ) set_property( TARGET "Tests.SPIRV-Cross" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.SPIRV-Cross" PUBLIC "SPIRV-Cross-lib" ) - add_test( NAME "Tests.SPIRV-Cross" COMMAND "Tests.SPIRV-Cross" ) + #add_test( NAME "Tests.SPIRV-Cross" COMMAND "Tests.SPIRV-Cross" ) endif () diff --git a/AE/engine/external/shared/Utf8Proc/CMakeLists.txt b/AE/engine/external/shared/Utf8Proc/CMakeLists.txt index ad02d5f6..6dac1df7 100644 --- a/AE/engine/external/shared/Utf8Proc/CMakeLists.txt +++ b/AE/engine/external/shared/Utf8Proc/CMakeLists.txt @@ -27,7 +27,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.Utf8Proc" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.Utf8Proc" "tests/main.cpp" ) - add_test( NAME "Tests.Utf8Proc" COMMAND "Tests.Utf8Proc" ) + #add_test( NAME "Tests.Utf8Proc" COMMAND "Tests.Utf8Proc" ) endif() set_property( TARGET "Tests.Utf8Proc" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.Utf8Proc" PUBLIC "Utf8Proc-lib" ) diff --git a/AE/engine/external/shared/VMA/CMakeLists.txt b/AE/engine/external/shared/VMA/CMakeLists.txt index 44ad1a48..c2a2076b 100644 --- a/AE/engine/external/shared/VMA/CMakeLists.txt +++ b/AE/engine/external/shared/VMA/CMakeLists.txt @@ -17,7 +17,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS} AND ${AE_ENABLE_VULKAN}) target_include_directories( "Tests.VMA" PRIVATE "${AE_TEST_DIR}" ) else () add_executable( "Tests.VMA" "tests/main.cpp" ) - add_test( NAME "Tests.VMA" COMMAND "Tests.VMA" ) + #add_test( NAME "Tests.VMA" COMMAND "Tests.VMA" ) endif () set_property( TARGET "Tests.VMA" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.VMA" PUBLIC "VMA-lib" ) diff --git a/AE/engine/external/shared/Vulkan/CMakeLists.txt b/AE/engine/external/shared/Vulkan/CMakeLists.txt index 9bf6bf07..7773cbbf 100644 --- a/AE/engine/external/shared/Vulkan/CMakeLists.txt +++ b/AE/engine/external/shared/Vulkan/CMakeLists.txt @@ -16,7 +16,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.Vulkan" PRIVATE "${AE_TEST_DIR}" ) else () add_executable( "Tests.Vulkan" "tests/main.cpp" ) - add_test( NAME "Tests.Vulkan" COMMAND "Tests.Vulkan" ) + #add_test( NAME "Tests.Vulkan" COMMAND "Tests.Vulkan" ) endif () set_property( TARGET "Tests.Vulkan" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.Vulkan" PUBLIC "Vulkan-lib" ) diff --git a/AE/engine/external/shared/Vulkan/update.bat b/AE/engine/external/shared/Vulkan/update.bat index 8c98c679..5ad23f1a 100644 --- a/AE/engine/external/shared/Vulkan/update.bat +++ b/AE/engine/external/shared/Vulkan/update.bat @@ -1,6 +1,7 @@ rmdir /Q /S "..\..\..\..\..\AE-Data\external\source\Vulkan" rmdir /Q /S "temp" git clone --branch "sdk-1.3.261.1" "..\..\..\..\..\3party\Vulkan-Headers" "temp" +mkdir "..\..\..\..\..\AE-Data\external\source\Vulkan" robocopy "temp\include" "..\..\..\..\..\AE-Data\external\source\Vulkan" *.h /S copy /Y "temp\LICENSE.txt" "..\..\..\..\..\AE-Data\external\source\Vulkan\LICENSE.txt" rmdir /Q /S "temp" diff --git a/AE/engine/external/shared/fameta-counter/CMakeLists.txt b/AE/engine/external/shared/fameta-counter/CMakeLists.txt new file mode 100644 index 00000000..5bb2a6b2 --- /dev/null +++ b/AE/engine/external/shared/fameta-counter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +# +# fameta-counter source code (unlicense) + +add_library( "FametaCounter-lib" INTERFACE ) + +target_compile_definitions( "FametaCounter-lib" INTERFACE "AE_ENABLE_FAMETA_COUNTER" "AE_LICENSE_UNLICENSE" ) + +install( FILES "${FREETYPE_PATH}/LICENSE" DESTINATION "3party_license" RENAME "FametaCounter-LICENSE" ) diff --git a/AE/engine/external/shared/fameta-counter/LICENSE b/AE/engine/external/shared/fameta-counter/LICENSE new file mode 100644 index 00000000..fdddb29a --- /dev/null +++ b/AE/engine/external/shared/fameta-counter/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/AE/engine/external/shared/fameta-counter/counter.hpp b/AE/engine/external/shared/fameta-counter/counter.hpp new file mode 100644 index 00000000..95f25eaa --- /dev/null +++ b/AE/engine/external/shared/fameta-counter/counter.hpp @@ -0,0 +1,178 @@ +// Author: Fabio Alemagna +// Source: https://github.com/falemagn/fameta-counter +// Inspired to Filip Roséen's work. See https://stackoverflow.com/questions/60082260/c-compile-time-counters-revisited + +#ifndef FAMETA_COUNTER_HPP +#define FAMETA_COUNTER_HPP + +#if !defined(__cpp_if_constexpr) || __cpp_if_constexpr < 201606L +# if defined(FAMETA_BINARY_LOOKUP) && FAMETA_BINARY_LOOKUP +# error "Binary lookup is only available when compiling with c++17 and above" +# endif +# +# undef FAMETA_BINARY_LOOKUP +# define FAMETA_BINARY_LOOKUP 0 +# define FAMETA_UNIQUE_VALUE_TYPE unsigned long long +#else +# if !defined(FAMETA_BINARY_LOOKUP) +# define FAMETA_BINARY_LOOKUP 1 +# endif +# define FAMETA_UNIQUE_VALUE_TYPE auto +#endif + +#if defined(__GNUC__) && !defined(__clang__) +// There appears to be a bug on gcc that makes it emit a diagnostic that cannot be turned off in certain conditions. This will silence it. +// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112267 +# define FAMETA_FRIEND_RETURN_TYPE auto +#else +# define FAMETA_FRIEND_RETURN_TYPE bool +#endif + +#if !defined(FAMETA_FRIEND_INJECTION_PRAGMA_BEGIN) && !defined(FAMETA_FRIEND_INJECTION_PRAGMA_END) +# if defined(__INTEL_COMPILER) +# define FAMETA_FRIEND_INJECTION_PRAGMA_BEGIN _Pragma("warning push"); _Pragma("warning disable 1624"); +# define FAMETA_FRIEND_INJECTION_PRAGMA_END _Pragma("warning pop"); +# elif defined(__clang__) +# define FAMETA_FRIEND_INJECTION_PRAGMA_BEGIN _Pragma("GCC diagnostic push"); _Pragma("GCC diagnostic ignored \"-Wundefined-internal\""); +# define FAMETA_FRIEND_INJECTION_PRAGMA_END _Pragma("GCC diagnostic pop"); +# elif defined(__GNUC__) +# define FAMETA_FRIEND_INJECTION_PRAGMA_BEGIN _Pragma("GCC diagnostic push"); _Pragma("GCC diagnostic ignored \"-Wnon-template-friend\""); \ + _Pragma("GCC diagnostic ignored \"-Wunused-function\""); +# define FAMETA_FRIEND_INJECTION_PRAGMA_END _Pragma("GCC diagnostic pop"); +# else +# define FAMETA_FRIEND_INJECTION_PRAGMA_BEGIN +# define FAMETA_FRIEND_INJECTION_PRAGMA_END +# endif +#endif + +namespace fameta +{ + +// Anonymous namespace to avoid ODR violation +namespace { + +template +class counter +{ +public: + template + static constexpr int next() + { + return next(0)*Step+Start; + } + + template + static constexpr int next() + { + struct Unique{}; + return next(0)*Step+Start; + } + + template + static constexpr int current() + { + return current(0)*Step+Start; + } + + template + static constexpr int current() + { + struct Unique{}; + return current(0)*Step+Start; + } + +private: + template + struct slot + { + FAMETA_FRIEND_INJECTION_PRAGMA_BEGIN + + friend constexpr FAMETA_FRIEND_RETURN_TYPE slot_allocated(slot); + + FAMETA_FRIEND_INJECTION_PRAGMA_END + }; + + template + struct allocate_slot { + friend constexpr FAMETA_FRIEND_RETURN_TYPE slot_allocated(slot) { + return true; + } + + enum { value = I }; + }; + +#if FAMETA_BINARY_LOOKUP + // If slot_allocated(slot) has NOT been defined, then SFINAE will keep this function out of the overload set... + template ())> + static constexpr int next(int) + { + return next(0); + } + + // ...And this function will be used, instead, which will define slot_allocated(slot) via allocate_slot. + template + static constexpr int next(double) + { + if constexpr (P == 0) + return allocate_slot::value; + else + return next(0); + } + + // If slot_allocated(slot) has NOT been defined, then SFINAE will keep this function out of the overload set... + template ())> + static constexpr int current(int) + { + return current(0); + } + + // ...And this function will be used, instead, which will return the current counter, or assert in case next() hasn't been called yet. + template + static constexpr int current(double) + { + static_assert(I != 0 || P != 0, "You must invoke next() first"); + + if constexpr (P == 0) + return I-1; + else + return current(0); + } +#else // FAMETA_BINARY_LOOKUP + // If slot_allocated(slot) has NOT been defined, then SFINAE will keep this function out of the overload set... + template ())> + static constexpr int next(int) + { + return next(0); + } + + // ...And this function will be used, instead, which will define slot_allocated(slot) via allocate_slot. + template + static constexpr int next(double) + { + return allocate_slot::value; + } + + // If slot_allocated(slot) has NOT been defined, then SFINAE will keep this function out of the overload set... + template ())> + static constexpr int current(int) + { + return current(0); + } + + // ...And this function will be used, instead, which will return the current counter, or assert in case next() hasn't been called yet. + template + static constexpr int current(double) + { + static_assert(I != 0, "You must invoke next() first"); + + return I-1; + } +#endif // !FAMETA_BINARY_LOOKUP + +}; + +} + +} + +#endif // FAMETA_COUNTER_HPP diff --git a/AE/engine/external/shared/glslang/CMakeLists.txt b/AE/engine/external/shared/glslang/CMakeLists.txt index 5f6034f5..cbb7345d 100644 --- a/AE/engine/external/shared/glslang/CMakeLists.txt +++ b/AE/engine/external/shared/glslang/CMakeLists.txt @@ -55,5 +55,5 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.GLSLang" "tests/main.cpp" ) set_property( TARGET "Tests.GLSLang" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.GLSLang" PUBLIC "GLSLang-lib" ) - add_test( NAME "Tests.GLSLang" COMMAND "Tests.GLSLang" ) + #add_test( NAME "Tests.GLSLang" COMMAND "Tests.GLSLang" ) endif () diff --git a/AE/engine/external/shared/imgui/CMakeLists.txt b/AE/engine/external/shared/imgui/CMakeLists.txt index fb65e7b6..d3bf34c9 100644 --- a/AE/engine/external/shared/imgui/CMakeLists.txt +++ b/AE/engine/external/shared/imgui/CMakeLists.txt @@ -30,7 +30,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.ImGUI" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.ImGUI" "tests/main.cpp" ) - add_test( NAME "Tests.ImGUI" COMMAND "Tests.ImGUI" ) + #add_test( NAME "Tests.ImGUI" COMMAND "Tests.ImGUI" ) endif() set_property( TARGET "Tests.ImGUI" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.ImGUI" PUBLIC "ImGUI-lib" ) diff --git a/AE/engine/external/shared/msdfgen/CMakeLists.txt b/AE/engine/external/shared/msdfgen/CMakeLists.txt index b830264c..bc949e40 100644 --- a/AE/engine/external/shared/msdfgen/CMakeLists.txt +++ b/AE/engine/external/shared/msdfgen/CMakeLists.txt @@ -25,7 +25,7 @@ set_property( TARGET "msdfgen-lib" PROPERTY INTERFACE_LINK_LIBRARIES if (${AE_ENABLE_EXTERNAL_TESTS}) add_executable( "Tests.msdfgen" "tests/main.cpp" ) - add_test( NAME "Tests.msdfgen" COMMAND "Tests.msdfgen" ) + #add_test( NAME "Tests.msdfgen" COMMAND "Tests.msdfgen" ) set_property( TARGET "Tests.msdfgen" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.msdfgen" PUBLIC "msdfgen-lib" ) endif () diff --git a/AE/engine/external/shared/stb/CMakeLists.txt b/AE/engine/external/shared/stb/CMakeLists.txt index 1ecc9f8e..127a140d 100644 --- a/AE/engine/external/shared/stb/CMakeLists.txt +++ b/AE/engine/external/shared/stb/CMakeLists.txt @@ -16,7 +16,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.STB" PRIVATE "${AE_TEST_DIR}" ) else() add_executable( "Tests.STB" "tests/main.cpp" ) - add_test( NAME "Tests.STB" COMMAND "Tests.STB" ) + #add_test( NAME "Tests.STB" COMMAND "Tests.STB" ) endif() set_property( TARGET "Tests.STB" PROPERTY FOLDER "Engine/External/Tests" ) target_link_libraries( "Tests.STB" PUBLIC "STB-lib" ) diff --git a/AE/engine/external/shared/tinygltf/CMakeLists.txt b/AE/engine/external/shared/tinygltf/CMakeLists.txt index 4987daa7..2ba6e7ec 100644 --- a/AE/engine/external/shared/tinygltf/CMakeLists.txt +++ b/AE/engine/external/shared/tinygltf/CMakeLists.txt @@ -16,7 +16,7 @@ if (${AE_ENABLE_EXTERNAL_TESTS}) target_include_directories( "Tests.TinyglTF" PRIVATE "${AE_TEST_DIR}" ) else () add_executable( "Tests.TinyglTF" "tests/main.cpp" ) - add_test( NAME "Tests.TinyglTF" COMMAND "Tests.TinyglTF" ) + #add_test( NAME "Tests.TinyglTF" COMMAND "Tests.TinyglTF" ) endif () set_target_properties( "Tests.TinyglTF" PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES ) target_compile_features( "Tests.TinyglTF" PUBLIC cxx_std_17 ) diff --git a/AE/engine/external/shared/xxHash/CMakeLists.txt b/AE/engine/external/shared/xxHash/CMakeLists.txt new file mode 100644 index 00000000..dc5234e0 --- /dev/null +++ b/AE/engine/external/shared/xxHash/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +# +# download xxHash (BSD-2 license) + +set( XXHASH_PATH "${AE_EXTERNAL_SHARED_PATH}/xxHash" ) + +add_library( "xxHash-lib" INTERFACE ) +target_include_directories( "xxHash-lib" INTERFACE "${XXHASH_PATH}" ) +target_compile_definitions( "xxHash-lib" INTERFACE "AE_ENABLE_XXHASH" "AE_LICENSE_BSD_2" "XXH_INLINE_ALL" ) +install( FILES "${XXHASH_PATH}/LICENSE.txt" DESTINATION "3party_license" RENAME "xxHash-LICENSE.txt" ) + +if (${AE_ENABLE_EXTERNAL_TESTS}) + add_executable( "Tests.xxHash" "tests/main.cpp" ) + #add_test( NAME "Tests.xxHash" COMMAND "Tests.xxHash" ) + set_property( TARGET "Tests.xxHash" PROPERTY FOLDER "Engine/External/Tests" ) + target_link_libraries( "Tests.xxHash" PUBLIC "xxHash-lib" ) +endif () diff --git a/AE/engine/external/shared/xxHash/android/install.bat b/AE/engine/external/shared/xxHash/android/install.bat new file mode 100644 index 00000000..da8b6d25 --- /dev/null +++ b/AE/engine/external/shared/xxHash/android/install.bat @@ -0,0 +1,8 @@ +rmdir /Q /S "..\..\..\..\..\AE-Data\external\android-clang\xxHash" +rmdir /Q /S "temp" +git clone --branch "v0.8.2" "..\..\..\..\..\3party\xxHash" "temp" +copy /Y "temp\cmake_unofficial\CMakeLists.txt" "temp\cmake_unofficial\origin_CMakeLists.txt" +copy /Y "xxHash_CMakeLists.txt" "temp\cmake_unofficial\CMakeLists.txt" +cmake -S temp/cmake_unofficial -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="../../../../../AE-Data/external/android-clang/xxHash" +cmake --build build --config Release --target install -j 2 +rmdir /Q /S "temp" diff --git a/AE/engine/external/shared/xxHash/tests/main.cpp b/AE/engine/external/shared/xxHash/tests/main.cpp new file mode 100644 index 00000000..cbe3246b --- /dev/null +++ b/AE/engine/external/shared/xxHash/tests/main.cpp @@ -0,0 +1,21 @@ + +#ifndef AE_ENABLE_XXHASH +# error AE_ENABLE_XXHASH required +#endif + +#include "xxhash.h" + +#ifdef ANDROID +# include "TestHelper.h" + +extern void AE_xxHash_Test () +#else +int main () +#endif +{ + char buffer [128]; + XXH64_hash_t hash = XXH64( buffer, 128, 0 ); + (void)(hash); + + return 0; +} diff --git a/AE/engine/external/shared/xxHash/update.bat b/AE/engine/external/shared/xxHash/update.bat new file mode 100644 index 00000000..156073f4 --- /dev/null +++ b/AE/engine/external/shared/xxHash/update.bat @@ -0,0 +1,8 @@ +rmdir /Q /S "..\..\..\..\..\AE-Data\external\source\xxHash" +rmdir /Q /S "temp" +git clone --branch "v0.8.2" "..\..\..\..\..\3party\xxHash" "temp" +mkdir "..\..\..\..\..\AE-Data\external\source\xxHash" +copy /Y "temp\xxhash.h" "..\..\..\..\..\AE-Data\external\source\xxHash\xxhash.h" +copy /Y "temp\LICENSE" "..\..\..\..\..\AE-Data\external\source\xxHash\LICENSE.txt" +rmdir /Q /S "temp" +pause diff --git a/AE/engine/external/shared/xxHash/xxHash_CMakeLists.txt b/AE/engine/external/shared/xxHash/xxHash_CMakeLists.txt new file mode 100644 index 00000000..1fdf4c6f --- /dev/null +++ b/AE/engine/external/shared/xxHash/xxHash_CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required( VERSION 3.10 FATAL_ERROR ) + +project( "xxHash-main" LANGUAGES CXX ) + +set( AE_EXTERNAL_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../.." ) +include( "${AE_EXTERNAL_PATH}/config.cmake" ) + +set( XXHASH_BUILD_ENABLE_INLINE_API ON CACHE INTERNAL "" FORCE ) +set( XXHASH_BUILD_XXHSUM OFF CACHE INTERNAL "" FORCE ) +set( BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE ) +set( DISPATCH OFF CACHE INTERNAL "" FORCE ) + +include( "origin_CMakeLists.txt" ) + +install( FILES "${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE" DESTINATION "." ) diff --git a/AE/engine/pch/Base.h b/AE/engine/pch/Base.h index 16bea67e..283ff045 100644 --- a/AE/engine/pch/Base.h +++ b/AE/engine/pch/Base.h @@ -8,6 +8,7 @@ #include "base/CompileTime/Math.h" #include "base/CompileTime/TypeList.h" #include "base/CompileTime/FunctionInfo.h" +#include "base/CompileTime/Counter.h" // Algorithms #include "base/Algorithms/ArrayUtils.h" @@ -18,6 +19,7 @@ // Containers #include "base/Containers/AnyTypeRef.h" #include "base/Containers/ArrayView.h" +#include "base/Containers/ChunkList.h" #include "base/Containers/FixedArray.h" #include "base/Containers/FixedMap.h" #include "base/Containers/FixedSet.h" @@ -52,7 +54,7 @@ #include "base/Math/Bytes.h" #include "base/Math/Percent.h" #include "base/Math/Math.h" -#include "base/Math/Radians.h" +#include "base/Math/Radian.h" #include "base/Math/VecSwizzle.h" #include "base/Math/Fractional.h" #include "base/Math/Float8.h" @@ -86,14 +88,14 @@ // Memory #include "base/Memory/AllocatorFwdDecl.h" -#include "base/Memory/IAllocator.h" #include "base/Memory/UntypedAllocator.h" +#include "base/Memory/IAllocator.h" #include "base/Memory/SharedMem.h" #include "base/Memory/StackAllocator.h" #include "base/Memory/LinearAllocator.h" -#include "base/Memory/AllocatorRef.h" #include "base/Memory/MemUtils.h" #include "base/Memory/MemWriter.h" +#include "base/Memory/MemChunkList.h" // Platforms #include "base/Platforms/CPUInfo.h" @@ -117,5 +119,6 @@ #include "base/Utils/TypeId.h" #include "base/Utils/Version.h" #include "base/Utils/FrameUID.h" +#include "base/Utils/PackedPtr.h" diff --git a/AE/engine/pch/Networking.h b/AE/engine/pch/Networking.h index f7a9c361..8de751eb 100644 --- a/AE/engine/pch/Networking.h +++ b/AE/engine/pch/Networking.h @@ -2,12 +2,16 @@ #pragma once -// Raw -#include "networking/Raw/IpAddress.h" -#include "networking/Raw/TcpSocket.h" -#include "networking/Raw/UdpSocket.h" -#include "networking/Raw/SocketService.h" +// LowLevel +#include "networking/LowLevel/IpAddress.h" +#include "networking/LowLevel/TcpSocket.h" +#include "networking/LowLevel/UdpSocket.h" +#include "networking/LowLevel/SocketService.h" -// DataSource +// HighLevel +#include "networking/HighLevel/Client.h" +#include "networking/HighLevel/Server.h" +#include "networking/HighLevel/Messages.h" -// Channels +// Utils +#include "networking/HighLevel/AsyncCSMessageProducer.h" diff --git a/AE/engine/pch/Platform.h b/AE/engine/pch/Platform.h index a71b38af..5fc3e54f 100644 --- a/AE/engine/pch/Platform.h +++ b/AE/engine/pch/Platform.h @@ -12,6 +12,5 @@ #include "platform/Public/VRDevice.h" #include "platform/Public/IApplication.h" -#include "platform/DefaultV1/DefaultAppV1.h" - -#include "platform/DefaultV2/DefaultAppV2.h" +#include "platform/BaseAppV1/DefaultAppV1.h" +#include "platform/BaseAppV2/DefaultAppV2.h" diff --git a/AE/engine/pch/Serializing.h b/AE/engine/pch/Serializing.h index 5e2cd9c0..60f2a671 100644 --- a/AE/engine/pch/Serializing.h +++ b/AE/engine/pch/Serializing.h @@ -3,5 +3,5 @@ #pragma once #include "serializing/Common.h" -#include "serializing/ISerializable.h" -#include "serializing/ObjectFactory.h" +#include "serializing/Basic/ISerializable.h" +#include "serializing/Basic/ObjectFactory.h" diff --git a/AE/engine/pch/Threading.h b/AE/engine/pch/Threading.h index f8c43083..ebbfd371 100644 --- a/AE/engine/pch/Threading.h +++ b/AE/engine/pch/Threading.h @@ -5,8 +5,8 @@ #include "threading/Common.h" // Containers -#include "threading/Containers/LfIndexedPool2.h" -#include "threading/Containers/LfIndexedPool3.h" +#include "threading/Containers/LfChunkList.h" +#include "threading/Containers/LfIndexedPool.h" #include "threading/Containers/LfStaticIndexedPool.h" #include "threading/Containers/LfStaticPool.h" #include "threading/Containers/LfStaticQueue.h" @@ -15,19 +15,21 @@ #include "threading/DataSource/AsyncDataSource.h" #include "threading/DataSource/TsDataSource.h" #include "threading/DataSource/WinAsyncDataSource.h" +//#include "threading/DataSource/UnixAsyncDataSource.h" #include "threading/DataSource/AsyncDataSourceAsStream.h" +#include "threading/DataSource/SyncDataSource.h" // Memory #include "threading/Memory/FrameAllocator.h" #include "threading/Memory/GlobalLinearAllocator.h" #include "threading/Memory/LfFixedBlockAllocator.h" -#include "threading/Memory/LfFixedBlockAllocator3.h" #include "threading/Memory/LfLinearAllocator.h" #include "threading/Memory/MemoryManager.h" #include "threading/Memory/MemoryProfiler.h" #include "threading/Memory/TsLinearAllocator.h" #include "threading/Memory/TsSharedMem.h" #include "threading/Memory/TsStackAllocator.h" +#include "threading/Memory/TsIAllocator.h" // Primitives #include "threading/Primitives/Barrier.h" diff --git a/AE/engine/shared_data/3party_shaders/Blur-1.glsl b/AE/engine/shared_data/3party_shaders/Blur-1.glsl index 9fedbacb..b58c72c3 100644 --- a/AE/engine/shared_data/3party_shaders/Blur-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Blur-1.glsl @@ -10,7 +10,7 @@ float4 Blur5 (gl::CombinedTex2D image, float2 uv, float2 invResolution, color += gl.texture.Sample( image, uv ) * 0.29411764705882354; color += gl.texture.Sample( image, uv + (off1 * invResolution) ) * 0.35294117647058826; color += gl.texture.Sample( image, uv - (off1 * invResolution) ) * 0.35294117647058826; - return color; + return color; } float4 Blur9 (gl::CombinedTex2D image, float2 uv, float2 invResolution, float2 direction) diff --git a/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl b/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl index 02fedaf5..934f5152 100644 --- a/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl +++ b/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl @@ -2,7 +2,7 @@ // Copyright (c) Microsoft. All rights reserved. // This code is licensed under the MIT License (MIT). -// +// // GLSL port by Zhirnov Andrey. #ifdef __cplusplus diff --git a/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl b/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl index 8c1397fa..f0570c57 100644 --- a/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl +++ b/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl @@ -83,7 +83,7 @@ float3 HSVtoRGB_v2 (float3 hsv) // if ( var_i == 0 ) { R = V ; G = TempRGB.z ; B = TempRGB.x } // else if ( var_i == 2 ) { R = TempRGB.x ; G = V ; B = TempRGB.z } // else if ( var_i == 4 ) { R = TempRGB.z ; G = TempRGB.x ; B = V } - // + // // else if ( var_i == 1 ) { R = TempRGB.y ; G = V ; B = TempRGB.x } // else if ( var_i == 3 ) { R = TempRGB.x ; G = TempRGB.y ; B = V } // else if ( var_i == 5 ) { R = V ; G = TempRGB.x ; B = TempRGB.y } @@ -208,7 +208,7 @@ float3 XYYtoRGB_v2 (float3 xyY) ); const float3x3 XYZ2RGB = float3x3( 3.2406, -1.5372, -0.4986, - -0.9689, 1.8758, 0.0415, + -0.9689, 1.8758, 0.0415, 0.0557, -0.2040, 1.0570 ); return XYZ2RGB * XYZ; @@ -267,3 +267,67 @@ float3 XYYtoRGB (const float3 xyY) xyY.z * (1.0 - xyY.x - xyY.y) / xyY.y ); return XYZtoRGB( xyz ); } + +/* +================================================= + RGBtoOklab / OklabToRGB +----- + from https://mini.gmshaders.com/p/oklab + originally from https://bottosson.github.io/posts/gamutclipping/#source-code (MIT license) +================================================= +*/ +float3 RGBtoOklab (float3 rgb) +{ + const float3x3 im1 = float3x3( 0.4121656120, 0.2118591070, 0.0883097947, + 0.5362752080, 0.6807189584, 0.2818474174, + 0.0514575653, 0.1074065790, 0.6302613616 ); + + const float3x3 im2 = float3x3( +0.2104542553, +1.9779984951, +0.0259040371, + +0.7936177850, -2.4285922050, +0.7827717662, + -0.0040720468, +0.4505937099, -0.8086757660 ); + + float3 lms = im1 * rgb; + return im2 * (Sign(lms) * Pow( Abs(lms), float3(1.0/3.0) )); +} + +float3 OklabToRGB (float3 oklab) +{ + const float3x3 m1 = float3x3( +1.000000000, +1.000000000, +1.000000000, + +0.396337777, -0.105561346, -0.089484178, + +0.215803757, -0.063854173, -1.291485548 ); + + const float3x3 m2 = float3x3( +4.076724529, -1.268143773, -0.004111989, + -3.307216883, +2.609332323, -0.703476310, + +0.230759054, -0.341134429, +1.706862569 ); + float3 lms = m1 * oklab; + return m2 * (lms * lms * lms); +} + +/* +================================================= + RGBLerpOklab +----- + By Inigo Quilez, under MIT license + https://www.shadertoy.com/view/ttcyRS +================================================= +*/ +float3 RGBLerpOklab (const float3 lin1, const float3 lin2, const float factor) +{ + const float3x3 kCONEtoLMS = float3x3( + 0.4121656120, 0.2118591070, 0.0883097947, + 0.5362752080, 0.6807189584, 0.2818474174, + 0.0514575653, 0.1074065790, 0.6302613616 ); + + const float3x3 kLMStoCONE = float3x3( + 4.0767245293, -1.2681437731, -0.0041119885, + -3.3072168827, 2.6093323231, -0.7034763098, + 0.2307590544, -0.3411344290, 1.7068625689 ); + + float3 lms1 = Pow( kCONEtoLMS * lin1, float3(1.0/3.0) ); + float3 lms2 = Pow( kCONEtoLMS * lin2, float3(1.0/3.0) ); + + float3 lms = Lerp( lms1, lms2, factor ); + lms *= 1.0 + 0.2 * factor * (1.0 - factor); + + return kLMStoCONE * (lms * lms * lms); +} diff --git a/AE/engine/shared_data/3party_shaders/Hash-1.glsl b/AE/engine/shared_data/3party_shaders/Hash-1.glsl index 8437e4dc..88430adb 100644 --- a/AE/engine/shared_data/3party_shaders/Hash-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Hash-1.glsl @@ -5,7 +5,7 @@ #include "Math.glsl" -// DHash from https://www.shadertoy.com/view/4djSRW +// DHash from https://www.shadertoy.com/view/4djSRW // MIT License... // Copyright (c) 2014 David Hoskins. diff --git a/AE/engine/shared_data/3party_shaders/Hash-2.glsl b/AE/engine/shared_data/3party_shaders/Hash-2.glsl index d663bd19..d1e85dde 100644 --- a/AE/engine/shared_data/3party_shaders/Hash-2.glsl +++ b/AE/engine/shared_data/3party_shaders/Hash-2.glsl @@ -72,8 +72,8 @@ float HEHash12 (int2 uv) { return HEHash12( uint2(uv) ); } uint3 _IWeylConst () { return uint3( - 0x3504f333u, // W0 = 3*2309*128413 - 0xf1bbcdcbu, // W1 = 7*349*1660097 + 0x3504f333u, // W0 = 3*2309*128413 + 0xf1bbcdcbu, // W1 = 7*349*1660097 741103597u // M = 13*83*686843 ); } @@ -138,7 +138,7 @@ ND_ float Hash_Gaussianish (const float2 n, const float seed) { const float t = Fract( seed ); const float nrnd0 = nrand( n + 0.07*t ); - const float nrnd1 = nrand( n + 0.11*t ); + const float nrnd1 = nrand( n + 0.11*t ); const float nrnd2 = nrand( n + 0.13*t ); const float nrnd3 = nrand( n + 0.17*t ); return (nrnd0+nrnd1+nrnd2+nrnd3) / 4.0; @@ -148,7 +148,7 @@ ND_ float Hash_MoarGaussianish (const float2 n, const float seed) { const float t = Fract( seed ); const float nrnd0 = nrand( n + 0.07*t ); - const float nrnd1 = nrand( n + 0.11*t ); + const float nrnd1 = nrand( n + 0.11*t ); const float nrnd2 = nrand( n + 0.13*t ); const float nrnd3 = nrand( n + 0.17*t ); diff --git a/AE/engine/shared_data/3party_shaders/Noise-1.glsl b/AE/engine/shared_data/3party_shaders/Noise-1.glsl index 2c87097d..4f0b49cd 100644 --- a/AE/engine/shared_data/3party_shaders/Noise-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Noise-1.glsl @@ -32,7 +32,7 @@ float GradientNoise (gl::CombinedTex2D rgbaNoise, const float3 pos) // cubic interpolant float3 u = w*w*(3.0-2.0*w); float3 du = 6.0*w*(1.0-w); - #endif + #endif // gradients float3 ga = hash( i+float3(0.0,0.0,0.0) ); @@ -76,7 +76,7 @@ float GradientNoise (const float3 pos) // cubic interpolant float3 u = w*w*(3.0-2.0*w); float3 du = 6.0*w*(1.0-w); - #endif + #endif // gradients float3 ga = hash( i+float3(0.0,0.0,0.0) ); @@ -193,11 +193,11 @@ float ValueNoise (gl::CombinedTex2D greyNoise, const float3 pos) return Lerp( Lerp( Lerp(hash(pi + float3(0, 0, 0)), hash(pi + float3(1, 0, 0)), w.x), - Lerp(hash(pi + float3(0, 0, 1)), hash(pi + float3(1, 0, 1)), w.x), + Lerp(hash(pi + float3(0, 0, 1)), hash(pi + float3(1, 0, 1)), w.x), w.z), Lerp( Lerp(hash(pi + float3(0, 1, 0)), hash(pi + float3(1, 1, 0)), w.x), - Lerp(hash(pi + float3(0, 1, 1)), hash(pi + float3(1, 1, 1)), w.x), + Lerp(hash(pi + float3(0, 1, 1)), hash(pi + float3(1, 1, 1)), w.x), w.z), w.y); #undef hash @@ -215,11 +215,11 @@ float ValueNoise (const float3 pos) return Lerp( Lerp( Lerp(hash(pi + float3(0, 0, 0)), hash(pi + float3(1, 0, 0)), w.x), - Lerp(hash(pi + float3(0, 0, 1)), hash(pi + float3(1, 0, 1)), w.x), + Lerp(hash(pi + float3(0, 0, 1)), hash(pi + float3(1, 0, 1)), w.x), w.z), Lerp( Lerp(hash(pi + float3(0, 1, 0)), hash(pi + float3(1, 1, 0)), w.x), - Lerp(hash(pi + float3(0, 1, 1)), hash(pi + float3(1, 1, 1)), w.x), + Lerp(hash(pi + float3(0, 1, 1)), hash(pi + float3(1, 1, 1)), w.x), w.z), w.y); #undef hash @@ -246,18 +246,18 @@ float PerlinNoise (gl::CombinedTex2D rgbaNoise, const float3 pos) return Lerp( Lerp( - Lerp(Dot(pf - float3(0, 0, 0), hash(pi + float3(0, 0, 0))), + Lerp(Dot(pf - float3(0, 0, 0), hash(pi + float3(0, 0, 0))), Dot(pf - float3(1, 0, 0), hash(pi + float3(1, 0, 0))), w.x), - Lerp(Dot(pf - float3(0, 0, 1), hash(pi + float3(0, 0, 1))), + Lerp(Dot(pf - float3(0, 0, 1), hash(pi + float3(0, 0, 1))), Dot(pf - float3(1, 0, 1), hash(pi + float3(1, 0, 1))), w.x), w.z), Lerp( - Lerp(Dot(pf - float3(0, 1, 0), hash(pi + float3(0, 1, 0))), + Lerp(Dot(pf - float3(0, 1, 0), hash(pi + float3(0, 1, 0))), Dot(pf - float3(1, 1, 0), hash(pi + float3(1, 1, 0))), w.x), - Lerp(Dot(pf - float3(0, 1, 1), hash(pi + float3(0, 1, 1))), + Lerp(Dot(pf - float3(0, 1, 1), hash(pi + float3(0, 1, 1))), Dot(pf - float3(1, 1, 1), hash(pi + float3(1, 1, 1))), w.x), w.z), @@ -276,18 +276,18 @@ float PerlinNoise (const float3 pos) return Lerp( Lerp( - Lerp(Dot(pf - float3(0, 0, 0), hash(pi + float3(0, 0, 0))), + Lerp(Dot(pf - float3(0, 0, 0), hash(pi + float3(0, 0, 0))), Dot(pf - float3(1, 0, 0), hash(pi + float3(1, 0, 0))), w.x), - Lerp(Dot(pf - float3(0, 0, 1), hash(pi + float3(0, 0, 1))), + Lerp(Dot(pf - float3(0, 0, 1), hash(pi + float3(0, 0, 1))), Dot(pf - float3(1, 0, 1), hash(pi + float3(1, 0, 1))), w.x), w.z), Lerp( - Lerp(Dot(pf - float3(0, 1, 0), hash(pi + float3(0, 1, 0))), + Lerp(Dot(pf - float3(0, 1, 0), hash(pi + float3(0, 1, 0))), Dot(pf - float3(1, 1, 0), hash(pi + float3(1, 1, 0))), w.x), - Lerp(Dot(pf - float3(0, 1, 1), hash(pi + float3(0, 1, 1))), + Lerp(Dot(pf - float3(0, 1, 1), hash(pi + float3(0, 1, 1))), Dot(pf - float3(1, 1, 1), hash(pi + float3(1, 1, 1))), w.x), w.z), diff --git a/AE/engine/shared_data/3party_shaders/SDF-1.glsl b/AE/engine/shared_data/3party_shaders/SDF-1.glsl index 5dd98014..303109ef 100644 --- a/AE/engine/shared_data/3party_shaders/SDF-1.glsl +++ b/AE/engine/shared_data/3party_shaders/SDF-1.glsl @@ -22,7 +22,7 @@ float SDF2_Line (const float2 position, const float2 point0, const float2 point float SDF2_Rect (const float2 position, const float2 hsize) { const float2 d = Abs( position ) - hsize; - return Length(Max( d, float2(0.0f) )) + Min(Max( d.x, d.y ), 0.0f ); + return Length( Max( d, float2(0.0f) )) + Min( Max( d.x, d.y ), 0.0f ); } @@ -37,30 +37,30 @@ float SDF2_Pentagon (const float2 position, const float radius) const float3 k = float3( 0.809016994f, 0.587785252f, 0.726542528f ); float2 p = position; p.x = Abs(p.x); - p -= 2.0f * Min(Dot( float2(-k.x,k.y), p ), 0.0f) * float2(-k.x,k.y); - p -= 2.0f * Min(Dot( float2( k.x,k.y), p ), 0.0f) * float2( k.x,k.y); + p -= 2.0f * Min( Dot( float2(-k.x, k.y ), p ), 0.0f ) * float2(-k.x, k.y); + p -= 2.0f * Min( Dot( float2( k.x, k.y ), p ), 0.0f ) * float2( k.x, k.y); p -= float2( Clamp( p.x, -radius * k.z, radius * k.z ), radius ); - return Length(p) * SignOrZero(p.y); + return Length( p ) * SignOrZero( p.y ); } float SDF2_Hexagon (const float2 position, const float radius) { const float3 k = float3( -0.866025404, 0.5, 0.577350269 ); - float2 p = Abs(position); + float2 p = Abs( position ); p -= 2.0 * Min(Dot( k.xy, p ), 0.0f ) * k.xy; - p -= float2(Clamp( p.x, -k.z * radius, k.z * radius ), radius ); - return Length(p) * SignOrZero(p.y); + p -= float2( Clamp( p.x, -k.z * radius, k.z * radius ), radius ); + return Length( p ) * SignOrZero( p.y ); } float SDF2_Octagon (const float2 position, const float radius) { - const float3 k = float3( -0.9238795325, 0.3826834323, 0.4142135623 ); - float2 p = Abs(position); - p -= 2.0 * Min(Dot( float2( k.x, k.y), p ), 0.0 ) * float2( k.x,k.y); - p -= 2.0 * Min(Dot( float2(-k.x, k.y), p ), 0.0 ) * float2(-k.x,k.y); - p -= float2(Clamp( p.x, -k.z * radius, k.z * radius ), radius ); + const float3 k = float3( -0.9238795325f, 0.3826834323f, 0.4142135623f ); + float2 p = Abs( position ); + p -= 2.0 * Min( Dot( float2( k.x, k.y), p ), 0.0f ) * float2( k.x,k.y); + p -= 2.0 * Min( Dot( float2(-k.x, k.y), p ), 0.0f ) * float2(-k.x,k.y); + p -= float2( Clamp( p.x, -k.z * radius, k.z * radius ), radius ); return Length(p) * SignOrZero(p.y); } @@ -69,8 +69,8 @@ float SDF2_RoundedRect (const float2 position, const float2 hsize, float4 radiu { radius.xy = (position.x > 0.0) ? radius.xy : radius.zw; radius.x = (position.y > 0.0) ? radius.x : radius.y; - float2 q = Abs(position) - hsize + radius.x; - return Min(Max( q.x, q.y ), 0.0f ) + Length(Max( q, 0.0f )) - radius.x; + float2 q = Abs( position ) - hsize + radius.x; + return Min( Max( q.x, q.y ), 0.0f ) + Length( Max( q, 0.0f )) - radius.x; } @@ -81,14 +81,14 @@ float SDF2_OrientedRect (const float2 position, const float2 a, const float2 b, float2 q = (position - (a + b) * 0.5f); q = float2x2( d.x, -d.y, d.y, d.x ) * q; q = Abs(q) - float2( l, angle ) * 0.5f; - return Length(Max( q, 0.0f )) + Min(Max( q.x, q.y ), 0.0f ); + return Length( Max( q, 0.0f )) + Min( Max( q.x, q.y ), 0.0f ); } float SDF2_EquilateralTriangle (float2 p, const float size) { const float k = Sqrt(3.0); // TODO - p.x = Abs(p.x) - size; + p.x = Abs( p.x ) - size; p.y = p.y + size / k; if ( p.x + k * p.y > 0.0 ) p = float2( p.x - k * p.y, -k * p.x - p.y ) / 2.0; p.x -= Clamp( p.x, -2.0 * size, 0.0 ); @@ -108,7 +108,7 @@ float SDF2_Triangle (const float2 position, const float2 p0, const float2 p1, c float2 pq1 = v1 - e1 * Saturate( Dot( v1, e1 ) / Dot( e1, e1 )); float2 pq2 = v2 - e2 * Saturate( Dot( v2, e2 ) / Dot( e2, e2 )); float s = SignOrZero( e0.x * e2.y - e0.y * e2.x ); - float2 d = Min( Min(float2( Dot( pq0, pq0 ), s * (v0.x * e0.y - v0.y * e0.x)), + float2 d = Min( Min( float2( Dot( pq0, pq0 ), s * (v0.x * e0.y - v0.y * e0.x)), float2( Dot( pq1, pq1 ), s * (v1.x * e1.y - v1.y * e1.x) )), float2( Dot( pq2, pq2 ), s * (v2.x * e2.y - v2.y * e2.x) )); return -Sqrt(d.x) * SignOrZero(d.y); @@ -241,8 +241,8 @@ float SDF_Octahedron (const float3 position, const float size) if ( 3.0 * p.z < m ) q = p.zxy; else return m * 0.57735027f; - const float k = Clamp( 0.5f * (q.z - q.y + size), 0.0f, size ); - return Length( float3( q.x, q.y - size + k, q.z - k )); + const float k = Clamp( 0.5f * (q.z - q.y + size), 0.0f, size ); + return Length( float3( q.x, q.y - size + k, q.z - k )); } @@ -410,16 +410,16 @@ float2 SDF_Rotate2D (const float2 p, const float angle) (_sdf_( (_pos_)/(_scale_) ) * _scale_) #define SDF_OpSymX( _pos_, _sdf_ )\ - _sdf_(float3( Abs((_pos_).x), (_pos_).yz )) + (_sdf_(float3( Abs((_pos_).x), (_pos_).yz ))) #define SDF_OpSymXZ( _pos_, _sdf_ )\ - _sdf_(float3( Abs((_pos_).x), (_pos_).y, Abs((_pos_).z) )) + (_sdf_(float3( Abs((_pos_).x), (_pos_).y, Abs((_pos_).z) ))) -#define SDF_InfRepetition( _pos_, _center_, _sdf_ )\ - _sdf_( Mod( (_pos_) + 0.5f * (_center_), (_center_) ) - 0.5f * (_center_) ) +#define SDF_InfRepetition( _pos_, _step_, _sdf_ )\ + (_sdf_( Mod( (_pos_) + 0.5f * (_step_), (_step_) ) - 0.5f * (_step_) )) -#define SDF_Repetition( _pos_, _c_, _l_, _sdf_ )\ - _sdf_( (_pos_) - (_c_) * Clamp( Round( (_pos_)/(_c_) ), -(_l_), (_l_) )) +#define SDF_Repetition( _pos_, _step_, _count_, _sdf_ )\ + (_sdf_( (_pos_) - (_step_) * Clamp( Round( (_pos_)/(_step_) ), -(_count_), (_count_) ))) //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl b/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl index 47b34d08..9acfc935 100644 --- a/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl +++ b/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl @@ -11,7 +11,7 @@ // // Reinhard -// +// // The Reinhard tone operator. Typically, the value of k is 1.0, but you can adjust exposure by 1/k. // I.e. ToneMap_Reinhard(x, 0.5) == ToneMap_Reinhard(x * 2.0, 1.0) diff --git a/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl b/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl index 12869ad8..a4f55ad2 100644 --- a/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl +++ b/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl @@ -5,7 +5,7 @@ // from https://www.shadertoy.com/view/XsGfWV float3 ToneMap_ACES_v2 (const float3 color) -{ +{ const float3x3 m1 = float3x3( 0.59719, 0.07600, 0.02840, 0.35458, 0.90834, 0.13383, @@ -16,14 +16,14 @@ float3 ToneMap_ACES_v2 (const float3 color) -0.53108, 1.10813, -0.07276, -0.07367, -0.00605, 1.07602 ); - float3 v = m1 * color; + float3 v = m1 * color; float3 a = v * (v + 0.0245786) - 0.000090537; float3 b = v * (0.983729 * v + 0.4329510) + 0.238081; return clamp( m2 * (a / b), 0.0, 1.0 ); } float3 ToneMap_ACES_v3 (const float3 color) -{ +{ const float3x3 m1 = float3x3( 0.59719, 0.35458, 0.04823, 0.07600, 0.90834, 0.01566, @@ -34,7 +34,7 @@ float3 ToneMap_ACES_v3 (const float3 color) -0.10208, 1.10813, -0.00605, -0.00327, -0.07276, 1.07602 ); - float3 v = m1 * color; + float3 v = m1 * color; float3 a = v * (v + 0.0245786) - 0.000090537; float3 b = v * (0.983729 * v + 0.4329510) + 0.238081; return clamp( m2 * (a / b), 0.0, 1.0 ); @@ -47,7 +47,7 @@ float3 ToneMap_ACES_v3 (const float3 color) // // Reinhard2 -// +// float3 ToneMap_Reinhard2 (const float3 hdr) { @@ -58,7 +58,7 @@ float3 ToneMap_Reinhard2 (const float3 hdr) // // Unreal -// +// // Unreal 3, Documentation: "Color Grading" // Adapted to be close to Tonemap_ACES, with similar range @@ -72,7 +72,7 @@ float3 ToneMap_Unreal (const float3 hdr) // // Uchimura -// +// // Uchimura 2017, "HDR theory and practice" // Math: https://www.desmos.com/calculator/gslcdxvipg @@ -118,7 +118,7 @@ float3 ToneMap_Uchimura (const float3 hdr) { // // Lottes -// +// // Lottes 2016, "Advanced Techniques and Optimization of HDR Color Pipelines" diff --git a/AE/engine/shared_data/feature_set/min_apple.as b/AE/engine/shared_data/feature_set/min_apple.as index a75a5a82..7b5ac159 100644 --- a/AE/engine/shared_data/feature_set/min_apple.as +++ b/AE/engine/shared_data/feature_set/min_apple.as @@ -7,6 +7,8 @@ void ASmain () // Apple M1 driver 0.2.1914 on Osx 12.0 // Apple A12 GPU driver 0.2.1915 on Ios 15.3 // Apple A15 GPU driver 0.2.1914 on Ios 15.3 + // Apple A17 Pro GPU driver 0.2.2014 on Ios 17.1 + // Apple M3 Max driver 0.2.2014 on Osx 14.2 // Apple8 // Apple8_Mac // Apple7_Metal3 @@ -95,7 +97,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (155); fset.perDescrSet_maxStorageImages (40); fset.perDescrSet_maxUniformBuffers (155); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (96); fset.perStage_maxSampledImages (96); fset.perStage_maxSamplers (16); diff --git a/AE/engine/shared_data/feature_set/min_desktop.as b/AE/engine/shared_data/feature_set/min_desktop.as index 68ec9dd3..0d9c6ec6 100644 --- a/AE/engine/shared_data/feature_set/min_desktop.as +++ b/AE/engine/shared_data/feature_set/min_desktop.as @@ -95,7 +95,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (155); fset.perDescrSet_maxStorageImages (40); fset.perDescrSet_maxUniformBuffers (90); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (8); fset.perStage_maxSampledImages (128); fset.perStage_maxSamplers (16); diff --git a/AE/engine/shared_data/feature_set/min_inline_rt.as b/AE/engine/shared_data/feature_set/min_inline_rt.as index 25c3fbae..5b26ea5a 100644 --- a/AE/engine/shared_data/feature_set/min_inline_rt.as +++ b/AE/engine/shared_data/feature_set/min_inline_rt.as @@ -11,7 +11,9 @@ void ASmain () // NVIDIA GeForce RTX 2080 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 3090 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 4090 driver 526.98.0.0 on Windows 10 + // AMD Radeon RX 7900 XTX (RADV GFX1100) driver 23.2.1 on Arch unknown // Samsung Xclipse 920 driver 2.0.0 on Android 12.0 + // vivo V2324A driver 44.1.0 on Android 14.0 // Apple8 // Apple8_Mac // Apple7_Metal3 @@ -27,8 +29,6 @@ void ASmain () RC fset = FeatureSet( "MinInlineRayTracing" ); fset.depthBiasClamp (True); - fset.dualSrcBlend (True); - fset.fillModeNonSolid (True); fset.independentBlend (True); fset.sampleRateShading (True); fset.constantAlphaColorBlendFactors (True); @@ -55,14 +55,13 @@ void ASmain () EShaderStages::Compute )); fset.subgroupQuadStages(EShaderStages( - EShaderStages::Vertex | EShaderStages::Fragment | EShaderStages::Compute )); fset.subgroup (True); fset.subgroupSizeControl (True); fset.minSubgroupSize (4); - fset.maxSubgroupSize (32); + fset.maxSubgroupSize (16); fset.shaderInt8 (True); fset.shaderInt16 (True); fset.shaderFloat16 (True); @@ -72,8 +71,6 @@ void ASmain () fset.scalarBlockLayout (True); fset.bufferDeviceAddress (True); fset.fragmentStoresAndAtomics (True); - fset.vertexPipelineStoresAndAtomics (True); - fset.shaderClipDistance (True); fset.runtimeDescriptorArray (True); fset.shaderSampledImageArrayDynamicIndexing (True); fset.shaderStorageBufferArrayDynamicIndexing (True); @@ -92,33 +89,31 @@ void ASmain () fset.minSpirvVersion (140); fset.minMetalVersion (240); fset.drawIndirectFirstInstance (True); - fset.multiViewport (True); - fset.maxViewports (16); + fset.maxViewports (1); fset.maxTexelBufferElements (64 << 20); fset.maxUniformBufferSize (64 << 10); fset.maxStorageBufferSize (64 << 10); - fset.perDescrSet_maxInputAttachments (1000000); - fset.perDescrSet_maxSampledImages (1000000); + fset.perDescrSet_maxInputAttachments (9); + fset.perDescrSet_maxSampledImages (500000); fset.perDescrSet_maxSamplers (1024); - fset.perDescrSet_maxStorageBuffers (1048576); - fset.perDescrSet_maxStorageImages (1000000); - fset.perDescrSet_maxUniformBuffers (1048576); - fset.perDescrSet_maxAccelStructures (1048576); - fset.perDescrSet_maxTotalResources (1024); - fset.perStage_maxInputAttachments (1048576); - fset.perStage_maxSampledImages (1048576); + fset.perDescrSet_maxStorageBuffers (500000); + fset.perDescrSet_maxStorageImages (500000); + fset.perDescrSet_maxUniformBuffers (216); + fset.perDescrSet_maxAccelStructures (500000); + fset.perDescrSet_maxTotalResources (512); + fset.perStage_maxInputAttachments (9); + fset.perStage_maxSampledImages (500000); fset.perStage_maxSamplers (8192); - fset.perStage_maxStorageBuffers (1048576); - fset.perStage_maxStorageImages (1048576); - fset.perStage_maxUniformBuffers (1048576); - fset.perStage_maxAccelStructures (1048576); - fset.perStage_maxTotalResources (8008184); - fset.maxDescriptorSets (8); + fset.perStage_maxStorageBuffers (500000); + fset.perStage_maxStorageImages (500000); + fset.perStage_maxUniformBuffers (36); + fset.perStage_maxAccelStructures (500000); + fset.perStage_maxTotalResources (500000); + fset.maxDescriptorSets (7); fset.maxTexelOffset (7); fset.maxTexelGatherOffset (7); fset.maxFragmentOutputAttachments (8); - fset.maxFragmentDualSrcAttachments (1); - fset.maxFragmentCombinedOutputResources (8008184); + fset.maxFragmentCombinedOutputResources (1000008); fset.maxPushConstantsSize (128); fset.maxComputeSharedMemorySize (32 << 10); fset.maxComputeWorkGroupInvocations (1 << 10); @@ -196,8 +191,7 @@ void ASmain () EPixelFormat::R16_SNorm, EPixelFormat::R8_SNorm, EPixelFormat::RGBA16_UNorm, EPixelFormat::RGBA8_UNorm, EPixelFormat::RG16_UNorm, EPixelFormat::RG8_UNorm, EPixelFormat::R16_UNorm, EPixelFormat::R8_UNorm, EPixelFormat::RGB10_A2_UNorm, EPixelFormat::BGRA8_UNorm, EPixelFormat::sRGB8_A8, EPixelFormat::sBGR8_A8, - EPixelFormat::R16F, EPixelFormat::RG16F, EPixelFormat::RGBA16F, EPixelFormat::R32F, - EPixelFormat::RG32F, EPixelFormat::RGB_11_11_10F + EPixelFormat::R16F, EPixelFormat::RG16F, EPixelFormat::RGBA16F, EPixelFormat::RGB_11_11_10F }); fset.AddTexelFormats( EFormatFeature::Attachment, { EPixelFormat::RGBA16_SNorm, EPixelFormat::RGBA8_SNorm, EPixelFormat::RG16_SNorm, EPixelFormat::RG8_SNorm, @@ -222,6 +216,6 @@ void ASmain () fset.samplerAnisotropy (True); fset.maxSamplerAnisotropy (16.00); fset.maxSamplerLodBias (4.00); - fset.maxFramebufferLayers (1 << 10); + fset.maxFramebufferLayers (256); fset.supportedQueues(EQueueMask( EQueueMask::Graphics )); } diff --git a/AE/engine/shared_data/feature_set/min_mesh_shader.as b/AE/engine/shared_data/feature_set/min_mesh_shader.as index b2392868..b4b7bbf8 100644 --- a/AE/engine/shared_data/feature_set/min_mesh_shader.as +++ b/AE/engine/shared_data/feature_set/min_mesh_shader.as @@ -9,6 +9,7 @@ void ASmain () // NVIDIA GeForce RTX 2080 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 3090 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 4090 driver 526.98.0.0 on Windows 10 + // AMD Radeon RX 7900 XTX (RADV GFX1100) driver 23.2.1 on Arch unknown // Apple8 // Apple8_Mac // Apple7_Metal3 diff --git a/AE/engine/shared_data/feature_set/min_mobile.as b/AE/engine/shared_data/feature_set/min_mobile.as index 641e5da0..eb6d44c5 100644 --- a/AE/engine/shared_data/feature_set/min_mobile.as +++ b/AE/engine/shared_data/feature_set/min_mobile.as @@ -4,26 +4,36 @@ void ASmain () { // include: + // AMD Radeon RX 7900 XTX (RADV GFX1100) driver 23.2.1 on Arch unknown // Apple A12 GPU driver 0.2.1915 on Ios 15.3 // Apple A15 GPU driver 0.2.1914 on Ios 15.3 + // Apple A17 Pro GPU driver 0.2.2014 on Ios 17.1 + // Apple M3 Max driver 0.2.2014 on Osx 14.2 // Adreno (TM) 660 driver 512.530.0 on Android 11.0 + // Google Pixel 8 Pro driver 44.0.0 on Android 14.0 // Intel(R) Haswell Desktop driver 19.0.0 on Android 9.0 // Mali-T830 driver 28.0.0 on Android 9.0 // Adreno (TM) 505 driver 512.454.0 on Android 9.0 + // nubia NX729J driver 512.746.0 on Android 13.0 // NVIDIA Tegra X1 (rev B) (nvgpu) driver 495.0.0.0 on Android 11.0 // Mali-G71 driver 575.795.1934 on Android 7.1 + // OPPO CPH1951 driver 1.386.1368 on Android 11.0 + // OPPO PDYT20 driver 32.1.0 on Android 12.0 + // OPPO PFFM20 driver 32.1.0 on Android 12.0 // V3D 4.2 driver 21.2.5 on Android 12.0 // Mali-G52 MC2 driver 26.0.0 on Android 11.0 // Adreno (TM) 730 driver 512.615.0 on Android 12.0 // rockchip orangepi5 driver 12.0.0 on Android 12.0 // Mali-G72 driver 26.0.0 on Android 11.0 + // samsung SM-G780F driver 38.1.0 on Android 13.0 // Mali-G76 driver 32.1.0 on Android 12.0 + // samsung SM-S901E driver 512.744.6 on Android 13.0 // Samsung Xclipse 920 driver 2.0.0 on Android 12.0 // PowerVR Rogue GE8320 driver 1.386.1368 on Android 11.0 // Adreno (TM) 610 driver 512.502.0 on Android 11.0 // PowerVR Rogue GE8300 driver 1.322.3448 on Android 10.0 // VeriSilicon driver 6.4.0 on Android 11.0 - // Mali-G57 driver 21.0.0 on Android 10.0 + // vivo V2324A driver 44.1.0 on Android 14.0 // Adreno (TM) 612 driver 512.502.0 on Android 12.0 // Apple8 // Apple7_Metal3 @@ -51,7 +61,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (24); fset.perDescrSet_maxStorageImages (24); fset.perDescrSet_maxUniformBuffers (72); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (4); fset.perStage_maxSampledImages (16); fset.perStage_maxSamplers (16); diff --git a/AE/engine/shared_data/feature_set/min_mobile_adreno.as b/AE/engine/shared_data/feature_set/min_mobile_adreno.as index e9ffb876..17c03d41 100644 --- a/AE/engine/shared_data/feature_set/min_mobile_adreno.as +++ b/AE/engine/shared_data/feature_set/min_mobile_adreno.as @@ -65,7 +65,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (24); fset.perDescrSet_maxStorageImages (24); fset.perDescrSet_maxUniformBuffers (84); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (8); fset.perStage_maxSampledImages (128); fset.perStage_maxSamplers (16); diff --git a/AE/engine/shared_data/feature_set/min_mobile_mali.as b/AE/engine/shared_data/feature_set/min_mobile_mali.as index 6c767047..ace33dff 100644 --- a/AE/engine/shared_data/feature_set/min_mobile_mali.as +++ b/AE/engine/shared_data/feature_set/min_mobile_mali.as @@ -18,7 +18,6 @@ void ASmain () // Mali-G52 MC2 driver 26.0.0 on Android 11.0 // Mali-G72 driver 26.0.0 on Android 11.0 // Mali-G76 driver 32.1.0 on Android 12.0 - // Mali-G57 driver 21.0.0 on Android 10.0 const EFeature True = EFeature::RequireTrue; @@ -51,7 +50,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (24); fset.perDescrSet_maxStorageImages (24); fset.perDescrSet_maxUniformBuffers (72); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (4); fset.perStage_maxSampledImages (16); fset.perStage_maxSamplers (128); @@ -171,9 +170,9 @@ void ASmain () EPixelFormat::ASTC_sRGB8_A8_8x6, EPixelFormat::ASTC_sRGB8_A8_8x8, EPixelFormat::ASTC_sRGB8_A8_10x5, EPixelFormat::ASTC_sRGB8_A8_10x6, EPixelFormat::ASTC_sRGB8_A8_10x8, EPixelFormat::ASTC_sRGB8_A8_10x10, EPixelFormat::ASTC_sRGB8_A8_12x10, EPixelFormat::ASTC_sRGB8_A8_12x12 }); - fset.AddTexelFormats( EFormatFeature::HWCompressedAttachment, { - EPixelFormat::RGBA8_UNorm - }); + //fset.AddTexelFormats( EFormatFeature::HWCompressedAttachment, { + // EPixelFormat::RGBA8_UNorm + //}); fset.samplerMipLodBias (True); fset.maxSamplerAnisotropy (1.00); fset.maxSamplerLodBias (2.00); diff --git a/AE/engine/shared_data/feature_set/min_mobile_pvr.as b/AE/engine/shared_data/feature_set/min_mobile_pvr.as index 4fe8451d..51e96053 100644 --- a/AE/engine/shared_data/feature_set/min_mobile_pvr.as +++ b/AE/engine/shared_data/feature_set/min_mobile_pvr.as @@ -77,7 +77,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (256); fset.perDescrSet_maxStorageImages (256); fset.perDescrSet_maxUniformBuffers (256); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (8); fset.perStage_maxSampledImages (128); fset.perStage_maxSamplers (128); diff --git a/AE/engine/shared_data/feature_set/min_recursive_rt.as b/AE/engine/shared_data/feature_set/min_recursive_rt.as index 2b62a820..a867d0ea 100644 --- a/AE/engine/shared_data/feature_set/min_recursive_rt.as +++ b/AE/engine/shared_data/feature_set/min_recursive_rt.as @@ -10,6 +10,7 @@ void ASmain () // NVIDIA GeForce RTX 2080 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 3090 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 4090 driver 526.98.0.0 on Windows 10 + // AMD Radeon RX 7900 XTX (RADV GFX1100) driver 23.2.1 on Arch unknown const EFeature True = EFeature::RequireTrue; diff --git a/AE/engine/shared_data/feature_set/minimal.as b/AE/engine/shared_data/feature_set/minimal.as index 6997cf5b..763054cd 100644 --- a/AE/engine/shared_data/feature_set/minimal.as +++ b/AE/engine/shared_data/feature_set/minimal.as @@ -23,7 +23,7 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (24); fset.perDescrSet_maxStorageImages (24); fset.perDescrSet_maxUniformBuffers (72); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (4); fset.perStage_maxSampledImages (16); fset.perStage_maxSamplers (16); diff --git a/AE/engine/shared_data/feature_set/parts/min_desc_indexing.as b/AE/engine/shared_data/feature_set/parts/min_desc_indexing.as index 043b0597..b45f48ad 100644 --- a/AE/engine/shared_data/feature_set/parts/min_desc_indexing.as +++ b/AE/engine/shared_data/feature_set/parts/min_desc_indexing.as @@ -22,24 +22,34 @@ void ASmain () // NVIDIA GeForce RTX 3090 driver 473.11.0.0 on Windows 10 // NVIDIA GeForce RTX 4090 driver 526.98.0.0 on Windows 10 // Radeon RX 580 Series driver 2.0.207 on Ubuntu 20.04 + // AMD Radeon RX 7900 XTX (RADV GFX1100) driver 23.2.1 on Arch unknown // Apple A12 GPU driver 0.2.1915 on Ios 15.3 // Apple A15 GPU driver 0.2.1914 on Ios 15.3 + // Apple A17 Pro GPU driver 0.2.2014 on Ios 17.1 + // Apple M3 Max driver 0.2.2014 on Osx 14.2 // Adreno (TM) 660 driver 512.530.0 on Android 11.0 + // Google Pixel 8 Pro driver 44.0.0 on Android 14.0 // Intel(R) Haswell Desktop driver 19.0.0 on Android 9.0 // Mali-T830 driver 28.0.0 on Android 9.0 // Adreno (TM) 505 driver 512.454.0 on Android 9.0 + // nubia NX729J driver 512.746.0 on Android 13.0 // NVIDIA Tegra X1 (rev B) (nvgpu) driver 495.0.0.0 on Android 11.0 // Mali-G71 driver 575.795.1934 on Android 7.1 + // OPPO CPH1951 driver 1.386.1368 on Android 11.0 + // OPPO PDYT20 driver 32.1.0 on Android 12.0 + // OPPO PFFM20 driver 32.1.0 on Android 12.0 // Mali-G52 MC2 driver 26.0.0 on Android 11.0 // Adreno (TM) 730 driver 512.615.0 on Android 12.0 // rockchip orangepi5 driver 12.0.0 on Android 12.0 // Mali-G72 driver 26.0.0 on Android 11.0 + // samsung SM-G780F driver 38.1.0 on Android 13.0 // Mali-G76 driver 32.1.0 on Android 12.0 + // samsung SM-S901E driver 512.744.6 on Android 13.0 // Samsung Xclipse 920 driver 2.0.0 on Android 12.0 // PowerVR Rogue GE8320 driver 1.386.1368 on Android 11.0 // Adreno (TM) 610 driver 512.502.0 on Android 11.0 // PowerVR Rogue GE8300 driver 1.322.3448 on Android 10.0 - // Mali-G57 driver 21.0.0 on Android 10.0 + // vivo V2324A driver 44.1.0 on Android 14.0 // Adreno (TM) 612 driver 512.502.0 on Android 12.0 // Apple8 // Apple8_Mac @@ -68,8 +78,8 @@ void ASmain () fset.perDescrSet_maxStorageBuffers (24); fset.perDescrSet_maxStorageImages (24); fset.perDescrSet_maxUniformBuffers (72); - fset.perDescrSet_maxTotalResources (1024); + fset.perDescrSet_maxTotalResources (512); fset.perStage_maxInputAttachments (4); fset.perStage_maxSampledImages (16); fset.perStage_maxSamplers (16); diff --git a/AE/engine/shared_data/feature_set/parts/min_nonuniform_desc_idx.as b/AE/engine/shared_data/feature_set/parts/min_nonuniform_desc_idx.as index c92b1215..6704ce6e 100644 --- a/AE/engine/shared_data/feature_set/parts/min_nonuniform_desc_idx.as +++ b/AE/engine/shared_data/feature_set/parts/min_nonuniform_desc_idx.as @@ -14,10 +14,16 @@ void ASmain () // NVIDIA GeForce RTX 4090 driver 526.98.0.0 on Windows 10 // Apple A12 GPU driver 0.2.1915 on Ios 15.3 // Apple A15 GPU driver 0.2.1914 on Ios 15.3 + // Apple A17 Pro GPU driver 0.2.2014 on Ios 17.1 + // Apple M3 Max driver 0.2.2014 on Osx 14.2 // Adreno (TM) 660 driver 512.530.0 on Android 11.0 + // nubia NX729J driver 512.746.0 on Android 13.0 + // OPPO PDYT20 driver 32.1.0 on Android 12.0 + // OPPO PFFM20 driver 32.1.0 on Android 12.0 // Adreno (TM) 730 driver 512.615.0 on Android 12.0 + // samsung SM-G780F driver 38.1.0 on Android 13.0 + // samsung SM-S901E driver 512.744.6 on Android 13.0 // Samsung Xclipse 920 driver 2.0.0 on Android 12.0 - // Mali-G57 driver 21.0.0 on Android 10.0 // Apple8 // Apple8_Mac // Apple7_Metal3 @@ -48,24 +54,24 @@ void ASmain () fset.maxUniformBufferSize (64 << 10); fset.maxStorageBufferSize (64 << 10); - fset.perDescrSet_maxInputAttachments (4); - fset.perDescrSet_maxSampledImages (96); + fset.perDescrSet_maxInputAttachments (8); + fset.perDescrSet_maxSampledImages (480); fset.perDescrSet_maxSamplers (80); fset.perDescrSet_maxStorageBuffers (155); fset.perDescrSet_maxStorageImages (40); - fset.perDescrSet_maxUniformBuffers (72); - fset.perDescrSet_maxTotalResources (1024); - fset.perStage_maxInputAttachments (4); - fset.perStage_maxSampledImages (16); + fset.perDescrSet_maxUniformBuffers (90); + fset.perDescrSet_maxTotalResources (512); + fset.perStage_maxInputAttachments (8); + fset.perStage_maxSampledImages (96); fset.perStage_maxSamplers (16); fset.perStage_maxStorageBuffers (31); fset.perStage_maxStorageImages (8); - fset.perStage_maxUniformBuffers (12); - fset.perStage_maxTotalResources (83); + fset.perStage_maxUniformBuffers (15); + fset.perStage_maxTotalResources (127); fset.maxDescriptorSets (4); fset.maxFragmentOutputAttachments (8); - fset.maxFragmentCombinedOutputResources (51); + fset.maxFragmentCombinedOutputResources (72); fset.maxPushConstantsSize (128); } diff --git a/AE/engine/shared_data/scripts/asset_packer.as b/AE/engine/shared_data/scripts/asset_packer.as index c9014a3e..00335541 100644 --- a/AE/engine/shared_data/scripts/asset_packer.as +++ b/AE/engine/shared_data/scripts/asset_packer.as @@ -1,4 +1,4 @@ -//CB390BE2 +//bfe1cedf #include #include @@ -20,90 +20,90 @@ struct RC; template using array = std::vector; +struct EPipelineDynamicState; +struct EFilter; +struct MultiSamples; +struct EFeature; +struct ESamplerChromaLocation; +struct EDescSetUsage; +struct EVertexInputRate; +struct EBorderColor; +struct EAttachmentStoreOp; +struct EImageAspect; +struct EShaderIO; +struct EVendorID; struct EResourceState; +struct float3; struct EPipelineOpt; struct RasterFont; struct float2; -struct ESurfaceFormat; -struct MipmapLevel; -struct ESamplerYcbcrRange; -struct ESamplerUsage; -struct EReductionMode; -struct ubyte4; -struct ESubgroupOperation; -struct ubyte2; -struct EGraphicsDeviceID; -struct ubyte3; -struct Material; -struct Mesh; -struct EPrimitive; -struct DepthStencil; +struct float4; +struct RectU; +struct ECompareOp; +struct EIndex; +struct uint4; +struct uint3; +struct RectI; +struct EShaderStages; +struct uint2; +struct EShader; +struct RectF; +struct ESubgroupTypes; +struct RGBA32u; +struct ESamplerYcbcrModelConversion; +struct ImageAtlas; +struct EMipmapFilter; +struct short2; +struct Texture; +struct sbyte4; struct short4; -struct EPixelFormat; -struct EBlendOp; +struct sbyte3; +struct RGBA8u; +struct ushort4; +struct sbyte2; +struct short3; struct ushort3; struct ushort2; -struct sbyte2; +struct RGBA32f; struct HSVColor; -struct ELogicOp; +struct bool2; +struct int3; struct int4; +struct EPixelFormat; struct EImage; -struct RGBA32f; -struct int2; -struct EQueueMask; -struct bool3; struct RGBA32i; +struct bool3; +struct int2; struct bool4; -struct ImageLayer; -struct EVertexType; -struct bool2; -struct int3; struct ECullMode; -struct ECubeFace; struct EAddressMode; -struct EPolygonMode; -struct ERasterFontMode; +struct ECubeFace; +struct EQueueMask; struct EAttachmentLoadOp; +struct EVertexType; +struct EPolygonMode; +struct ImageLayer; struct EBlendFactor; -struct EStencilOp; -struct ERTInstanceOpt; +struct ERasterFontMode; +struct EBlendOp; +struct ELogicOp; struct Model; -struct float4; -struct ECompareOp; -struct uint4; -struct RectU; -struct uint2; -struct EIndex; -struct uint3; -struct RectI; -struct RectF; -struct ESubgroupTypes; -struct EShader; -struct EShaderStages; -struct RGBA32u; -struct short3; -struct ushort4; -struct short2; -struct sbyte3; -struct sbyte4; -struct ESamplerYcbcrModelConversion; -struct RGBA8u; -struct Texture; -struct ImageAtlas; -struct EFilter; -struct EMipmapFilter; -struct EPipelineDynamicState; -struct MultiSamples; -struct EFeature; -struct ESamplerChromaLocation; -struct EBorderColor; -struct EDescSetUsage; -struct EAttachmentStoreOp; -struct EVertexInputRate; -struct EImageAspect; -struct EVendorID; -struct EShaderIO; -struct float3; +struct ERTInstanceOpt; +struct EStencilOp; +struct ESurfaceFormat; +struct MipmapLevel; +struct ESamplerYcbcrRange; +struct ubyte4; +struct ESamplerUsage; +struct EReductionMode; +struct ubyte3; +struct EGraphicsDeviceID; +struct ubyte2; +struct ESubgroupOperation; +struct Mesh; +struct Material; +struct DepthStencil; +struct EPrimitive; using sbyte = int8; using ubyte = uint8; @@ -1991,57 +1991,60 @@ struct EGraphicsDeviceID static constexpr uint32 Adreno_600_QC5 = 4; static constexpr uint32 Adreno_700_SC3 = 5; static constexpr uint32 Adreno_700_DC4_SC5 = 6; - static constexpr uint32 AMD_GCN1 = 7; - static constexpr uint32 AMD_GCN2 = 8; - static constexpr uint32 AMD_GCN3 = 9; - static constexpr uint32 AMD_GCN4 = 10; - static constexpr uint32 AMD_GCN5 = 11; - static constexpr uint32 AMD_GCN5_APU = 12; - static constexpr uint32 AMD_RDNA1 = 13; - static constexpr uint32 AMD_RDNA2 = 14; - static constexpr uint32 AMD_RDNA2_APU = 15; - static constexpr uint32 AMD_RDNA3 = 16; - static constexpr uint32 AMD_RDNA3_APU = 17; - static constexpr uint32 Apple_A8 = 18; - static constexpr uint32 Apple_A9_A10 = 19; - static constexpr uint32 Apple_A11 = 20; - static constexpr uint32 Apple_A12 = 21; - static constexpr uint32 Apple_A13 = 22; - static constexpr uint32 Apple_A14_M1 = 23; - static constexpr uint32 Apple_A15_M2 = 24; - static constexpr uint32 Apple_A16 = 25; - static constexpr uint32 Mali_Midgard_Gen2 = 26; - static constexpr uint32 Mali_Midgard_Gen3 = 27; - static constexpr uint32 Mali_Midgard_Gen4 = 28; - static constexpr uint32 Mali_Bifrost_Gen1 = 29; - static constexpr uint32 Mali_Bifrost_Gen2 = 30; - static constexpr uint32 Mali_Bifrost_Gen3 = 31; - static constexpr uint32 Mali_Valhall_Gen1 = 32; - static constexpr uint32 Mali_Valhall_Gen2 = 33; - static constexpr uint32 Mali_Valhall_Gen3 = 34; - static constexpr uint32 Mali_Valhall_Gen4 = 35; - static constexpr uint32 NV_Maxwell = 36; - static constexpr uint32 NV_Maxwell_Tegra = 37; - static constexpr uint32 NV_Pascal = 38; - static constexpr uint32 NV_Pascal_MX = 39; - static constexpr uint32 NV_Pascal_Tegra = 40; - static constexpr uint32 NV_Volta = 41; - static constexpr uint32 NV_Turing_16 = 42; - static constexpr uint32 NV_Turing = 43; - static constexpr uint32 NV_Turing_MX = 44; - static constexpr uint32 NV_Ampere = 45; - static constexpr uint32 NV_Ampere_Orin = 46; - static constexpr uint32 NV_Ada = 47; - static constexpr uint32 Intel_Gen9 = 48; - static constexpr uint32 Intel_Gen11 = 49; - static constexpr uint32 Intel_Gen12 = 50; - static constexpr uint32 PowerVR_Series8XE = 51; - static constexpr uint32 PowerVR_Series8XEP = 52; - static constexpr uint32 PowerVR_Series8XT = 53; - static constexpr uint32 PowerVR_Series9XE = 54; - static constexpr uint32 VeriSilicon = 55; - static constexpr uint32 V3D_4 = 56; - static constexpr uint32 SwiftShader = 57; + static constexpr uint32 Adreno_700_QC5X = 7; + static constexpr uint32 AMD_GCN1 = 8; + static constexpr uint32 AMD_GCN2 = 9; + static constexpr uint32 AMD_GCN3 = 10; + static constexpr uint32 AMD_GCN4 = 11; + static constexpr uint32 AMD_GCN5 = 12; + static constexpr uint32 AMD_GCN5_APU = 13; + static constexpr uint32 AMD_RDNA1 = 14; + static constexpr uint32 AMD_RDNA2 = 15; + static constexpr uint32 AMD_RDNA2_APU = 16; + static constexpr uint32 AMD_RDNA3 = 17; + static constexpr uint32 AMD_RDNA3_APU = 18; + static constexpr uint32 Apple_A8 = 19; + static constexpr uint32 Apple_A9_A10 = 20; + static constexpr uint32 Apple_A11 = 21; + static constexpr uint32 Apple_A12 = 22; + static constexpr uint32 Apple_A13 = 23; + static constexpr uint32 Apple_A14_M1 = 24; + static constexpr uint32 Apple_A15_M2 = 25; + static constexpr uint32 Apple_A16 = 26; + static constexpr uint32 Apple_A17_M3 = 27; + static constexpr uint32 Mali_Midgard_Gen2 = 28; + static constexpr uint32 Mali_Midgard_Gen3 = 29; + static constexpr uint32 Mali_Midgard_Gen4 = 30; + static constexpr uint32 Mali_Bifrost_Gen1 = 31; + static constexpr uint32 Mali_Bifrost_Gen2 = 32; + static constexpr uint32 Mali_Bifrost_Gen3 = 33; + static constexpr uint32 Mali_Valhall_Gen1 = 34; + static constexpr uint32 Mali_Valhall_Gen2 = 35; + static constexpr uint32 Mali_Valhall_Gen3 = 36; + static constexpr uint32 Mali_Valhall_Gen4 = 37; + static constexpr uint32 Mali_Valhall_Gen5 = 38; + static constexpr uint32 NV_Maxwell = 39; + static constexpr uint32 NV_Maxwell_Tegra = 40; + static constexpr uint32 NV_Pascal = 41; + static constexpr uint32 NV_Pascal_MX = 42; + static constexpr uint32 NV_Pascal_Tegra = 43; + static constexpr uint32 NV_Volta = 44; + static constexpr uint32 NV_Turing_16 = 45; + static constexpr uint32 NV_Turing = 46; + static constexpr uint32 NV_Turing_MX = 47; + static constexpr uint32 NV_Ampere = 48; + static constexpr uint32 NV_Ampere_Orin = 49; + static constexpr uint32 NV_Ada = 50; + static constexpr uint32 Intel_Gen9 = 51; + static constexpr uint32 Intel_Gen11 = 52; + static constexpr uint32 Intel_Gen12 = 53; + static constexpr uint32 Intel_Gen12_7 = 54; + static constexpr uint32 PowerVR_Series8XE = 55; + static constexpr uint32 PowerVR_Series8XEP = 56; + static constexpr uint32 PowerVR_Series8XT = 57; + static constexpr uint32 PowerVR_Series9XE = 58; + static constexpr uint32 VeriSilicon = 59; + static constexpr uint32 SwiftShader = 60; }; struct EFilter @@ -2106,8 +2109,7 @@ struct ESamplerUsage ESamplerUsage (uint8) {} operator uint8 () const; static constexpr uint8 Default = 0; - static constexpr uint8 Subsampled = 1; - static constexpr uint8 SubsampledCoarseReconstruction = 2; + static constexpr uint8 NonSeamlessCubeMap = 1; }; struct EVertexInputRate @@ -2133,16 +2135,19 @@ struct EDescSetUsage struct EPipelineOpt { EPipelineOpt () {} - EPipelineOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 Optimize = 1; - static constexpr uint8 CS_DispatchBase = 2; - static constexpr uint8 RT_NoNullAnyHitShaders = 4; - static constexpr uint8 RT_NoNullClosestHitShaders = 8; - static constexpr uint8 RT_NoNullMissShaders = 16; - static constexpr uint8 RT_NoNullIntersectionShaders = 32; - static constexpr uint8 RT_SkipTriangles = 64; - static constexpr uint8 RT_SkipAABBs = 128; + EPipelineOpt (uint16) {} + operator uint16 () const; + static constexpr uint16 Optimize = 1; + static constexpr uint16 CS_DispatchBase = 2; + static constexpr uint16 RT_NoNullAnyHitShaders = 4; + static constexpr uint16 RT_NoNullClosestHitShaders = 8; + static constexpr uint16 RT_NoNullMissShaders = 16; + static constexpr uint16 RT_NoNullIntersectionShaders = 32; + static constexpr uint16 RT_SkipTriangles = 64; + static constexpr uint16 RT_SkipAABBs = 128; + static constexpr uint16 DontCompile = 256; + static constexpr uint16 CaptureStatistics = 512; + static constexpr uint16 CaptureInternalRepresentation = 1024; }; struct EQueueMask @@ -2343,15 +2348,15 @@ struct RC : RasterFont }; template <> -struct RC : Material +struct RC : ImageAtlas { - RC (const Material &); + RC (const ImageAtlas &); }; template <> -struct RC : Mesh +struct RC : Texture { - RC (const Mesh &); + RC (const Texture &); }; template <> @@ -2361,14 +2366,14 @@ struct RC : Model }; template <> -struct RC : Texture +struct RC : Mesh { - RC (const Texture &); + RC (const Mesh &); }; template <> -struct RC : ImageAtlas +struct RC : Material { - RC (const ImageAtlas &); + RC (const Material &); }; diff --git a/AE/engine/shared_data/scripts/input_actions.as b/AE/engine/shared_data/scripts/input_actions.as index bccf421c..6bb31ca6 100644 --- a/AE/engine/shared_data/scripts/input_actions.as +++ b/AE/engine/shared_data/scripts/input_actions.as @@ -1,4 +1,4 @@ -//2C45077E +//eda3b3b4 #include #include @@ -20,47 +20,47 @@ struct RC; template using array = std::vector; -struct ubyte2; -struct ubyte3; -struct EGestureType; -struct EGestureState; -struct WinAPI_ActionBindings; -struct OpenVR_Input; -struct ubyte4; -struct VecSwizzle; -struct Android_ActionBindings; -struct EValueType; +struct uint4; struct WinAPI_Input; -struct OpenVR_BindingsMode; +struct EValueType; +struct Android_ActionBindings; +struct VecSwizzle; +struct GLFW_Input; +struct int4; +struct int3; +struct bool2; +struct int2; +struct bool3; +struct bool4; +struct short4; struct ushort3; -struct sbyte2; -struct ushort2; struct ActionInfo; struct Android_BindingsMode; -struct short4; -struct bool4; -struct int2; -struct bool3; -struct int3; -struct bool2; -struct GLFW_Input; -struct int4; -struct GLFW_BindingsMode; +struct ushort2; +struct OpenVR_BindingsMode; +struct sbyte2; +struct EGestureType; +struct ubyte3; +struct ubyte2; +struct ubyte4; +struct WinAPI_ActionBindings; +struct EGestureState; +struct OpenVR_Input; +struct float3; +struct float2; +struct OpenVR_ActionBindings; +struct WinAPI_BindingsMode; +struct GLFW_ActionBindings; +struct sbyte3; +struct sbyte4; struct short3; struct short2; struct ushort4; -struct sbyte3; -struct sbyte4; -struct uint4; -struct float4; +struct GLFW_BindingsMode; struct uint3; struct uint2; struct Android_Input; -struct float2; -struct float3; -struct GLFW_ActionBindings; -struct WinAPI_BindingsMode; -struct OpenVR_ActionBindings; +struct float4; using sbyte = int8; using ubyte = uint8; @@ -1509,7 +1509,7 @@ struct Android_Input static constexpr uint16 Delete = 67; static constexpr uint16 Grave = 68; static constexpr uint16 Minus = 69; - static constexpr uint16 Equals = 70; + static constexpr uint16 Equal = 70; static constexpr uint16 LeftBracket = 71; static constexpr uint16 RightBracket = 72; static constexpr uint16 BackSlash = 73; @@ -1787,15 +1787,15 @@ struct OpenVR_ActionBindings }; template <> -struct RC : WinAPI_ActionBindings +struct RC : Android_ActionBindings { - RC (const WinAPI_ActionBindings &); + RC (const Android_ActionBindings &); }; template <> -struct RC : Android_ActionBindings +struct RC : Android_BindingsMode { - RC (const Android_ActionBindings &); + RC (const Android_BindingsMode &); }; template <> @@ -1805,32 +1805,32 @@ struct RC : OpenVR_BindingsMode }; template <> -struct RC : Android_BindingsMode +struct RC : WinAPI_ActionBindings { - RC (const Android_BindingsMode &); + RC (const WinAPI_ActionBindings &); }; template <> -struct RC : GLFW_BindingsMode +struct RC : OpenVR_ActionBindings { - RC (const GLFW_BindingsMode &); + RC (const OpenVR_ActionBindings &); }; template <> -struct RC : GLFW_ActionBindings +struct RC : WinAPI_BindingsMode { - RC (const GLFW_ActionBindings &); + RC (const WinAPI_BindingsMode &); }; template <> -struct RC : WinAPI_BindingsMode +struct RC : GLFW_ActionBindings { - RC (const WinAPI_BindingsMode &); + RC (const GLFW_ActionBindings &); }; template <> -struct RC : OpenVR_ActionBindings +struct RC : GLFW_BindingsMode { - RC (const OpenVR_ActionBindings &); + RC (const GLFW_BindingsMode &); }; diff --git a/AE/engine/shared_data/scripts/offline_packer.as b/AE/engine/shared_data/scripts/offline_packer.as index 1f98f5f1..1f380828 100644 --- a/AE/engine/shared_data/scripts/offline_packer.as +++ b/AE/engine/shared_data/scripts/offline_packer.as @@ -1,4 +1,4 @@ -//F104D754 +//457d401f #include #include @@ -20,37 +20,37 @@ struct RC; template using array = std::vector; -struct int3; +struct PipelineCompiler; +struct float2; +struct float3; +struct float4; +struct uint4; +struct uint2; +struct uint3; +struct AssetPacker; +struct EReflectionFlags; +struct short2; +struct ushort4; +struct short3; +struct sbyte4; +struct sbyte3; +struct sbyte2; +struct ushort3; +struct ushort2; +struct short4; +struct Archive; struct bool2; +struct int3; struct int2; struct bool3; -struct EPathParamsFlags; struct int4; +struct EPathParamsFlags; struct bool4; -struct ushort3; -struct sbyte2; -struct ushort2; -struct short4; -struct Archive; -struct InputActions; +struct ubyte4; struct ubyte3; struct EFileType; struct ubyte2; -struct ubyte4; -struct float2; -struct float3; -struct PipelineCompiler; -struct short2; -struct ushort4; -struct short3; -struct sbyte4; -struct sbyte3; -struct AssetPacker; -struct EReflectionFlags; -struct uint3; -struct uint2; -struct float4; -struct uint4; +struct InputActions; using sbyte = int8; using ubyte = uint8; @@ -1098,26 +1098,26 @@ struct Archive }; template <> -struct RC : Archive +struct RC : PipelineCompiler { - RC (const Archive &); + RC (const PipelineCompiler &); }; template <> -struct RC : InputActions +struct RC : AssetPacker { - RC (const InputActions &); + RC (const AssetPacker &); }; template <> -struct RC : PipelineCompiler +struct RC : Archive { - RC (const PipelineCompiler &); + RC (const Archive &); }; template <> -struct RC : AssetPacker +struct RC : InputActions { - RC (const AssetPacker &); + RC (const InputActions &); }; diff --git a/AE/engine/shared_data/scripts/pipeline_compiler.as b/AE/engine/shared_data/scripts/pipeline_compiler.as index 01b0119b..55a11f18 100644 --- a/AE/engine/shared_data/scripts/pipeline_compiler.as +++ b/AE/engine/shared_data/scripts/pipeline_compiler.as @@ -1,4 +1,4 @@ -//9F2C308D +//dda2b636 #include #include @@ -20,137 +20,137 @@ struct RC; template using array = std::vector; -struct EBlendFactor; +struct ComputePipelineSpec; +struct EPrimitive; +struct ubyte3; +struct EGraphicsDeviceID; +struct RenderState_InputAssemblyState; +struct ubyte2; +struct ESubgroupOperation; +struct RenderState_RasterizationState; +struct EFormatFeature; +struct ESamplerYcbcrRange; +struct ubyte4; +struct ESamplerUsage; +struct CompatibleRenderPass; +struct GlobalConfig; struct ERTInstanceOpt; struct Shader; struct ShaderStructType; +struct EBlendFactor; struct EStencilOp; -struct EAddressMode; struct ECompilationTarget; -struct RayIndex; +struct EAddressMode; struct RenderState_MultisampleState; -struct EImage; +struct RayIndex; struct bool2; struct bool3; +struct EImage; +struct bool4; struct RGBA32i; struct EVertexType; -struct bool4; struct EBlendOp; +struct ECullMode; struct ELogicOp; struct ETessPatch; -struct ECullMode; -struct EQueueMask; +struct EShaderPreprocessor; struct RGBA32f; -struct HSVColor; struct RenderState_ColorBuffer_ColorMask; +struct EQueueMask; struct ShaderStructTypeUsage; -struct ComputePipelineSpec; -struct EShaderPreprocessor; -struct EPrimitive; -struct ESubgroupOperation; -struct ubyte3; -struct EGraphicsDeviceID; -struct ubyte2; -struct RenderState_InputAssemblyState; -struct RenderState_RasterizationState; -struct EFormatFeature; -struct ESamplerYcbcrRange; -struct ESamplerUsage; -struct ubyte4; -struct CompatibleRenderPass; -struct GlobalConfig; -struct EShaderIO; +struct HSVColor; +struct ESamplerYcbcrModelConversion; +struct EAttachment; +struct short2; +struct ushort4; +struct GraphicsPass; +struct short3; +struct sbyte4; +struct sbyte3; +struct ESubgroupTypes; +struct Align; +struct EAccessType; +struct EIndex; +struct ComputePipeline; +struct float4; struct EImageAspect; -struct InstanceIndex; -struct EPipelineOpt; +struct EShaderIO; struct float2; struct float3; -struct EDescSetUsage; +struct InstanceIndex; +struct EPipelineOpt; struct EVertexInputRate; +struct EDescSetUsage; struct ESamplerChromaLocation; struct DescriptorSetLayout; +struct EFilter; struct EPipelineDynamicState; struct RenderState_StencilBufferState; -struct EFilter; -struct MultiSamples; struct RayTracingPipeline; -struct RayTracingShaderBinding; struct RenderTechnique; -struct EAttachment; -struct ESamplerYcbcrModelConversion; -struct sbyte4; -struct sbyte3; -struct ushort4; -struct short2; -struct short3; -struct ESubgroupTypes; -struct GraphicsPass; -struct Align; -struct EIndex; -struct ComputePipeline; -struct EAccessType; -struct float4; +struct MultiSamples; +struct RayTracingShaderBinding; +struct AttachmentSpec; +struct DepthStencil; +struct RenderState; +struct RenderState_ColorBuffersState; +struct RayTracingPipelineSpec; +struct EReductionMode; +struct ESurfaceFormat; +struct EShaderVersion; +struct MipmapLevel; struct EValueType; struct ETessSpacing; -struct TilePipeline; struct TilePipelineSpec; +struct TilePipeline; struct EAttachmentLoadOp; struct EPolygonMode; -struct ImageLayer; -struct int4; struct int3; struct ArraySize; struct int2; -struct PipelineLayout; +struct int4; +struct ImageLayer; struct GraphicsPipelineSpec; -struct short4; -struct EPixelFormat; +struct PipelineLayout; struct sbyte2; struct ushort3; struct ushort2; -struct RenderState_ColorBuffer; +struct short4; +struct EPixelFormat; struct RenderPass; -struct AttachmentSpec; -struct RenderState; -struct DepthStencil; -struct RenderState_ColorBuffersState; -struct RayTracingPipelineSpec; -struct EReductionMode; -struct ESurfaceFormat; -struct MipmapLevel; -struct EShaderVersion; -struct MeshPipeline; -struct EVendorID; -struct EResourceState; -struct ComputePass; -struct NamedRenderState; -struct FeatureSet; -struct VertexDivisor; -struct EAttachmentStoreOp; -struct EBorderColor; -struct EImageType; -struct Sampler; -struct MeshPipelineSpec; -struct GraphicsPipeline; -struct CallableIndex; -struct EShaderOpt; -struct EFeature; -struct EMipmapFilter; -struct VertexBufferInput; +struct RenderState_ColorBuffer; struct RGBA8u; +struct VertexBufferInput; +struct EMipmapFilter; struct EStructLayout; -struct RGBA32u; struct Attachment; +struct RGBA32u; struct uint2; -struct ShaderIO; -struct EMutableRenderState; struct uint3; +struct EMutableRenderState; +struct ShaderIO; +struct EShader; struct EShaderStages; struct RenderState_DepthBufferState; -struct EShader; -struct RenderState_StencilFaceState; struct ECompareOp; +struct RenderState_StencilFaceState; struct uint4; +struct EVendorID; +struct MeshPipeline; +struct FeatureSet; +struct EResourceState; +struct NamedRenderState; +struct ComputePass; +struct EAttachmentStoreOp; +struct EImageType; +struct VertexDivisor; +struct EBorderColor; +struct Sampler; +struct EShaderOpt; +struct GraphicsPipeline; +struct MeshPipelineSpec; +struct CallableIndex; +struct EFeature; using sbyte = int8; using ubyte = uint8; @@ -1987,57 +1987,60 @@ struct EGraphicsDeviceID static constexpr uint32 Adreno_600_QC5 = 4; static constexpr uint32 Adreno_700_SC3 = 5; static constexpr uint32 Adreno_700_DC4_SC5 = 6; - static constexpr uint32 AMD_GCN1 = 7; - static constexpr uint32 AMD_GCN2 = 8; - static constexpr uint32 AMD_GCN3 = 9; - static constexpr uint32 AMD_GCN4 = 10; - static constexpr uint32 AMD_GCN5 = 11; - static constexpr uint32 AMD_GCN5_APU = 12; - static constexpr uint32 AMD_RDNA1 = 13; - static constexpr uint32 AMD_RDNA2 = 14; - static constexpr uint32 AMD_RDNA2_APU = 15; - static constexpr uint32 AMD_RDNA3 = 16; - static constexpr uint32 AMD_RDNA3_APU = 17; - static constexpr uint32 Apple_A8 = 18; - static constexpr uint32 Apple_A9_A10 = 19; - static constexpr uint32 Apple_A11 = 20; - static constexpr uint32 Apple_A12 = 21; - static constexpr uint32 Apple_A13 = 22; - static constexpr uint32 Apple_A14_M1 = 23; - static constexpr uint32 Apple_A15_M2 = 24; - static constexpr uint32 Apple_A16 = 25; - static constexpr uint32 Mali_Midgard_Gen2 = 26; - static constexpr uint32 Mali_Midgard_Gen3 = 27; - static constexpr uint32 Mali_Midgard_Gen4 = 28; - static constexpr uint32 Mali_Bifrost_Gen1 = 29; - static constexpr uint32 Mali_Bifrost_Gen2 = 30; - static constexpr uint32 Mali_Bifrost_Gen3 = 31; - static constexpr uint32 Mali_Valhall_Gen1 = 32; - static constexpr uint32 Mali_Valhall_Gen2 = 33; - static constexpr uint32 Mali_Valhall_Gen3 = 34; - static constexpr uint32 Mali_Valhall_Gen4 = 35; - static constexpr uint32 NV_Maxwell = 36; - static constexpr uint32 NV_Maxwell_Tegra = 37; - static constexpr uint32 NV_Pascal = 38; - static constexpr uint32 NV_Pascal_MX = 39; - static constexpr uint32 NV_Pascal_Tegra = 40; - static constexpr uint32 NV_Volta = 41; - static constexpr uint32 NV_Turing_16 = 42; - static constexpr uint32 NV_Turing = 43; - static constexpr uint32 NV_Turing_MX = 44; - static constexpr uint32 NV_Ampere = 45; - static constexpr uint32 NV_Ampere_Orin = 46; - static constexpr uint32 NV_Ada = 47; - static constexpr uint32 Intel_Gen9 = 48; - static constexpr uint32 Intel_Gen11 = 49; - static constexpr uint32 Intel_Gen12 = 50; - static constexpr uint32 PowerVR_Series8XE = 51; - static constexpr uint32 PowerVR_Series8XEP = 52; - static constexpr uint32 PowerVR_Series8XT = 53; - static constexpr uint32 PowerVR_Series9XE = 54; - static constexpr uint32 VeriSilicon = 55; - static constexpr uint32 V3D_4 = 56; - static constexpr uint32 SwiftShader = 57; + static constexpr uint32 Adreno_700_QC5X = 7; + static constexpr uint32 AMD_GCN1 = 8; + static constexpr uint32 AMD_GCN2 = 9; + static constexpr uint32 AMD_GCN3 = 10; + static constexpr uint32 AMD_GCN4 = 11; + static constexpr uint32 AMD_GCN5 = 12; + static constexpr uint32 AMD_GCN5_APU = 13; + static constexpr uint32 AMD_RDNA1 = 14; + static constexpr uint32 AMD_RDNA2 = 15; + static constexpr uint32 AMD_RDNA2_APU = 16; + static constexpr uint32 AMD_RDNA3 = 17; + static constexpr uint32 AMD_RDNA3_APU = 18; + static constexpr uint32 Apple_A8 = 19; + static constexpr uint32 Apple_A9_A10 = 20; + static constexpr uint32 Apple_A11 = 21; + static constexpr uint32 Apple_A12 = 22; + static constexpr uint32 Apple_A13 = 23; + static constexpr uint32 Apple_A14_M1 = 24; + static constexpr uint32 Apple_A15_M2 = 25; + static constexpr uint32 Apple_A16 = 26; + static constexpr uint32 Apple_A17_M3 = 27; + static constexpr uint32 Mali_Midgard_Gen2 = 28; + static constexpr uint32 Mali_Midgard_Gen3 = 29; + static constexpr uint32 Mali_Midgard_Gen4 = 30; + static constexpr uint32 Mali_Bifrost_Gen1 = 31; + static constexpr uint32 Mali_Bifrost_Gen2 = 32; + static constexpr uint32 Mali_Bifrost_Gen3 = 33; + static constexpr uint32 Mali_Valhall_Gen1 = 34; + static constexpr uint32 Mali_Valhall_Gen2 = 35; + static constexpr uint32 Mali_Valhall_Gen3 = 36; + static constexpr uint32 Mali_Valhall_Gen4 = 37; + static constexpr uint32 Mali_Valhall_Gen5 = 38; + static constexpr uint32 NV_Maxwell = 39; + static constexpr uint32 NV_Maxwell_Tegra = 40; + static constexpr uint32 NV_Pascal = 41; + static constexpr uint32 NV_Pascal_MX = 42; + static constexpr uint32 NV_Pascal_Tegra = 43; + static constexpr uint32 NV_Volta = 44; + static constexpr uint32 NV_Turing_16 = 45; + static constexpr uint32 NV_Turing = 46; + static constexpr uint32 NV_Turing_MX = 47; + static constexpr uint32 NV_Ampere = 48; + static constexpr uint32 NV_Ampere_Orin = 49; + static constexpr uint32 NV_Ada = 50; + static constexpr uint32 Intel_Gen9 = 51; + static constexpr uint32 Intel_Gen11 = 52; + static constexpr uint32 Intel_Gen12 = 53; + static constexpr uint32 Intel_Gen12_7 = 54; + static constexpr uint32 PowerVR_Series8XE = 55; + static constexpr uint32 PowerVR_Series8XEP = 56; + static constexpr uint32 PowerVR_Series8XT = 57; + static constexpr uint32 PowerVR_Series9XE = 58; + static constexpr uint32 VeriSilicon = 59; + static constexpr uint32 SwiftShader = 60; }; struct EFilter @@ -2102,8 +2105,7 @@ struct ESamplerUsage ESamplerUsage (uint8) {} operator uint8 () const; static constexpr uint8 Default = 0; - static constexpr uint8 Subsampled = 1; - static constexpr uint8 SubsampledCoarseReconstruction = 2; + static constexpr uint8 NonSeamlessCubeMap = 1; }; struct EVertexInputRate @@ -2129,16 +2131,19 @@ struct EDescSetUsage struct EPipelineOpt { EPipelineOpt () {} - EPipelineOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 Optimize = 1; - static constexpr uint8 CS_DispatchBase = 2; - static constexpr uint8 RT_NoNullAnyHitShaders = 4; - static constexpr uint8 RT_NoNullClosestHitShaders = 8; - static constexpr uint8 RT_NoNullMissShaders = 16; - static constexpr uint8 RT_NoNullIntersectionShaders = 32; - static constexpr uint8 RT_SkipTriangles = 64; - static constexpr uint8 RT_SkipAABBs = 128; + EPipelineOpt (uint16) {} + operator uint16 () const; + static constexpr uint16 Optimize = 1; + static constexpr uint16 CS_DispatchBase = 2; + static constexpr uint16 RT_NoNullAnyHitShaders = 4; + static constexpr uint16 RT_NoNullClosestHitShaders = 8; + static constexpr uint16 RT_NoNullMissShaders = 16; + static constexpr uint16 RT_NoNullIntersectionShaders = 32; + static constexpr uint16 RT_SkipTriangles = 64; + static constexpr uint16 RT_SkipAABBs = 128; + static constexpr uint16 DontCompile = 256; + static constexpr uint16 CaptureStatistics = 512; + static constexpr uint16 CaptureInternalRepresentation = 1024; }; struct EQueueMask @@ -2402,6 +2407,7 @@ struct EShaderVersion // Added mesh shading. static constexpr uint32 Metal_3_0 = 1073741872; + static constexpr uint32 Metal_3_1 = 1073741873; // Compile for iOS. static constexpr uint32 Metal_iOS_2_0 = 536870944; @@ -2410,6 +2416,7 @@ struct EShaderVersion static constexpr uint32 Metal_iOS_2_3 = 536870947; static constexpr uint32 Metal_iOS_2_4 = 536870948; static constexpr uint32 Metal_iOS_3_0 = 536870960; + static constexpr uint32 Metal_iOS_3_1 = 536870961; // Compile for MacOS. static constexpr uint32 Metal_Mac_2_0 = 805306400; @@ -2418,6 +2425,7 @@ struct EShaderVersion static constexpr uint32 Metal_Mac_2_3 = 805306403; static constexpr uint32 Metal_Mac_2_4 = 805306404; static constexpr uint32 Metal_Mac_3_0 = 805306416; + static constexpr uint32 Metal_Mac_3_1 = 805306417; }; struct EShaderOpt @@ -2774,6 +2782,7 @@ struct FeatureSet void shaderSubgroupClock (EFeature); void shaderDeviceClock (EFeature); void cooperativeMatrix (EFeature); + void cooperativeMatrixStages (uint); void shaderClipDistance (EFeature); void shaderCullDistance (EFeature); void shaderResourceMinLod (EFeature); @@ -2897,6 +2906,7 @@ struct FeatureSet void filterMinmaxImageComponentMapping (EFeature); void samplerMipLodBias (EFeature); void samplerYcbcrConversion (EFeature); + void nonSeamlessCubeMap (EFeature); void maxSamplerAnisotropy (float); void maxSamplerLodBias (float); void maxFramebufferLayers (uint); @@ -4123,6 +4133,18 @@ const string Sampler_Anisotropy8Clamp; const string Sampler_Anisotropy16Repeat; const string Sampler_Anisotropy16MirrorRepeat; const string Sampler_Anisotropy16Clamp; +template <> +struct RC : ComputePipelineSpec +{ + RC (const ComputePipelineSpec &); +}; + +template <> +struct RC : CompatibleRenderPass +{ + RC (const CompatibleRenderPass &); +}; + template <> struct RC : Shader { @@ -4136,15 +4158,15 @@ struct RC : ShaderStructType }; template <> -struct RC : ComputePipelineSpec +struct RC : GraphicsPass { - RC (const ComputePipelineSpec &); + RC (const GraphicsPass &); }; template <> -struct RC : CompatibleRenderPass +struct RC : ComputePipeline { - RC (const CompatibleRenderPass &); + RC (const ComputePipeline &); }; template <> @@ -4159,12 +4181,6 @@ struct RC : RayTracingPipeline RC (const RayTracingPipeline &); }; -template <> -struct RC : RayTracingShaderBinding -{ - RC (const RayTracingShaderBinding &); -}; - template <> struct RC : RenderTechnique { @@ -4172,21 +4188,21 @@ struct RC : RenderTechnique }; template <> -struct RC : GraphicsPass +struct RC : RayTracingShaderBinding { - RC (const GraphicsPass &); + RC (const RayTracingShaderBinding &); }; template <> -struct RC : ComputePipeline +struct RC : AttachmentSpec { - RC (const ComputePipeline &); + RC (const AttachmentSpec &); }; template <> -struct RC : TilePipeline +struct RC : RayTracingPipelineSpec { - RC (const TilePipeline &); + RC (const RayTracingPipelineSpec &); }; template <> @@ -4196,9 +4212,9 @@ struct RC : TilePipelineSpec }; template <> -struct RC : PipelineLayout +struct RC : TilePipeline { - RC (const PipelineLayout &); + RC (const TilePipeline &); }; template <> @@ -4207,6 +4223,12 @@ struct RC : GraphicsPipelineSpec RC (const GraphicsPipelineSpec &); }; +template <> +struct RC : PipelineLayout +{ + RC (const PipelineLayout &); +}; + template <> struct RC : RenderPass { @@ -4214,15 +4236,15 @@ struct RC : RenderPass }; template <> -struct RC : AttachmentSpec +struct RC : VertexBufferInput { - RC (const AttachmentSpec &); + RC (const VertexBufferInput &); }; template <> -struct RC : RayTracingPipelineSpec +struct RC : Attachment { - RC (const RayTracingPipelineSpec &); + RC (const Attachment &); }; template <> @@ -4232,9 +4254,9 @@ struct RC : MeshPipeline }; template <> -struct RC : ComputePass +struct RC : FeatureSet { - RC (const ComputePass &); + RC (const FeatureSet &); }; template <> @@ -4244,9 +4266,9 @@ struct RC : NamedRenderState }; template <> -struct RC : FeatureSet +struct RC : ComputePass { - RC (const FeatureSet &); + RC (const ComputePass &); }; template <> @@ -4255,12 +4277,6 @@ struct RC : Sampler RC (const Sampler &); }; -template <> -struct RC : MeshPipelineSpec -{ - RC (const MeshPipelineSpec &); -}; - template <> struct RC : GraphicsPipeline { @@ -4268,14 +4284,8 @@ struct RC : GraphicsPipeline }; template <> -struct RC : VertexBufferInput -{ - RC (const VertexBufferInput &); -}; - -template <> -struct RC : Attachment +struct RC : MeshPipelineSpec { - RC (const Attachment &); + RC (const MeshPipelineSpec &); }; diff --git a/AE/engine/shared_data/scripts/res_editor.as b/AE/engine/shared_data/scripts/res_editor.as index f0fae09f..e8ecf527 100644 --- a/AE/engine/shared_data/scripts/res_editor.as +++ b/AE/engine/shared_data/scripts/res_editor.as @@ -1,4 +1,4 @@ -//F2DCA70E +//e9eee8b5 #include #include @@ -20,163 +20,163 @@ struct RC; template using array = std::vector; +struct EAddressMode; +struct Collection; +struct RayIndex; +struct RTScene; +struct EPassFlags; +struct EBlendFactor; +struct ERTInstanceOpt; +struct EStencilOp; struct EBlendOp; -struct RGBA8u; +struct ECullMode; +struct RGBA32f; +struct EQueueMask; +struct DbgViewFlags; +struct HSVColor; struct ELogicOp; -struct EMipmapFilter; -struct OrbitalCamera; -struct UnifiedGeometry_DrawMeshTasksIndirectCount; -struct RGBA32u; -struct UnifiedGeometry_DrawMeshTasks; -struct RectF; -struct SphericalCube; -struct EShader; -struct DynamicFloat; -struct EShaderStages; -struct RTInstanceMask; -struct uint3; -struct uint2; -struct RectI; -struct RectU; -struct EColorSpace; -struct uint4; -struct FPSCamera; -struct ECompareOp; -struct DynamicDim; -struct RayTracingPass; -struct EResourceState; -struct UnifiedGeometry_DrawMeshTasksIndirect; -struct ComputePass; -struct EVendorID; -struct Postprocess; -struct SceneRayTracingPass; -struct EImageType; -struct EAttachmentStoreOp; -struct UnifiedGeometry; -struct EBorderColor; -struct UnifiedGeometry_DrawIndirectCount; -struct EFeature; -struct CallableIndex; -struct Buffer; -struct DepthStencil; -struct TopDownCamera; -struct ScaleBiasCamera; -struct EReductionMode; -struct BaseController; -struct MipmapLevel; -struct FlightCamera; -struct ESurfaceFormat; -struct float3x4; -struct Model; -struct float3x3; -struct float3x2; -struct float4x3; -struct EPolygonMode; -struct EAttachmentLoadOp; -struct DynamicUInt; -struct float4x2; -struct float4x4; -struct int2; -struct int3; -struct int4; -struct ImageLayer; -struct Scene; -struct ushort2; -struct sbyte2; -struct ushort3; -struct EPixelFormat; -struct short4; -struct UnifiedGeometry_DrawIndexed; -struct short3; -struct Random; -struct short2; -struct ushort4; -struct sbyte3; -struct sbyte4; -struct ESamplerYcbcrModelConversion; -struct ScriptFlags; -struct UnifiedGeometry_DrawIndexedIndirectCount; -struct Image; -struct ESubgroupTypes; -struct GeomSource; -struct UnifiedGeometry_DrawIndirect; -struct DynamicInt3; -struct DynamicInt2; -struct EIndex; -struct DynamicInt4; -struct RTShader; -struct DynamicUInt2; -struct DynamicUInt3; -struct DynamicFloat4; -struct EPostprocess; -struct float4; -struct DynamicFloat3; -struct ImageLoadOpFlags; -struct DynamicUInt4; -struct RTGeometry; -struct DynamicFloat2; -struct float3; +struct DynamicULong; +struct EImage; +struct bool2; +struct bool3; +struct RGBA32i; +struct EVertexType; +struct SceneGraphicsPass; +struct bool4; +struct VideoImage; +struct ESubgroupOperation; +struct ubyte3; +struct EGraphicsDeviceID; +struct FPVCamera; +struct ubyte2; +struct Random_Normal4; +struct Random_Normal3; +struct Random_Normal1; +struct UnifiedGeometry_Draw; +struct Random_Normal2; +struct EPrimitive; +struct RTInstanceSBTOffset; +struct ESamplerYcbcrRange; +struct ubyte4; +struct RTInstanceCustomIndex; +struct ESamplerUsage; +struct EDescSetUsage; +struct EVertexInputRate; +struct EShaderIO; +struct EImageAspect; +struct RTInstanceTransform; +struct ERenderLayer; +struct InstanceIndex; struct float2; struct EPipelineOpt; +struct float3; +struct EPipelineDynamicState; struct DynamicInt; -struct InstanceIndex; -struct ERenderLayer; -struct RTInstanceTransform; -struct EImageAspect; -struct EShaderIO; -struct EVertexInputRate; -struct EDescSetUsage; -struct float2x4; +struct EFilter; +struct Random_Binomial4; +struct Random_Binomial2; +struct MultiSamples; +struct IPass; +struct Random_Binomial1; +struct Random_Binomial3; +struct ESamplerChromaLocation; struct float2x2; struct float2x3; +struct float2x4; struct UnifiedGeometry_DrawIndexedIndirect; -struct ESamplerChromaLocation; -struct Random_Binomial3; -struct Random_Binomial1; -struct IPass; -struct MultiSamples; -struct Random_Binomial2; -struct Random_Binomial4; -struct EFilter; -struct EPipelineDynamicState; -struct EPrimitive; -struct Random_Normal1; -struct Random_Normal2; -struct UnifiedGeometry_Draw; -struct Random_Normal3; -struct Random_Normal4; -struct ubyte2; -struct ubyte3; -struct EGraphicsDeviceID; -struct FPVCamera; -struct ESubgroupOperation; -struct VideoImage; -struct RTInstanceCustomIndex; -struct ubyte4; -struct ESamplerUsage; -struct ESamplerYcbcrRange; -struct RTInstanceSBTOffset; -struct EStencilOp; -struct DbgViewFlags; -struct ERTInstanceOpt; -struct EBlendFactor; -struct RayIndex; -struct RTScene; -struct EPassFlags; -struct Collection; -struct EAddressMode; -struct bool4; -struct SceneGraphicsPass; -struct EVertexType; -struct RGBA32i; -struct bool3; -struct bool2; -struct EImage; -struct DynamicULong; -struct HSVColor; -struct RGBA32f; -struct EQueueMask; -struct ECullMode; +struct ESubgroupTypes; +struct UnifiedGeometry_DrawIndexedIndirectCount; +struct Image; +struct ScriptFlags; +struct ESamplerYcbcrModelConversion; +struct sbyte4; +struct sbyte3; +struct short2; +struct ushort4; +struct short3; +struct Random; +struct DynamicFloat2; +struct UnifiedGeometry_DrawIndexed; +struct RTGeometry; +struct DynamicUInt4; +struct DynamicFloat3; +struct float4; +struct ImageLoadOpFlags; +struct EPostprocess; +struct DynamicFloat4; +struct DynamicUInt3; +struct RTShader; +struct DynamicUInt2; +struct EIndex; +struct DynamicInt4; +struct DynamicInt2; +struct DynamicInt3; +struct UnifiedGeometry_DrawIndirect; +struct GeomSource; +struct float4x2; +struct float4x4; +struct DynamicUInt; +struct EAttachmentLoadOp; +struct EPolygonMode; +struct float4x3; +struct float3x2; +struct float3x3; +struct Model; +struct float3x4; +struct short4; +struct EPixelFormat; +struct ushort3; +struct sbyte2; +struct ushort2; +struct Scene; +struct ImageLayer; +struct int4; +struct int3; +struct int2; +struct DepthStencil; +struct ESurfaceFormat; +struct MipmapLevel; +struct FlightCamera; +struct BaseController; +struct EReductionMode; +struct ScaleBiasCamera; +struct TopDownCamera; +struct EBorderColor; +struct EAttachmentStoreOp; +struct UnifiedGeometry; +struct EImageType; +struct SceneRayTracingPass; +struct EVendorID; +struct Postprocess; +struct EResourceState; +struct UnifiedGeometry_DrawMeshTasksIndirect; +struct ComputePass; +struct Buffer; +struct CallableIndex; +struct EFeature; +struct UnifiedGeometry_DrawIndirectCount; +struct RectF; +struct SphericalCube; +struct RGBA32u; +struct UnifiedGeometry_DrawMeshTasks; +struct UnifiedGeometry_DrawMeshTasksIndirectCount; +struct OrbitalCamera; +struct EMipmapFilter; +struct RGBA8u; +struct DynamicDim; +struct ECompareOp; +struct FPSCamera; +struct RayTracingPass; +struct EColorSpace; +struct uint4; +struct RectU; +struct RectI; +struct uint3; +struct uint2; +struct EShaderStages; +struct RTInstanceMask; +struct EShader; +struct DynamicFloat; struct EImageType { @@ -2359,57 +2359,60 @@ struct EGraphicsDeviceID static constexpr uint32 Adreno_600_QC5 = 4; static constexpr uint32 Adreno_700_SC3 = 5; static constexpr uint32 Adreno_700_DC4_SC5 = 6; - static constexpr uint32 AMD_GCN1 = 7; - static constexpr uint32 AMD_GCN2 = 8; - static constexpr uint32 AMD_GCN3 = 9; - static constexpr uint32 AMD_GCN4 = 10; - static constexpr uint32 AMD_GCN5 = 11; - static constexpr uint32 AMD_GCN5_APU = 12; - static constexpr uint32 AMD_RDNA1 = 13; - static constexpr uint32 AMD_RDNA2 = 14; - static constexpr uint32 AMD_RDNA2_APU = 15; - static constexpr uint32 AMD_RDNA3 = 16; - static constexpr uint32 AMD_RDNA3_APU = 17; - static constexpr uint32 Apple_A8 = 18; - static constexpr uint32 Apple_A9_A10 = 19; - static constexpr uint32 Apple_A11 = 20; - static constexpr uint32 Apple_A12 = 21; - static constexpr uint32 Apple_A13 = 22; - static constexpr uint32 Apple_A14_M1 = 23; - static constexpr uint32 Apple_A15_M2 = 24; - static constexpr uint32 Apple_A16 = 25; - static constexpr uint32 Mali_Midgard_Gen2 = 26; - static constexpr uint32 Mali_Midgard_Gen3 = 27; - static constexpr uint32 Mali_Midgard_Gen4 = 28; - static constexpr uint32 Mali_Bifrost_Gen1 = 29; - static constexpr uint32 Mali_Bifrost_Gen2 = 30; - static constexpr uint32 Mali_Bifrost_Gen3 = 31; - static constexpr uint32 Mali_Valhall_Gen1 = 32; - static constexpr uint32 Mali_Valhall_Gen2 = 33; - static constexpr uint32 Mali_Valhall_Gen3 = 34; - static constexpr uint32 Mali_Valhall_Gen4 = 35; - static constexpr uint32 NV_Maxwell = 36; - static constexpr uint32 NV_Maxwell_Tegra = 37; - static constexpr uint32 NV_Pascal = 38; - static constexpr uint32 NV_Pascal_MX = 39; - static constexpr uint32 NV_Pascal_Tegra = 40; - static constexpr uint32 NV_Volta = 41; - static constexpr uint32 NV_Turing_16 = 42; - static constexpr uint32 NV_Turing = 43; - static constexpr uint32 NV_Turing_MX = 44; - static constexpr uint32 NV_Ampere = 45; - static constexpr uint32 NV_Ampere_Orin = 46; - static constexpr uint32 NV_Ada = 47; - static constexpr uint32 Intel_Gen9 = 48; - static constexpr uint32 Intel_Gen11 = 49; - static constexpr uint32 Intel_Gen12 = 50; - static constexpr uint32 PowerVR_Series8XE = 51; - static constexpr uint32 PowerVR_Series8XEP = 52; - static constexpr uint32 PowerVR_Series8XT = 53; - static constexpr uint32 PowerVR_Series9XE = 54; - static constexpr uint32 VeriSilicon = 55; - static constexpr uint32 V3D_4 = 56; - static constexpr uint32 SwiftShader = 57; + static constexpr uint32 Adreno_700_QC5X = 7; + static constexpr uint32 AMD_GCN1 = 8; + static constexpr uint32 AMD_GCN2 = 9; + static constexpr uint32 AMD_GCN3 = 10; + static constexpr uint32 AMD_GCN4 = 11; + static constexpr uint32 AMD_GCN5 = 12; + static constexpr uint32 AMD_GCN5_APU = 13; + static constexpr uint32 AMD_RDNA1 = 14; + static constexpr uint32 AMD_RDNA2 = 15; + static constexpr uint32 AMD_RDNA2_APU = 16; + static constexpr uint32 AMD_RDNA3 = 17; + static constexpr uint32 AMD_RDNA3_APU = 18; + static constexpr uint32 Apple_A8 = 19; + static constexpr uint32 Apple_A9_A10 = 20; + static constexpr uint32 Apple_A11 = 21; + static constexpr uint32 Apple_A12 = 22; + static constexpr uint32 Apple_A13 = 23; + static constexpr uint32 Apple_A14_M1 = 24; + static constexpr uint32 Apple_A15_M2 = 25; + static constexpr uint32 Apple_A16 = 26; + static constexpr uint32 Apple_A17_M3 = 27; + static constexpr uint32 Mali_Midgard_Gen2 = 28; + static constexpr uint32 Mali_Midgard_Gen3 = 29; + static constexpr uint32 Mali_Midgard_Gen4 = 30; + static constexpr uint32 Mali_Bifrost_Gen1 = 31; + static constexpr uint32 Mali_Bifrost_Gen2 = 32; + static constexpr uint32 Mali_Bifrost_Gen3 = 33; + static constexpr uint32 Mali_Valhall_Gen1 = 34; + static constexpr uint32 Mali_Valhall_Gen2 = 35; + static constexpr uint32 Mali_Valhall_Gen3 = 36; + static constexpr uint32 Mali_Valhall_Gen4 = 37; + static constexpr uint32 Mali_Valhall_Gen5 = 38; + static constexpr uint32 NV_Maxwell = 39; + static constexpr uint32 NV_Maxwell_Tegra = 40; + static constexpr uint32 NV_Pascal = 41; + static constexpr uint32 NV_Pascal_MX = 42; + static constexpr uint32 NV_Pascal_Tegra = 43; + static constexpr uint32 NV_Volta = 44; + static constexpr uint32 NV_Turing_16 = 45; + static constexpr uint32 NV_Turing = 46; + static constexpr uint32 NV_Turing_MX = 47; + static constexpr uint32 NV_Ampere = 48; + static constexpr uint32 NV_Ampere_Orin = 49; + static constexpr uint32 NV_Ada = 50; + static constexpr uint32 Intel_Gen9 = 51; + static constexpr uint32 Intel_Gen11 = 52; + static constexpr uint32 Intel_Gen12 = 53; + static constexpr uint32 Intel_Gen12_7 = 54; + static constexpr uint32 PowerVR_Series8XE = 55; + static constexpr uint32 PowerVR_Series8XEP = 56; + static constexpr uint32 PowerVR_Series8XT = 57; + static constexpr uint32 PowerVR_Series9XE = 58; + static constexpr uint32 VeriSilicon = 59; + static constexpr uint32 SwiftShader = 60; }; struct EFilter @@ -2474,8 +2477,7 @@ struct ESamplerUsage ESamplerUsage (uint8) {} operator uint8 () const; static constexpr uint8 Default = 0; - static constexpr uint8 Subsampled = 1; - static constexpr uint8 SubsampledCoarseReconstruction = 2; + static constexpr uint8 NonSeamlessCubeMap = 1; }; struct EVertexInputRate @@ -2501,16 +2503,19 @@ struct EDescSetUsage struct EPipelineOpt { EPipelineOpt () {} - EPipelineOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 Optimize = 1; - static constexpr uint8 CS_DispatchBase = 2; - static constexpr uint8 RT_NoNullAnyHitShaders = 4; - static constexpr uint8 RT_NoNullClosestHitShaders = 8; - static constexpr uint8 RT_NoNullMissShaders = 16; - static constexpr uint8 RT_NoNullIntersectionShaders = 32; - static constexpr uint8 RT_SkipTriangles = 64; - static constexpr uint8 RT_SkipAABBs = 128; + EPipelineOpt (uint16) {} + operator uint16 () const; + static constexpr uint16 Optimize = 1; + static constexpr uint16 CS_DispatchBase = 2; + static constexpr uint16 RT_NoNullAnyHitShaders = 4; + static constexpr uint16 RT_NoNullClosestHitShaders = 8; + static constexpr uint16 RT_NoNullMissShaders = 16; + static constexpr uint16 RT_NoNullIntersectionShaders = 32; + static constexpr uint16 RT_SkipTriangles = 64; + static constexpr uint16 RT_SkipAABBs = 128; + static constexpr uint16 DontCompile = 256; + static constexpr uint16 CaptureStatistics = 512; + static constexpr uint16 CaptureInternalRepresentation = 1024; }; struct EQueueMask @@ -2645,7 +2650,6 @@ struct ScriptFlags struct DynamicDim { - DynamicDim (); RC opMul (int) const; RC opDiv (int) const; RC Mul (int) const; @@ -2669,66 +2673,79 @@ struct DynamicDim struct DynamicUInt { DynamicUInt (); + DynamicUInt (uint); }; struct DynamicUInt2 { DynamicUInt2 (); + DynamicUInt2 (const uint2 &); }; struct DynamicUInt3 { DynamicUInt3 (); + DynamicUInt3 (const uint3 &); }; struct DynamicUInt4 { DynamicUInt4 (); + DynamicUInt4 (const uint4 &); }; struct DynamicInt { DynamicInt (); + DynamicInt (int); }; struct DynamicInt2 { DynamicInt2 (); + DynamicInt2 (const int2 &); }; struct DynamicInt3 { DynamicInt3 (); + DynamicInt3 (const int3 &); }; struct DynamicInt4 { DynamicInt4 (); + DynamicInt4 (const int4 &); }; struct DynamicFloat { DynamicFloat (); + DynamicFloat (float); }; struct DynamicFloat2 { DynamicFloat2 (); + DynamicFloat2 (const float2 &); }; struct DynamicFloat3 { DynamicFloat3 (); + DynamicFloat3 (const float3 &); }; struct DynamicFloat4 { DynamicFloat4 (); + DynamicFloat4 (const float4 &); }; struct DynamicULong { DynamicULong (); + DynamicULong (uint64); }; struct ImageLoadOpFlags @@ -2743,7 +2760,6 @@ struct ImageLoadOpFlags struct Image { - Image (); // Create image from file. // File will be searched in VFS. @@ -3294,10 +3310,6 @@ struct SphericalCube // Vertex count: (lod+2)^2, index count: 6*(lod+1)^2. void DetailLevel (uint maxLOD); void DetailLevel (uint minLOD, uint maxLOD); - - // Set constant or dynamic tessellation level. - void TessLevel (float level); - void TessLevel (const RC & level); }; struct UnifiedGeometry_Draw @@ -3572,9 +3584,12 @@ struct EPostprocess // Entry point: 'void mainVR (out float4 fragColor, in float2 fragCoord, in float3 fragRayOri, in float3 fragRayDir)' static constexpr uint32 ShadertoyVR = 2; - static constexpr uint32 ShadertoyVR_180 = 4; - static constexpr uint32 ShadertoyVR_360 = 8; - static constexpr uint32 Shadertoy_360 = 16; + static constexpr uint32 ShadertoyVR_180 = 3; + static constexpr uint32 ShadertoyVR_360 = 4; + static constexpr uint32 Shadertoy_360 = 5; + static constexpr uint32 Curved_1000R = 6; + static constexpr uint32 Curved_1500R = 7; + static constexpr uint32 Curved_1800R = 8; }; struct EPassFlags @@ -3598,7 +3613,6 @@ struct EPassFlags struct Postprocess { - Postprocess (); // Set debug label and color. It is used in graphics profiler. void SetDebugLabel (const string & label); @@ -3628,8 +3642,18 @@ struct Postprocess void ColorSelector (const string & name, const RGBA8u & initial); // TODO - void Constant (const string &, const RC &); - void Constant (const string &, const RC &); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); // Returns dynamic dimension of the pass. // It is auto-detected when used render targets with dynamic dimension or dynamic size for compute dispatches. @@ -3639,7 +3663,7 @@ struct Postprocess void EnableIfGreater (const RC & dynamic, uint refValue); // Add resource to all shaders in the current pass. - // In - resource is used for read access. + // In - resource is used for read access. // Out - resource is used for write access. void ArgIn (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource); @@ -3650,7 +3674,7 @@ struct Postprocess void ArgInOut (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); - void ArgIn (const RC & camera); + void Set (const RC & camera); void ArgIn (const string & uniformName, const array & resources); void ArgOut (const string & uniformName, const array & resources); void ArgInOut (const string & uniformName, const array & resources); @@ -3658,47 +3682,67 @@ struct Postprocess // Add color/depth render target. // Implicitly name will be 'out_Color' + index. void Output (const RC &); - void Output (const RC &, const MipmapLevel &); - void Output (const RC &, const ImageLayer &); - void Output (const RC &, const ImageLayer &, const MipmapLevel &); - void Output (const RC &, const RGBA32f &); - void Output (const RC &, const MipmapLevel &, const RGBA32f &); - void Output (const RC &, const ImageLayer &, const RGBA32f &); - void Output (const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32f &); + void Output (const RC & image, const MipmapLevel & mipmap); + void Output (const RC & image, const ImageLayer & baseLayer); + void Output (const RC & image, const ImageLayer & baseLayer, const MipmapLevel & mipmap); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount, const MipmapLevel & mipmap); + void Output (const RC & image, const RGBA32f & clearColor); + void Output (const RC & image, const MipmapLevel & mipmap, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, const MipmapLevel & mipmap, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount, const MipmapLevel & mipmap, const RGBA32f & clearColor); void Output (const RC &, const RGBA32u &); void Output (const RC &, const MipmapLevel &, const RGBA32u &); void Output (const RC &, const ImageLayer &, const RGBA32u &); void Output (const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32u &); + void Output (const RC &, const ImageLayer &, uint, const RGBA32u &); + void Output (const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32u &); void Output (const RC &, const RGBA32i &); void Output (const RC &, const MipmapLevel &, const RGBA32i &); void Output (const RC &, const ImageLayer &, const RGBA32i &); void Output (const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32i &); + void Output (const RC &, const ImageLayer &, uint, const RGBA32i &); + void Output (const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32i &); void Output (const RC &, const DepthStencil &); void Output (const RC &, const MipmapLevel &, const DepthStencil &); void Output (const RC &, const ImageLayer &, const DepthStencil &); void Output (const RC &, const ImageLayer &, const MipmapLevel &, const DepthStencil &); + void Output (const RC &, const ImageLayer &, uint, const DepthStencil &); + void Output (const RC &, const ImageLayer &, uint, const MipmapLevel &, const DepthStencil &); // Add color/depth render target with explicit name. void Output (const string &, const RC &); void Output (const string &, const RC &, const MipmapLevel &); void Output (const string &, const RC &, const ImageLayer &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &); + void Output (const string &, const RC &, const ImageLayer &, uint); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &); void Output (const string &, const RC &, const RGBA32f &); void Output (const string &, const RC &, const MipmapLevel &, const RGBA32f &); void Output (const string &, const RC &, const ImageLayer &, const RGBA32f &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32f &); + void Output (const string &, const RC &, const ImageLayer &, uint, const RGBA32f &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32f &); void Output (const string &, const RC &, const RGBA32u &); void Output (const string &, const RC &, const MipmapLevel &, const RGBA32u &); void Output (const string &, const RC &, const ImageLayer &, const RGBA32u &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32u &); + void Output (const string &, const RC &, const ImageLayer &, uint, const RGBA32u &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32u &); void Output (const string &, const RC &, const RGBA32i &); void Output (const string &, const RC &, const MipmapLevel &, const RGBA32i &); void Output (const string &, const RC &, const ImageLayer &, const RGBA32i &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32i &); + void Output (const string &, const RC &, const ImageLayer &, uint, const RGBA32i &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32i &); void Output (const string &, const RC &, const DepthStencil &); void Output (const string &, const RC &, const MipmapLevel &, const DepthStencil &); void Output (const string &, const RC &, const ImageLayer &, const DepthStencil &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const DepthStencil &); + void Output (const string &, const RC &, const ImageLayer &, uint, const DepthStencil &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const DepthStencil &); // Add color render target with blend operation. // Implicitly name will be 'out_Color' + index. @@ -3706,23 +3750,32 @@ struct Postprocess void OutputBlend (const RC &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const RC &, const ImageLayer &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const RC &, const ImageLayer &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const RC &, const ImageLayer &, uint, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const RC &, const ImageLayer &, uint, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const RC &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const RC &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const RC &, const ImageLayer &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const RC &, const ImageLayer &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const RC &, const ImageLayer &, uint, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const RC &, const ImageLayer &, uint, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); // Add color render target with blend operation and with explicit name. void OutputBlend (const string &, const RC &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, const ImageLayer &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, const ImageLayer &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const string &, const RC &, const ImageLayer &, uint, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, const ImageLayer &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void OutputBlend (const string &, const RC &, const ImageLayer &, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const string &, const RC &, const ImageLayer &, uint, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); + void OutputBlend (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp); void DepthRange (float min, float max); // Set path to fragment shader, empty - load current file. + Postprocess (); Postprocess (const string & shaderPath); Postprocess (const string & shaderPath, EPostprocess postprocessFlags); Postprocess (EPostprocess postprocessFlags); @@ -3766,8 +3819,18 @@ struct ComputePass void ColorSelector (const string & name, const RGBA8u & initial); // TODO - void Constant (const string &, const RC &); - void Constant (const string &, const RC &); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); // Returns dynamic dimension of the pass. // It is auto-detected when used render targets with dynamic dimension or dynamic size for compute dispatches. @@ -3777,7 +3840,7 @@ struct ComputePass void EnableIfGreater (const RC & dynamic, uint refValue); // Add resource to all shaders in the current pass. - // In - resource is used for read access. + // In - resource is used for read access. // Out - resource is used for write access. void ArgIn (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource); @@ -3788,7 +3851,7 @@ struct ComputePass void ArgInOut (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); - void ArgIn (const RC & camera); + void Set (const RC & camera); void ArgIn (const string & uniformName, const array & resources); void ArgOut (const string & uniformName, const array & resources); void ArgInOut (const string & uniformName, const array & resources); @@ -3899,8 +3962,18 @@ struct RayTracingPass void ColorSelector (const string & name, const RGBA8u & initial); // TODO - void Constant (const string &, const RC &); - void Constant (const string &, const RC &); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); // Returns dynamic dimension of the pass. // It is auto-detected when used render targets with dynamic dimension or dynamic size for compute dispatches. @@ -3910,7 +3983,7 @@ struct RayTracingPass void EnableIfGreater (const RC & dynamic, uint refValue); // Add resource to all shaders in the current pass. - // In - resource is used for read access. + // In - resource is used for read access. // Out - resource is used for write access. void ArgIn (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource); @@ -3921,7 +3994,7 @@ struct RayTracingPass void ArgInOut (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); - void ArgIn (const RC & camera); + void Set (const RC & camera); void ArgIn (const string & uniformName, const array & resources); void ArgOut (const string & uniformName, const array & resources); void ArgInOut (const string & uniformName, const array & resources); @@ -3969,7 +4042,6 @@ struct ERenderLayer struct SceneGraphicsPass { - SceneGraphicsPass (); // Set debug label and color. It is used in graphics profiler. void SetDebugLabel (const string & label); @@ -3999,8 +4071,18 @@ struct SceneGraphicsPass void ColorSelector (const string & name, const RGBA8u & initial); // TODO - void Constant (const string &, const RC &); - void Constant (const string &, const RC &); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); // Returns dynamic dimension of the pass. // It is auto-detected when used render targets with dynamic dimension or dynamic size for compute dispatches. @@ -4010,7 +4092,7 @@ struct SceneGraphicsPass void EnableIfGreater (const RC & dynamic, uint refValue); // Add resource to all shaders in the current pass. - // In - resource is used for read access. + // In - resource is used for read access. // Out - resource is used for write access. void ArgIn (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource); @@ -4021,7 +4103,7 @@ struct SceneGraphicsPass void ArgInOut (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); - void ArgIn (const RC & camera); + void Set (const RC & camera); void ArgIn (const string & uniformName, const array & resources); void ArgOut (const string & uniformName, const array & resources); void ArgInOut (const string & uniformName, const array & resources); @@ -4029,52 +4111,69 @@ struct SceneGraphicsPass // Add color/depth render target. // Implicitly name will be 'out_Color' + index. void Output (const RC &); - void Output (const RC &, const MipmapLevel &); - void Output (const RC &, const ImageLayer &); - void Output (const RC &, const ImageLayer &, const MipmapLevel &); - void Output (const RC &, const RGBA32f &); - void Output (const RC &, const MipmapLevel &, const RGBA32f &); - void Output (const RC &, const ImageLayer &, const RGBA32f &); - void Output (const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32f &); + void Output (const RC & image, const MipmapLevel & mipmap); + void Output (const RC & image, const ImageLayer & baseLayer); + void Output (const RC & image, const ImageLayer & baseLayer, const MipmapLevel & mipmap); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount, const MipmapLevel & mipmap); + void Output (const RC & image, const RGBA32f & clearColor); + void Output (const RC & image, const MipmapLevel & mipmap, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, const MipmapLevel & mipmap, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount, const RGBA32f & clearColor); + void Output (const RC & image, const ImageLayer & baseLayer, uint layerCount, const MipmapLevel & mipmap, const RGBA32f & clearColor); void Output (const RC &, const RGBA32u &); void Output (const RC &, const MipmapLevel &, const RGBA32u &); void Output (const RC &, const ImageLayer &, const RGBA32u &); void Output (const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32u &); + void Output (const RC &, const ImageLayer &, uint, const RGBA32u &); + void Output (const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32u &); void Output (const RC &, const RGBA32i &); void Output (const RC &, const MipmapLevel &, const RGBA32i &); void Output (const RC &, const ImageLayer &, const RGBA32i &); void Output (const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32i &); + void Output (const RC &, const ImageLayer &, uint, const RGBA32i &); + void Output (const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32i &); void Output (const RC &, const DepthStencil &); void Output (const RC &, const MipmapLevel &, const DepthStencil &); void Output (const RC &, const ImageLayer &, const DepthStencil &); void Output (const RC &, const ImageLayer &, const MipmapLevel &, const DepthStencil &); + void Output (const RC &, const ImageLayer &, uint, const DepthStencil &); + void Output (const RC &, const ImageLayer &, uint, const MipmapLevel &, const DepthStencil &); // Add color/depth render target with explicit name. void Output (const string &, const RC &); void Output (const string &, const RC &, const MipmapLevel &); void Output (const string &, const RC &, const ImageLayer &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &); + void Output (const string &, const RC &, const ImageLayer &, uint); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &); void Output (const string &, const RC &, const RGBA32f &); void Output (const string &, const RC &, const MipmapLevel &, const RGBA32f &); void Output (const string &, const RC &, const ImageLayer &, const RGBA32f &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32f &); + void Output (const string &, const RC &, const ImageLayer &, uint, const RGBA32f &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32f &); void Output (const string &, const RC &, const RGBA32u &); void Output (const string &, const RC &, const MipmapLevel &, const RGBA32u &); void Output (const string &, const RC &, const ImageLayer &, const RGBA32u &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32u &); + void Output (const string &, const RC &, const ImageLayer &, uint, const RGBA32u &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32u &); void Output (const string &, const RC &, const RGBA32i &); void Output (const string &, const RC &, const MipmapLevel &, const RGBA32i &); void Output (const string &, const RC &, const ImageLayer &, const RGBA32i &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const RGBA32i &); + void Output (const string &, const RC &, const ImageLayer &, uint, const RGBA32i &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const RGBA32i &); void Output (const string &, const RC &, const DepthStencil &); void Output (const string &, const RC &, const MipmapLevel &, const DepthStencil &); void Output (const string &, const RC &, const ImageLayer &, const DepthStencil &); void Output (const string &, const RC &, const ImageLayer &, const MipmapLevel &, const DepthStencil &); + void Output (const string &, const RC &, const ImageLayer &, uint, const DepthStencil &); + void Output (const string &, const RC &, const ImageLayer &, uint, const MipmapLevel &, const DepthStencil &); void DepthRange (float min, float max); - // Set input controller (camera), supported single controller per pass. - void Set (const RC &); - // Add path to single pipeline or folder with pipelines. // Scene geometry will be linked with compatible pipeline or error will be generated. void AddPipeline (const string & pplnFile); @@ -4084,7 +4183,6 @@ struct SceneGraphicsPass struct SceneRayTracingPass { - SceneRayTracingPass (); // Set debug label and color. It is used in graphics profiler. void SetDebugLabel (const string & label); @@ -4114,8 +4212,18 @@ struct SceneRayTracingPass void ColorSelector (const string & name, const RGBA8u & initial); // TODO - void Constant (const string &, const RC &); - void Constant (const string &, const RC &); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); + void Constant (const string & name, const RC & dynamicValue); // Returns dynamic dimension of the pass. // It is auto-detected when used render targets with dynamic dimension or dynamic size for compute dispatches. @@ -4125,7 +4233,7 @@ struct SceneRayTracingPass void EnableIfGreater (const RC & dynamic, uint refValue); // Add resource to all shaders in the current pass. - // In - resource is used for read access. + // In - resource is used for read access. // Out - resource is used for write access. void ArgIn (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource); @@ -4136,14 +4244,11 @@ struct SceneRayTracingPass void ArgInOut (const string & uniformName, const RC & resource); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); void ArgIn (const string & uniformName, const RC & resource, const string & samplerName); - void ArgIn (const RC & camera); + void Set (const RC & camera); void ArgIn (const string & uniformName, const array & resources); void ArgOut (const string & uniformName, const array & resources); void ArgInOut (const string & uniformName, const array & resources); - // Set input controller (camera), supported single controller per pass. - void Set (const RC &); - // Set path to single pipeline. // Scene geometry will be linked with compatible pipeline or error will be generated. void SetPipeline (const string & pplnFile); @@ -4182,7 +4287,7 @@ struct Scene }; -// Returns dynamic dimensions of the screen size. +// Returns dynamic dimensions of the screen surface. RC SurfaceSize (); // Present image to the screen. @@ -4266,6 +4371,9 @@ void GetCylinder (uint segmentCount, bool isInner, array & positions, a // Returns cylinder void GetCylinder (uint segmentCount, bool isInner, array & positions, array & normals, array & tangents, array & bitangents, array & texcoords, array & indices); +// Returns spherical cube without projection and rotation +void GetSphericalCube (uint lod, array & positions, array & indices); + // Helper function to convert array of indices to array of uint3 indices per triangle void IndicesToPrimitives (const array & indices, array & primitives); @@ -4324,120 +4432,123 @@ void WhiteColorSpectrum3 (array & wavelengthToRGB); void WhiteColorSpectrum7 (array & wavelengthToRGB, bool normalized); void WhiteColorSpectrumStep50nm (array & wavelengthToRGB, bool normalized); void WhiteColorSpectrumStep100nm (array & wavelengthToRGB, bool normalized); +float3 CM_CubeSC_Forward (const float3 &); +float3 CM_IdentitySC_Forward (const float3 &); +float3 CM_TangentialSC_Forward (const float3 &); #define SCRIPT template <> -struct RC : OrbitalCamera +struct RC : Collection { - RC (const OrbitalCamera &); + RC (const Collection &); }; template <> -struct RC : SphericalCube +struct RC : RTScene { - RC (const SphericalCube &); + RC (const RTScene &); }; template <> -struct RC : DynamicFloat +struct RC : DynamicULong { - RC (const DynamicFloat &); + RC (const DynamicULong &); }; template <> -struct RC : FPSCamera +struct RC : SceneGraphicsPass { - RC (const FPSCamera &); + RC (const SceneGraphicsPass &); }; template <> -struct RC : DynamicDim +struct RC : VideoImage { - RC (const DynamicDim &); + RC (const VideoImage &); }; template <> -struct RC : RayTracingPass +struct RC : FPVCamera { - RC (const RayTracingPass &); + RC (const FPVCamera &); }; template <> -struct RC : ComputePass +struct RC : DynamicInt { - RC (const ComputePass &); + RC (const DynamicInt &); }; template <> -struct RC : Postprocess +struct RC : IPass { - RC (const Postprocess &); + RC (const IPass &); }; template <> -struct RC : SceneRayTracingPass +struct RC : Image { - RC (const SceneRayTracingPass &); + RC (const Image &); }; template <> -struct RC : UnifiedGeometry +struct RC : DynamicFloat2 { - RC (const UnifiedGeometry &); + RC (const DynamicFloat2 &); }; template <> -struct RC : Buffer +struct RC : RTGeometry { - RC (const Buffer &); + RC (const RTGeometry &); }; template <> -struct RC : TopDownCamera +struct RC : DynamicUInt4 { - RC (const TopDownCamera &); + RC (const DynamicUInt4 &); }; template <> -struct RC : ScaleBiasCamera +struct RC : DynamicFloat3 { - RC (const ScaleBiasCamera &); + RC (const DynamicFloat3 &); }; template <> -struct RC : BaseController +struct RC : DynamicFloat4 { - RC (const BaseController &); + RC (const DynamicFloat4 &); }; template <> -struct RC : FlightCamera +struct RC : DynamicUInt3 { - RC (const FlightCamera &); + RC (const DynamicUInt3 &); }; template <> -struct RC : Model +struct RC : DynamicUInt2 { - RC (const Model &); + RC (const DynamicUInt2 &); }; template <> -struct RC : DynamicUInt +struct RC : DynamicInt4 { - RC (const DynamicUInt &); + RC (const DynamicInt4 &); }; template <> -struct RC : Scene +struct RC : DynamicInt2 { - RC (const Scene &); + RC (const DynamicInt2 &); }; template <> -struct RC : Image +struct RC : DynamicInt3 { - RC (const Image &); + RC (const DynamicInt3 &); }; template <> @@ -4447,110 +4558,110 @@ struct RC : GeomSource }; template <> -struct RC : DynamicInt3 +struct RC : DynamicUInt { - RC (const DynamicInt3 &); + RC (const DynamicUInt &); }; template <> -struct RC : DynamicInt2 +struct RC : Model { - RC (const DynamicInt2 &); + RC (const Model &); }; template <> -struct RC : DynamicInt4 +struct RC : Scene { - RC (const DynamicInt4 &); + RC (const Scene &); }; template <> -struct RC : DynamicUInt2 +struct RC : FlightCamera { - RC (const DynamicUInt2 &); + RC (const FlightCamera &); }; template <> -struct RC : DynamicUInt3 +struct RC : BaseController { - RC (const DynamicUInt3 &); + RC (const BaseController &); }; template <> -struct RC : DynamicFloat4 +struct RC : ScaleBiasCamera { - RC (const DynamicFloat4 &); + RC (const ScaleBiasCamera &); }; template <> -struct RC : DynamicFloat3 +struct RC : TopDownCamera { - RC (const DynamicFloat3 &); + RC (const TopDownCamera &); }; template <> -struct RC : DynamicUInt4 +struct RC : UnifiedGeometry { - RC (const DynamicUInt4 &); + RC (const UnifiedGeometry &); }; template <> -struct RC : RTGeometry +struct RC : SceneRayTracingPass { - RC (const RTGeometry &); + RC (const SceneRayTracingPass &); }; template <> -struct RC : DynamicFloat2 +struct RC : Postprocess { - RC (const DynamicFloat2 &); + RC (const Postprocess &); }; template <> -struct RC : DynamicInt +struct RC : ComputePass { - RC (const DynamicInt &); + RC (const ComputePass &); }; template <> -struct RC : IPass +struct RC : Buffer { - RC (const IPass &); + RC (const Buffer &); }; template <> -struct RC : FPVCamera +struct RC : SphericalCube { - RC (const FPVCamera &); + RC (const SphericalCube &); }; template <> -struct RC : VideoImage +struct RC : OrbitalCamera { - RC (const VideoImage &); + RC (const OrbitalCamera &); }; template <> -struct RC : RTScene +struct RC : DynamicDim { - RC (const RTScene &); + RC (const DynamicDim &); }; template <> -struct RC : Collection +struct RC : FPSCamera { - RC (const Collection &); + RC (const FPSCamera &); }; template <> -struct RC : SceneGraphicsPass +struct RC : RayTracingPass { - RC (const SceneGraphicsPass &); + RC (const RayTracingPass &); }; template <> -struct RC : DynamicULong +struct RC : DynamicFloat { - RC (const DynamicULong &); + RC (const DynamicFloat &); }; diff --git a/AE/engine/shared_data/shaders/CodeTemplates.glsl b/AE/engine/shared_data/shaders/CodeTemplates.glsl index b83acbfc..f77ae633 100644 --- a/AE/engine/shared_data/shaders/CodeTemplates.glsl +++ b/AE/engine/shared_data/shaders/CodeTemplates.glsl @@ -2,6 +2,7 @@ /* Code templates, snippets, default shaders, ... */ +#include "Math.glsl" //----------------------------------------------------------------------------- @@ -21,3 +22,23 @@ ND_ float2 FullscreenQuadUV () { return float2( gl.VertexIndex>>1, gl.V #endif //----------------------------------------------------------------------------- + +#if defined(SH_FRAG) and defined(GL_EXT_fragment_shader_barycentric) +/* +================================================= + FSBarycentricWireframe +================================================= +*/ +ND_ float FSBarycentricWireframe (float thicknessPx, float falloffPx) +{ + const float3 dx_barycoord = gl.dFdx( gl.BaryCoord ); + const float3 dy_barycoord = gl.dFdy( gl.BaryCoord ); + const float3 d_barycoord = Sqrt( dx_barycoord * dx_barycoord + dy_barycoord * dy_barycoord ); + const float3 d_thickness = d_barycoord * thicknessPx; + const float3 d_falloff = d_barycoord * falloffPx; + + const float3 remap = SmoothStep( d_thickness, d_thickness + d_falloff, gl.BaryCoord); + const float wireframe = Min( remap.x, Min( remap.y, remap.z )); + return wireframe; +} +#endif diff --git a/AE/engine/shared_data/shaders/Color.glsl b/AE/engine/shared_data/shaders/Color.glsl index 22880671..f130160f 100644 --- a/AE/engine/shared_data/shaders/Color.glsl +++ b/AE/engine/shared_data/shaders/Color.glsl @@ -30,12 +30,19 @@ ND_ float3 XYYtoRGB (const float3 xyY); ND_ float3 RGBtoXYY_v2 (float3 rgb); ND_ float3 XYYtoRGB_v2 (float3 xyY); +ND_ float3 RGBtoOklab (float3 rgb); +ND_ float3 OklabToRGB (float3 oklab); + ND_ float RGBtoLuminance (const float3 linear); ND_ float RGBtoLogLuminance (const float3 linear, float gamma); ND_ float RGBtoLogLuminance (const float3 linear); ND_ float3 FromRGBM (const float4 rgbm); ND_ float4 ToRGBM (const float3 rgb); + +ND_ float3 LerpHSV (float3 a, float3 b, float factor); +ND_ float3 RGBLerpHSV (float3 a, float3 b, const float factor); +ND_ float3 RGBLerpOklab (const float3 a, const float3 b, const float factor); //----------------------------------------------------------------------------- #include "../3party_shaders/ColorUtils-1.glsl" @@ -54,7 +61,7 @@ float4 RainbowWrap (const float factor) //----------------------------------------------------------------------------- -float3 ColorLerpHSV (float3 a, float3 b, float factor) +float3 RGBLerpHSV (float3 a, float3 b, const float factor) { a = RGBtoHSV( a ); b = RGBtoHSV( b ); diff --git a/AE/engine/shared_data/shaders/CubeMap.glsl b/AE/engine/shared_data/shaders/CubeMap.glsl index bc5dd20e..6817373e 100644 --- a/AE/engine/shared_data/shaders/CubeMap.glsl +++ b/AE/engine/shared_data/shaders/CubeMap.glsl @@ -1,6 +1,13 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' /* - Cube map utilities + Cube map and spherical cube utilities. + + Also see [SphericalCubeMath.h](https://github.com/azhirnov/as-en/blob/dev/AE/engine/tools/geometry_tools/SphericalCube/SphericalCubeMath.h) + + 1. Forward projection converts snorm coordinate (UV) to the snorm coord with distortion correction. + 2. Forward projection converts snorm coordinate (UV) to the 3D coordinate of the cube face / sphere with applied distortion correction. + 3. Inverse projection invert distortion for UV coordinate. + 4. Inverse projection converts 3D coordinate to the snorm UV and cube face index. */ #ifdef __cplusplus @@ -9,14 +16,48 @@ #include "Math.glsl" -ND_ float3 CM_RotateVec (const float3 c, const int face); -ND_ float4 CM_InverseRotation (const float3 c); // returns {xyz, face} +#define ECubeFace int +#define ECubeFace_XPos 0 // right +#define ECubeFace_XNeg 1 // left +#define ECubeFace_YPos 2 // down +#define ECubeFace_YNeg 3 // up +#define ECubeFace_ZPos 4 // front - inside, back - outside +#define ECubeFace_ZNeg 5 // back - inside, front - outside + + +ND_ float3 CM_RotateVec (const float3 snormCoord, const ECubeFace face); +ND_ float4 CM_InverseRotation (float3 dir); // returns {transformed 'dir', face} +ND_ float3 CM_InverseRotation (const ECubeFace face, float3 dir); // returns transformed 'dir' for specified 'face' -ND_ float3 CM_IdentitySC_Forward (const float2 snormCoord, const int face); +ND_ float3 CM_CubeSC_Forward (const float2 snormCoord, const ECubeFace face); + +ND_ float3 CM_IdentitySC_Forward (const float2 snormCoord, const ECubeFace face); ND_ float3 CM_IdentitySC_Inverse (const float3 coord); -ND_ float3 CM_TangentialSC_Forward (const float2 snormCoord, const int face); +ND_ float2 CM_TangentialSC_Forward (const float2 snormCoord); +ND_ float3 CM_TangentialSC_Forward (const float2 snormCoord, const ECubeFace face); +ND_ float2 CM_TangentialSC_Inverse (const float2 snormCoord); ND_ float3 CM_TangentialSC_Inverse (const float3 coord); + +ND_ float2 CM_EverittSC_Forward (const float2 snormCoord); +ND_ float3 CM_EverittSC_Forward (const float2 snormCoord, const ECubeFace face); +ND_ float2 CM_EverittSC_Inverse (const float2 snormCoord); +ND_ float3 CM_EverittSC_Inverse (const float3 coord); + +ND_ float2 CM_5thPolySC_Forward (const float2 snormCoord); +ND_ float3 CM_5thPolySC_Forward (const float2 snormCoord, const ECubeFace face); +ND_ float2 CM_5thPolySC_Inverse (const float2 snormCoord); +ND_ float3 CM_5thPolySC_Inverse (const float3 coord); + +ND_ float2 CM_COBE_SC_Forward (const float2 snormCoord); +ND_ float3 CM_COBE_SC_Forward (const float2 snormCoord, const ECubeFace face); +ND_ float2 CM_COBE_SC_Inverse (const float2 snormCoord); +ND_ float3 CM_COBE_SC_Inverse (const float3 coord); + +ND_ float2 CM_ArvoSC_Forward (const float2 snormCoord); +ND_ float3 CM_ArvoSC_Forward (const float2 snormCoord, const ECubeFace face); +ND_ float2 CM_ArvoSC_Inverse (const float2 snormCoord); +ND_ float3 CM_ArvoSC_Inverse (const float3 coord); //----------------------------------------------------------------------------- @@ -26,57 +67,63 @@ ND_ float3 CM_TangentialSC_Inverse (const float3 coord); CM_RotateVec / CM_InverseRotation ================================================= */ -float3 CM_RotateVec (const float3 c, const int face) -{ - #if 0 - switch ( face ) - { - case 0 : return float3( c.z, c.y, -c.x); // X+ - case 1 : return float3(-c.z, c.y, c.x); // X- - case 2 : return float3( c.x, -c.z, c.y); // Y+ - case 3 : return float3( c.x, c.z, -c.y); // Y- - case 4 : return float3( c.x, c.y, c.z); // Z+ - case 5 : return float3(-c.x, c.y, -c.z); // Z- - } - return float3(0.f); - #else - return float3( c.z, c.y, -c.x) * float(face == 0) + - float3(-c.z, c.y, c.x) * float(face == 1) + - float3( c.x, -c.z, c.y) * float(face == 2) + - float3( c.x, c.z, -c.y) * float(face == 3) + - float3( c.x, c.y, c.z) * float(face == 4) + - float3(-c.x, c.y, -c.z) * float(face == 5); - #endif -} - -float4 CM_InverseRotation (const float3 c) +float3 CM_RotateVec (const float3 c, const ECubeFace face) +{ + return float3( c.z, -c.y, -c.x) * float(face == ECubeFace_XPos) + + float3(-c.z, -c.y, c.x) * float(face == ECubeFace_XNeg) + + float3( c.x, c.z, c.y) * float(face == ECubeFace_YPos) + + float3( c.x, -c.z, -c.y) * float(face == ECubeFace_YNeg) + + float3( c.x, -c.y, c.z) * float(face == ECubeFace_ZPos) + + float3(-c.x, -c.y, -c.z) * float(face == ECubeFace_ZNeg); +} + +float4 CM_InverseRotation (float3 c) { + c.y = -c.y; + // front (xy space) if ( All(bool3( Abs(c.x) <= c.z, c.z > 0.f, Abs(c.y) <= c.z ))) - return float4( c.x, c.y, c.z, 4.f ); + return float4( c.x, c.y, c.z, ECubeFace_ZPos ); // right (zy space) if ( All(bool3( Abs(c.z) <= c.x, c.x > 0.f, Abs(c.y) <= c.x ))) - return float4( -c.z, c.y, c.x, 0.f ); + return float4( -c.z, c.y, c.x, ECubeFace_XPos ); // back (xy space) if ( All(bool3( Abs(c.x) <= -c.z, c.z < 0.f, Abs(c.y) <= -c.z ))) - return float4( -c.x, c.y, -c.z, 5.f ); + return float4( -c.x, c.y, -c.z, ECubeFace_ZNeg ); // left (zy space) if ( All(bool3( Abs(c.z) <= -c.x, c.x < 0.f, Abs(c.y) <= -c.x ))) - return float4( c.z, c.y, -c.x, 1.f ); + return float4( c.z, c.y, -c.x, ECubeFace_XNeg ); // up (xz space) if ( c.y > 0.f ) - return float4( c.x, c.z, c.y, 2.f ); + return float4( c.x, -c.z, c.y, ECubeFace_YNeg ); // down (xz space) - return float4( c.x, -c.z, -c.y, 3.f ); + return float4( c.x, c.z, -c.y, ECubeFace_YPos ); +} - // TODO: - // float2( 0.5f + 0.5f * ATan(dir.z, dir.x) / Pi(), - // 1.f - ACos(dir.y) / Pi()); +float3 CM_InverseRotation (const ECubeFace face, float3 c) +{ + c.y = -c.y; + return float3( -c.z, c.y, c.x ) * float(face == ECubeFace_XPos) + + float3( c.z, c.y, -c.x ) * float(face == ECubeFace_XNeg) + + float3( c.x, -c.z, c.y ) * float(face == ECubeFace_YNeg) + + float3( c.x, c.z, -c.y ) * float(face == ECubeFace_YPos) + + float3( c.x, c.y, c.z ) * float(face == ECubeFace_ZPos) + + float3( -c.x, c.y, -c.z ) * float(face == ECubeFace_ZNeg); +} + +/* +================================================= + CM_CubeSC_Forward +================================================= +*/ +float3 CM_CubeSC_Forward (const float2 snormCoord, const ECubeFace face) +{ + return CM_RotateVec( float3(snormCoord, 1.f), face ); } /* @@ -86,9 +133,9 @@ float4 CM_InverseRotation (const float3 c) identity spherical cube projection ================================================= */ -float3 CM_IdentitySC_Forward (const float2 snormCoord, const int face) +float3 CM_IdentitySC_Forward (const float2 snormCoord, const ECubeFace face) { - return Normalize( CM_RotateVec( float3(snormCoord, 1.f), face )); + return Normalize( CM_CubeSC_Forward( snormCoord, face )); } float3 CM_IdentitySC_Inverse (const float3 coord) @@ -105,23 +152,152 @@ float3 CM_IdentitySC_Inverse (const float3 coord) tangential spherical cube projection ================================================= */ -float3 CM_TangentialSC_Forward (const float2 snormCoord, const int face) +float2 CM_TangentialSC_Forward (const float2 snormCoord) { const float warp_theta = 0.868734829276f; const float tan_warp_theta = 1.182286685546f; //tan( warp_theta ); - float2 coord = Tan( warp_theta * snormCoord ) / tan_warp_theta; + return Tan( warp_theta * snormCoord ) / tan_warp_theta; +} +float3 CM_TangentialSC_Forward (const float2 snormCoord, const ECubeFace face) +{ + float2 coord = CM_TangentialSC_Forward( snormCoord ); return Normalize( CM_RotateVec( float3(coord.x, coord.y, 1.f), face )); } -float3 CM_TangentialSC_Inverse (const float3 coord) +float2 CM_TangentialSC_Inverse (const float2 snormCoord) { const float warp_theta = 0.868734829276f; const float tan_warp_theta = 1.182286685546f; //tan( warp_theta ); + return ATan( snormCoord * tan_warp_theta ) / warp_theta; +} +float3 CM_TangentialSC_Inverse (const float3 coord) +{ float4 coord_face = CM_InverseRotation( coord ); - coord_face.xy /= coord_face.z; + return float3( CM_TangentialSC_Inverse( coord_face.xy / coord_face.z ), coord_face.w ); +} - float2 c = ATan( coord_face.xy * tan_warp_theta ) / warp_theta; - return float3( c.xy, coord_face.w ); +/* +================================================= + CM_EverittSC_Forward / CM_EverittSC_Inverse +================================================= +*/ +float2 CM_EverittSC_Forward (const float2 snormCoord) +{ + const float e = 1.4511; + return Sign( snormCoord ) * (e - Sqrt( e*e - 4.0 * (e - 1.0) * Abs(snormCoord) )) / (2.0 * (e - 1.0)); +} + +float3 CM_EverittSC_Forward (const float2 snormCoord, const ECubeFace face) +{ + float2 coord = CM_EverittSC_Forward( snormCoord ); + return Normalize( CM_RotateVec( float3(coord.x, coord.y, 1.f), face )); } + +float2 CM_EverittSC_Inverse (const float2 snormCoord) +{ + const float e = 1.4511; + return snormCoord * (e + (1.0 - e) * Abs(snormCoord)); +} + +float3 CM_EverittSC_Inverse (const float3 coord) +{ + float4 coord_face = CM_InverseRotation( coord ); + return float3( CM_EverittSC_Inverse( coord_face.xy / coord_face.z ), coord_face.w ); +} + +/* +================================================= + CM_5thPolySC_Forward / CM_5thPolySC_Inverse +================================================= +*/ +float2 CM_5thPolySC_Forward (const float2 snormCoord) +{ + float2 sq = snormCoord * snormCoord; + return (0.745558715593 + (0.130546850193 + 0.123894434214 * sq) * sq) * snormCoord; +} + +float3 CM_5thPolySC_Forward (const float2 snormCoord, const ECubeFace face) +{ + float2 coord = CM_5thPolySC_Forward( snormCoord ); + return Normalize( CM_RotateVec( float3(coord.x, coord.y, 1.f), face )); +} + +float2 CM_5thPolySC_Inverse (const float2 snormCoord) +{ + float2 sq = snormCoord * snormCoord; + return (1.34318229552 + (-0.486514066449 + 0.143331770927 * sq) * sq) * snormCoord; +} + +float3 CM_5thPolySC_Inverse (const float3 coord) +{ + float4 coord_face = CM_InverseRotation( coord ); + return float3( CM_5thPolySC_Inverse( coord_face.xy / coord_face.z ), coord_face.w ); +} + +/* +================================================= + CM_COBE_SC_Forward / CM_COBE_SC_Inverse +================================================= +*/ +float2 CM_COBE_SC_Forward (const float2 snormCoord) +{ + float2 sq1 = snormCoord * snormCoord; + float2 sq2 = sq1.yx; + float2 sum = ((-0.0941180085824 + 0.0409125981187 * sq2 - 0.0623272690881 * sq1) * sq1 + (0.0275922480902 + 0.0342217026979 * sq2) * sq2); + return (0.723951234952 + 0.276048765048 * sq1 + (1.0 - sq1) * sum) * snormCoord; +} + +float3 CM_COBE_SC_Forward (const float2 snormCoord, const ECubeFace face) +{ + float2 coord = CM_COBE_SC_Forward( snormCoord ); + return Normalize( CM_RotateVec( float3(coord.x, coord.y, 1.f), face )); +} + +float2 CM_COBE_SC_Inverse (const float2 snormCoord) +{ + float2 sq1 = snormCoord * snormCoord; + float2 sq2 = sq1.yx; + float2 sum = ((-0.212853382041 + 0.0941259684877 * sq2 + 0.0693532685333 * sq1) * sq1 + (-0.117847692949 + 0.0107989197181 * sq2) * sq2); + return (1.37738198385 - 0.377381983848 * sq1 + (1.0 - sq1) * sum) * snormCoord; +} + +float3 CM_COBE_SC_Inverse (const float3 coord) +{ + float4 coord_face = CM_InverseRotation( coord ); + return float3( CM_COBE_SC_Inverse( coord_face.xy / coord_face.z ), coord_face.w ); +} + +/* +================================================= + CM_ArvoSC_Forward / CM_ArvoSC_Inverse +================================================= +*/ +float2 CM_ArvoSC_Forward (const float2 snormCoord) +{ + float tan_a_term = Tan( snormCoord.x * 0.523598775598 ); + float cos_a_term = Cos( snormCoord.x * 1.0471975512 ); + return float2( 1.41421356237 * tan_a_term / Sqrt( 1.0 - tan_a_term * tan_a_term ), + snormCoord.y / Sqrt( 1.0 + (1.0 - snormCoord.y * snormCoord.y) * cos_a_term) ); +} + +float3 CM_ArvoSC_Forward (const float2 snormCoord, const ECubeFace face) +{ + float2 coord = CM_ArvoSC_Forward( snormCoord ); + return Normalize( CM_RotateVec( float3(coord.x, coord.y, 1.f), face )); +} + +float2 CM_ArvoSC_Inverse (const float2 snormCoord) +{ + float ss2 = Sqrt( snormCoord.x * snormCoord.x + 2.0 ); + return float2( ATan( snormCoord.x / ss2 ) * 1.9098593171, + snormCoord.y * ss2 / Sqrt( Dot( snormCoord, snormCoord ) + 1.0 )); +} + +float3 CM_ArvoSC_Inverse (const float3 coord) +{ + float4 coord_face = CM_InverseRotation( coord ); + return float3( CM_ArvoSC_Inverse( coord_face.xy / coord_face.z ), coord_face.w ); +} + diff --git a/AE/engine/shared_data/shaders/Geometry.glsl b/AE/engine/shared_data/shaders/Geometry.glsl index 7730223d..3dedf77d 100644 --- a/AE/engine/shared_data/shaders/Geometry.glsl +++ b/AE/engine/shared_data/shaders/Geometry.glsl @@ -22,6 +22,9 @@ ND_ bool IsInsideRect (const float2 pos, const float2 minBound, const float2 ma ND_ bool IsInsideRect (const int2 pos, const int4 rect) { return IsInsideRect( pos, rect.xy, rect.zw ); } ND_ bool IsInsideRect (const float2 pos, const float4 rect) { return IsInsideRect( pos, rect.xy, rect.zw ); } +ND_ bool IsInsideRect (const int2 pos, const int2 halfSize) { return AllLess( Abs(pos), halfSize ); } +ND_ bool IsInsideRect (const float2 pos, const float2 halfSize) { return AllLess( Abs(pos), halfSize ); } + ND_ bool IsOutsideRect (const float2 pos, const float2 minBound, const float2 maxBound) { return Any(bool4( Less( pos, minBound ), Greater( pos, maxBound ))); } ND_ bool IsOutsideRect (const float2 pos, const float4 rect) { return IsOutsideRect( pos, rect.xy, rect.zw ); } @@ -34,14 +37,30 @@ ND_ float2 Rect_HalfSize (const float4 rect) //----------------------------------------------------------------------------- -ND_ int2 LeftVector (const int2 v) { return int2 ( -v.y, v.x ); } -ND_ float2 LeftVector (const float2 v) { return float2( -v.y, v.x ); } +ND_ int2 LeftVector (const int2 v) { return int2 ( -v.y, v.x ); } +ND_ float2 LeftVector (const float2 v) { return float2( -v.y, v.x ); } + +ND_ int2 RightVector (const int2 v) { return int2 ( v.y, -v.x ); } +ND_ float2 RightVector (const float2 v) { return float2( v.y, -v.x ); } + +ND_ float3 LeftVectorXZ (const float3 v) { return float3( -v.z, v.y, v.x ); } +ND_ float3 RightVectorXZ (const float3 v) { return float3( v.z, v.y, -v.x ); } +//----------------------------------------------------------------------------- + + +ND_ float TriangleArea (const float3 a, const float3 b, const float3 c) { return Length(Cross( b - a, c - a )) * 0.5; } +ND_ float TriangleArea (const float2 a, const float2 b, const float2 c) { return TriangleArea( float3(a, 0.f), float3(b, 0.f), float3(c, 0.f) ); } +//----------------------------------------------------------------------------- + -ND_ int2 RightVector (const int2 v) { return int2 ( v.y, -v.x ); } -ND_ float2 RightVector (const float2 v) { return float2( v.y, -v.x ); } +// spherical coordinates +ND_ float3 SphericalToCartesian (const float2 spherical); +ND_ float3 SphericalToCartesian (const float3 sphericalAndRadius); +ND_ float3 CartesianToSpherical (const float3 cartesian); + +ND_ float DistanceOnSphere (const float3 n0, const float3 n1) { return ACos( Dot( n0, n1 )); } +ND_ float DistanceSqOnSphereApprox (const float3 n0, const float3 n1) { return (2.0 - 2.0 * Dot( n0, n1 )); } -ND_ float3 LeftVectorXZ (const float3 v) { return float3( -v.z, v.y, v.x ); } -ND_ float3 RightVectorXZ (const float3 v) { return float3( v.z, v.y, -v.x ); } //----------------------------------------------------------------------------- @@ -67,3 +86,25 @@ float ToNonlinearDepth (const float linearDepth, const float zNear, const float { return ((zFar + zNear) - 2.0 * zNear / linearDepth) / (zFar - zNear); } + + +float3 SphericalToCartesian (const float2 spherical) +{ + float phi = spherical.x; + float theta = spherical.y; + float sin_t = Sin(theta); + return float3( sin_t * Cos(phi), Cos(theta), sin_t * Sin(phi) ); +} + +float3 SphericalToCartesian (const float3 sphericalAndRadius) +{ + return SphericalToCartesian( sphericalAndRadius.xy ) * sphericalAndRadius.z; +} + +float3 CartesianToSpherical (const float3 cartesian) +{ + float theta = ACos( cartesian.y ); + float phi = ATan( cartesian.z, cartesian.x ); + return float3( phi, theta, 1.0f ); +} + diff --git a/AE/engine/shared_data/shaders/HWRayTracing.glsl b/AE/engine/shared_data/shaders/HWRayTracing.glsl index d195ca97..fa353ad1 100644 --- a/AE/engine/shared_data/shaders/HWRayTracing.glsl +++ b/AE/engine/shared_data/shaders/HWRayTracing.glsl @@ -238,6 +238,8 @@ # define /*float3*/ GetCommittedIntersectionObjectRayOrigin(/*gl::RayQuery*/ _rquery_) gl.rayQuery.GetIntersectionObjectRayOrigin( _rquery_, true ) # define /*float4x3*/ GetCommittedIntersectionObjectToWorld(/*gl::RayQuery*/ _rquery_) gl.rayQuery.GetIntersectionObjectToWorld( _rquery_, true ) # define /*float4x3*/ GetCommittedIntersectionWorldToObject(/*gl::RayQuery*/ _rquery_) gl.rayQuery.GetIntersectionWorldToObject( _rquery_, true ) +# define /*float3x4*/ GetCommittedIntersectionObjectToWorld3x4(/*gl::RayQuery*/ _rquery_) MatTranspose(gl.rayQuery.GetIntersectionObjectToWorld( _rquery_, true )) +# define /*float3x4*/ GetCommittedIntersectionWorldToObject3x4(/*gl::RayQuery*/ _rquery_) MatTranspose(gl.rayQuery.GetIntersectionWorldToObject( _rquery_, true )) # define /*void*/ GetCommittedIntersectionTriangleVertexPositions(/*gl::RayQuery*/ _rquery_, /*float3[3]*/_pos_) gl.rayQuery.GetIntersectionTriangleVertexPositions( _rquery_, true, _pos_ ) // Candidate @@ -255,6 +257,8 @@ # define /*float3*/ GetCandidateIntersectionObjectRayOrigin(/*gl::RayQuery*/ _rquery_) gl.rayQuery.GetIntersectionObjectRayOrigin( _rquery_, false ) # define /*float4x3*/ GetCandidateIntersectionObjectToWorld(/*gl::RayQuery*/ _rquery_) gl.rayQuery.GetIntersectionObjectToWorld( _rquery_, false ) # define /*float4x3*/ GetCandidateIntersectionWorldToObject(/*gl::RayQuery*/ _rquery_) gl.rayQuery.GetIntersectionWorldToObject( _rquery_, false ) +# define /*float3x4*/ GetCandidateIntersectionObjectToWorld3x4(/*gl::RayQuery*/ _rquery_) MatTranspose(gl.rayQuery.GetIntersectionObjectToWorld( _rquery_, false )) +# define /*float3x4*/ GetCandidateIntersectionWorldToObject3x4(/*gl::RayQuery*/ _rquery_) MatTranspose(gl.rayQuery.GetIntersectionWorldToObject( _rquery_, false )) # define /*void*/ GetCandidateIntersectionTriangleVertexPositions(/*gl::RayQuery*/ _rquery_, /*float3[3]*/_pos_) gl.rayQuery.GetIntersectionTriangleVertexPositions( _rquery_, false, _pos_ ) #endif // AE_RAY_QUERY diff --git a/AE/engine/shared_data/shaders/Math.glsl b/AE/engine/shared_data/shaders/Math.glsl index 62e3f049..d03a5f3d 100644 --- a/AE/engine/shared_data/shaders/Math.glsl +++ b/AE/engine/shared_data/shaders/Math.glsl @@ -5,7 +5,33 @@ #ifdef __cplusplus # pragma once + +# ifdef __INTELLISENSE__ +# include +# endif +#endif + +#ifndef AE_ENABLE_BYTE_TYPE +# define AE_ENABLE_BYTE_TYPE 0 +#endif + +#ifndef AE_ENABLE_SHORT_TYPE +# define AE_ENABLE_SHORT_TYPE 0 +#endif + +#ifndef AE_ENABLE_HALF_TYPE +# define AE_ENABLE_HALF_TYPE 0 +#endif + +#ifndef AE_ENABLE_LONG_TYPE +# define AE_ENABLE_LONG_TYPE 0 +#endif + +#ifndef AE_ENABLE_DOUBLE_TYPE +# define AE_ENABLE_DOUBLE_TYPE 0 #endif +//----------------------------------------------------------------------------- + #define and && #define or || @@ -13,11 +39,11 @@ #define Any any #define All all #define Abs abs -#define ACos acos -#define ASin asin +#define ACos acos // result in range [0 .. Pi] +#define ASin asin // result in range [-Pi/2 ... Pi/2] #define ASinH asinh #define ACosH acosh -#define ATan atan // result in range [-Pi...+Pi] +#define ATan atan // for 2 arg overload: result in range [-Pi...+Pi], for 1 arg overload result in range [-Pi/2 ... Pi/2] #define BitScanReverse findMSB #define BitScanForward findLSB #define ATanH atanh @@ -32,7 +58,6 @@ #define Exp2 exp2 #define Fract fract // x - Floor( x ) #define Floor floor -#define FusedMulAdd fma // (a * b) + c #define IsNaN isnan #define IsInfinity isinf #define InvSqrt inversesqrt @@ -53,7 +78,6 @@ #define Refract refract #define Step step #define SmoothStep smoothstep -#define Saturate( x ) (clamp( (x), 0.0f, 1.0f )) #define Sqrt sqrt #define Sin sin #define SinH sinh @@ -61,15 +85,19 @@ #define Tan tan #define TanH tanh #define Trunc trunc -#define ToUNorm( x ) ((x) * 0.5f + 0.5f) -#define ToSNorm( x ) ((x) * 2.0f - 1.0f) #define BitCount bitCount #define ToDeg degrees #define ToRad radians +#define FusedMulAdd fma // (a * b) + c +//#define FusedMulAdd(_a_,_b_,_c_) ((_a_) * (_b_) + (_c_)) // different precision + #define MatInverse inverse #define MatTranspose transpose #define MatDeterminant determinant + +ND_ float2 SinCos (const float x) { return float2(sin(x), cos(x)); } + //----------------------------------------------------------------------------- @@ -80,36 +108,66 @@ //----------------------------------------------------------------------------- -#define Less lessThan // < -#define Greater greaterThan // > -#define LessEqual lessThanEqual // <= -#define GreaterEqual greaterThanEqual // >= -//#define not ! - -ND_ bool Equals (const float lhs, const float rhs) { return lhs == rhs; } -ND_ bool2 Equals (const float2 lhs, const float2 rhs) { return equal( lhs, rhs ); } -ND_ bool3 Equals (const float3 lhs, const float3 rhs) { return equal( lhs, rhs ); } -ND_ bool4 Equals (const float4 lhs, const float4 rhs) { return equal( lhs, rhs ); } - -ND_ bool Equals (const int lhs, const int rhs) { return lhs == rhs; } -ND_ bool2 Equals (const int2 lhs, const int2 rhs) { return equal( lhs, rhs ); } -ND_ bool3 Equals (const int3 lhs, const int3 rhs) { return equal( lhs, rhs ); } -ND_ bool4 Equals (const int4 lhs, const int4 rhs) { return equal( lhs, rhs ); } - -ND_ bool Equals (const uint lhs, const uint rhs) { return lhs == rhs; } -ND_ bool2 Equals (const uint2 lhs, const uint2 rhs) { return equal( lhs, rhs ); } -ND_ bool3 Equals (const uint3 lhs, const uint3 rhs) { return equal( lhs, rhs ); } -ND_ bool4 Equals (const uint4 lhs, const uint4 rhs) { return equal( lhs, rhs ); } - -ND_ bool Equals (const double lhs, const double rhs) { return lhs == rhs; } -ND_ bool2 Equals (const double2 lhs, const double2 rhs) { return equal( lhs, rhs ); } -ND_ bool3 Equals (const double3 lhs, const double3 rhs) { return equal( lhs, rhs ); } -ND_ bool4 Equals (const double4 lhs, const double4 rhs) { return equal( lhs, rhs ); } - -ND_ bool Not (const bool value) { return !value; } -ND_ bool2 Not (const bool2 value) { return not(value); } -ND_ bool3 Not (const bool3 value) { return not(value); } -ND_ bool4 Not (const bool4 value) { return not(value); } + +/* +================================================= + Equal / NotEqual / + Less / Greater / LessEqual / GreaterEqual +---- + boolType Equal (T lhs, T rhs) + boolType NotEqual (T lhs, T rhs) + boolType Less (T lhs, T rhs) + boolType Greater (T lhs, T rhs) + boolType LessEqual (T lhs, T rhs) + boolType GreaterEqual (T lhs, T rhs) +---- + per component comparator +================================================= +*/ +#define Gen_CMP1( _stype_, _vtype_, _name_, _opS_, _opV_ ) \ + ND_ bool _name_ (const _stype_ lhs, const _stype_ rhs) { return lhs _opS_ rhs; } \ + ND_ bool2 _name_ (const _vtype_##2 lhs, const _vtype_##2 rhs) { return _opV_( lhs, rhs ); } \ + ND_ bool3 _name_ (const _vtype_##3 lhs, const _vtype_##3 rhs) { return _opV_( lhs, rhs ); } \ + ND_ bool4 _name_ (const _vtype_##4 lhs, const _vtype_##4 rhs) { return _opV_( lhs, rhs ); } + +#define Gen_CMP( _stype_, _vtype_ ) \ + Gen_CMP1( _stype_, _vtype_, Equal, ==, equal ) \ + Gen_CMP1( _stype_, _vtype_, NotEqual, !=, notEqual ) \ + Gen_CMP1( _stype_, _vtype_, Less, <, lessThan ) \ + Gen_CMP1( _stype_, _vtype_, Greater, >, greaterThan ) \ + Gen_CMP1( _stype_, _vtype_, LessEqual, <=, lessThanEqual ) \ + Gen_CMP1( _stype_, _vtype_, GreaterEqual, >=, greaterThanEqual ) + +Gen_CMP( float, float_vec_t ) +Gen_CMP( int, int_vec_t ) +Gen_CMP( uint, uint_vec_t ) + +#if AE_ENABLE_BYTE_TYPE + Gen_CMP( sbyte, sbyte_vec_t ) + Gen_CMP( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_CMP( sshort, sshort_vec_t ) + Gen_CMP( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_CMP( slong, slong_vec_t ) + Gen_CMP( ulong, ulong_vec_t ) +#endif +#if AE_ENABLE_HALF_TYPE + Gen_CMP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_CMP( double, double_vec_t ) +#endif + +#undef Gen_CMP1 +#undef Gen_CMP + +ND_ bool Not (const bool value) { return !value; } +ND_ bool2 Not (const bool2 value) { return not(value); } +ND_ bool3 Not (const bool3 value) { return not(value); } +ND_ bool4 Not (const bool4 value) { return not(value); } #define AllLess( a, b ) All( Less( (a), (b) )) #define AllLessEqual( a, b ) All( LessEqual( (a), (b) )) @@ -123,117 +181,302 @@ ND_ bool4 Not (const bool4 value) { return not(value); #define AnyGreater( a, b ) Any( Greater( (a), (b) )) #define AnyGreaterEqual( a, b ) Any( GreaterEqual( (a), (b) )) -#define AllEqual( a, b ) All( Equals( (a), (b) )) -#define AnyEqual( a, b ) Any( Equals( (a), (b) )) +#define AllEqual( a, b ) All( Equal( (a), (b) )) +#define AnyEqual( a, b ) Any( Equal( (a), (b) )) -#define AllNotEqual( a, b ) All( Not( Equals( (a), (b) ))) -#define AnyNotEqual( a, b ) Any( Not( Equals( (a), (b) ))) +#define AllNotEqual( a, b ) All( Not( Equal( (a), (b) ))) +#define AnyNotEqual( a, b ) Any( Not( Equal( (a), (b) ))) -#define NotAllEqual( a, b ) Not( All( Equals( (a), (b) ))) -#define NotAnyEqual( a, b ) Not( Any( Equals( (a), (b) ))) -//----------------------------------------------------------------------------- +#define NotAllEqual( a, b ) Not( All( Equal( (a), (b) ))) +#define NotAnyEqual( a, b ) Not( Any( Equal( (a), (b) ))) +/* +================================================= + Saturate +---- + T Saturate (T x) +================================================= +*/ +#define Gen_SATURATE1( _stype_, _type_ ) \ + ND_ _type_ Saturate (const _type_ x) { \ + return clamp( x, _stype_(0.0), _stype_(1.0) ); \ + } -#define Min3( a, b, c ) Min( Min( (a), (b) ), (c) ) -#define Min4( a, b, c, d ) Min( Min( (a), (b) ), Min( (c), (d) )) -#define Max3( a, b, c ) Max( Max( (a), (b) ), (c) ) -#define Max4( a, b, c, d ) Max( Max( (a), (b) ), Max( (c), (d) )) +#define Gen_SATURATE( _stype_, _vtype_ )\ + Gen_SATURATE1( _stype_, _stype_ )\ + Gen_SATURATE1( _stype_, _vtype_##2 )\ + Gen_SATURATE1( _stype_, _vtype_##3 )\ + Gen_SATURATE1( _stype_, _vtype_##4 ) +Gen_SATURATE( float, float_vec_t ) -//----------------------------------------------------------------------------- -// Constants +#if AE_ENABLE_HALF_TYPE + Gen_SATURATE( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_SATURATE( double, double_vec_t ) +#endif -ND_ float Epsilon () { return 1.e-5f; } -ND_ float Pi () { return 3.14159265358979323846f; } -ND_ float Pi2 () { return Pi() * 2.0f; } +#undef Gen_SATURATE1 +#undef Gen_SATURATE -ND_ float ReciprocalPi () { return 0.31830988618379067153f; } -ND_ float SqrtOf2 () { return 1.41421356237309504880f; } +/* +================================================= + ToUNorm / ToSNorm +---- + T ToUNorm (T x) [-1, +1] to [ 0, 1] + T ToSNorm (T x) [ 0, 1] to [-1, +1] +================================================= +*/ +#define Gen_TOUSNORM1( _type_ ) \ + ND_ _type_ ToUNorm (const _type_ x) { \ + return FusedMulAdd( x, _type_(0.5), _type_(0.5) ); \ + } \ + ND_ _type_ ToSNorm (const _type_ x) { \ + return FusedMulAdd( x, _type_(2.0), _type_(-1.0) ); \ + } +#define Gen_TOUSNORM( _stype_, _vtype_ )\ + Gen_TOUSNORM1( _stype_ ) \ + Gen_TOUSNORM1( _vtype_##2 ) \ + Gen_TOUSNORM1( _vtype_##3 ) \ + Gen_TOUSNORM1( _vtype_##4 ) -//----------------------------------------------------------------------------- -// Square +Gen_TOUSNORM( float, float_vec_t ) -ND_ float Square (const float x) { return x * x; } -ND_ float2 Square (const float2 x) { return x * x; } -ND_ float3 Square (const float3 x) { return x * x; } -ND_ float4 Square (const float4 x) { return x * x; } +#if AE_ENABLE_HALF_TYPE + Gen_TOUSNORM( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_TOUSNORM( double, double_vec_t ) +#endif -ND_ int Square (const int x) { return x * x; } -ND_ int2 Square (const int2 x) { return x * x; } -ND_ int3 Square (const int3 x) { return x * x; } -ND_ int4 Square (const int4 x) { return x * x; } +#undef Gen_TOUSNORM1 +#undef Gen_TOUSNORM -ND_ uint Square (const uint x) { return x * x; } -ND_ uint2 Square (const uint2 x) { return x * x; } -ND_ uint3 Square (const uint3 x) { return x * x; } -ND_ uint4 Square (const uint4 x) { return x * x; } +//----------------------------------------------------------------------------- +// Constants +ND_ float Epsilon () { return 1.e-5f; } +ND_ float Pi () { return 3.14159265358979323846f; } +ND_ float Pi2 () { return 6.28318530717958647692f; } +ND_ float HalfPi () { return 1.57079632679489661923f; } -//----------------------------------------------------------------------------- -// Same as 'condition ? ifTrue : ifFalse' +ND_ float ReciprocalPi () { return 0.31830988618379067153f; } +ND_ float SqrtOf2 () { return 1.41421356237309504880f; } -#define Gen_SELECT( _vtype_, _btype_ )\ - ND_ _vtype_ Select (const _btype_ condition, const _vtype_ ifTrue, const _vtype_ ifFalse) { return (ifFalse * _vtype_(Not(condition))) + (ifTrue * _vtype_(condition)); } -Gen_SELECT( float, bool ) -Gen_SELECT( float2, bool2 ) -Gen_SELECT( float3, bool3 ) -Gen_SELECT( float4, bool4 ) +/* +================================================= + Square +---- + T Square (T x) +================================================= +*/ +#define Gen_SQUARE( _stype_, _vtype_ )\ + ND_ _stype_ Square (const _stype_ x) { return x * x; }\ + ND_ _vtype_##2 Square (const _vtype_##2 x) { return x * x; }\ + ND_ _vtype_##3 Square (const _vtype_##3 x) { return x * x; }\ + ND_ _vtype_##4 Square (const _vtype_##4 x) { return x * x; } + +Gen_SQUARE( float, float_vec_t ) +Gen_SQUARE( int, int_vec_t ) +Gen_SQUARE( uint, uint_vec_t ) + +#if AE_ENABLE_BYTE_TYPE + Gen_SQUARE( sbyte, sbyte_vec_t ) + Gen_SQUARE( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_SQUARE( sshort, sshort_vec_t ) + Gen_SQUARE( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_SQUARE( slong, slong_vec_t ) + Gen_SQUARE( ulong, ulong_vec_t ) +#endif +#if AE_ENABLE_HALF_TYPE + Gen_SQUARE( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_SQUARE( double, double_vec_t ) +#endif -Gen_SELECT( int, bool ) -Gen_SELECT( int2, bool2 ) -Gen_SELECT( int3, bool3 ) -Gen_SELECT( int4, bool4 ) +#undef Gen_SQUARE -#undef Gen_SELECT +/* +================================================= + Select +---- + T Select (boolType condition, T ifTrue, T ifFalse) +---- + same as per component 'condition ? ifTrue : ifFalse' +================================================= +*/ +#define Gen_SELECT1( _vtype_, _btype_ )\ + ND_ _vtype_ Select (const _btype_ condition, const _vtype_ ifTrue, const _vtype_ ifFalse) { return (ifFalse * _vtype_(Not(condition))) + (ifTrue * _vtype_(condition)); } +#define Gen_SELECT( _stype_, _vtype_ )\ + Gen_SELECT1( _stype_, bool )\ + Gen_SELECT1( _vtype_##2, bool2 )\ + Gen_SELECT1( _vtype_##3, bool3 )\ + Gen_SELECT1( _vtype_##4, bool4 ) -//----------------------------------------------------------------------------- -// BranchLess -// same as 'condition ? ifTrue : ifFalse' +Gen_SELECT( float, float_vec_t ) +Gen_SELECT( int, int_vec_t ) +Gen_SELECT( uint, uint_vec_t ) -#define Gen_BRANCHLESS( _vtype_ )\ - ND_ _vtype_ BranchLess (bool condition, const _vtype_ ifTrue, const _vtype_ ifFalse) { _vtype_ tmp[2] = {ifTrue, ifFalse}; return tmp[int(condition)]; } +#if AE_ENABLE_BYTE_TYPE + Gen_SELECT( sbyte, sbyte_vec_t ) + Gen_SELECT( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_SELECT( sshort, sshort_vec_t ) + Gen_SELECT( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_SELECT( slong, slong_vec_t ) + Gen_SELECT( ulong, ulong_vec_t ) +#endif +#if AE_ENABLE_HALF_TYPE + Gen_SELECT( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_SELECT( double, double_vec_t ) +#endif -Gen_BRANCHLESS( float ) -Gen_BRANCHLESS( float2 ) -Gen_BRANCHLESS( float3 ) -Gen_BRANCHLESS( float4 ) +#undef Gen_SELECT1 +#undef Gen_SELECT -Gen_BRANCHLESS( int ) -Gen_BRANCHLESS( int2 ) -Gen_BRANCHLESS( int3 ) -Gen_BRANCHLESS( int4 ) +/* +================================================= + BranchLess +---- + T BranchLess (bool condition, T ifTrue, T ifFalse) +---- + same as 'condition ? ifTrue : ifFalse' +================================================= +*/ +#define Gen_BRANCHLESS1( _vtype_ )\ + ND_ _vtype_ BranchLess (const bool condition, const _vtype_ ifTrue, const _vtype_ ifFalse) { _vtype_ tmp[2] = {ifTrue, ifFalse}; return tmp[int(condition)]; } + +#define Gen_BRANCHLESS( _stype_, _vtype_ )\ + Gen_BRANCHLESS1( _stype_ )\ + Gen_BRANCHLESS1( _vtype_##2 )\ + Gen_BRANCHLESS1( _vtype_##3 )\ + Gen_BRANCHLESS1( _vtype_##4 ) + +Gen_BRANCHLESS( float, float_vec_t ) +Gen_BRANCHLESS( int, int_vec_t ) +Gen_BRANCHLESS( uint, uint_vec_t ) + +#if AE_ENABLE_BYTE_TYPE + Gen_BRANCHLESS( sbyte, sbyte_vec_t ) + Gen_BRANCHLESS( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_BRANCHLESS( sshort, sshort_vec_t ) + Gen_BRANCHLESS( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_BRANCHLESS( slong, slong_vec_t ) + Gen_BRANCHLESS( ulong, ulong_vec_t ) +#endif +#if AE_ENABLE_HALF_TYPE + Gen_BRANCHLESS( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_BRANCHLESS( double, double_vec_t ) +#endif +#undef Gen_BRANCHLESS1 #undef Gen_BRANCHLESS -//----------------------------------------------------------------------------- -// square length and distance +/* +================================================= + Min* / Max* +================================================= +*/ +#define Min3( a, b, c ) Min( Min( (a), (b) ), (c) ) +#define Min4( a, b, c, d ) Min( Min( (a), (b) ), Min( (c), (d) )) +#define Max3( a, b, c ) Max( Max( (a), (b) ), (c) ) +#define Max4( a, b, c, d ) Max( Max( (a), (b) ), Max( (c), (d) )) -ND_ float LengthSq (const float2 x) { return Dot( x, x ); } -ND_ float LengthSq (const float3 x) { return Dot( x, x ); } +#define MinAbs( _a_, _b_ ) Select( Less(Abs(_a_), Abs(_b_)), (_a_), (_b_) ) +#define MaxAbs( _a_, _b_ ) Select( Greater(Abs(_a_), Abs(_b_)), (_a_), (_b_) ) -ND_ float DistanceSq (const float2 x, const float2 y) { float2 r = x - y; return Dot( r, r ); } -ND_ float DistanceSq (const float3 x, const float3 y) { float3 r = x - y; return Dot( r, r ); } +/* +================================================= + LengthSq / DistanceSq +---- + Scalar LengthSq (Vec x) + Scalar DistanceSq (Vec x, Vec y) +================================================= +*/ +#define Gen_LENGTHSQ_DISTANCESQ1( _stype_, _vtype_ ) \ + ND_ _stype_ LengthSq (const _vtype_ x) { return Dot( x, x ); } \ + ND_ _stype_ DistanceSq (const _vtype_ x, const _vtype_ y) { _vtype_ r = x - y; return Dot( r, r ); } +#define Gen_LENGTHSQ_DISTANCESQ( _stype_, _vtype_ )\ + Gen_LENGTHSQ_DISTANCESQ1( _stype_, _vtype_##2 )\ + Gen_LENGTHSQ_DISTANCESQ1( _stype_, _vtype_##3 ) -//----------------------------------------------------------------------------- -// Other functions +Gen_LENGTHSQ_DISTANCESQ( float, float_vec_t ) -// -1 or +1, nan = +1 -ND_ float Sign (const float x) { return x < 0.0f ? -1.0f : 1.0f; } -ND_ float2 Sign (const float2 v) { return Select( Less( v, float2(0.f) ), float2(-1.0f), float2(1.0f) ); } -ND_ float3 Sign (const float3 v) { return Select( Less( v, float3(0.f) ), float3(-1.0f), float3(1.0f) ); } -ND_ float4 Sign (const float4 v) { return Select( Less( v, float4(0.f) ), float4(-1.0f), float4(1.0f) ); } +#if AE_ENABLE_HALF_TYPE + Gen_LENGTHSQ_DISTANCESQ( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_LENGTHSQ_DISTANCESQ( double, double_vec_t ) +#endif -ND_ int Sign (const int x) { return x < 0 ? -1 : 1; } -ND_ int2 Sign (const int2 v) { return Select( Less( v, int2(0) ), int2(-1), int2(1) ); } -ND_ int3 Sign (const int3 v) { return Select( Less( v, int3(0) ), int3(-1), int3(1) ); } -ND_ int4 Sign (const int4 v) { return Select( Less( v, int4(0) ), int4(-1), int4(1) ); } +#undef Gen_LENGTHSQ_DISTANCESQ1 +#undef Gen_LENGTHSQ_DISTANCESQ -ND_ float2 SinCos (const float x) { return float2(sin(x), cos(x)); } +/* +================================================= + Sign +---- + T Sign (T x) +---- + returns -1 or +1, +1 on nan +================================================= +*/ +#define Gen_SIGN1( _vtype_ )\ + ND_ _vtype_ Sign (const _vtype_ v) { return Select( Less( v, _vtype_(0) ), _vtype_(-1), _vtype_(1) ); } + +#define Gen_SIGN( _stype_, _vtype_ )\ + ND_ _stype_ Sign (const _stype_ x) { return x < _stype_(0) ? _stype_(-1) : _stype_(1); }\ + Gen_SIGN1( _vtype_##2 )\ + Gen_SIGN1( _vtype_##3 )\ + Gen_SIGN1( _vtype_##4 ) + +Gen_SIGN( float, float_vec_t ) +Gen_SIGN( int, int_vec_t ) +Gen_SIGN( uint, uint_vec_t ) + +#if AE_ENABLE_BYTE_TYPE + Gen_SIGN( sbyte, sbyte_vec_t ) + Gen_SIGN( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_SIGN( sshort, sshort_vec_t ) + Gen_SIGN( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_SIGN( slong, slong_vec_t ) + Gen_SIGN( ulong, ulong_vec_t ) +#endif +#if AE_ENABLE_HALF_TYPE + Gen_SIGN( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_SIGN( double, double_vec_t ) +#endif + +#undef Gen_SIGN1 +#undef Gen_SIGN //----------------------------------------------------------------------------- @@ -398,54 +641,214 @@ ND_ bool HasBit (const uint value, const uint index) return (value & (1u << index)) != 0; } +ND_ uint ExtractBit (inout uint bits) +{ + uint result = bits & ~(bits - 1); + bits = bits & ~result; + return result; +} + +ND_ uint ExtractBitLog2 (inout uint bits) +{ + return uint(IntLog2( ExtractBit( INOUT bits ))); +} + //----------------------------------------------------------------------------- // interpolation -ND_ float BaryLerp (const float v0, const float v1, const float v2, const float3 barycentrics) { return v0 * barycentrics.x + v1 * barycentrics.y + v2 * barycentrics.z; } -ND_ float2 BaryLerp (const float2 v0, const float2 v1, const float2 v2, const float3 barycentrics) { return v0 * barycentrics.x + v1 * barycentrics.y + v2 * barycentrics.z; } -ND_ float3 BaryLerp (const float3 v0, const float3 v1, const float3 v2, const float3 barycentrics) { return v0 * barycentrics.x + v1 * barycentrics.y + v2 * barycentrics.z; } -ND_ float4 BaryLerp (const float4 v0, const float4 v1, const float4 v2, const float3 barycentrics) { return v0 * barycentrics.x + v1 * barycentrics.y + v2 * barycentrics.z; } +/* +================================================= + BaryLerp +---- + T BaryLerp (T v0, T v1, T v2, Vec3 barycentrics) -- barycentric interpolation + T BaryLerp (T v0, T v1, T v2, Vec2 barycentrics) -- barycentric interpolation with much better precision +================================================= +*/ +#define Gen_BARYLERP1( _type_, _bary3_, _bary2_ )\ + ND_ _type_ BaryLerp (const _type_ v0, const _type_ v1, const _type_ v2, const _bary3_ barycentrics) { return v0 * barycentrics.x + v1 * barycentrics.y + v2 * barycentrics.z; }\ + ND_ _type_ BaryLerp (const _type_ v0, const _type_ v1, const _type_ v2, const _bary2_ barycentrics) { return v0 + FusedMulAdd( _type_(barycentrics.x), (v1 - v0), barycentrics.y * (v2 - v0) ); } -// has much precision -ND_ float BaryLerp (const float v0, const float v1, const float v2, const float2 barycentrics) { return v0 + FusedMulAdd( barycentrics.x, (v1 - v0), barycentrics.y * (v2 - v0)); } -ND_ float2 BaryLerp (const float2 v0, const float2 v1, const float2 v2, const float2 barycentrics) { return v0 + FusedMulAdd( barycentrics.xx, (v1 - v0), barycentrics.y * (v2 - v0)); } -ND_ float3 BaryLerp (const float3 v0, const float3 v1, const float3 v2, const float2 barycentrics) { return v0 + FusedMulAdd( barycentrics.xxx, (v1 - v0), barycentrics.y * (v2 - v0)); } -ND_ float4 BaryLerp (const float4 v0, const float4 v1, const float4 v2, const float2 barycentrics) { return v0 + FusedMulAdd( barycentrics.xxxx, (v1 - v0), barycentrics.y * (v2 - v0)); } +#define Gen_BARYLERP( _stype_, _vtype_ )\ + Gen_BARYLERP1( _stype_, _vtype_##3, _vtype_##2 )\ + Gen_BARYLERP1( _vtype_##2, _vtype_##3, _vtype_##2 )\ + Gen_BARYLERP1( _vtype_##3, _vtype_##3, _vtype_##2 )\ + Gen_BARYLERP1( _vtype_##4, _vtype_##3, _vtype_##2 ) -ND_ float BiLerp (const float x1y1, const float x2y1, const float x1y2, const float x2y2, const float2 factor) { return Lerp( Lerp( x1y1, x2y1, factor.x ), Lerp( x1y2, x2y2, factor.x ), factor.y ); } -ND_ float2 BiLerp (const float2 x1y1, const float2 x2y1, const float2 x1y2, const float2 x2y2, const float2 factor) { return Lerp( Lerp( x1y1, x2y1, factor.x ), Lerp( x1y2, x2y2, factor.x ), factor.y ); } -ND_ float3 BiLerp (const float3 x1y1, const float3 x2y1, const float3 x1y2, const float3 x2y2, const float2 factor) { return Lerp( Lerp( x1y1, x2y1, factor.x ), Lerp( x1y2, x2y2, factor.x ), factor.y ); } -ND_ float4 BiLerp (const float4 x1y1, const float4 x2y1, const float4 x1y2, const float4 x2y2, const float2 factor) { return Lerp( Lerp( x1y1, x2y1, factor.x ), Lerp( x1y2, x2y2, factor.x ), factor.y ); } +Gen_BARYLERP( float, float_vec_t ) +#if AE_ENABLE_HALF_TYPE + Gen_BARYLERP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_BARYLERP( double, double_vec_t ) +#endif + +#undef Gen_BARYLERP1 +#undef Gen_BARYLERP + +/* +================================================= + BiLerp +---- + T BiLerp (T x1y1, T x2y1, T x1y2, T x2y2, Vec2 factor) +---- + bilinear interpolation +================================================= +*/ +#define Gen_BILERP1( _type_, _factor_ )\ + ND_ _type_ BiLerp (const _type_ x1y1, const _type_ x2y1, const _type_ x1y2, const _type_ x2y2, const _factor_ factor) { return Lerp( Lerp( x1y1, x2y1, factor.x ), Lerp( x1y2, x2y2, factor.x ), factor.y ); } -// map 'v' in 'src' interval to 'dst' interval. -// only for scalar range. +#define Gen_BILERP( _stype_, _vtype_ )\ + Gen_BILERP1( _stype_, _vtype_##2 )\ + Gen_BILERP1( _vtype_##2, _vtype_##2 )\ + Gen_BILERP1( _vtype_##3, _vtype_##2 )\ + Gen_BILERP1( _vtype_##4, _vtype_##2 ) + +Gen_BILERP( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_BILERP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_BILERP( double, double_vec_t ) +#endif + +#undef Gen_BILERP1 +#undef Gen_BILERP + +/* +================================================= + Remap / RemapClamped +---- + T Remap (Vec2 src, Vec2 dst, T v) + T RemapClamped (Vec2 src, Vec2 dst, T v) +---- + Map 'v' in 'src' interval to 'dst' interval. + Map 'v' in 'src' interval to 'dst' interval and clamp. + Interval is a scalar range which specified for all components. +================================================= +*/ ND_ float Remap (const float2 src, const float2 dst, const float v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } ND_ float2 Remap (const float2 src, const float2 dst, const float2 v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } ND_ float3 Remap (const float2 src, const float2 dst, const float3 v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } ND_ float4 Remap (const float2 src, const float2 dst, const float4 v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } -// map 'v' in 'src' interval to 'dst' interval. -ND_ float2 Remap (const float2 src0, const float2 src1, const float2 dst0, const float2 dst1, const float2 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } -ND_ float3 Remap (const float3 src0, const float3 src1, const float3 dst0, const float3 dst1, const float3 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } -ND_ float4 Remap (const float4 src0, const float4 src1, const float4 dst0, const float4 dst1, const float4 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } - - -// map 'v' in 'src' interval to 'dst' interval and clamp. -// only for scalar range. ND_ float RemapClamped (const float2 src, const float2 dst, const float v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } ND_ float2 RemapClamped (const float2 src, const float2 dst, const float2 v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } ND_ float3 RemapClamped (const float2 src, const float2 dst, const float3 v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } ND_ float4 RemapClamped (const float2 src, const float2 dst, const float4 v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } -// map 'v' in 'src' interval to 'dst' interval and clamp. +/* +================================================= + Remap / RemapClamped +---- + Map 'v' in 'src' interval to 'dst' interval. + Map 'v' in 'src' interval to 'dst' interval and clamp. + Interval is specified per-component. +================================================= +*/ +ND_ float2 Remap (const float2 src0, const float2 src1, const float2 dst0, const float2 dst1, const float2 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } +ND_ float3 Remap (const float3 src0, const float3 src1, const float3 dst0, const float3 dst1, const float3 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } +ND_ float4 Remap (const float4 src0, const float4 src1, const float4 dst0, const float4 dst1, const float4 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } + ND_ float2 RemapClamped (const float2 src0, const float2 src1, const float2 dst0, const float2 dst1, const float2 v) { return Clamp( Remap( src0, src1, dst0, dst1, v ), dst0, dst1 ); } ND_ float3 RemapClamped (const float3 src0, const float3 src1, const float3 dst0, const float3 dst1, const float3 v) { return Clamp( Remap( src0, src1, dst0, dst1, v ), dst0, dst1 ); } ND_ float4 RemapClamped (const float4 src0, const float4 src1, const float4 dst0, const float4 dst1, const float4 v) { return Clamp( Remap( src0, src1, dst0, dst1, v ), dst0, dst1 ); } -//----------------------------------------------------------------------------- +/* +================================================= + UIndexToUNormFloor / UIndexToSNormFloor +---- + map coordinate 'index' in N dimension with size 'count' + to unorm value with rounding downwards +================================================= +*/ +ND_ float UIndexToUNormFloor (const int index, const int count) { return float( index) / float( count - 1); } +ND_ float2 UIndexToUNormFloor (const int2 index, const int2 count) { return float2(index) / float2(count - 1); } +ND_ float3 UIndexToUNormFloor (const int3 index, const int3 count) { return float3(index) / float3(count - 1); } +ND_ float4 UIndexToUNormFloor (const int4 index, const int4 count) { return float4(index) / float4(count - 1); } + +ND_ float UIndexToUNormFloor (const uint index, const uint count) { return float( index) / float( count - 1); } +ND_ float2 UIndexToUNormFloor (const uint2 index, const uint2 count) { return float2(index) / float2(count - 1); } +ND_ float3 UIndexToUNormFloor (const uint3 index, const uint3 count) { return float3(index) / float3(count - 1); } +ND_ float4 UIndexToUNormFloor (const uint4 index, const uint4 count) { return float4(index) / float4(count - 1); } + +ND_ float UIndexToUNormFloor (const float index, const float count) { return index / (count - 1.f); } +ND_ float2 UIndexToUNormFloor (const float2 index, const float2 count) { return index / (count - 1.f); } +ND_ float3 UIndexToUNormFloor (const float3 index, const float3 count) { return index / (count - 1.f); } +ND_ float4 UIndexToUNormFloor (const float4 index, const float4 count) { return index / (count - 1.f); } + +#define UIndexToSNormFloor( _index_, _count_ ) ToSNorm( UIndexToUNormFloor( (_index_), (_count_) )) + +/* +================================================= + UIndexToUNormRound / UIndexToSNormRound +---- + map coordinate 'index' in N dimension with size 'count' + to unorm value with rounding +================================================= +*/ +ND_ float UIndexToUNormRound (const int index, const int count) { return (float( index) + 0.5f) / float( count); } +ND_ float2 UIndexToUNormRound (const int2 index, const int2 count) { return (float2(index) + 0.5f) / float2(count); } +ND_ float3 UIndexToUNormRound (const int3 index, const int3 count) { return (float3(index) + 0.5f) / float3(count); } +ND_ float4 UIndexToUNormRound (const int4 index, const int4 count) { return (float4(index) + 0.5f) / float4(count); } + +ND_ float UIndexToUNormRound (const uint index, const uint count) { return (float( index) + 0.5f) / float( count); } +ND_ float2 UIndexToUNormRound (const uint2 index, const uint2 count) { return (float2(index) + 0.5f) / float2(count); } +ND_ float3 UIndexToUNormRound (const uint3 index, const uint3 count) { return (float3(index) + 0.5f) / float3(count); } +ND_ float4 UIndexToUNormRound (const uint4 index, const uint4 count) { return (float4(index) + 0.5f) / float4(count); } + +ND_ float UIndexToUNormRound (const float index, const float count) { return (index + 0.5f) / count; } +ND_ float2 UIndexToUNormRound (const float2 index, const float2 count) { return (index + 0.5f) / count; } +ND_ float3 UIndexToUNormRound (const float3 index, const float3 count) { return (index + 0.5f) / count; } +ND_ float4 UIndexToUNormRound (const float4 index, const float4 count) { return (index + 0.5f) / count; } + +#define UIndexToSNormRound( _index_, _count_ ) ToSNorm( UIndexToUNormRound( (_index_), (_count_) )) +/* +================================================= + SLerp / BiSLerp +---- + float3 SLerp (float3 x, float3 y, float factor) + float3 BiSLerp (float3 x1y1, float3 x2y1, float3 x1y2, float3 x2y2, float2 factor) +================================================= +*/ +#define Gen_SLERP( _stype_, _vtype_ )\ + ND_ _vtype_##3 SLerp (const _vtype_##3 x, const _vtype_##3 y, const _stype_ factor)\ + { \ + /* from GLM (MIT license) https://github.com/g-truc/glm */ \ + _stype_ cos_a = Dot( x, y ); \ + _stype_ alpha = ACos( cos_a ); \ + _stype_ sin_a = Sin( alpha ); \ + _stype_ t1 = Sin( (_stype_(1) - factor) * alpha ) / sin_a; \ + _stype_ t2 = Sin( factor * alpha ) / sin_a; \ + return x * t1 + y * t2; \ + } \ + \ + ND_ _vtype_##3 BiSLerp (const _vtype_##3 x1y1, const _vtype_##3 x2y1, \ + const _vtype_##3 x1y2, const _vtype_##3 x2y2, \ + const _vtype_##2 factor) \ + { \ + return SLerp( SLerp( x1y1, x2y1, factor.x ), \ + SLerp( x1y2, x2y2, factor.x ), factor.y ); \ + } + +Gen_SLERP( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_SLERP( half, half_vec_t ) +#endif + +#undef Gen_SLERP + +/* +================================================= + NearestSampleArray / LinearSampleArray +---- + used to get array element from unorm float +================================================= +*/ #define NearestSampleArray( _result_, _array_, _factor_ ) \ { \ int lll = (_array_).length() - 1; \ @@ -465,21 +868,26 @@ ND_ float4 RemapClamped (const float4 src0, const float4 src1, const float4 dst #define LinearSampleArray( _result_, _array_, _factor_ )\ LinearSampleArray2( (_result_), (_array_), (_factor_), Lerp ) -//----------------------------------------------------------------------------- +/* +================================================= + InterpolateQuad / InterpolateTriangle +---- + can be used in TessEval shader +================================================= +*/ +#define InterpolateQuad( _arr_, _field_, _factor2_ )\ + BiLerp( _arr_[0] _field_, _arr_[1] _field_, _arr_[3] _field_, _arr_[2] _field_, _factor2_.xy ) -#define InterpolateQuad( _arr_, _field_, _factor2_ ) \ - (Lerp( Lerp( _arr_[0] _field_, _arr_[1] _field_, _factor2_.x ), \ - Lerp( _arr_[3] _field_, _arr_[2] _field_, _factor2_.x ), \ - _factor2_.y )) - -#define InterpolateTriangle( _arr_, _field_, _factor3_ ) \ - ( _factor3_.x * _arr_[0] _field_ + \ - _factor3_.y * _arr_[1] _field_ + \ +#define InterpolateTriangle( _arr_, _field_, _factor3_ )\ + ( _factor3_.x * _arr_[0] _field_ + \ + _factor3_.y * _arr_[1] _field_ + \ _factor3_.z * _arr_[2] _field_ ) -//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// IsZero / IsNotZero / IsNormalized + ND_ bool IsZero (const float x) { return Abs(x) <= Epsilon(); } ND_ bool2 IsZero (const float2 v) { return LessEqual( Abs(v), float2(Epsilon()) ); } ND_ bool3 IsZero (const float3 v) { return LessEqual( Abs(v), float3(Epsilon()) ); } @@ -493,18 +901,24 @@ ND_ bool4 IsNotZero (const float4 v) { return Greater( Abs(v), float4(Epsilon #define AllZeros( v ) All( IsZero( v )) #define AnyNotZero( v ) Any( IsNotZero( v )) -#define IsFinite( v ) All(Equals( (v), (v) )) +#define IsFinite( v ) All(Equal( (v), (v) )) ND_ bool IsNormalized (const float2 v, const float err) { float d = Dot( v, v ) - 1.f; return Abs(d) < err; } ND_ bool IsNormalized (const float3 v, const float err) { float d = Dot( v, v ) - 1.f; return Abs(d) < err; } ND_ bool IsNormalized (const float2 v) { return IsNormalized( v, Epsilon() ); } ND_ bool IsNormalized (const float3 v) { return IsNormalized( v, Epsilon() ); } -//----------------------------------------------------------------------------- - -#define SWAP_Impl( _type_ ) void Swap (inout _type_ lhs, inout _type_ rhs) { _type_ tmp = lhs; lhs = rhs; rhs = tmp; } +/* +================================================= + Swap +---- + void Swap (T& lhs, T& rhs) +================================================= +*/ +#define Gen_SWAP( _type_ )\ + void Swap (inout _type_ lhs, inout _type_ rhs) { _type_ tmp = lhs; lhs = rhs; rhs = tmp; } -SWAP_Impl( float ) +Gen_SWAP( float ) -#undef SWAP_Impl +#undef Gen_SWAP //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/shaders/Matrix.glsl b/AE/engine/shared_data/shaders/Matrix.glsl index 0b955041..f734eb95 100644 --- a/AE/engine/shared_data/shaders/Matrix.glsl +++ b/AE/engine/shared_data/shaders/Matrix.glsl @@ -205,11 +205,10 @@ float3x3 f3x3_Rotate (const float angle, const float3 inAxis) return result; } -// TODO: set [3][3] = 1 -float4x4 f4x4_RotateX (const float angle) { return float4x4(f3x3_RotateX( angle )); } -float4x4 f4x4_RotateY (const float angle) { return float4x4(f3x3_RotateY( angle )); } -float4x4 f4x4_RotateZ (const float angle) { return float4x4(f3x3_RotateZ( angle )); } -float4x4 f4x4_Rotate (const float angle, const float3 axis) { return float4x4(f3x3_Rotate( angle, axis )); } +float4x4 f4x4_RotateX (const float angle) { float4x4 m = float4x4(f3x3_RotateX( angle )); m[3][3] = 1.f; return m; } +float4x4 f4x4_RotateY (const float angle) { float4x4 m = float4x4(f3x3_RotateY( angle )); m[3][3] = 1.f; return m; } +float4x4 f4x4_RotateZ (const float angle) { float4x4 m = float4x4(f3x3_RotateZ( angle )); m[3][3] = 1.f; return m; } +float4x4 f4x4_Rotate (const float angle, const float3 axis) { float4x4 m = float4x4(f3x3_Rotate( angle, axis )); m[3][3] = 1.f; return m; } //----------------------------------------------------------------------------- @@ -253,7 +252,7 @@ float3 UnProject (const float4x4 invMat, const float3 pos, const float4 viewpor float3 ViewDir (const float4x4 invMat, const float2 unormPos) { - const float4 world_pos = invMat * float4(ToSNorm(unormPos), 1.0f, 1.0f); + const float4 world_pos = invMat * float4(ToSNorm(unormPos), -1.0f, 1.0f); return Normalize( world_pos.xyz / world_pos.w ); } diff --git a/AE/engine/shared_data/shaders/Normal.glsl b/AE/engine/shared_data/shaders/Normal.glsl index 51cc4e3c..23307dd1 100644 --- a/AE/engine/shared_data/shaders/Normal.glsl +++ b/AE/engine/shared_data/shaders/Normal.glsl @@ -1,6 +1,10 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' /* - Normal calculation functions + Normal calculation functions. + + TBN matrix: + tan_view_dir = Normalize( MatTranspose(TBN) * viewDir ); + world_normal = Normalize( TBN * normalMap ); */ #ifdef __cplusplus @@ -58,9 +62,9 @@ ND_ float3 ComputeNormal (const float3 position0, const float3 position1, const */ #define _impl_SmoothNormal2x1( _outNormalInWS_, _getPos_, _coord_ ) \ \ - const float3 v0 = _getPos_( _coord_, offset.xx ); \ - const float3 v1 = _getPos_( _coord_, offset.yx ); \ - const float3 v2 = _getPos_( _coord_, offset.xy ); \ + const float3 v0 = _getPos_( _coord_, offset.xx ).xyz; \ + const float3 v1 = _getPos_( _coord_, offset.yx ).xyz; \ + const float3 v2 = _getPos_( _coord_, offset.xy ).xyz; \ \ _outNormalInWS_ = Cross( v1 - v0, v2 - v0 ); /* 1-0, 2-0 */ \ _outNormalInWS_ = Normalize( _outNormalInWS_ ); \ @@ -88,10 +92,10 @@ ND_ float3 ComputeNormal (const float3 position0, const float3 position1, const */ #define _impl_SmoothNormal2x2( _outNormalInWS_, _getPos_, _coord_ ) \ \ - const float3 v0 = _getPos_( _coord_, offset.xx ); \ - const float3 v1 = _getPos_( _coord_, offset.yx ); \ - const float3 v2 = _getPos_( _coord_, offset.xy ); \ - const float3 v3 = _getPos_( _coord_, offset.yy ); \ + const float3 v0 = _getPos_( _coord_, offset.xx ).xyz; \ + const float3 v1 = _getPos_( _coord_, offset.yx ).xyz; \ + const float3 v2 = _getPos_( _coord_, offset.xy ).xyz; \ + const float3 v3 = _getPos_( _coord_, offset.yy ).xyz; \ \ _outNormalInWS_ = Cross( v1 - v0, v3 - v0 ); /* 1-0, 3-0 */ \ _outNormalInWS_ += Cross( v3 - v0, v2 - v0 ); /* 3-0, 2-0 */ \ @@ -121,15 +125,15 @@ ND_ float3 ComputeNormal (const float3 position0, const float3 position1, const */ #define _impl_SmoothNormal3x3( _outNormalInWS_, _getPos_, _coord_ ) \ \ - const float3 v0 = _getPos_( _coord_, offset.xx ); \ - const float3 v1 = _getPos_( _coord_, offset.yx ); \ - const float3 v2 = _getPos_( _coord_, offset.zx ); \ - const float3 v3 = _getPos_( _coord_, offset.xy ); \ - const float3 v4 = _getPos_( _coord_, offset.yy ); \ - const float3 v5 = _getPos_( _coord_, offset.zy ); \ - const float3 v6 = _getPos_( _coord_, offset.xz ); \ - const float3 v7 = _getPos_( _coord_, offset.yz ); \ - const float3 v8 = _getPos_( _coord_, offset.zz ); \ + const float3 v0 = _getPos_( _coord_, offset.xx ).xyz; \ + const float3 v1 = _getPos_( _coord_, offset.yx ).xyz; \ + const float3 v2 = _getPos_( _coord_, offset.zx ).xyz; \ + const float3 v3 = _getPos_( _coord_, offset.xy ).xyz; \ + const float3 v4 = _getPos_( _coord_, offset.yy ).xyz; \ + const float3 v5 = _getPos_( _coord_, offset.zy ).xyz; \ + const float3 v6 = _getPos_( _coord_, offset.xz ).xyz; \ + const float3 v7 = _getPos_( _coord_, offset.yz ).xyz; \ + const float3 v8 = _getPos_( _coord_, offset.zz ).xyz; \ \ _outNormalInWS_ = Cross( v1 - v4, v2 - v4 ); /* 1-4, 2-4 */ \ _outNormalInWS_ += Cross( v2 - v4, v5 - v4 ); /* 2-4, 5-4 */ \ @@ -174,7 +178,7 @@ ND_ float3 ComputeNormal (const float3 position0, const float3 position1, const // Calc normal using quad subgroup float3 ComputeNormalInWS_quadSg (const float3 worldPos) - { + { float3 p0 = gl.subgroup.QuadBroadcast( worldPos, 0 ); float3 p1 = gl.subgroup.QuadBroadcast( worldPos, 1 ); float3 p2 = gl.subgroup.QuadBroadcast( worldPos, 2 ); @@ -285,10 +289,10 @@ ND_ float3 ComputeNormal (const float3 position0, const float3 position1, const */ #define _impl_SmoothTBN2x2( _outTBNinWS_, _getPos_, _getUV_, _coord_ ) \ \ - const float3 pos0 = _getPos_( _coord_, offset.xx ); \ - const float3 pos1 = _getPos_( _coord_, offset.yx ); \ - const float3 pos2 = _getPos_( _coord_, offset.zx ); \ - const float3 pos3 = _getPos_( _coord_, offset.xy ); \ + const float3 pos0 = _getPos_( _coord_, offset.xx ).xyz; \ + const float3 pos1 = _getPos_( _coord_, offset.yx ).xyz; \ + const float3 pos2 = _getPos_( _coord_, offset.zx ).xyz; \ + const float3 pos3 = _getPos_( _coord_, offset.xy ).xyz; \ \ const float2 uv0 = _getUV_( _coord_, offset.xx ); \ const float2 uv1 = _getUV_( _coord_, offset.yx ); \ @@ -328,15 +332,15 @@ ND_ float3 ComputeNormal (const float3 position0, const float3 position1, const */ #define _impl_SmoothTBN3x3( _outTBNinWS_, _getPos_, _getUV_, _coord_ ) \ \ - const float3 pos0 = _getPos_( _coord_, offset.xx ); \ - const float3 pos1 = _getPos_( _coord_, offset.yx ); \ - const float3 pos2 = _getPos_( _coord_, offset.zx ); \ - const float3 pos3 = _getPos_( _coord_, offset.xy ); \ - const float3 pos4 = _getPos_( _coord_, offset.yy ); \ - const float3 pos5 = _getPos_( _coord_, offset.zy ); \ - const float3 pos6 = _getPos_( _coord_, offset.xz ); \ - const float3 pos7 = _getPos_( _coord_, offset.yz ); \ - const float3 pos8 = _getPos_( _coord_, offset.zz ); \ + const float3 pos0 = _getPos_( _coord_, offset.xx ).xyz; \ + const float3 pos1 = _getPos_( _coord_, offset.yx ).xyz; \ + const float3 pos2 = _getPos_( _coord_, offset.zx ).xyz; \ + const float3 pos3 = _getPos_( _coord_, offset.xy ).xyz; \ + const float3 pos4 = _getPos_( _coord_, offset.yy ).xyz; \ + const float3 pos5 = _getPos_( _coord_, offset.zy ).xyz; \ + const float3 pos6 = _getPos_( _coord_, offset.xz ).xyz; \ + const float3 pos7 = _getPos_( _coord_, offset.yz ).xyz; \ + const float3 pos8 = _getPos_( _coord_, offset.zz ).xyz; \ \ const float2 uv0 = _getUV_( _coord_, offset.xx ); \ const float2 uv1 = _getUV_( _coord_, offset.yx ); \ diff --git a/AE/engine/shared_data/shaders/PBR.glsl b/AE/engine/shared_data/shaders/PBR.glsl index 9c3df8e9..537fa424 100644 --- a/AE/engine/shared_data/shaders/PBR.glsl +++ b/AE/engine/shared_data/shaders/PBR.glsl @@ -84,7 +84,7 @@ float3 SpecularBRDF (const float3 albedo, const float3 lightColor, const float3 float alpha = rough * rough; float alpha2 = alpha * alpha; float denom = n_dot_h * n_dot_h * (alpha2 - 1.0f) + 1.0f; - D = (alpha2) / (Pi() * denom*denom); + D = (alpha2) / (Pi() * denom*denom); } float G; @@ -99,7 +99,7 @@ float3 SpecularBRDF (const float3 albedo, const float3 lightColor, const float3 float3 F; { float3 F0 = Lerp( float3(0.04f), albedo, metallic ); - F = F0 + (1.0f - F0) * Pow( 1.0f - n_dot_v, 5.0f ); + F = F0 + (1.0f - F0) * Pow( 1.0f - n_dot_v, 5.0f ); } float3 spec = D * F * G / (4.0f * n_dot_l * n_dot_v); diff --git a/AE/engine/shared_data/shaders/Quaternion.glsl b/AE/engine/shared_data/shaders/Quaternion.glsl index 4ffc8f9a..da9090b6 100644 --- a/AE/engine/shared_data/shaders/Quaternion.glsl +++ b/AE/engine/shared_data/shaders/Quaternion.glsl @@ -119,7 +119,7 @@ quat QMul (const quat left, const quat right) { quat ret; - ret.data.xyz = left.data.w * right.data.xyz + + ret.data.xyz = left.data.w * right.data.xyz + left.data.xyz * right.data.w + Cross( left.data.xyz, right.data.xyz ); diff --git a/AE/engine/shared_data/shaders/Ray.glsl b/AE/engine/shared_data/shaders/Ray.glsl index 8d9963ec..475fabd8 100644 --- a/AE/engine/shared_data/shaders/Ray.glsl +++ b/AE/engine/shared_data/shaders/Ray.glsl @@ -20,14 +20,34 @@ struct Ray }; ND_ Ray Ray_Create (const float3 origin, const float3 direction, const float tmin); -ND_ Ray Ray_FromScreen (const float3 origin, const quat rotation, const float fovX, const float nearPlane, const int2 screenSize, const int2 screenCoord); -ND_ Ray Ray_From (const float3 leftBottom, const float3 rightBottom, const float3 leftTop, const float3 rightTop, const float3 origin, const float nearPlane, const float2 unormCoord); + +ND_ Ray Ray_FromScreen (const float3 origin, const float fovX, const float nearPlane, + const float2 screenSizePx, const float2 screenCoordPx); + +ND_ Ray Ray_From (const float3 leftBottom, const float3 rightBottom, const float3 leftTop, const float3 rightTop, + const float3 origin, const float nearPlane, const float2 unormCoord); ND_ Ray Ray_From (const float4x4 invViewProj, const float3 origin, const float nearPlane, const float2 unormCoord); + +ND_ Ray Ray_FromFlatScreen (const float3 origin, const float distanceToEye, float2 screenSize, const float nearPlane, const float2 snormCoord); +ND_ Ray Ray_FromCurvedScreen (const float3 origin, const float distanceToEye, const float screenRadius, float2 screenSize, const float nearPlane, const float2 snormCoord); + +ND_ Ray Ray_PlaneToVR180 (const float ipd, const float3 origin, const float nearPlane, float2 uv); +ND_ Ray Ray_PlaneToVR360 (const float ipd, const float3 origin, const float nearPlane, float2 uv); +ND_ Ray Ray_PlaneTo360 (const float3 origin, const float nearPlane, const float2 uv); +ND_ Ray Ray_PlaneToSphere (float2 fov, const float3 origin, const float nearPlane, float2 uv); + +ND_ float2 Inverted_PlaneToVR180 (const float3 rayDir, const uint eye); +ND_ float2 Inverted_PlaneToVR360 (const float3 rayDir, const uint eye); +ND_ float2 Inverted_PlaneTo360 (const float3 rayDir); +ND_ float2 Inverted_PlaneToCubemap360 (const float3 rayDir); +ND_ float2 Inverted_PlaneToCubemapVR360 (const float3 rayDir, const uint eye); + ND_ float3 Ray_CalcX (const Ray ray, const float2 pointYZ); ND_ float3 Ray_CalcY (const Ray ray, const float2 pointXZ); ND_ float3 Ray_CalcZ (const Ray ray, const float2 pointXY); ND_ bool Ray_Contains (const Ray ray, const float3 point); void Ray_Rotate (inout Ray ray, const quat rotation); + void Ray_Rotate (inout Ray ray, const float3x3 rotation); void Ray_Move (inout Ray ray, const float delta); void Ray_SetLength (inout Ray ray, const float length); void Ray_SetOrigin (inout Ray ray, const float3 origin); @@ -43,9 +63,9 @@ Ray Ray_Create (const float3 origin, const float3 direction, const float tmin) { Ray ray; ray.origin = origin; - ray.t = tmin; ray.dir = direction; - ray.pos = FusedMulAdd( ray.dir, float3(ray.t), ray.origin ); + + Ray_SetLength( INOUT ray, tmin ); // set 't' and 'pos' return ray; } @@ -56,23 +76,19 @@ Ray Ray_Create (const float3 origin, const float3 direction, const float tmin) create ray for raytracing, raymarching, ... ================================================= */ -Ray Ray_FromScreen (const float3 origin, const quat rotation, const float fovX, const float nearPlane, - const int2 screenSize, const int2 screenCoord) +Ray Ray_FromScreen (const float3 origin, const float fovX, const float nearPlane, + const float2 screenSize, const float2 screenCoord) { - float2 scr_size = float2(screenSize); - float2 coord = float2(screenCoord); - - float ratio = scr_size.y / scr_size.x; - float fovY = fovX * ratio; - float2 scale = nearPlane / Cos( float2(fovX, fovY) * 0.5 ); - float2 uv = (coord - scr_size * 0.5) / (scr_size.x * 0.5) * scale; + float ratio = screenSize.y / screenSize.x; + float fovY = fovX * ratio; + float2 scale = nearPlane / Cos( float2(fovX, fovY) * 0.5 ); + float2 uv = (screenCoord - screenSize * 0.5) / (screenSize.x * 0.5) * scale; Ray ray; ray.origin = origin; - ray.dir = Normalize( QMul( rotation, Normalize( float3(uv.x, -uv.y, -0.5) ))); - ray.t = nearPlane; - ray.pos = FusedMulAdd( ray.dir, float3(ray.t), ray.origin ); + ray.dir = Normalize( float3( uv.x, -uv.y, -0.5 )); + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' return ray; } @@ -93,9 +109,8 @@ Ray Ray_From (const float3 leftBottom, const float3 rightBottom, const float3 l Ray ray; ray.origin = origin; ray.dir = Normalize( vec ); - ray.t = nearPlane; - ray.pos = FusedMulAdd( ray.dir, float3(ray.t), ray.origin ); + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' return ray; } @@ -114,9 +129,262 @@ Ray Ray_From (const float4x4 invViewProj, const float3 origin, const float near Ray ray; ray.origin = origin; ray.dir = dir; - ray.pos = origin + dir * nearPlane; - ray.t = nearPlane; + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' + return ray; +} + +/* +================================================= + Ray_FromFlatScreen +---- + _______ -- screen + + * -- eye +================================================= +*/ +Ray Ray_FromFlatScreen (const float3 origin, const float distanceToEye, float2 screenSize, const float nearPlane, const float2 snormCoord) +{ + screenSize *= 0.5f; + + Ray ray; + ray.origin = origin; + ray.dir = Normalize(float3( screenSize * snormCoord, distanceToEye )); + + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' + return ray; +} + +/* +================================================= + Ray_FromCurvedScreen +---- + Field of view on Y-axis is larger because of screen curvature. + _____ -- curved screen + / \ + * --- eye +================================================= +*/ +Ray Ray_FromCurvedScreen (const float3 origin, const float distanceToEye, const float screenRadius, float2 screenSize, const float nearPlane, const float2 snormCoord) +{ + screenSize *= 0.5f; + + float2 corner; + corner.y = screenSize.x / screenRadius; + corner.x = Sqrt( 1.f - corner.y * corner.y ); + + Ray ray; + ray.origin = origin; + ray.dir = Normalize(float3( corner.y * snormCoord.x, + screenSize.y * snormCoord.y, + (screenRadius - distanceToEye) - corner.x )); + + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' + return ray; +} + +/* +================================================= + Ray_PlaneToVR180 +---- + VR180 left-right + Z+ - forward, X+ - right, Y+ - down +================================================= +*/ +Ray Ray_PlaneToVR180 (const float ipd, const float3 origin, const float nearPlane, float2 uv) +{ + // from https://developers.google.com/vr/jump/rendering-ods-content.pdf + float scale = ipd * 0.5 * (uv.x < 0.5 ? -1.0 : 1.0); + uv = float2( (uv.x < 0.5 ? uv.x : uv.x - 0.5) * 0.5 + 0.375, uv.y ); // map [0, 1] to [0.375, 0.875] + float theta = (-uv.x) * 2.0 * Pi(); + float phi = uv.y * Pi() - Pi() * 0.5; + float sin_t = Sin( theta ); + float cos_t = Cos( theta ); + float cos_p = Cos( phi ); + + Ray ray; + ray.origin = origin + float3( cos_t, 0.0, sin_t ) * scale; + ray.dir = float3( sin_t * cos_p, Sin(phi), -cos_t * cos_p ); + + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' + return ray; +} + +float2 Inverted_PlaneToVR180 (const float3 rayDir, const uint eye) +{ + float theta = ASin( rayDir.y ); + float phi = ATan( rayDir.z, rayDir.x ); + + theta = (theta + Pi() * 0.5f) / Pi(); + phi = (Pi() - phi) / Pi2(); + phi = Fract( phi - 0.125f ) * 2.f + (eye == 0 ? 0.f : 0.5f); + + return float2( phi, theta ); +} + +/* +================================================= + Ray_PlaneToVR360 +---- + VR360 top-bottom + Z+ - forward, X+ - right, Y+ - down +================================================= +*/ +Ray Ray_PlaneToVR360 (const float ipd, const float3 origin, const float nearPlane, float2 uv) +{ + // from https://developers.google.com/vr/jump/rendering-ods-content.pdf + float scale = ipd * 0.5 * (uv.y < 0.5 ? -1.0 : 1.0); + uv = float2( uv.x, (uv.y < 0.5 ? uv.y : uv.y - 0.5) * 2.0 ); + float theta = (-uv.x) * 2.0 * Pi(); + float phi = uv.y * Pi() - Pi() * 0.5; + float sin_t = Sin( theta ); + float cos_t = Cos( theta ); + float cos_p = Cos( phi ); + + Ray ray; + ray.origin = origin + float3( cos_t, 0.0, sin_t ) * scale; + ray.dir = float3( sin_t * cos_p, Sin(phi), -cos_t * cos_p ); + + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' + return ray; +} + +float2 Inverted_PlaneToVR360 (const float3 rayDir, const uint eye) +{ + float theta = ASin( rayDir.y ); + float phi = ATan( rayDir.z, rayDir.x ); + + theta = (theta + Pi() * 0.5f) * 0.5f / Pi(); + theta += (eye == 0 ? 0.f : 0.5f); + phi = (Pi() - phi) / Pi2(); + + return float2( Fract( phi - 0.75f ), theta ); +} + +/* +================================================= + Ray_PlaneTo360 +---- + Z+ - forward, X+ - right, Y+ - down +================================================= +*/ +Ray Ray_PlaneTo360 (const float3 origin, const float nearPlane, const float2 uv) +{ + float theta = (-uv.x) * 2.0 * Pi(); + float phi = uv.y * Pi() - Pi() * 0.5; + float cos_p = Cos( phi ); + + Ray ray; + ray.origin = origin; + ray.dir = float3( Sin(theta) * cos_p, Sin(phi), -Cos(theta) * cos_p ); + + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' + return ray; +} + +float2 Inverted_PlaneTo360 (const float3 rayDir) +{ + float theta = ASin( rayDir.y ); + float phi = ATan( rayDir.z, rayDir.x ); + + theta = (theta + Pi() * 0.5) / Pi(); + phi = (Pi() - phi) / Pi2(); + + return float2( Fract( phi - 0.75 ), theta ); +} + +/* +================================================= + Inverted_PlaneToCubemap360 +---- + for webm 360; top plane (horizontal): left, front, right; bottom plane (vertical): down, back, up. +================================================= +*/ +float2 Inverted_PlaneToCubemap360 (const float3 c) +{ + // front (xy space) + if ( All(bool3( Abs(c.x) <= c.z, c.z > 0.f, Abs(c.y) <= c.z ))) + return Lerp( float2(1.0/3.0, 0.0), float2(2.0/3.0, 0.5), ToUNorm(c.xy / c.z) ); + + // right (zy space) + if ( All(bool3( Abs(c.z) <= c.x, c.x > 0.f, Abs(c.y) <= c.x ))) + return Lerp( float2(2.0/3.0, 0.0), float2(1.0, 0.5), ToUNorm(float2( -c.z, c.y ) / c.x) ); + + // back (xy space) + if ( All(bool3( Abs(c.x) <= -c.z, c.z < 0.f, Abs(c.y) <= -c.z ))) + return Lerp( float2(2.0/3.0, 1.0), float2(1.0/3.0, 0.5), ToUNorm(c.yx / -c.z) ); + + // left (zy space) + if ( All(bool3( Abs(c.z) <= -c.x, c.x < 0.f, Abs(c.y) <= -c.x ))) + return Lerp( float2(0.0, 0.0), float2(1.0/3.0, 0.5), ToUNorm(c.zy / -c.x) ); + + // down (xz space) + if ( c.y > 0.f ) + return Lerp( float2(1.0/3.0, 1.0), float2(0.0, 0.5), ToUNorm(c.zx / c.y) ); + + // up (xz space) + return Lerp( float2(1.0, 1.0), float2(2.0/3.0, 0.5), ToUNorm(float2( c.z, -c.x ) / c.y) ); +} + +/* +================================================= + Inverted_PlaneToCubemap360 +---- + for webm VR360; left plane (vertical): left, front, right; bottom plane (vertical): down, back, up. +================================================= +*/ +float2 Inverted_PlaneToCubemapVR360 (const float3 c, const uint eye) +{ + float2 uv; + + // front (xy space) + if ( All(bool3( Abs(c.x) <= c.z, c.z > 0.f, Abs(c.y) <= c.z ))) + uv = Lerp( float2(1.0/3.0, 0.0), float2(2.0/3.0, 0.5), ToUNorm(c.xy / c.z) ); + else + // right (zy space) + if ( All(bool3( Abs(c.z) <= c.x, c.x > 0.f, Abs(c.y) <= c.x ))) + uv = Lerp( float2(2.0/3.0, 0.0), float2(1.0, 0.5), ToUNorm(float2( -c.z, c.y ) / c.x) ); + else + // back (xy space) + if ( All(bool3( Abs(c.x) <= -c.z, c.z < 0.f, Abs(c.y) <= -c.z ))) + uv = Lerp( float2(2.0/3.0, 1.0), float2(1.0/3.0, 0.5), ToUNorm(c.yx / c.z) ); + else + // left (zy space) + if ( All(bool3( Abs(c.z) <= -c.x, c.x < 0.f, Abs(c.y) <= -c.x ))) + uv = Lerp( float2(0.0, 0.0), float2(1.0/3.0, 0.5), ToUNorm(c.zy / -c.x) ); + else + // down (xz space) + if ( c.y > 0.f ) + uv = Lerp( float2(1.0, 1.0), float2(2.0/3.0, 0.5), ToUNorm(c.zx / -c.y) ); + else + // up (xz space) + uv = Lerp( float2(1.0/3.0, 1.0), float2(0.0, 0.5), ToUNorm(float2( -c.z, c.x ) / c.y) ); + + uv = uv.yx; + uv.x = uv.x * 0.5f + (eye == 0 ? 0.f : 0.5f); + return uv; +} + +/* +================================================= + Ray_PlaneToSphere +---- + Z+ - forward, X+ - right, Y+ - down +================================================= +*/ +Ray Ray_PlaneToSphere (float2 fov, const float3 origin, const float nearPlane, float2 uv) +{ + uv = ToSNorm( uv ); + fov *= 0.5; + float theta = fov.x * -uv.x + Pi(); + float phi = fov.y * uv.y; + float cos_p = Cos( phi ); + + Ray ray; + ray.origin = origin; + ray.dir = float3( Sin(theta) * cos_p, Sin(phi), -Cos(theta) * cos_p ); + + Ray_SetLength( INOUT ray, nearPlane ); // set 't' and 'pos' return ray; } @@ -159,20 +427,28 @@ bool Ray_Contains (const Ray ray, const float3 point) const float2 z = ray.pos.zz + ray.dir.zz * (point.xy - ray.pos.xy) / ray.dir.xy; // z(x) == z(y) and z(x) == point.z - return Equals( z.x, z.y ) and Equals( z.x, point.z ); + return Equal( z.x, z.y ) and Equal( z.x, point.z ); } /* ================================================= Ray_Rotate +---- + view matrix must be transposed ================================================= */ void Ray_Rotate (inout Ray ray, const quat rotation) { // ray.origin - const ray.dir = Normalize( QMul( rotation, ray.dir )); - ray.t = Distance( ray.origin, ray.pos ); - ray.pos = ray.t * ray.dir; + ray.pos = FusedMulAdd( ray.dir, float3(ray.t), ray.origin ); +} + +void Ray_Rotate (inout Ray ray, const float3x3 rotation) +{ + // ray.origin - const + ray.dir = Normalize( rotation * ray.dir ); + ray.pos = FusedMulAdd( ray.dir, float3(ray.t), ray.origin ); } /* diff --git a/AE/engine/shared_data/shaders/ReliefMapping.glsl b/AE/engine/shared_data/shaders/ReliefMapping.glsl index dc545527..ff978fcf 100644 --- a/AE/engine/shared_data/shaders/ReliefMapping.glsl +++ b/AE/engine/shared_data/shaders/ReliefMapping.glsl @@ -16,14 +16,23 @@ ND_ float2 ParallaxMapping (gl::CombinedTex2D heightMap, const float2 uv, const float3 viewDir, const float heightScale, const float parallaxBias); +ND_ float3 ParallaxMapping (gl::CombinedTexCube heightMap, + const float3 uv, const float3 viewDir, + const float heightScale, const float parallaxBias); ND_ float2 SteepParallaxMapping (gl::CombinedTex2D heightMap, const float2 uv, const float3 viewDir, const int numLayers, const float heightScale); +ND_ float3 SteepParallaxMapping (gl::CombinedTexCube heightMap, + const float3 uv, const float3 viewDir, + const int numLayers, const float heightScale); ND_ float2 ParallaxOcclusionMapping (gl::CombinedTex2D heightMap, const float2 uv, const float3 viewDir, const int numLayers, const float heightScale); +ND_ float3 ParallaxOcclusionMapping (gl::CombinedTexCube heightMap, + const float3 uv, const float3 viewDir, + const int numLayers, const float heightScale); // returns shading factor @@ -54,6 +63,13 @@ float2 ParallaxMapping (gl::CombinedTex2D heightMap, const float2 uv, co return uv - p; } +float3 ParallaxMapping (gl::CombinedTexCube heightMap, const float3 uv, const float3 viewDir, const float heightScale, const float parallaxBias) +{ + float h = ExtractDepth( heightMap, uv ); + float3 p = viewDir * (h * (heightScale * 0.5f) + parallaxBias); + return uv - p; +} + /* ================================================= SteepParallaxMapping @@ -79,6 +95,26 @@ float2 SteepParallaxMapping (gl::CombinedTex2D heightMap, const float2 u return cur_uv; } +float3 SteepParallaxMapping (gl::CombinedTexCube heightMap, const float3 uv, const float3 viewDir, const int numLayers, const float heightScale) +{ + const float layer_depth = 1.0f / numLayers; + const float3 delta_uv = viewDir * heightScale / numLayers; + float cur_layer_depth = 0.0f; + float3 cur_uv = uv; + float height = ExtractDepth( heightMap, cur_uv ); + + for (int i = 0; i < numLayers; ++i) + { + cur_layer_depth += layer_depth; + cur_uv -= delta_uv; + height = ExtractDepth( heightMap, cur_uv ); + + if ( height < cur_layer_depth ) + break; + } + return cur_uv; +} + /* ================================================= ParallaxOcclusionMapping @@ -109,6 +145,31 @@ float2 ParallaxOcclusionMapping (gl::CombinedTex2D heightMap, const floa return Lerp( cur_uv, prev_uv, next_depth / (next_depth - prev_depth) ); } +float3 ParallaxOcclusionMapping (gl::CombinedTexCube heightMap, const float3 uv, const float3 viewDir, const int numLayers, const float heightScale) +{ + const float layer_depth = 1.0f / numLayers; + const float3 delta_uv = viewDir * heightScale / numLayers; + float cur_layer_depth = 0.0f; + float3 cur_uv = uv; + float height = ExtractDepth( heightMap, cur_uv ); + + for (int i = 0; i < numLayers; ++i) + { + cur_layer_depth += layer_depth; + cur_uv -= delta_uv; + height = ExtractDepth( heightMap, cur_uv ); + + if ( height < cur_layer_depth ) + break; + } + + const float3 prev_uv = cur_uv + delta_uv; + const float next_depth = height - cur_layer_depth; + const float prev_depth = ExtractDepth( heightMap, prev_uv ) - cur_layer_depth + layer_depth; + + return Lerp( cur_uv, prev_uv, next_depth / (next_depth - prev_depth) ); +} + /* ================================================= ParallaxOcclusionSoftShadow diff --git a/AE/engine/shared_data/shaders/SDF.glsl b/AE/engine/shared_data/shaders/SDF.glsl index 14eca785..de94b3ac 100644 --- a/AE/engine/shared_data/shaders/SDF.glsl +++ b/AE/engine/shared_data/shaders/SDF.glsl @@ -82,11 +82,15 @@ ND_ float SDF_OpSymX (const float3 position, float (*sdf)(float3)); ND_ float SDF_OpSymXZ (const float2 position, float (*sdf)(float2)); ND_ float SDF_OpSymXZ (const float3 position, float (*sdf)(float3)); -ND_ float SDF_InfRepetition (const float2 position, const float2 center, float (*sdf)(float2)); -ND_ float SDF_InfRepetition (const float3 position, const float3 center, float (*sdf)(float3)); - -ND_ float SDF_Repetition (const float2 position, const float step, const float2 count, float (*sdf)(float2)); -ND_ float SDF_Repetition (const float3 position, const float step, const float3 count, float (*sdf)(float3)); +ND_ float SDF_InfRepetition (const float2 position, const float step, float (*sdf)(float2)); +ND_ float SDF_InfRepetition (const float2 position, const float2 step, float (*sdf)(float2)); +ND_ float SDF_InfRepetition (const float3 position, const float step, float (*sdf)(float3)); +ND_ float SDF_InfRepetition (const float3 position, const float3 step, float (*sdf)(float3)); + +ND_ float SDF_Repetition (const float2 position, const float step, const float2 count, float (*sdf)(float2)); +ND_ float SDF_Repetition (const float2 position, const float2 step, const float2 count, float (*sdf)(float2)); +ND_ float SDF_Repetition (const float3 position, const float step, const float3 count, float (*sdf)(float3)); +ND_ float SDF_Repetition (const float3 position, const float3 step, const float3 count, float (*sdf)(float3)); #endif diff --git a/AE/engine/shared_data/shaders/Sphere.glsl b/AE/engine/shared_data/shaders/Sphere.glsl index 7c249ecf..f4b4c277 100644 --- a/AE/engine/shared_data/shaders/Sphere.glsl +++ b/AE/engine/shared_data/shaders/Sphere.glsl @@ -14,5 +14,16 @@ struct Sphere float3 center; float radius; }; + +ND_ Sphere Sphere_Create (const float3 center, const float radius); //----------------------------------------------------------------------------- + + +Sphere Sphere_Create (const float3 center, const float radius) +{ + Sphere result; + result.center = center; + result.radius = radius; + return result; +} diff --git a/AE/engine/shared_data/shaders/Spline.glsl b/AE/engine/shared_data/shaders/Spline.glsl new file mode 100644 index 00000000..a7cb73d9 --- /dev/null +++ b/AE/engine/shared_data/shaders/Spline.glsl @@ -0,0 +1,212 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Spline interpolation. +*/ + +#ifdef __cplusplus +# pragma once +#endif + +#include "Math.glsl" + +/* +================================================= + CatmullRom +---- + T CatmullRom (T p0, T p1, T p2, T p3, Scalar t) +---- + Result will be in range [p1, p2], + points p0 and p3 used as control points. +---- + Multiple calls: + CatmullRom( p4, p5, p6, p7, a ); result in range: [p5, p6] + CatmullRom( p5, p6, p7, p8, b ); result in range: [p6, p7] +================================================= +*/ +#define Gen_CATMULLROM1( _stype_, _type_ ) \ + ND_ _type_ CatmullRom (_type_ p0, _type_ p1, _type_ p2, _type_ p3, _stype_ t) \ + { \ + _type_ a0 = Lerp( p0, p1, t + _stype_(1.0) ); \ + _type_ a1 = Lerp( p1, p2, t ); \ + _type_ a2 = Lerp( p2, p3, t - _stype_(1.0) ); \ + _type_ b0 = Lerp( a0, a1, ToUNorm(t) ); \ + _type_ b1 = Lerp( a1, a2, t * _stype_(0.5) ); \ + return Lerp( b0, b1, t ); \ + } + +#define Gen_CATMULLROM( _stype_, _vtype_ ) \ + Gen_CATMULLROM1( _stype_, _stype_ ) \ + Gen_CATMULLROM1( _stype_, _vtype_##2 ) \ + Gen_CATMULLROM1( _stype_, _vtype_##3 ) \ + Gen_CATMULLROM1( _stype_, _vtype_##4 ) + +Gen_CATMULLROM( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_CATMULLROM( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_CATMULLROM( double, double_vec_t ) +#endif + +#undef Gen_CATMULLROM1 +#undef Gen_CATMULLROM + +/* +================================================= + QLerp +---- + T QLerp (T p0, T p1, Scalar t) +================================================= +*/ +#define Gen_QLERP1( _stype_, _type_ ) \ + ND_ _type_ QLerp (_type_ p0, _type_ p1, _stype_ t) \ + { \ + /* SmoothStep() - Hermite interpolation */ \ + return Lerp( p0, p1, SmoothStep( _stype_(0.0), _stype_(1.0), t )); \ + } + +#define Gen_QLERP( _stype_, _vtype_ ) \ + Gen_QLERP1( _stype_, _stype_ ) \ + Gen_QLERP1( _stype_, _vtype_##2 ) \ + Gen_QLERP1( _stype_, _vtype_##3 ) \ + Gen_QLERP1( _stype_, _vtype_##4 ) + +Gen_QLERP( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_QLERP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_QLERP( double, double_vec_t ) +#endif + +#undef Gen_QLERP1 +#undef Gen_QLERP + +/* +================================================= + Bezier3 +---- + T Bezier3 (T p0, T p1, T p2, Scalar t) +---- + Result is interpolation between all 3 points (p0, p1, p2), but: + * may not intersects with specified points. + * may walk outside of points bounding box. +---- + Multiple calls: + Bezier3( p0, p1, p2, a ); + Bezier3( p2, p3, p4, b ); +================================================= +*/ +#define Gen_BEZIER3A( _stype_, _type_ ) \ + ND_ _type_ Bezier3 (_type_ p0, _type_ p1, _type_ p2, _stype_ t) \ + { \ + _type_ a = Lerp( p0, p1, t ); \ + _type_ b = Lerp( p1, p2, t ); \ + return Lerp( a, b, t ); \ + } + +#define Gen_BEZIER3( _stype_, _vtype_ ) \ + Gen_BEZIER3A( _stype_, _stype_ ) \ + Gen_BEZIER3A( _stype_, _vtype_##2 ) \ + Gen_BEZIER3A( _stype_, _vtype_##3 ) \ + Gen_BEZIER3A( _stype_, _vtype_##4 ) + +Gen_BEZIER3( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_BEZIER3( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_BEZIER3( double, double_vec_t ) +#endif + +#undef Gen_BEZIER3A +#undef Gen_BEZIER3 + +/* +================================================= + Bezier4 +---- + T Bezier4 (T p0, T p1, T p2, T p4, Scalar t) +---- + Result is interpolation between all 4 points (p0, p1, p2, p4), but: + * may not intersects with specified points. + * may walk outside of points bounding box. +---- + Multiple calls: + Bezier4( p0, p1, p2, p3, a ); + Bezier4( p3, p4, p5, p6, b ); +================================================= +*/ +#define Gen_BEZIER4A( _stype_, _type_ ) \ + ND_ _type_ Bezier4 (_type_ p0, _type_ p1, _type_ p2, _type_ p3, _stype_ t) \ + { \ + _type_ a = Lerp( p0, p1, t ); \ + _type_ b = Lerp( p1, p2, t ); \ + _type_ c = Lerp( p2, p3, t ); \ + _type_ m = Lerp( a, b, t ); \ + _type_ n = Lerp( b, c, t ); \ + return Lerp( m, n, t ); \ + } + +#define Gen_BEZIER4( _stype_, _vtype_ ) \ + Gen_BEZIER4A( _stype_, _stype_ ) \ + Gen_BEZIER4A( _stype_, _vtype_##2 ) \ + Gen_BEZIER4A( _stype_, _vtype_##3 ) \ + Gen_BEZIER4A( _stype_, _vtype_##4 ) + +Gen_BEZIER4( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_BEZIER4( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_BEZIER4( double, double_vec_t ) +#endif + +#undef Gen_BEZIER4A +#undef Gen_BEZIER4 + +/* +================================================= + BSpline +---- + T BSpline (T p0, T p1, T p2, T p4, Scalar t) +---- + Result will be in range [p1, p2], + points p0 and p3 used as control points. +---- + Multiple calls: + BSpline( p4, p5, p6, p7, a ); result in range: [p5, p6] + BSpline( p5, p6, p7, p8, b ); result in range: [p6, p7] +================================================= +*/ +#define Gen_BSPLINE1( _stype_, _type_ ) \ + ND_ _type_ BSpline (_type_ p0, _type_ p1, _type_ p2, _type_ p3, _stype_ t) \ + { \ + _type_ c0 = (p0 + _stype_(4.0) * p1 + p2) / _stype_(6.0); \ + _type_ c1 = (p2 - p0) * _stype_(0.5); \ + _type_ c2 = (p0 + p2) * _stype_(0.5) - p1; \ + _type_ c3 = (p3 - p0) / _stype_(6.0) + (p1 - p2) * _stype_(0.5); \ + return c0 + (c1 * t) + (c2 * t*t) + (c3 * t*t*t); \ + } + +#define Gen_BSPLINE( _stype_, _vtype_ ) \ + Gen_BSPLINE1( _stype_, _stype_ ) \ + Gen_BSPLINE1( _stype_, _vtype_##2 ) \ + Gen_BSPLINE1( _stype_, _vtype_##3 ) \ + Gen_BSPLINE1( _stype_, _vtype_##4 ) + +Gen_BSPLINE( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_BSPLINE( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_BSPLINE( double, double_vec_t ) +#endif + +#undef Gen_BSPLINE1 +#undef Gen_BSPLINE diff --git a/AE/engine/shared_data/shaders/aestyle.glsl.h b/AE/engine/shared_data/shaders/aestyle.glsl.h index f36e9e92..17bddae6 100644 --- a/AE/engine/shared_data/shaders/aestyle.glsl.h +++ b/AE/engine/shared_data/shaders/aestyle.glsl.h @@ -29,27 +29,49 @@ #define INOUT #define OUT +// used for vec/mat type building (templates) +#define float_vec_t float +#define float_mat_t float +#define double_vec_t double +#define double_mat_t double +#define int_vec_t int +#define uint_vec_t uint +#define bool_vec_t bool +#define sbyte_vec_t sbyte +#define ubyte_vec_t ubyte +#define sshort_vec_t sshort +#define ushort_vec_t ushort +#define half_vec_t half +#define half_mat_t half +#define slong_vec_t slong +#define ulong_vec_t ulong + // Math types #if 1 #include "aestyle_shared.h" template ND_ T abs (const T); -template ND_ T acos (const T); -template ND_ T acosh (const T); +template ND_ T acos (const T x); // result in range [0, Pi], undefined if Abs(x) > 1 +template ND_ T acosh (const T x); // result is non-negative inverse of cosh, undefined if x < 1 ND_ bool all (const bool2); ND_ bool all (const bool3); ND_ bool all (const bool4); ND_ bool any (const bool2); ND_ bool any (const bool3); ND_ bool any (const bool4); -template ND_ T asin (const T); -template ND_ T asinh (const T); -template ND_ T atan (const T y, const T x); -template ND_ T atan (const T y_over_x); -template ND_ T atan (const T); +template ND_ T asin (const T); // result in range [-Pi/2, Pi/2], undefined if Abs(x) > 1 +template ND_ T asinh (const T); // result is inverse of sinh +template ND_ T atan (const T y, const T x); // result in range [-Pi, Pi], undefined if x = 0 +template ND_ T atan (const T y_over_x); // result in range [-Pi/2, Pi/2] +template ND_ T atanh (const T); // result is inverse of tanh, undefined if Abs(x) > 1 template ND_ T bitCount (const T); + +// For signed data types, the most significant bits will be set to the value of bit offset + base - 1 (i.e., it is sign extended to the width of the return type). +// The result will be undefined if offset or bits is negative, or if the sum of offset and bits is greater than the number of bits used to store the operand. template ND_ T bitfieldExtract (const T value, int offset, int bits); + +// The result will be undefined if offset or bits is negative, or if the sum of offset and bits is greater than the number of bits used to store the operand. template ND_ T bitfieldInsert (const T base, const T insert, int offset, int bits); template ND_ T bitfieldReverse (const T); template ND_ T ceil (const T); @@ -61,28 +83,18 @@ template ND_ T cosh (const T); ND_ float3 cross (const float3 x, const float3 y); ND_ double3 cross (const double3 x, const double3 y); template ND_ T degrees (const T); - ND_ float distance (const float2, const float2); - ND_ float distance (const float3, const float3); - ND_ float distance (const float4, const float4); - ND_ double distance (const double2, const double2); - ND_ double distance (const double3, const double3); - ND_ double distance (const double4, const double4); - ND_ float dot (const float2, const float2); - ND_ float dot (const float3, const float3); - ND_ float dot (const float4, const float4); - ND_ double dot (const double2, const double2); - ND_ double dot (const double3, const double3); - ND_ double dot (const double4, const double4); +template ND_ T distance (const _Vec, const _Vec); +template ND_ T dot (const _Vec, const _Vec); template ND_ _Vec equal (const _Vec x, const _Vec y); template ND_ T exp (const T); template ND_ T exp2 (const T); template ND_ T faceforward (const T n, const T i, const T Nref); -template ND_ T findLSB (const T); -template ND_ T findMSB (const T); - ND_ int floatBitsToInt (const float); // intBitsToFloat -template ND_ _Vec floatBitsToInt (const _Vec); // intBitsToFloat - ND_ uint floatBitsToUint (const float); // uintBitsToFloat -template ND_ _Vec floatBitsToUint (const _Vec); // uintBitsToFloat +template ND_ T findLSB (const T); // -1 on incorrect input +template ND_ T findMSB (const T); // -1 on incorrect input + ND_ int floatBitsToInt (const float); // inverse intBitsToFloat +template ND_ _Vec floatBitsToInt (const _Vec); // inverse intBitsToFloat + ND_ uint floatBitsToUint (const float); // inverse uintBitsToFloat +template ND_ _Vec floatBitsToUint (const _Vec); // inverse uintBitsToFloat template ND_ T floor (const T); template ND_ T fma (const T a, const T b, const T c); template ND_ T fract (const T); @@ -91,10 +103,10 @@ template ND_ _Vec greaterThan (const _Vec template ND_ _Vec greaterThanEqual (const _Vec x, const _Vec y); template ND_ void umulExtended (const T x, const T y, OUT T &msb, OUT T &lsb); template ND_ void imulExtended (const T x, const T y, OUT T &msb, OUT T &lsb); - ND_ float intBitsToFloat (const int); // floatBitsToInt -template ND_ _Vec intBitsToFloat (const _Vec); // floatBitsToInt - ND_ float uintBitsToFloat (const uint); // floatBitsToUint -template ND_ _Vec uintBitsToFloat (const _Vec); // floatBitsToUint + ND_ float intBitsToFloat (const int); // inverse floatBitsToInt +template ND_ _Vec intBitsToFloat (const _Vec); // inverse floatBitsToInt + ND_ float uintBitsToFloat (const uint); // inverse floatBitsToUint +template ND_ _Vec uintBitsToFloat (const _Vec); // inverse floatBitsToUint template ND_ T inversesqrt (const T); template ND_ bool isinf (const T); template ND_ _Vec isinf (const _Vec); @@ -107,8 +119,8 @@ template ND_ float length (const _Vec) template ND_ double length (const _Vec); template ND_ _Vec lessThan (const _Vec x, const _Vec y); template ND_ _Vec lessThanEqual (const _Vec x, const _Vec y); -template ND_ T log (const T); -template ND_ T log2 (const T); +template ND_ T log (const T); // result is undefined if x <= 0 +template ND_ T log2 (const T); // result is undefined if x <= 0 template ND_ _Vec max (const _Vec x, const _Vec y); template ND_ _Vec max (const _Vec x, const T y); template ND_ T max (const T x, const T y); @@ -211,35 +223,35 @@ template ND_ _Matrix transpose (const _Matrix ND_ ushort2 unpackUint2x16 (const uint v); ND_ ushort4 unpackUint4x16 (const ulong v); - ND_ short halfBitsToInt16 (const half); // int16BitsToHalf -template ND_ _Vec halfBitsToInt16 (const _Vec); // int16BitsToHalf - ND_ ushort halfBitsToUint16 (const half); // uint16BitsToHalf -template ND_ _Vec halfBitsToUint16 (const _Vec); // uint16BitsToHalf - - ND_ short float16BitsToInt16 (const half); // int16BitsToFloat16 -template ND_ _Vec float16BitsToInt16 (const _Vec); // int16BitsToFloat16 - ND_ ushort float16BitsToUint16 (const half); // uint16BitsToFloat16 -template ND_ _Vec float16BitsToUint16 (const _Vec); // uint16BitsToFloat16 - - ND_ slong doubleBitsToInt64 (const double); // int64BitsToDouble -template ND_ _Vec doubleBitsToInt64 (const _Vec); // int64BitsToDouble - ND_ ulong doubleBitsToUint64 (const double); // uint64BitsToDouble -template ND_ _Vec doubleBitsToUint64 (const _Vec); // uint64BitsToDouble - - ND_ half int16BitsToHalf (const short); // halfBitsToInt16 -template ND_ _Vec int16BitsToHalf (const _Vec); // halfBitsToInt16 - ND_ half uint16BitsToHalf (const ushort); // halfBitsToUint16 -template ND_ _Vec uint16BitsToHalf (const _Vec); // halfBitsToUint16 - - ND_ half int16BitsToFloat16 (const short); // float16BitsToInt16 -template ND_ _Vec int16BitsToFloat16 (const _Vec); // float16BitsToInt16 - ND_ half uint16BitsToFloat16 (const ushort); // float16BitsToUint16 -template ND_ _Vec uint16BitsToFloat16 (const _Vec); // float16BitsToUint16 - - ND_ double int64BitsToDouble (const slong); // doubleBitsToInt64 -template ND_ _Vec int64BitsToDouble (const _Vec); // doubleBitsToInt64 - ND_ double uint64BitsToDouble (const ulong); // doubleBitsToUint64 -template ND_ _Vec uint64BitsToDouble (const _Vec); // doubleBitsToUint64 + ND_ short halfBitsToInt16 (const half); // inverse int16BitsToHalf +template ND_ _Vec halfBitsToInt16 (const _Vec); // inverse int16BitsToHalf + ND_ ushort halfBitsToUint16 (const half); // inverse uint16BitsToHalf +template ND_ _Vec halfBitsToUint16 (const _Vec); // inverse uint16BitsToHalf + + ND_ short float16BitsToInt16 (const half); // inverse int16BitsToFloat16 +template ND_ _Vec float16BitsToInt16 (const _Vec); // inverse int16BitsToFloat16 + ND_ ushort float16BitsToUint16 (const half); // inverse uint16BitsToFloat16 +template ND_ _Vec float16BitsToUint16 (const _Vec); // inverse uint16BitsToFloat16 + + ND_ slong doubleBitsToInt64 (const double); // inverse int64BitsToDouble +template ND_ _Vec doubleBitsToInt64 (const _Vec); // inverse int64BitsToDouble + ND_ ulong doubleBitsToUint64 (const double); // inverse uint64BitsToDouble +template ND_ _Vec doubleBitsToUint64 (const _Vec); // inverse uint64BitsToDouble + + ND_ half int16BitsToHalf (const short); // inverse halfBitsToInt16 +template ND_ _Vec int16BitsToHalf (const _Vec); // inverse halfBitsToInt16 + ND_ half uint16BitsToHalf (const ushort); // inverse halfBitsToUint16 +template ND_ _Vec uint16BitsToHalf (const _Vec); // inverse halfBitsToUint16 + + ND_ half int16BitsToFloat16 (const short); // inverse float16BitsToInt16 +template ND_ _Vec int16BitsToFloat16 (const _Vec); // inverse float16BitsToInt16 + ND_ half uint16BitsToFloat16 (const ushort); // inverse float16BitsToUint16 +template ND_ _Vec uint16BitsToFloat16 (const _Vec); // inverse float16BitsToUint16 + + ND_ double int64BitsToDouble (const slong); // inverse doubleBitsToInt64 +template ND_ _Vec int64BitsToDouble (const _Vec); // inverse doubleBitsToInt64 + ND_ double uint64BitsToDouble (const ulong); // inverse doubleBitsToUint64 +template ND_ _Vec uint64BitsToDouble (const _Vec); // inverse doubleBitsToUint64 #endif @@ -253,6 +265,7 @@ struct gl enum class _Uns_ {}; + // used some random constants because unsized arrays are not supported static constexpr uint _MaxClipDistance = 8; static constexpr uint _MaxCullDistance = 8; static constexpr uint _MaxSampleMask = 1; @@ -311,7 +324,7 @@ struct gl // - Data from uniform buffer, for arrays it must be constant or dynamically uniform indexing. // - Data from push constants. // - 'DrawID'. - // + // // Non-dynamically uniform: // - 'VertexIndex', 'PrimitiveID', ... // - 'InstanceIndex' - non-uniform on TBDR. @@ -1116,7 +1129,7 @@ struct gl #else - // sync + // sync #ifdef AE_MEM_SCOPE void MemoryBarrier (gl::Scope execution, gl::Scope memory, gl::StorageSemantics storage, gl::Semantics sem); #endif @@ -1384,7 +1397,9 @@ struct gl { A, // 'a' argument in CoopMatMulAdd() B, // 'b' argument in CoopMatMulAdd() - Accumulator // 'c' argument in CoopMatMulAdd() + Accumulator, // 'c' argument in CoopMatMulAdd() + + C = Accumulator }; enum class MatrixOperands @@ -1402,7 +1417,15 @@ struct gl template struct CoopMat { - ND_ constexpr uint length() { return Rows * Columns; } + explicit CoopMat (T scalar); + + template + explicit CoopMat (const CoopMat &); + + ND_ uint length() const { return Rows * Columns; } + + ND_ T & operator [] (int i); + ND_ T operator [] (int i) const; }; template @@ -1411,8 +1434,9 @@ struct gl template void CoopMatStore (CoopMat m, OUT B* buf, uint element, uint stride, CooperativeMatrixLayout layout); - template - ND_ CoopMat CoopMatMulAdd (CoopMat a, CoopMat b, CoopMat c, MatrixOperands matrixOperands = MatrixOperands::None); + // a * b + c + template + ND_ CoopMat CoopMatMulAdd (CoopMat a, CoopMat b, CoopMat c, MatrixOperands matrixOperands = MatrixOperands::None); #endif // AE_COOP_MATRIX and AE_MEM_SCOPE diff --git a/AE/engine/src/base/Algorithms/ArrayUtils.h b/AE/engine/src/base/Algorithms/ArrayUtils.h index 9b4cbab3..683300a6 100644 --- a/AE/engine/src/base/Algorithms/ArrayUtils.h +++ b/AE/engine/src/base/Algorithms/ArrayUtils.h @@ -110,19 +110,19 @@ namespace AE::Base ================================================= */ template - ND_ constexpr ssize Distance (T *lhs, T *rhs) __NE___ + ND_ constexpr ssize Distance (T* lhs, T* rhs) __NE___ { return std::distance< T *>( lhs, rhs ); } template - ND_ constexpr ssize Distance (const T *lhs, T *rhs) __NE___ + ND_ constexpr ssize Distance (const T* lhs, T* rhs) __NE___ { return std::distance< T const *>( lhs, rhs ); } template - ND_ constexpr ssize Distance (T *lhs, const T *rhs) __NE___ + ND_ constexpr ssize Distance (T* lhs, const T* rhs) __NE___ { return std::distance< T const *>( lhs, rhs ); } @@ -238,9 +238,9 @@ namespace AE::Base ================================================= */ template - ND_ bool IsSorted (Iter begin, Iter end, Cmp && fn) __NE___ + ND_ bool IsSorted (Iter begin, Iter end, Cmp &&fn) __NE___ { - //STATIC_ASSERT( IsNothrowInvocable ); + CheckNothrow( IsNoExcept( fn( begin, end ))); if ( begin == end ) return true; @@ -297,12 +297,23 @@ namespace AE::Base template void RemoveDuplicates (INOUT Array &arr, Compare comp) __NE___ { - STATIC_ASSERT( IsNothrowInvocable ); + CheckNothrow( IsNothrowInvocable ); std::sort( arr.begin(), arr.end(), comp ); arr.erase( std::unique( arr.begin(), arr.end() ), arr.end() ); } +/* +================================================= + EraseIfEqual +================================================= +*/ + template + void EraseIfEqual (INOUT Array &arr, const B &value) __NE___ + { + arr.erase( std::remove( arr.begin(), arr.end(), value ), arr.end() ); + } + /* ================================================= ArrayContains @@ -461,9 +472,7 @@ namespace AE::Base } // _hidden_ - template > - > + template )> ND_ constexpr auto IndicesOnly (const Container& container) __NE___ { return Base::_hidden_::IndicesOnlyRange{ 0, container.size() }; @@ -480,9 +489,7 @@ namespace AE::Base return Base::_hidden_::IndicesOnlyRange{ 0, count }; } - template > - > + template )> ND_ constexpr inline auto IndicesOnly () __NE___ { return Base::_hidden_::IndicesOnlyRange{ 0, usize(T::_Count) }; @@ -540,9 +547,7 @@ namespace AE::Base } // _hidden_ - template > - > + template )> ND_ constexpr auto ReverseIndices (const Container& container) __NE___ { return Base::_hidden_::ReverseIndicesRange{ container.size()-1, container.size() }; @@ -662,10 +667,20 @@ namespace AE::Base template ND_ constexpr auto BitfieldIterate (const T &bits) __NE___ { - STATIC_ASSERT( IsEnum or IsUnsignedInteger ); + StaticAssert( IsEnum or IsUnsignedInteger ); return Base::_hidden_::BitfieldIterateView{ bits }; } + template + ND_ constexpr auto BitfieldIterate (const BitSet &bits) __NE___ + { + if constexpr( C <= 32 ) + return Base::_hidden_::BitfieldIterateView{ uint(bits.to_ulong()) }; + else + if constexpr( C <= 64 ) + return Base::_hidden_::BitfieldIterateView{ bits.to_ullong() }; + } + /* ================================================= BitIndexIterate @@ -721,16 +736,25 @@ namespace AE::Base template ND_ constexpr auto BitIndexIterate (const T &bits) __NE___ { - STATIC_ASSERT( IsEnum or IsUnsignedInteger ); + StaticAssert( IsEnum or IsUnsignedInteger ); return Base::_hidden_::BitIndexIterateView< uint, T >{ bits }; } template ND_ constexpr auto BitIndexIterate (const T &bits) __NE___ { - STATIC_ASSERT( IsEnum or IsUnsignedInteger ); + StaticAssert( IsEnum or IsUnsignedInteger ); return Base::_hidden_::BitIndexIterateView< R, T >{ bits }; } + template + ND_ constexpr auto BitIndexIterate (const BitSet &bits) __NE___ + { + if constexpr( C <= 32 ) + return Base::_hidden_::BitIndexIterateView< uint, uint >{ uint(bits.to_ulong()) }; + else + if constexpr( C <= 64 ) + return Base::_hidden_::BitIndexIterateView< uint, ulong >{ bits.to_ullong() }; + } } // AE::Base diff --git a/AE/engine/src/base/Algorithms/Cast.h b/AE/engine/src/base/Algorithms/Cast.h index 9ba1d4b8..2b956ce8 100644 --- a/AE/engine/src/base/Algorithms/Cast.h +++ b/AE/engine/src/base/Algorithms/Cast.h @@ -5,7 +5,6 @@ #include "base/Common.h" #include "base/Containers/Ptr.h" - namespace AE::Base { @@ -26,7 +25,7 @@ namespace AE::Base { constexpr usize align = alignof(R); - STATIC_ASSERT( ((align & (align - 1)) == 0), "Align must be power of 2" ); + StaticAssert( ((align & (align - 1)) == 0), "Align must be power of 2" ); return (usize(ptr) & (align-1)) == 0; } @@ -45,7 +44,7 @@ namespace AE::Base if ( not CheckPointerAlignment( ptr )) { std::stringstream str; - str << "Failed to cast pointer from '" << typeid(T).name() << "' to '" << typeid(R).name() + str << "Failed to cast pointer from '" << TypeNameOf() << "' to '" << TypeNameOf() << "': memory address " << std::hex << usize(ptr) << " is not aligned to " << std::dec << alignof(R) << ", it may cause undefined behavior"; AE_LOGE( str.str() ); @@ -64,7 +63,7 @@ namespace AE::Base template ND_ constexpr R const volatile* Cast (T const volatile* value) __NE___ { - STATIC_ASSERT( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); + StaticAssert( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); CheckPointerCast( value ); return static_cast< R const volatile *>( static_cast< void const volatile *>(value) ); } @@ -72,7 +71,7 @@ namespace AE::Base template ND_ constexpr R volatile* Cast (T volatile* value) __NE___ { - STATIC_ASSERT( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); + StaticAssert( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); CheckPointerCast( value ); return static_cast< R volatile *>( static_cast< void volatile *>(value) ); } @@ -80,7 +79,7 @@ namespace AE::Base template ND_ constexpr R const* Cast (T const* value) __NE___ { - STATIC_ASSERT( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); + StaticAssert( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); CheckPointerCast( value ); return static_cast< R const *>( static_cast< void const *>(value) ); } @@ -88,7 +87,7 @@ namespace AE::Base template ND_ constexpr R* Cast (T* value) __NE___ { - STATIC_ASSERT( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); + StaticAssert( sizeof(R*) == sizeof(T*) and sizeof(T*) == sizeof(void*) ); CheckPointerCast( value ); return static_cast< R *>( static_cast< void *>(value) ); } @@ -157,13 +156,13 @@ namespace AE::Base TimeCast (chrono) ================================================= */ - template , int> = 0> + template )> ND_ constexpr To TimeCast (const std::chrono::duration &value) __NE___ { return std::chrono::duration_cast( value ); } - template , int> = 0> + template )> ND_ constexpr std::chrono::time_point TimeCast (const std::chrono::time_point &value) __NE___ { return std::chrono::time_point_cast( value ); @@ -204,6 +203,27 @@ namespace AE::Base { return std::dynamic_pointer_cast( other ); } + + template + ND_ bool CastAllowed (T const* value) __NE___ + { + return (dynamic_cast( value ) != null) == (value != null); + } + + template + ND_ bool CastNotAllowed (T const* value) __NE___ + { + return not CastAllowed( value ); + } + +#else + + template + ND_ bool CastAllowed (T const* value) __NE___ { return true; } + + template + ND_ bool CastNotAllowed (T const* value) __NE___ { return true; } + #endif /* @@ -214,9 +234,9 @@ namespace AE::Base template ND_ constexpr To BitCast (const From& src) __NE___ { - STATIC_ASSERT( sizeof(To) == sizeof(From), "must be same size!" ); - STATIC_ASSERT( IsMemCopyAvailable and IsMemCopyAvailable, "must be trivial types!" ); - //STATIC_ASSERT( not IsSameTypes< To, From >); // to find unnecessary cast + StaticAssert( sizeof(To) == sizeof(From), "must be same size!" ); + StaticAssert( IsMemCopyAvailable and IsMemCopyAvailable, "must be trivial types!" ); + //StaticAssert( not IsSameTypes< To, From >); // to find unnecessary cast #ifdef __cpp_lib_bit_cast if constexpr( std::is_trivially_copyable_v and std::is_trivially_copyable_v ){ @@ -241,9 +261,9 @@ namespace AE::Base template ND_ constexpr To UnsafeBitCast (const From& src) __NE___ { - //STATIC_ASSERT( sizeof(From) <= sizeof(To), "cast will lost data!" ); - STATIC_ASSERT( IsMemCopyAvailable and IsMemCopyAvailable, "must be trivial types!" ); - //STATIC_ASSERT( not IsSameTypes< To, From >); // to find unnecessary cast + //StaticAssert( sizeof(From) <= sizeof(To), "cast will lost data!" ); + StaticAssert( IsMemCopyAvailable and IsMemCopyAvailable, "must be trivial types!" ); + //StaticAssert( not IsSameTypes< To, From >); // to find unnecessary cast To dst = {}; std::memcpy( OUT &dst, &src, std::min( sizeof(From), sizeof(To) )); @@ -298,7 +318,7 @@ namespace AE::Base template ND_ constexpr To LimitCast (const From& src) __NE___ { - STATIC_ASSERT( MaxValue() >= MaxValue() ); + StaticAssert( MaxValue() >= MaxValue() ); if constexpr( IsSigned and IsUnsigned ) { @@ -308,7 +328,7 @@ namespace AE::Base } else { - STATIC_ASSERT( MinValue() <= MinValue() ); + StaticAssert( MinValue() <= MinValue() ); return src > static_cast(MaxValue()) ? MaxValue() : src < static_cast(MinValue()) ? MinValue() : diff --git a/AE/engine/src/base/Algorithms/Hash.h b/AE/engine/src/base/Algorithms/Hash.h index 53a01422..bcc07823 100644 --- a/AE/engine/src/base/Algorithms/Hash.h +++ b/AE/engine/src/base/Algorithms/Hash.h @@ -2,6 +2,10 @@ #pragma once +#ifdef AE_ENABLE_XXHASH +# include "xxhash.h" +#endif + namespace AE::Base { @@ -139,6 +143,37 @@ namespace AE::Base dst &= ~((1 << ignoreMantissaBits)-1); return HashVal( std::hash()( dst )); } +//----------------------------------------------------------------------------- + + +#ifdef AE_ENABLE_XXHASH +/* +================================================= + XXHash32 (buffer) +================================================= +*/ + ND_ inline HashVal32 XXHash32 (const void* ptr, const usize sizeInBytes) __NE___ + { + return HashVal32{ XXH32( ptr, sizeInBytes, 0 )}; + } + +/* +================================================= + XXHash64 (buffer) +================================================= +*/ + ND_ inline HashVal64 XXHash64 (const void* ptr, const usize sizeInBytes) __NE___ + { + #if AE_HAS_SIMD + return HashVal64{ XXH3_64bits( ptr, sizeInBytes )}; + #else + return HashVal64{ XXH64( ptr, sizeInBytes, 0 )}; + #endif + } + +#endif // AE_ENABLE_XXHASH +//----------------------------------------------------------------------------- + /* ================================================= @@ -147,11 +182,18 @@ namespace AE::Base use private api to calculate hash of buffer ================================================= */ - ND_ inline HashVal HashOf (const void *ptr, usize sizeInBytes) __NE___ + ND_ inline HashVal HashOf (const void* ptr, const usize sizeInBytes) __NE___ { - ASSERT( ptr != null and sizeInBytes ); + ASSERT( ptr != null and sizeInBytes > 0 ); + + #if defined(AE_ENABLE_XXHASH) + if constexpr( sizeof(HashVal) == sizeof(HashVal64) ) + return HashVal{XXHash64( ptr, sizeInBytes )}; + else + if constexpr( sizeof(HashVal) == sizeof(HashVal32) ) + return HashVal{XXHash32( ptr, sizeInBytes )}; - # if defined(AE_HAS_HASHFN_HashArrayRepresentation) + #elif defined(AE_HAS_HASHFN_HashArrayRepresentation) return HashVal{std::_Hash_array_representation( static_cast(ptr), sizeInBytes )}; #elif defined(AE_HAS_HASHFN_Murmur2OrCityhash) diff --git a/AE/engine/src/base/Algorithms/Parser.cpp b/AE/engine/src/base/Algorithms/Parser.cpp index 438ea6b7..3de6b3b2 100644 --- a/AE/engine/src/base/Algorithms/Parser.cpp +++ b/AE/engine/src/base/Algorithms/Parser.cpp @@ -147,7 +147,7 @@ namespace AE::Base // windows style "\r\n" if_unlikely( (c == '\r') & (n == '\n') ) { - if ( lines == 0 and pos > 0 ) ++lines; + if ( lines == 0 and pos > 0 ) ++lines; ++lines; ++pos; continue; @@ -565,7 +565,7 @@ namespace { //if ( not parser.OnUnknown( c )) // RETURN_ERR( "invalid char '"s << c << "'" ); - } + } if ( is_word ) tokens.push_back( StringView{ str.data() + begin, str.length() - begin }); @@ -675,7 +675,7 @@ namespace { if ( not parser.OnUnknown( c )) { RETURN_ERR( "invalid char '"s << c << "'" ); } - } + } if ( is_word ) tokens.push_back( StringView{ str.data() + begin, str.length() - begin }); @@ -852,7 +852,7 @@ namespace */ void Parser::ValidateVarName_CPP (StringView name, OUT String &result) __NE___ { - CATCH_ERRV( result.resize( name.size() )); + NOTHROW_ERRV( result.resize( name.size() )); for (usize i = 0; i < name.size(); ++i) { diff --git a/AE/engine/src/base/Algorithms/StringUtils.h b/AE/engine/src/base/Algorithms/StringUtils.h index 01cacdba..666fe6a0 100644 --- a/AE/engine/src/base/Algorithms/StringUtils.h +++ b/AE/engine/src/base/Algorithms/StringUtils.h @@ -22,6 +22,7 @@ namespace AE::Base { using namespace std::string_literals; + using namespace std::string_view_literals; /* ================================================= @@ -510,7 +511,7 @@ namespace AE::Base ================================================= */ template - ND_ constexpr bool IsAnsiString (const T *ptr, usize length) __NE___ + ND_ constexpr bool IsAnsiString (const T* ptr, usize length) __NE___ { for (usize i = 0; i < length; ++i) { @@ -575,7 +576,7 @@ namespace AE::Base ================================================= */ template - ND_ EnableIf< IsEnum or IsInteger, String> ToString (const T &value) __Th___ + ND_ EnableIf or IsInteger, String> ToString (const T &value) __Th___ { if constexpr( Radix == 10 ) { @@ -703,7 +704,7 @@ namespace AE::Base ================================================= */ template - ND_ String ToString (const TBytes &value) __Th___ + ND_ String ToString (const TByte &value) __Th___ { const T kb = SafeLeftBitShift( T{1}, 12 ); const T mb = SafeLeftBitShift( T{1}, 22 ); @@ -748,6 +749,8 @@ namespace AE::Base const double abs_time = Abs( time ); String str; + if ( not IsFinite( time )) {} + else if ( abs_time > 59.0 * 60.0 ) str << ToString( time * (1.0/3600.0), precission ) << " h"; else @@ -788,7 +791,7 @@ namespace AE::Base return ToAnsiString( str ); } - ND_ inline String ToString (const wchar_t *str) __Th___ + ND_ inline String ToString (const wchar_t* str) __Th___ { return ToAnsiString( WStringView{str} ); } @@ -1087,7 +1090,7 @@ namespace AE::Base int val = 0; auto err = std::from_chars( str.data(), str.data() + str.size(), OUT val, base ); - + Unused( err ); ASSERT( err.ec == std::errc() ); return val; } @@ -1099,7 +1102,7 @@ namespace AE::Base uint val = 0; auto err = std::from_chars( str.data(), str.data() + str.size(), OUT val, base ); - + Unused( err ); ASSERT( err.ec == std::errc() ); return val; } @@ -1111,7 +1114,7 @@ namespace AE::Base ulong val = 0; auto err = std::from_chars( str.data(), str.data() + str.size(), OUT val, base ); - + Unused( err ); ASSERT( err.ec == std::errc() ); return val; } diff --git a/AE/engine/src/base/Algorithms/Utf8.h b/AE/engine/src/base/Algorithms/Utf8.h index d38d75b8..6f780c90 100644 --- a/AE/engine/src/base/Algorithms/Utf8.h +++ b/AE/engine/src/base/Algorithms/Utf8.h @@ -43,7 +43,7 @@ namespace AE::Base ND_ forceinline CharUtf32 Utf8Decode (const CharUtf8 *str, const usize length, INOUT usize &pos) __NE___ { ASSERT( pos < length ); - STATIC_ASSERT( sizeof(utf8proc_uint8_t) == sizeof(*str) ); + StaticAssert( sizeof(utf8proc_uint8_t) == sizeof(*str) ); utf8proc_int32_t symb; utf8proc_ssize_t res = utf8proc_iterate( Cast(str + pos), length - pos, OUT &symb ); @@ -68,7 +68,7 @@ namespace AE::Base { Unused( size ); ASSERT( pos+4 <= size ); - STATIC_ASSERT( sizeof(utf8proc_uint8_t) == sizeof(*dst) ); + StaticAssert( sizeof(utf8proc_uint8_t) == sizeof(*dst) ); ssize cnt = utf8proc_encode_char( symb, Cast(dst + pos) ); ASSERT( cnt > 0 and cnt <= 4 ); diff --git a/AE/engine/src/base/CMakeLists.txt b/AE/engine/src/base/CMakeLists.txt index 86446b1f..55b6bad6 100644 --- a/AE/engine/src/base/CMakeLists.txt +++ b/AE/engine/src/base/CMakeLists.txt @@ -33,6 +33,12 @@ if (TARGET "Abseil-lib") target_link_libraries( "Base" PUBLIC "Abseil-lib" ) endif() +if (TARGET "xxHash-lib") + target_link_libraries( "Base" PUBLIC "xxHash-lib" ) +endif() + +target_link_libraries( "Base" PUBLIC "FametaCounter-lib" ) + if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") # for sockets target_link_libraries( "Base" PUBLIC "ws2_32" ) diff --git a/AE/engine/src/base/Common.h b/AE/engine/src/base/Common.h index 0bd2489e..bd7c9636 100644 --- a/AE/engine/src/base/Common.h +++ b/AE/engine/src/base/Common.h @@ -18,7 +18,7 @@ namespace AE using ssize = intptr_t; using usize = size_t; -#if defined(__cpp_char8_t) and not defined(AE_PLATFORM_APPLE) +#if defined(__cpp_char8_t) using CharUtf8 = char8_t; // C++20 #else enum class CharUtf8 : char {}; @@ -66,13 +66,13 @@ namespace AE::Base template > - using BasicString = std::basic_string< T, std::char_traits, A >; + using BasicString = std::basic_string< T, std::char_traits, A >; - using String = BasicString< CharAnsi >; - using WString = BasicString< wchar_t >; - using U8String = BasicString< CharUtf8 >; - using U16String = BasicString< CharUtf16 >; - using U32String = BasicString< CharUtf32 >; + using String = BasicString< CharAnsi >; + using WString = BasicString< wchar_t >; + using U8String = BasicString< CharUtf8 >; + using U16String = BasicString< CharUtf16 >; + using U32String = BasicString< CharUtf32 >; template @@ -84,10 +84,6 @@ namespace AE::Base using U32StringView = BasicStringView< CharUtf32 >; - template > - using Array = std::vector< T, A >; - template using SharedPtr = std::shared_ptr< T >; template using WeakPtr = std::weak_ptr< T >; @@ -102,6 +98,10 @@ namespace AE::Base template using Function = std::function< T >; + template > + using Array = std::vector< T, A >; + template > using Unique = std::unique_ptr< T, Deleter >; @@ -117,7 +117,6 @@ namespace AE::Base - // Uppercase names reserved by physical quantity wrappers using seconds = std::chrono::seconds; using milliseconds = std::chrono::milliseconds; @@ -134,10 +133,10 @@ namespace AE::Base MakeShared ================================================= */ - template - ND_ SharedPtr MakeShared (Types&&... args) __Th___ + template + ND_ SharedPtr MakeShared (Args&&... args) __Th___ { - return std::make_shared( FwdArg( args )... ); + return std::make_shared( FwdArg( args )... ); } /* @@ -145,10 +144,13 @@ namespace AE::Base MakeUnique ================================================= */ - template - ND_ Unique MakeUnique (Types&&... args) __Th___ + template + ND_ Unique MakeUnique (Args&&... args) __NE___ { - return std::make_unique( FwdArg( args )... ); + CheckNothrow( IsNothrowCtor< T, Args... >); + //CheckNothrow( IsNoExcept( new T{ FwdArg(args)... })); + + return Unique( new T{ FwdArg( args )... }); } /* @@ -176,6 +178,37 @@ namespace AE::Base #endif } +/* +================================================= + TypeNameOf +================================================= +*/ +#ifdef AE_ENABLE_RTTI + template + ND_ constexpr StringView TypeNameOf () __NE___ + { + return StringView{ typeid(T).name() }; + } + +# if defined(__cpp_char8_t) and defined(AE_PLATFORM_APPLE) + // bugfix: link error in MacOS clang14-15 + template <> + ND_ constexpr StringView TypeNameOf () __NE___ + { + return StringView{"char8_t"}; + } +# endif +#endif +//----------------------------------------------------------------------------- + + + template struct TMemCopyAvailable< BasicStringView > { static constexpr bool value = true; }; + template struct TZeroMemAvailable< BasicStringView > { static constexpr bool value = false; }; + template struct TTriviallySerializable< BasicStringView > { static constexpr bool value = false; }; + template struct TTriviallyDestructible< BasicStringView > { static constexpr bool value = true; }; + + template struct TTriviallyDestructible< BitSet > { static constexpr bool value = true; }; + } // AE::Base @@ -189,7 +222,7 @@ namespace AE // methods public: - explicit Exception (const char *str) __NE___ : _what{str} {} + explicit Exception (const char* str) __NE___ : _what{str} {} explicit Exception (const std::string &str) __NE___ : _what{str.c_str()} {} ND_ const char* what () C_NE___ { return _what; } diff --git a/AE/engine/src/base/CompileTime/Constants.h b/AE/engine/src/base/CompileTime/Constants.h index cb6f00e2..1fa2929c 100644 --- a/AE/engine/src/base/CompileTime/Constants.h +++ b/AE/engine/src/base/CompileTime/Constants.h @@ -18,7 +18,7 @@ namespace AE::Base template ND_ constexpr operator const T () C_NE___ { - STATIC_ASSERT( T(~T{0}) > T{0} ); + StaticAssert( T(~T{0}) > T{0} ); return T(~T{0}); } @@ -49,7 +49,7 @@ namespace AE::Base template ND_ constexpr operator const T () C_NE___ { - //STATIC_ASSERT( std::is_integral_v or std::is_enum_v ); + //StaticAssert( std::is_integral_v or std::is_enum_v ); return T(0); } @@ -106,10 +106,10 @@ namespace AE::Base public: - constexpr AnyFloatConst (double val) __NE___ : _d{val}, _f{float(val)} {} + explicit constexpr AnyFloatConst (double val) __NE___ : _d{val}, _f{float(val)} {} template - ND_ constexpr operator const T () C_NE___ + ND_ constexpr operator const T () C_NE___ { if constexpr( std::is_same_v< T, double >) return _d; diff --git a/AE/engine/src/base/CompileTime/Counter.h b/AE/engine/src/base/CompileTime/Counter.h new file mode 100644 index 00000000..37587981 --- /dev/null +++ b/AE/engine/src/base/CompileTime/Counter.h @@ -0,0 +1,18 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#ifdef AE_ENABLE_FAMETA_COUNTER +# include "external/shared/fameta-counter/counter.hpp" +# include "base/Common.h" + +namespace AE::Base +{ + + template + using CT_Counter = fameta::counter< UniqueType >; + + +} // AE::Base + +#endif // AE_ENABLE_FAMETA_COUNTER diff --git a/AE/engine/src/base/CompileTime/Hash.h b/AE/engine/src/base/CompileTime/Hash.h index 3c971629..972e8838 100644 --- a/AE/engine/src/base/CompileTime/Hash.h +++ b/AE/engine/src/base/CompileTime/Hash.h @@ -53,7 +53,7 @@ namespace AE::Base::_hidden_ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; - ND_ inline constexpr uint crc32_hash (char const *str, usize len, uint prev_crc) __NE___ + ND_ inline constexpr uint crc32_hash (char const* str, usize len, uint prev_crc) __NE___ { for (; (*str != '\0') & (len != 0); ++str, --len) { @@ -73,7 +73,7 @@ namespace AE::Base CT_Hash (string) ================================================= */ - ND_ inline constexpr HashVal32 CT_Hash (const char *str, usize len, uint seed) __NE___ + ND_ inline constexpr HashVal32 CT_Hash (const char* str, usize len, uint seed) __NE___ { return HashVal32{ Base::_hidden_::crc32_hash( str, len, seed )}; } diff --git a/AE/engine/src/base/CompileTime/Math.h b/AE/engine/src/base/CompileTime/Math.h index 869ec9af..7b839194 100644 --- a/AE/engine/src/base/CompileTime/Math.h +++ b/AE/engine/src/base/CompileTime/Math.h @@ -88,7 +88,7 @@ namespace _hidden_ template inline constexpr T CT_Pow (const T &base) { - STATIC_ASSERT( IsInteger and IsInteger and Power >= 0 ); + StaticAssert( IsInteger and IsInteger and Power >= 0 ); if constexpr( Power == 0 ) { @@ -145,7 +145,7 @@ namespace _hidden_ { template struct _ToBitMask { - STATIC_ASSERT( IsUnsignedInteger ); + StaticAssert( IsUnsignedInteger ); static constexpr R mask = Count >= CT_SizeOfInBits ? ~R{0} : Count < 0 ? R{0} : (R{1} << Count) - 1; diff --git a/AE/engine/src/base/CompileTime/TypeList.h b/AE/engine/src/base/CompileTime/TypeList.h index d5124101..a73ac320 100644 --- a/AE/engine/src/base/CompileTime/TypeList.h +++ b/AE/engine/src/base/CompileTime/TypeList.h @@ -15,47 +15,47 @@ namespace AE::Base struct TypeList { public: - struct AsTuple { using type = Tuple< Types... >; }; + struct AsTuple { using type = Tuple< Types... >; }; template - inline static constexpr usize FirstIndex = Base::_hidden_::TL_GetFirstIndex< T, 0, Types... >::value; + inline static constexpr usize FirstIndex = Base::_hidden_::TL_GetFirstIndex< T, 0, Types... >::value; template - inline static constexpr usize LastIndex = Base::_hidden_::TL_GetLastIndex< T, 0, Types... >::value; + inline static constexpr usize LastIndex = Base::_hidden_::TL_GetLastIndex< T, 0, Types... >::value; template