From bfbdf5006d21c3edaa5e7611a4933d18e67a534c Mon Sep 17 00:00:00 2001 From: azhirnov Date: Mon, 30 Sep 2024 14:51:01 +0300 Subject: [PATCH] v24.9.258 RC1 - added PerformanceStat instead of CpuPerformance class - CMake: unity build - Profiler: add GeneralProfiler and RemoteGeneralProfiler with cpu usage, mem usage, etc - Serializing: Bit packing moved to separate BitSerializer - Vulkan: add VK_NV_clip_space_w_scaling - Vulkan: add VK_EXT_subgroup_size_control - add AE_LICENSE_* to shader code, allow to disable code with unsupported license --- .github/workflows/android.yml | 2 +- .github/workflows/linux.yml | 6 +- .github/workflows/macos.yml | 2 +- .github/workflows/windows.yml | 6 +- AE/CMakeLists.txt | 18 +- AE/android/cicd/build.gradle | 1 + AE/android/demo/build.gradle | 1 + AE/android/demo/src/main/AndroidManifest.xml | 4 +- .../src/main/java/AE/engine/BaseActivity.java | 72 +- .../main/java/AE/engine/BaseApplication.java | 5 +- AE/android/rg-device/build.gradle | 1 + .../main/java/AE/RmG/Device/RmGDActivity.java | 2 + AE/android/test/build.gradle | 1 + AE/build_scripts/win_arm64/init.bat | 10 + .../{win_vc2022 => win_x64}/init.bat | 0 .../{win_vc2022 => win_x64}/init_no_pch.bat | 0 .../{win_vc2022 => win_x64}/init_noexc.bat | 0 .../init_precompiled.bat | 0 .../{win_vc2022 => win_x64}/init_rem.bat | 0 .../precompile_engine.bat | 16 + .../init_clang.bat | 0 .../win_x64_clang/init_clang_rem.bat | 10 + AE/docs/Papers.md | 1 + AE/docs/engine/Build.md | 12 +- AE/docs/engine/Platforms.md | 11 +- AE/docs/engine/Profiling.md | 11 +- AE/docs/engine/VulkanFeatures.md | 11 +- AE/docs/papers/ArchitectureNotes-ru.md | 2 + AE/docs/papers/GPU_Benchmarks.md | 83 + AE/docs/papers/GraphicsNotes-ru.md | 42 +- AE/docs/papers/bench/AMD_RX570.md | 100 + AE/docs/papers/bench/ARM_Mali_G57.md | 179 + AE/docs/papers/bench/ARM_Mali_T830.md | 60 + AE/docs/papers/bench/Adreno_505.md | 37 + AE/docs/papers/bench/Adreno_660.md | 154 + AE/docs/papers/bench/Apple_M1.md | 17 + AE/docs/papers/bench/Intel_UHD620.md | 108 + AE/docs/papers/bench/NVidia_RTX2080.md | 219 + AE/docs/papers/bench/PowerVR_BXM.md | 134 + .../img/compute-subgroups/adreno-600.png | Bin 0 -> 2770 bytes .../bench/img/compute-subgroups/amd-gcn4.png | Bin 0 -> 1612 bytes .../img/compute-subgroups/intel-gen9_5.png | Bin 0 -> 1178 bytes .../bench/img/compute-subgroups/mac-m1.png | Bin 0 -> 1490 bytes .../bench/img/compute-subgroups/nv-turing.png | Bin 0 -> 1492 bytes .../compute-subgroups/powervr-bxm-16x16.png | Bin 0 -> 1785 bytes .../img/compute-subgroups/powervr-bxm-8x8.png | Bin 0 -> 1266 bytes .../bench/img/compute-subgroups/valhall-1.png | Bin 0 -> 847 bytes .../bench/img/full-quad/adreno-660-tex-ht.png | Bin 0 -> 3982 bytes .../bench/img/full-quad/valhall-1-qd.png | Bin 0 -> 1027 bytes .../bench/img/full-quad/valhall-1-tex-ht.png | Bin 0 -> 1021 bytes .../img/full-subgroup/adreno-660-large.png | Bin 0 -> 11534 bytes .../img/full-subgroup/adreno-660-tex.png | Bin 0 -> 1182 bytes .../bench/img/full-subgroup/adreno-660.png | Bin 0 -> 1173 bytes .../img/full-subgroup/valhall-1-large.png | Bin 0 -> 16554 bytes .../bench/img/full-subgroup/valhall-1-tex.png | Bin 0 -> 856 bytes .../img/graphics-subgroups/adreno-600.png | Bin 0 -> 6647 bytes .../bench/img/graphics-subgroups/amd-gcn4.png | Bin 0 -> 1877 bytes .../img/graphics-subgroups/intel-gen9_5.png | Bin 0 -> 1620 bytes .../bench/img/graphics-subgroups/mac-m1.png | Bin 0 -> 1556 bytes .../img/graphics-subgroups/nv-turing.png | Bin 0 -> 2101 bytes .../img/graphics-subgroups/powervr-bxm.png | Bin 0 -> 2168 bytes .../img/graphics-subgroups/valhall-1.png | Bin 0 -> 16159 bytes .../bench/img/nv-turing-smid-compute.png | Bin 0 -> 8296 bytes .../bench/img/nv-turing-smid-graphics.png | Bin 0 -> 2583 bytes .../bench/img/valhall-1-unique-subgroups.png | Bin 0 -> 3718 bytes AE/engine/Changelog.md | 15 +- AE/engine/cmake/utils.cmake | 12 + .../AndroidTempl/app/build.gradle | 2 +- .../AndroidTest/app/build.gradle | 2 +- .../HWCPipe/android/app/build.gradle | 2 +- .../shared/Abseil/android/app/build.gradle | 2 +- .../AngelScript/android/app/build.gradle | 2 +- .../shared/Brotli/android/app/build.gradle | 2 +- AE/engine/external/shared/GLM/update.bat | 2 +- AE/engine/external/shared/GLM/update.sh | 2 +- .../shared/Utf8Proc/android/app/build.gradle | 2 +- .../shared/imgui/android/app/build.gradle | 2 +- .../shared/lz4/android/app/build.gradle | 2 +- .../shared/xxHash/android/install.bat | 8 - .../shared/zstd/android/app/build.gradle | 2 +- .../external/win-arm64-msvc143/CMakeLists.txt | 29 + AE/engine/pch/Base.h | 2 + AE/engine/pch/Graphics.h | 9 +- AE/engine/pch/Serializing.h | 6 +- .../performance/base/Perf_FindSubString.cpp | 4 +- AE/engine/performance/base/main.cpp | 8 + AE/engine/performance/threading/main.cpp | 8 + .../shared_data/3party_shaders/Blur-1.glsl | 4 + .../3party_shaders/ColorSpaceUtility-1.glsl | 29 +- .../3party_shaders/ColorUtils-1.glsl | 5 + .../shared_data/3party_shaders/Easing-1.glsl | 80 + .../3party_shaders/FastMath-1.glsl | 205 + .../shared_data/3party_shaders/GBuffer-1.glsl | 11 + .../shared_data/3party_shaders/Hash-1.glsl | 13 +- .../shared_data/3party_shaders/Hash-2.glsl | 69 +- .../shared_data/3party_shaders/Hash-3.glsl | 14 +- .../shared_data/3party_shaders/Hash-4.glsl | 489 +-- .../3party_shaders/Intersectors-1.glsl | 3 + .../3party_shaders/Intersectors-2.glsl | 3 + .../3party_shaders/LightModels.glsl | 35 +- .../shared_data/3party_shaders/Noise-1.glsl | 31 + .../shared_data/3party_shaders/Noise-2.glsl | 3 + .../shared_data/3party_shaders/PBR-1.glsl | 3 + .../shared_data/3party_shaders/PBR-2.glsl | 16 + .../shared_data/3party_shaders/SDF-1.glsl | 8 +- .../shared_data/3party_shaders/SDF-2.glsl | 4 + .../3party_shaders/TileableNoise.glsl | 69 + .../3party_shaders/ToneMapping-1.glsl | 5 + .../3party_shaders/ToneMapping-2.glsl | 10 + .../feature_set/min_mobile_mali.as | 17 +- AE/engine/shared_data/scripts/asset_packer.as | 2685 +++++++------ .../shared_data/scripts/input_actions.as | 1481 ++++--- .../shared_data/scripts/offline_packer.as | 112 +- .../shared_data/scripts/pipeline_compiler.as | 3516 +++++++++-------- AE/engine/shared_data/scripts/res_editor.as | 3188 ++++++++------- .../shared_data/shaders/CodeTemplates.glsl | 106 +- AE/engine/shared_data/shaders/Color.glsl | 6 +- AE/engine/shared_data/shaders/Easing.glsl | 113 +- AE/engine/shared_data/shaders/FastMath.glsl | 25 + AE/engine/shared_data/shaders/Geometry.glsl | 40 +- .../shared_data/shaders/GlobalIndex.glsl | 14 +- .../shared_data/shaders/HWRayTracing.glsl | 7 +- AE/engine/shared_data/shaders/Math.glsl | 755 ++-- AE/engine/shared_data/shaders/Matrix.glsl | 146 +- AE/engine/shared_data/shaders/Noise.glsl | 5 + AE/engine/shared_data/shaders/Quaternion.glsl | 96 +- AE/engine/shared_data/shaders/Ray.glsl | 10 +- AE/engine/shared_data/shaders/SDF.glsl | 10 +- .../shared_data/shaders/TexSampling.glsl | 26 + .../shared_data/shaders/TilableNoise.glsl | 28 +- AE/engine/shared_data/shaders/aestyle.glsl.h | 96 +- .../shared_data/shaders/aestyle_shared.h | 99 +- AE/engine/src/audio/BASS/AudioInputBASS.cpp | 2 +- AE/engine/src/audio/BASS/AudioSystemBASS.cpp | 4 +- AE/engine/src/audio/BASS/UtilsBASS.cpp | 138 + AE/engine/src/audio/BASS/UtilsBASS.cpp.h | 2 + AE/engine/src/audio/CMakeLists.txt | 1 + AE/engine/src/base/Algorithms/Cast.h | 18 +- AE/engine/src/base/Algorithms/Parser.cpp | 54 +- AE/engine/src/base/Algorithms/Parser.h | 41 + AE/engine/src/base/Algorithms/StringUtils.h | 11 +- AE/engine/src/base/CMakeLists.txt | 6 + AE/engine/src/base/Common.h | 13 +- AE/engine/src/base/Containers/FixedArray.h | 41 +- AE/engine/src/base/Containers/StructView.h | 10 +- AE/engine/src/base/Containers/Tuple.h | 122 +- AE/engine/src/base/Containers/Union.h | 4 +- .../src/base/Containers/UntypedStorage.h | 16 +- AE/engine/src/base/DataSource/DataStream.h | 32 +- .../src/base/DataSource/StdFileStream.cpp | 2 +- .../src/base/DataSource/UnixFileHelper.cpp.h | 2 + .../base/DataSource/WindowsFileHelper.cpp.h | 4 +- AE/engine/src/base/Defines/Attribs.h | 146 +- .../src/base/Defines/DetectLicense.inl.h | 32 +- AE/engine/src/base/Defines/StdInclude.h | 35 +- AE/engine/src/base/Log/Logger.cpp | 8 +- AE/engine/src/base/Math/BitMath.h | 41 +- AE/engine/src/base/Math/Byte.h | 227 +- AE/engine/src/base/Math/GLM.h | 31 +- AE/engine/src/base/Math/MatrixImpl.h | 47 +- AE/engine/src/base/Math/Percent.h | 25 +- AE/engine/src/base/Math/PhysicalDimension.h | 2 +- AE/engine/src/base/Math/PhysicalQuantity.h | 72 +- AE/engine/src/base/Math/PhysicalQuantityVec.h | 24 +- AE/engine/src/base/Math/Quat.h | 2 +- AE/engine/src/base/Math/Transformation.h | 20 +- AE/engine/src/base/Math/Vec.h | 118 +- AE/engine/src/base/Math/sRGB.h | 4 +- AE/engine/src/base/Memory/MemUtils.h | 10 +- AE/engine/src/base/Platforms/AndroidUtils.cpp | 17 + AE/engine/src/base/Platforms/AndroidUtils.h | 11 - AE/engine/src/base/Platforms/CPUInfo.h | 40 +- .../src/base/Platforms/CPUInfo_Apple.cpp | 42 - .../src/base/Platforms/CPUInfo_Linux.cpp | 131 - .../src/base/Platforms/CPUInfo_LinuxARM.cpp | 56 +- .../src/base/Platforms/CPUInfo_Windows.cpp | 8 +- AE/engine/src/base/Platforms/Perf_Apple.cpp | 72 + AE/engine/src/base/Platforms/Perf_Linux.cpp | 462 +++ ...{CPUInfo_WIndows2.cpp => Perf_WIndows.cpp} | 176 +- .../src/base/Platforms/PerformanceStat.h | 96 + .../src/base/Platforms/WindowsHeader.cpp.h | 8 + AE/engine/src/base/Platforms/WindowsUtils.cpp | 36 - AE/engine/src/base/Platforms/WindowsUtils.h | 8 - AE/engine/src/base/Pointers/Ptr.h | 34 +- AE/engine/src/base/Pointers/Ref.h | 59 + AE/engine/src/base/Time/Timer.h | 9 +- AE/engine/src/ecs-st/CMakeLists.txt | 1 + AE/engine/src/ecs-st/Core/Registry.h | 2 +- AE/engine/src/ecs-st/Core/Registry.inl.h | 4 +- AE/engine/src/graphics/CMakeLists.txt | 7 + .../{BufferDesc.cpp => BufferDesc.cpp.h} | 6 +- .../src/graphics/Private/CommandBatch.cpp.h | 2 + .../graphics/Private/ContextValidation.cpp | 67 +- .../src/graphics/Private/ContextValidation.h | 1 + AE/engine/src/graphics/Private/Defines.h | 2 - .../src/graphics/Private/DeviceProperties.cpp | 10 +- .../graphics/Private/DrawCommandBatch.cpp.h | 2 + AE/engine/src/graphics/Private/EnumToString.h | 2 + .../{EnumUtils.cpp => EnumUtils.cpp.h} | 201 +- AE/engine/src/graphics/Private/EnumUtils.h | 29 +- .../{FeatureSet.cpp => FeatureSet.cpp.h} | 60 +- .../{ImageDesc.cpp => ImageDesc.cpp.h} | 111 +- .../src/graphics/Private/ImageMemView.cpp | 2 +- .../src/graphics/Private/PipelinePack.cpp.h | 2 + .../{RenderState.cpp => RenderState.cpp.h} | 1 + .../Private/RenderTaskScheduler.cpp.h | 12 +- .../graphics/Private/ResourceManager.cpp.h | 2 + .../src/graphics/Private/ResourceValidation.h | 92 +- AE/engine/src/graphics/Private/Shared.cpp | 15 + .../Private/StagingBufferManager.cpp.h | 6 +- AE/engine/src/graphics/Private/Undef.h | 9 + AE/engine/src/graphics/Private/Video.cpp | 8 +- AE/engine/src/graphics/Public/BufferMemView.h | 68 +- AE/engine/src/graphics/Public/CommandBuffer.h | 3 +- .../src/graphics/Public/CommandBufferTypes.h | 36 +- AE/engine/src/graphics/Public/Common.h | 9 +- AE/engine/src/graphics/Public/FeatureSet.h | 5 +- AE/engine/src/graphics/Public/ImageDesc.h | 28 +- AE/engine/src/graphics/Public/ImageMemView.h | 4 +- AE/engine/src/graphics/Public/PipelineDesc.h | 3 +- AE/engine/src/graphics/Public/RenderState.h | 6 +- .../src/graphics/Public/RenderStateEnums.h | 85 +- AE/engine/src/graphics/Public/ResourceEnums.h | 3 +- AE/engine/src/graphics/Public/Video.h | 5 +- AE/engine/src/graphics/Public/VulkanTypes.h | 75 +- .../Remote/Commands/RBarrierManager.h | 2 +- .../Remote/Commands/RDrawBarrierManager.h | 2 +- .../graphics/Remote/Commands/RDrawContext.cpp | 15 + .../graphics/Remote/Commands/RDrawContext.h | 3 +- AE/engine/src/graphics/Remote/RConnection.cpp | 8 +- AE/engine/src/graphics/Remote/RConnection.h | 4 +- AE/engine/src/graphics/Remote/RDevice.cpp | 20 +- AE/engine/src/graphics/Remote/RDevice.h | 77 +- AE/engine/src/graphics/Remote/RMessages.cpp | 20 +- AE/engine/src/graphics/Remote/RMessages.cpp.h | 13 +- AE/engine/src/graphics/Remote/RMessages.h | 51 +- AE/engine/src/graphics/Remote/RSwapchain.cpp | 6 +- AE/engine/src/graphics/Remote/RSwapchain.h | 2 +- .../src/graphics/Remote/Resources/RImage.cpp | 2 +- .../src/graphics/Remote/Resources/RImage.h | 2 +- .../Remote/Resources/RPipelineHelper.cpp.h | 2 + .../graphics/RenderGraph/RGCommandContext.h | 82 +- .../src/graphics/RenderGraph/RenderGraph.cpp | 3 +- .../graphics/RenderGraph/ResStateTracker.cpp | 12 +- ...icsBindings.cpp => GraphicsBindings.cpp.h} | 170 +- .../Vulkan/Allocators/VBlockMemAllocator.cpp | 1 + .../Allocators/VGfxMemAllocatorUtils.cpp.h | 4 +- .../Vulkan/Allocators/VLinearMemAllocator.cpp | 1 + .../Vulkan/Commands/VBarrierManager.cpp | 6 +- .../Commands/VBarrierManagerUtils.cpp.h | 2 + .../Vulkan/Commands/VBaseIndirectContext.cpp | 6 + .../Vulkan/Commands/VBaseIndirectContext.h | 8 + .../graphics/Vulkan/Commands/VDrawContext.cpp | 15 + .../graphics/Vulkan/Commands/VDrawContext.h | 29 +- .../Vulkan/Commands/VGraphicsContext.cpp | 14 +- .../Vulkan/Commands/VGraphicsContext.h | 2 +- .../Vulkan/Commands/VTransferContext.h | 8 +- .../Vulkan/Descriptors/VDescriptorUpdater.cpp | 7 +- .../src/graphics/Vulkan/Resources/VBuffer.cpp | 4 +- .../Vulkan/Resources/VComputePipeline.cpp | 31 +- .../Vulkan/Resources/VFramebuffer.cpp | 1 - .../Vulkan/Resources/VGraphicsPipeline.cpp | 31 +- .../src/graphics/Vulkan/Resources/VImage.cpp | 18 +- .../src/graphics/Vulkan/Resources/VImage.h | 2 +- .../graphics/Vulkan/Resources/VImageView.cpp | 1 + .../Vulkan/Resources/VMeshPipeline.cpp | 29 +- .../Vulkan/Resources/VPipelineHelper.cpp.h | 10 +- .../Vulkan/Resources/VQueryManager.cpp | 4 +- .../graphics/Vulkan/Resources/VRTScene.cpp | 14 +- .../graphics/Vulkan/Utils/RenderDocApi.cpp | 8 +- .../Vulkan/Utils/VAMDPerfProfiler.cpp | 6 +- .../graphics/Vulkan/Utils/VAMDPerfProfiler.h | 2 + .../graphics/Vulkan/Utils/VNvPerfProfiler.h | 2 + AE/engine/src/graphics/Vulkan/VDevice.cpp | 112 +- AE/engine/src/graphics/Vulkan/VDevice.h | 8 +- AE/engine/src/graphics/Vulkan/VDeviceFS.cpp | 16 +- .../Vulkan/{VEnumCast.cpp => VEnumCast.cpp.h} | 0 AE/engine/src/graphics/Vulkan/VEnumCast.h | 10 +- AE/engine/src/graphics/Vulkan/VEnumToString.h | 2 +- .../graphics/Vulkan/VRenderTaskScheduler.cpp | 2 +- .../src/graphics/Vulkan/VResourceManager.cpp | 2 +- AE/engine/src/graphics/Vulkan/VSwapchain.cpp | 23 +- AE/engine/src/graphics/Vulkan/VSwapchain.h | 2 +- .../src/graphics/Vulkan/Video/VVideoImage.cpp | 20 +- .../graphics/Vulkan/Video/VVideoUtils.cpp.h | 2 + AE/engine/src/graphics_hl/CMakeLists.txt | 1 + .../src/graphics_hl/Canvas/VertexTypes.h | 4 +- .../src/graphics_hl/ImGui/ImGuiRenderer.cpp | 2 +- AE/engine/src/graphics_hl/UI/Layout.cpp | 6 +- AE/engine/src/networking/CMakeLists.txt | 1 + .../src/networking/HighLevel/DataEncoder.h | 2 +- .../src/networking/HighLevel/TcpChannel.cpp | 12 +- .../src/networking/HighLevel/TcpChannel.h | 2 +- .../networking/HighLevel/UdpUnreliable.cpp | 4 +- .../LowLevel/PlatformSpecific.cpp.h | 4 +- .../src/networking/LowLevel/TcpSocket.cpp | 2 +- AE/engine/src/networking/Utils/MsgAndSync.h | 60 +- .../platform/Android/ApplicationAndroid.cpp | 12 +- .../src/platform/Android/ApplicationAndroid.h | 17 +- .../src/platform/Android/WindowAndroid.cpp | 17 + .../src/platform/Android/WindowAndroid.h | 3 + AE/engine/src/platform/CMakeLists.txt | 5 +- AE/engine/src/platform/GLFW/WindowGLFW.cpp | 2 +- .../src/platform/Private/VRDeviceEmulator.cpp | 16 +- AE/engine/src/platform/Private/VRSurface.cpp | 2 +- AE/engine/src/platform/Private/WindowBase.cpp | 4 +- AE/engine/src/platform/Public/OutputSurface.h | 12 +- AE/engine/src/platform/Public/VRDevice.h | 3 +- .../src/platform/WinAPI/WindowWinAPI.cpp | 2 + AE/engine/src/profiler/CMakeLists.txt | 1 + .../profiler/ImGui/ImColumnHistoryDiagram.cpp | 10 +- AE/engine/src/profiler/ImGui/ImLineGraph.cpp | 17 +- .../src/profiler/Impl/GraphicsProfiler.cpp | 68 +- .../src/profiler/Impl/GraphicsProfiler.h | 2 +- AE/engine/src/profiler/Impl/HwpcProfiler.cpp | 176 +- AE/engine/src/profiler/Impl/HwpcProfiler.h | 52 +- .../src/profiler/Impl/HwpcProfiler_ImGui.cpp | 1371 ++++--- AE/engine/src/profiler/ProfilerUI.cpp | 20 +- .../{Utils => Profilers}/AdrenoProfiler.cpp | 191 +- .../src/profiler/Profilers/AdrenoProfiler.h | 161 + .../{Utils => Profilers}/ArmProfiler.cpp | 28 +- .../{Utils => Profilers}/ArmProfiler.h | 2 +- .../profiler/Profilers/GeneralProfiler.cpp | 428 ++ .../src/profiler/Profilers/GeneralProfiler.h | 102 + .../{Utils => Profilers}/MaliProfiler.cpp | 166 +- .../src/profiler/Profilers/MaliProfiler.h | 510 +++ .../{Utils => Profilers}/NVidiaProfiler.cpp | 30 +- .../{Utils => Profilers}/NVidiaProfiler.h | 2 +- .../{Utils => Profilers}/PowerVRProfiler.cpp | 71 +- .../{Utils => Profilers}/PowerVRProfiler.h | 12 +- AE/engine/src/profiler/Remote/Messages.h | 96 +- .../profiler/Remote/RemoteAdrenoProfiler.cpp | 25 +- .../profiler/Remote/RemoteAdrenoProfiler.h | 4 +- .../src/profiler/Remote/RemoteArmProfiler.cpp | 33 +- .../src/profiler/Remote/RemoteArmProfiler.h | 5 +- .../profiler/Remote/RemoteGeneralProfiler.cpp | 454 +++ .../profiler/Remote/RemoteGeneralProfiler.h | 155 + .../profiler/Remote/RemoteMaliProfiler.cpp | 23 +- .../src/profiler/Remote/RemoteMaliProfiler.h | 4 +- .../profiler/Remote/RemoteNVidiaProfiler.cpp | 25 +- .../profiler/Remote/RemoteNVidiaProfiler.h | 4 +- .../profiler/Remote/RemotePowerVRProfiler.cpp | 92 +- .../profiler/Remote/RemotePowerVRProfiler.h | 18 +- AE/engine/src/profiler/Utils/AdrenoProfiler.h | 141 - AE/engine/src/profiler/Utils/MaliProfiler.h | 450 --- .../src/scripting/Bindings/CoreBindings.h | 5 + .../Bindings/CoreBindings_BindQuaternion.cpp | 176 + .../CoreBindings_BindVectorMath.inl.h | 10 +- AE/engine/src/scripting/CMakeLists.txt | 9 + AE/engine/src/scripting/Impl/EnumBinder.h | 53 +- AE/engine/src/scripting/Impl/ScriptArgList.h | 2 +- .../src/scripting/Impl/ScriptArgList.inl.h | 2 +- AE/engine/src/scripting/Impl/ScriptEngine.cpp | 59 +- AE/engine/src/scripting/Impl/ScriptTypes.h | 1 - AE/engine/src/serializing/CMakeLists.txt | 1 + .../serializing/Private/BitDeserializer.inl.h | 59 + .../serializing/Private/BitSerializer.inl.h | 68 + .../{ => Private}/Deserializer.inl.h | 71 +- .../{ => Private}/Serializer.inl.h | 99 +- .../serializing/{ => Private}/Serializing.cpp | 2 +- .../src/serializing/Public/BitDeserializer.h | 51 + .../src/serializing/Public/BitSerializer.h | 60 + .../src/serializing/{ => Public}/Common.h | 4 +- .../serializing/{ => Public}/Deserializer.h | 68 +- .../serializing/{ => Public}/ISerializable.h | 2 +- .../serializing/{ => Public}/ObjectFactory.h | 15 +- .../src/serializing/{ => Public}/PackedBits.h | 3 +- .../src/serializing/{ => Public}/Serializer.h | 51 +- AE/engine/src/threading/CMakeLists.txt | 1 + .../DataSource/UnixAsyncDataSource_LinuxAIO.h | 8 +- .../src/threading/Primitives/Synchronized.h | 31 +- AE/engine/src/threading/TaskSystem/Promise.h | 2 +- .../threading/TaskSystem/ThreadManager.cpp | 4 +- AE/engine/src/vfs/CMakeLists.txt | 1 + .../src/vfs/Network/NetworkStorageClient.cpp | 72 +- .../src/vfs/Network/NetworkStorageServer.cpp | 36 +- AE/engine/src/video/CMakeLists.txt | 1 + .../src/video/FFmpeg/FFmpegVideoDecoder.cpp | 6 +- AE/engine/tests/asset_packer/CMakeLists.txt | 18 +- .../asset_packer/pipeline_test/test1_ref.txt | 38 +- .../asset_packer/pipeline_test/vk_types.h | 14 +- AE/engine/tests/base/UnitTest_CPUInfo.cpp | 21 +- AE/engine/tests/base/UnitTest_Math.cpp | 15 + AE/engine/tests/base/UnitTest_Math_Vec.cpp | 4 +- AE/engine/tests/base/UnitTest_TypeTraits.cpp | 29 + AE/engine/tests/ecs-st/UnitTest_Registry.cpp | 4 +- .../UnitTest_SphericalCubeMath.cpp | 2 +- .../RenderGraph/Test_RG_Debugger4.cpp | 1 + .../RenderGraph/Test_RG_Debugger5.cpp | 1 + .../RenderGraph/Test_RG_ImageFormat.cpp | 197 +- .../RenderGraph/Test_RG_RayQuery1.cpp | 1 + .../RenderGraph/Test_RG_RayTracing1.cpp | 1 + .../RenderGraph/Test_RG_RayTracing2.cpp | 1 + .../RenderGraph/Test_RG_RayTracing3.cpp | 1 + .../graphics/RenderGraph/cpp/mtl_types.h | 4 +- .../tests/graphics/RenderGraph/cpp/vk_types.h | 6 +- AE/engine/tests/graphics/Test_Buffer.cpp | 2 +- AE/engine/tests/graphics/Test_Image.cpp | 8 +- .../graphics/UnitTest_EResourceState.cpp | 2 +- .../tests/graphics/UnitTest_ImageDesc.cpp | 18 +- .../graphics_hl/DrawTests/DrawTestCore.h | 1 - .../graphics_hl/DrawTests/cpp/mtl_types.h | 6 +- .../graphics_hl/DrawTests/cpp/vk_types.h | 6 +- .../UnitTest_DSLayout_GLSL.cpp | 2 +- .../UnitTest_PipelineLayout_GLSL.cpp | 2 +- .../pipeline_compiler/UnitTest_StructType.cpp | 83 +- .../tests/res_loaders/UnitTest_AEImage.cpp | 2 +- .../serializing/UnitTest_Serialization.cpp | 95 +- .../tests/shader_trace/ShaderTrace_Test6.cpp | 1 + AE/engine/tests/shader_trace/TestDevice.cpp | 5 +- AE/engine/tools/atlas_tools/CMakeLists.txt | 1 + AE/engine/tools/atlas_tools/RectPacker.cpp | 10 +- AE/engine/tools/atlas_tools/RectPacker.h | 3 + AE/engine/tools/atlas_tools/RectPackerSTB.cpp | 10 +- AE/engine/tools/atlas_tools/RectPackerSTB.h | 3 + AE/engine/tools/cicd/BaseMachine.cpp | 2 + AE/engine/tools/cicd/BaseMachine_Vulkan.cpp | 2 - AE/engine/tools/cicd/CMakeLists.txt | 14 +- AE/engine/tools/cicd/NetBase.cpp | 2 +- AE/engine/tools/cicd/NetBase.h | 4 +- AE/engine/tools/cicd/Server.cpp | 1 + .../tools/feature_set_gen/CMakeLists.txt | 2 +- .../tools/feature_set_gen/FeatureSetUtils.cpp | 4 +- AE/engine/tools/feature_set_gen/main.cpp | 8 +- AE/engine/tools/geometry_tools/CMakeLists.txt | 1 + .../SphericalCube/SphericalCubeGen.cpp | 2 +- AE/engine/tools/graphics_lib/GraphicsLib.h | 1 + .../remote_graphics_device/RemoteDevice.cpp | 40 +- .../remote_graphics_device/RemoteDevice.h | 13 +- .../RemoteDevice_Msg.cpp | 126 +- ...ImageLoader.cpp => AEImageLoaderSaver.cpp} | 29 + .../tools/res_loaders/AE/AEImageSaver.cpp | 44 - AE/engine/tools/res_loaders/CMakeLists.txt | 1 + .../tools/res_loaders/DDS/DDSUtils.cpp.h | 2 + .../tools/res_loaders/KTX/KTXImageLoader.cpp | 2 +- .../tools/res_loaders/STB/STBImageLoader.cpp | 2 +- .../tools/res_loaders/WAV/WaveUtils.cpp.h | 2 + .../res_pack/asset_packer/CMakeLists.txt | 12 +- .../asset_packer/Packer/AssetPacker.cpp | 2 +- .../Packer/ImageAtlasPacker.cpp.h | 2 + .../asset_packer/Packer/ImagePacker.cpp.h | 2 + .../asset_packer/Packer/ImagePacker.h | 6 +- .../Packer/RasterFontPacker.cpp.h | 2 + .../ScriptObjects/ScriptImageAtlas.cpp | 4 +- .../ScriptObjects/ScriptRasterFont.cpp | 4 +- .../ScriptObjects/ScriptTexture.cpp | 63 + .../ScriptObjects/ScriptTexture.h | 1 + .../ScriptUIWidget_Controller.cpp.h | 2 + .../ScriptUIWidget_Drawable.cpp.h | 2 + .../ScriptObjects/ScriptUIWidget_Layout.cpp.h | 2 + .../asset_packer/Utils/AstcEncoder.cpp.h | 2 + .../asset_packer/Utils/Compressonator.cpp.h | 18 +- .../res_pack/input_actions/CMakeLists.txt | 1 + .../res_pack/pipeline_compiler/CMakeLists.txt | 17 +- .../Compiler/AEStyleGLSLPreprocessor.cpp | 16 +- .../Compiler/AEStyleMSLPreprocessor.cpp | 11 +- .../Compiler/SpirvCompiler.cpp | 6 +- .../pipeline_compiler/Packer/HashToName.h | 2 +- .../Packer/PipelineCompiler.cpp | 4 +- .../pipeline_compiler/Packer/PipelinePack.cpp | 11 +- .../pipeline_compiler/Packer/PipelinePack.h | 2 +- .../Packer/PipelinePackDeserializer.cpp | 10 +- .../Packer/RenderPassPack.cpp | 5 +- .../ScriptObjects/BasePipeline.cpp | 32 +- .../ScriptObjects/BasePipeline.h | 6 +- .../pipeline_compiler/ScriptObjects/Common.h | 2 +- .../ScriptObjects/ComputePipeline.cpp | 64 +- .../ScriptObjects/ComputePipeline.h | 2 + .../ScriptObjects/DescriptorSetLayout.cpp | 100 +- .../ScriptObjects/DescriptorSetLayout.h | 3 +- .../ScriptObjects/GraphicsPipeline.cpp | 11 +- .../ScriptObjects/MeshPipeline.cpp | 8 +- .../ScriptObjects/ObjectStorage.cpp | 12 +- .../ScriptObjects/ObjectStorage.h | 5 +- .../ScriptObjects/ObjectStorage_GLSL.cpp | 4 +- .../ScriptObjects/ScriptConfig.cpp | 20 + .../ScriptObjects/ScriptConfig.h | 1 + .../ScriptObjects/ScriptFeatureSet.h | 2 - .../ScriptObjects/ScriptRenderPass.cpp | 30 +- .../ScriptObjects/ScriptRenderPass.h | 2 +- .../ScriptObjects/ShaderStructType.cpp | 18 +- .../shader_trace/Impl/ShaderTrace.cpp | 2 +- .../shader_trace/Impl/TraceRecording.cpp.h | 2 + .../shader_trace/Public/ShaderTrace.h | 4 +- .../vulkan_header_gen/GenVulkanLoaders.cpp | 1 + .../vulkan_image_zcurve/VulkanImageZCurve.cpp | 6 +- AE/samples/demo/_data/cpp/mac_types.h | 78 +- AE/samples/demo/_data/cpp/vk_types.h | 78 +- AE/samples/res_editor/CMakeLists.txt | 8 + AE/samples/res_editor/Changelog.md | 6 + AE/samples/res_editor/Core/EditorCore.cpp | 22 +- AE/samples/res_editor/Core/EditorCore.h | 2 + AE/samples/res_editor/Core/EditorUI.cpp | 33 +- AE/samples/res_editor/Core/EditorUI.h | 4 +- .../res_editor/Dynamic/DynamicDimension.h | 232 +- AE/samples/res_editor/Dynamic/DynamicScalar.h | 28 +- AE/samples/res_editor/Dynamic/DynamicVec.h | 44 + .../res_editor/GeomSource/ModelGeomSource.cpp | 79 +- .../res_editor/GeomSource/ModelGeomSource.h | 11 +- AE/samples/res_editor/Passes/ComputePass.cpp | 57 +- AE/samples/res_editor/Passes/IPass.cpp | 13 + AE/samples/res_editor/Passes/IPass.h | 3 + .../res_editor/Passes/ImageCompression.cpp | 122 +- .../res_editor/Passes/ImageCompression.h | 12 +- AE/samples/res_editor/Passes/OtherPasses.cpp | 214 +- AE/samples/res_editor/Passes/OtherPasses.h | 54 +- AE/samples/res_editor/Passes/PassGroup.cpp | 2 +- AE/samples/res_editor/Passes/Postprocess.cpp | 71 +- AE/samples/res_editor/Passes/Postprocess.h | 6 +- .../res_editor/Passes/RayTracingPass.cpp | 69 +- AE/samples/res_editor/Passes/Renderer.cpp | 2 +- AE/samples/res_editor/Passes/Scene.cpp | 165 +- AE/samples/res_editor/Passes/Scene.h | 10 +- AE/samples/res_editor/Readme.md | 5 +- .../res_editor/Resources/DefaultResources.cpp | 35 +- .../res_editor/Resources/DefaultResources.h | 11 +- AE/samples/res_editor/Resources/Image.cpp | 33 +- AE/samples/res_editor/Resources/Image.h | 2 - AE/samples/res_editor/Resources/RTScene.cpp | 2 + .../res_editor/Resources/VideoImage.cpp | 6 +- .../res_editor/Resources/VideoImage2.cpp | 4 +- AE/samples/res_editor/Resources/VideoImage2.h | 2 +- .../Scripting/PipelineCompiler.inl.h | 17 +- .../res_editor/Scripting/ScriptBasePass.cpp | 47 +- .../res_editor/Scripting/ScriptBasePass.cpp.h | 21 +- .../res_editor/Scripting/ScriptBasePass.h | 7 +- .../Scripting/ScriptBaseRenderPass.cpp | 76 + .../Scripting/ScriptBaseRenderPass.h | 33 +- .../res_editor/Scripting/ScriptBuffer.cpp | 18 +- .../res_editor/Scripting/ScriptBuffer.h | 7 +- .../res_editor/Scripting/ScriptCommon.h | 4 + .../Scripting/ScriptComputePass.cpp | 10 +- .../res_editor/Scripting/ScriptComputePass.h | 5 +- .../Scripting/ScriptDynamicVars.cpp | 158 +- .../res_editor/Scripting/ScriptDynamicVars.h | 7 + AE/samples/res_editor/Scripting/ScriptExe.cpp | 157 +- AE/samples/res_editor/Scripting/ScriptExe.h | 5 + .../Scripting/ScriptExe_MeshGen.cpp | 2 +- .../res_editor/Scripting/ScriptGeomSource.cpp | 3 + .../res_editor/Scripting/ScriptImage.cpp | 71 +- AE/samples/res_editor/Scripting/ScriptImage.h | 14 +- .../res_editor/Scripting/ScriptPassArgs.h | 30 +- .../Scripting/ScriptPostprocess.cpp | 85 +- .../Scripting/ScriptRayTracingPass.cpp | 4 +- .../res_editor/Scripting/ScriptScene.cpp | 21 +- .../res_editor/Scripting/ScriptVideoImage.cpp | 5 +- .../res_editor/Scripting/ScriptVideoImage.h | 2 +- AE/samples/res_editor/_data/CMakeLists.txt | 15 +- AE/samples/res_editor/_data/cpp/vk_types.h | 20 +- .../pipeline_inc/{Model.as => ModelTypes.as} | 16 +- .../_data/pipelines/ModelReflection.as | 2 +- .../res_editor/_data/pipelines/ModelShared.as | 2 +- .../_data/pipelines/perf/Subgroups-2a.as | 140 + .../_data/pipelines/perf/Subgroups-2b.as | 146 + .../_data/pipelines/perf/Subgroups-2c.as | 133 + .../_data/pipelines/perf/TexLookup-1a.as | 84 + .../_data/pipelines/perf/TexLookup-1b.as | 84 + .../_data/pipelines/samples/Cubemap.as | 2 +- .../samples/DeferredTexturing-pass1.as | 2 +- .../samples/DeferredTexturing-pass2.as | 2 +- .../pipelines/samples/Dispersion2D-area.as | 2 +- .../_data/pipelines/samples/FSBarycentric.as | 2 +- .../MaterialDepthBuffer-HiPerf/Mtr-1.as | 2 +- .../MaterialDepthBuffer-HiPerf/Mtr-2.as | 2 +- .../MaterialDepthBuffer-HiPerf/Mtr-3.as | 2 +- .../MaterialDepthBuffer-LowPerf/Mtr-1.as | 2 +- .../MaterialDepthBuffer-LowPerf/Mtr-2.as | 2 +- .../MaterialDepthBuffer-LowPerf/Mtr-3.as | 2 +- .../MaterialDepthBuffer-MedPerf/Mtr-1.as | 2 +- .../MaterialDepthBuffer-MedPerf/Mtr-2.as | 2 +- .../MaterialDepthBuffer-MedPerf/Mtr-3.as | 2 +- .../pipelines/samples/MeshShader-Cubes.as | 2 +- .../_data/pipelines/samples/Model-Cubemap.as | 10 +- .../samples/{Model-RT-1.as => Model-RT.as} | 0 .../samples/{Model-1.as => Model.as} | 2 +- .../samples/VisibilityBuffer-pass1.as | 95 + .../samples/VisibilityBuffer-pass2.as | 300 ++ .../_data/pipelines/sphere/SphericalCube-1.as | 2 +- .../_data/pipelines/sphere/SphericalCube-2.as | 2 +- .../_data/pipelines/sphere/SphericalCube-3.as | 2 +- .../_data/pipelines/sphere/SphericalCube-4.as | 2 +- .../pipelines/sphere/SphericalCube-5a.as | 4 +- .../pipelines/sphere/SphericalCube-5b.as | 2 +- .../_data/pipelines/tests/ProceduralGrid.as | 4 +- .../pipelines/tests/TriangleBarycentrics.as | 275 ++ .../_data/pipelines/tests/Triangulation.as | 2 +- .../_data/scripts/callable/GenPlanet.as | 6 +- .../_data/scripts/perf/BufferStorage.as | 185 + .../_data/scripts/perf/ImageStorage-1.as | 141 + .../_data/scripts/perf/ImageStorage-2.as | 101 + .../_data/scripts/perf/ImageStorage-3.as | 78 + .../scripts/perf/ImageStorage-Reorder.as | 131 + .../_data/scripts/perf/Inst-fp16.as | 402 ++ .../_data/scripts/perf/Inst-fp32.as | 352 ++ .../_data/scripts/perf/RTCompression.as | 269 ++ .../res_editor/_data/scripts/perf/Storage.as | 170 + .../Subgroups.as => perf/Subgroups-1.as} | 3 - .../_data/scripts/perf/Subgroups-2.as | 158 + .../res_editor/_data/scripts/perf/TexCache.as | 103 + .../_data/scripts/perf/TexLookup.as | 123 + .../_data/scripts/samples-2d/AA-Grid.as | 2 +- .../scripts/samples-2d/MaterialDepthBuffer.as | 16 +- .../scripts/samples-3d/DeferredTexturing.as | 14 + .../samples-3d/{Model-1.as => Model.as} | 4 +- .../_data/scripts/samples-3d/Particles-1.as | 42 +- .../_data/scripts/samples-3d/Particles-2.as | 42 +- .../_data/scripts/samples-3d/Planet-1.as | 2 +- .../_data/scripts/samples-3d/Planet-2.as | 2 +- .../scripts/samples-3d/RenderToCubemap.as | 7 +- .../scripts/samples-3d/VisibilityBuffer.as | 66 + .../{Volumetric-1.as => Volumetric.as} | 0 .../_data/scripts/samples-rt/Dispersion2D.as | 9 +- .../scripts/samples-rt/Dispersion2DLayered.as | 9 +- .../samples-rt/{RT-Model-1.as => RT-Model.as} | 4 +- ...{RT-MultiBounce-1.as => RT-MultiBounce.as} | 4 +- .../_data/scripts/sphere/SphericalCube-3.as | 6 +- .../_data/scripts/sphere/SphericalCube-5.as | 2 +- .../_data/scripts/sphere/UVSphere-1.as | 150 + .../_data/scripts/tests/CubeMapTest-1.as | 7 +- .../_data/scripts/tests/CubeMapTest-2.as | 106 - .../_data/scripts/tests/Derivatives.as | 3 +- .../scripts/tests/ImageExport-CubeMap.as | 4 +- .../_data/scripts/tests/LightModels.as | 13 +- .../res_editor/_data/scripts/tests/NaN.as | 6 + .../_data/scripts/tests/NormalPacking.as | 58 +- .../_data/scripts/tests/RG8toFloat.as | 187 + .../_data/scripts/tests/RGBA8toFloat.as | 227 ++ ...eenProjection-1.as => ScreenProjection.as} | 0 .../_data/scripts/tests/SmoothNormal.as | 3 +- .../_data/scripts/tests/SmoothTBN.as | 3 +- .../scripts/tests/TriangleBarycentrics.as | 63 + .../_data/scripts/tools/2d/Easing.as | 2 +- .../_data/scripts/tools/2d/Graph1.as | 48 +- .../scripts/tools/2d/ImageCompression2D.as | 88 + .../scripts/tools/2d/ImageCompressionCube.as | 90 + .../res_editor/_data/scripts/tools/2d/Wave.as | 133 + .../_data/scripts/tools/noise/HashFp32.as | 32 +- .../res_editor/_data/shaders/FragHelper.glsl | 33 - .../_data/shaders/ModelMaterial.glsl | 21 +- AE/samples/res_editor/_data/shaders/glsl.h | 86 + .../res_editor/_ui_data/controls/glfw.as | 18 +- .../res_editor/_ui_data/controls/winapi.as | 18 +- AE/samples/res_editor/_ui_data/cpp/ia_names.h | 8 +- AE/samples/res_editor/_ui_data/cpp/vk_types.h | 63 +- AE/samples/res_editor/docs/Samples.md | 19 +- 645 files changed, 24071 insertions(+), 10956 deletions(-) create mode 100644 AE/build_scripts/win_arm64/init.bat rename AE/build_scripts/{win_vc2022 => win_x64}/init.bat (100%) rename AE/build_scripts/{win_vc2022 => win_x64}/init_no_pch.bat (100%) rename AE/build_scripts/{win_vc2022 => win_x64}/init_noexc.bat (100%) rename AE/build_scripts/{win_vc2022 => win_x64}/init_precompiled.bat (100%) rename AE/build_scripts/{win_vc2022 => win_x64}/init_rem.bat (100%) rename AE/build_scripts/{win_vc2022 => win_x64}/precompile_engine.bat (95%) rename AE/build_scripts/{win_vc2022 => win_x64_clang}/init_clang.bat (100%) create mode 100644 AE/build_scripts/win_x64_clang/init_clang_rem.bat create mode 100644 AE/docs/papers/GPU_Benchmarks.md create mode 100644 AE/docs/papers/bench/AMD_RX570.md create mode 100644 AE/docs/papers/bench/ARM_Mali_G57.md create mode 100644 AE/docs/papers/bench/ARM_Mali_T830.md create mode 100644 AE/docs/papers/bench/Adreno_505.md create mode 100644 AE/docs/papers/bench/Adreno_660.md create mode 100644 AE/docs/papers/bench/Apple_M1.md create mode 100644 AE/docs/papers/bench/Intel_UHD620.md create mode 100644 AE/docs/papers/bench/NVidia_RTX2080.md create mode 100644 AE/docs/papers/bench/PowerVR_BXM.md create mode 100644 AE/docs/papers/bench/img/compute-subgroups/adreno-600.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/amd-gcn4.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/intel-gen9_5.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/mac-m1.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/nv-turing.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/powervr-bxm-16x16.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/powervr-bxm-8x8.png create mode 100644 AE/docs/papers/bench/img/compute-subgroups/valhall-1.png create mode 100644 AE/docs/papers/bench/img/full-quad/adreno-660-tex-ht.png create mode 100644 AE/docs/papers/bench/img/full-quad/valhall-1-qd.png create mode 100644 AE/docs/papers/bench/img/full-quad/valhall-1-tex-ht.png create mode 100644 AE/docs/papers/bench/img/full-subgroup/adreno-660-large.png create mode 100644 AE/docs/papers/bench/img/full-subgroup/adreno-660-tex.png create mode 100644 AE/docs/papers/bench/img/full-subgroup/adreno-660.png create mode 100644 AE/docs/papers/bench/img/full-subgroup/valhall-1-large.png create mode 100644 AE/docs/papers/bench/img/full-subgroup/valhall-1-tex.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/adreno-600.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/amd-gcn4.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/intel-gen9_5.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/mac-m1.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/nv-turing.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/powervr-bxm.png create mode 100644 AE/docs/papers/bench/img/graphics-subgroups/valhall-1.png create mode 100644 AE/docs/papers/bench/img/nv-turing-smid-compute.png create mode 100644 AE/docs/papers/bench/img/nv-turing-smid-graphics.png create mode 100644 AE/docs/papers/bench/img/valhall-1-unique-subgroups.png delete mode 100644 AE/engine/external/shared/xxHash/android/install.bat create mode 100644 AE/engine/external/win-arm64-msvc143/CMakeLists.txt create mode 100644 AE/engine/shared_data/3party_shaders/FastMath-1.glsl create mode 100644 AE/engine/shared_data/3party_shaders/TileableNoise.glsl create mode 100644 AE/engine/shared_data/shaders/FastMath.glsl delete mode 100644 AE/engine/src/base/Platforms/CPUInfo_Linux.cpp create mode 100644 AE/engine/src/base/Platforms/Perf_Apple.cpp create mode 100644 AE/engine/src/base/Platforms/Perf_Linux.cpp rename AE/engine/src/base/Platforms/{CPUInfo_WIndows2.cpp => Perf_WIndows.cpp} (77%) create mode 100644 AE/engine/src/base/Platforms/PerformanceStat.h create mode 100644 AE/engine/src/base/Pointers/Ref.h rename AE/engine/src/graphics/Private/{BufferDesc.cpp => BufferDesc.cpp.h} (91%) rename AE/engine/src/graphics/Private/{EnumUtils.cpp => EnumUtils.cpp.h} (92%) rename AE/engine/src/graphics/Private/{FeatureSet.cpp => FeatureSet.cpp.h} (98%) rename AE/engine/src/graphics/Private/{ImageDesc.cpp => ImageDesc.cpp.h} (79%) rename AE/engine/src/graphics/Private/{RenderState.cpp => RenderState.cpp.h} (99%) create mode 100644 AE/engine/src/graphics/Private/Shared.cpp create mode 100644 AE/engine/src/graphics/Private/Undef.h rename AE/engine/src/graphics/Scripting/{GraphicsBindings.cpp => GraphicsBindings.cpp.h} (92%) rename AE/engine/src/graphics/Vulkan/{VEnumCast.cpp => VEnumCast.cpp.h} (100%) rename AE/engine/src/profiler/{Utils => Profilers}/AdrenoProfiler.cpp (94%) create mode 100644 AE/engine/src/profiler/Profilers/AdrenoProfiler.h rename AE/engine/src/profiler/{Utils => Profilers}/ArmProfiler.cpp (91%) rename AE/engine/src/profiler/{Utils => Profilers}/ArmProfiler.h (95%) create mode 100644 AE/engine/src/profiler/Profilers/GeneralProfiler.cpp create mode 100644 AE/engine/src/profiler/Profilers/GeneralProfiler.h rename AE/engine/src/profiler/{Utils => Profilers}/MaliProfiler.cpp (61%) create mode 100644 AE/engine/src/profiler/Profilers/MaliProfiler.h rename AE/engine/src/profiler/{Utils => Profilers}/NVidiaProfiler.cpp (91%) rename AE/engine/src/profiler/{Utils => Profilers}/NVidiaProfiler.h (96%) rename AE/engine/src/profiler/{Utils => Profilers}/PowerVRProfiler.cpp (89%) rename AE/engine/src/profiler/{Utils => Profilers}/PowerVRProfiler.h (93%) create mode 100644 AE/engine/src/profiler/Remote/RemoteGeneralProfiler.cpp create mode 100644 AE/engine/src/profiler/Remote/RemoteGeneralProfiler.h delete mode 100644 AE/engine/src/profiler/Utils/AdrenoProfiler.h delete mode 100644 AE/engine/src/profiler/Utils/MaliProfiler.h create mode 100644 AE/engine/src/scripting/Bindings/CoreBindings_BindQuaternion.cpp create mode 100644 AE/engine/src/serializing/Private/BitDeserializer.inl.h create mode 100644 AE/engine/src/serializing/Private/BitSerializer.inl.h rename AE/engine/src/serializing/{ => Private}/Deserializer.inl.h (91%) rename AE/engine/src/serializing/{ => Private}/Serializer.inl.h (73%) rename AE/engine/src/serializing/{ => Private}/Serializing.cpp (60%) create mode 100644 AE/engine/src/serializing/Public/BitDeserializer.h create mode 100644 AE/engine/src/serializing/Public/BitSerializer.h rename AE/engine/src/serializing/{ => Public}/Common.h (84%) rename AE/engine/src/serializing/{ => Public}/Deserializer.h (63%) rename AE/engine/src/serializing/{ => Public}/ISerializable.h (96%) rename AE/engine/src/serializing/{ => Public}/ObjectFactory.h (93%) rename AE/engine/src/serializing/{ => Public}/PackedBits.h (96%) rename AE/engine/src/serializing/{ => Public}/Serializer.h (70%) rename AE/engine/tools/res_loaders/AE/{AEImageLoader.cpp => AEImageLoaderSaver.cpp} (69%) delete mode 100644 AE/engine/tools/res_loaders/AE/AEImageSaver.cpp rename AE/samples/res_editor/_data/pipeline_inc/{Model.as => ModelTypes.as} (93%) create mode 100644 AE/samples/res_editor/_data/pipelines/perf/Subgroups-2a.as create mode 100644 AE/samples/res_editor/_data/pipelines/perf/Subgroups-2b.as create mode 100644 AE/samples/res_editor/_data/pipelines/perf/Subgroups-2c.as create mode 100644 AE/samples/res_editor/_data/pipelines/perf/TexLookup-1a.as create mode 100644 AE/samples/res_editor/_data/pipelines/perf/TexLookup-1b.as rename AE/samples/res_editor/_data/pipelines/samples/{Model-RT-1.as => Model-RT.as} (100%) rename AE/samples/res_editor/_data/pipelines/samples/{Model-1.as => Model.as} (99%) create mode 100644 AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass1.as create mode 100644 AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass2.as create mode 100644 AE/samples/res_editor/_data/pipelines/tests/TriangleBarycentrics.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/BufferStorage.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/ImageStorage-1.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/ImageStorage-2.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/ImageStorage-3.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/ImageStorage-Reorder.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/Inst-fp16.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/Inst-fp32.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/RTCompression.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/Storage.as rename AE/samples/res_editor/_data/scripts/{samples-2d/Subgroups.as => perf/Subgroups-1.as} (99%) create mode 100644 AE/samples/res_editor/_data/scripts/perf/Subgroups-2.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/TexCache.as create mode 100644 AE/samples/res_editor/_data/scripts/perf/TexLookup.as rename AE/samples/res_editor/_data/scripts/samples-3d/{Model-1.as => Model.as} (83%) create mode 100644 AE/samples/res_editor/_data/scripts/samples-3d/VisibilityBuffer.as rename AE/samples/res_editor/_data/scripts/samples-3d/{Volumetric-1.as => Volumetric.as} (100%) rename AE/samples/res_editor/_data/scripts/samples-rt/{RT-Model-1.as => RT-Model.as} (87%) rename AE/samples/res_editor/_data/scripts/samples-rt/{RT-MultiBounce-1.as => RT-MultiBounce.as} (99%) create mode 100644 AE/samples/res_editor/_data/scripts/sphere/UVSphere-1.as delete mode 100644 AE/samples/res_editor/_data/scripts/tests/CubeMapTest-2.as create mode 100644 AE/samples/res_editor/_data/scripts/tests/RG8toFloat.as create mode 100644 AE/samples/res_editor/_data/scripts/tests/RGBA8toFloat.as rename AE/samples/res_editor/_data/scripts/tests/{ScreenProjection-1.as => ScreenProjection.as} (100%) create mode 100644 AE/samples/res_editor/_data/scripts/tests/TriangleBarycentrics.as create mode 100644 AE/samples/res_editor/_data/scripts/tools/2d/ImageCompression2D.as create mode 100644 AE/samples/res_editor/_data/scripts/tools/2d/ImageCompressionCube.as create mode 100644 AE/samples/res_editor/_data/scripts/tools/2d/Wave.as delete mode 100644 AE/samples/res_editor/_data/shaders/FragHelper.glsl create mode 100644 AE/samples/res_editor/_data/shaders/glsl.h diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index 2382f249..75f70fad 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -35,7 +35,7 @@ jobs: working-directory: ${{github.workspace}} run: | cd AE-Bin - wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/J96d_RPphPyYBg" + wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/O1s2phbgeQ0qYQ" unzip external.zip rm external.zip cd ../AE/android diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c8120ae0..48ba8e0a 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -26,7 +26,7 @@ jobs: run: | mkdir _build cd AE-Bin - wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/ItFRNcXTSg8jJw" + wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/kOFxBTUcF_tPSA" unzip external.zip - name: Configure CMake @@ -125,7 +125,7 @@ jobs: run: | mkdir _build cd AE-Bin - wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/7FIqOv5RO-12oA" + wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/EFVy6uci9CWp8Q" unzip external.zip - name: Configure CMake @@ -225,7 +225,7 @@ jobs: run: | mkdir _build cd AE-Bin - wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/7FIqOv5RO-12oA" + wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/EFVy6uci9CWp8Q" unzip external.zip - name: Configure CMake diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 29a1be8d..5d8c8e8b 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -19,7 +19,7 @@ jobs: run: | mkdir _build cd AE-Bin - wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/d3yxp6smqL7wEg" + wget -O external.zip "https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/D8CH8QhYzxYB6w" unzip external.zip - name: Configure CMake diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index d10d8f0e..37f9c6f9 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -19,7 +19,7 @@ jobs: run: | mkdir _build cd AE-Bin - powershell -Command "(New-Object Net.WebClient).DownloadFile('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/jrEo9X0ToUJC0g', 'external.zip' )" + powershell -Command "(New-Object Net.WebClient).DownloadFile('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/rp0poS4J38WM3A', 'external.zip' )" powershell Expand-Archive external.zip -DestinationPath "." - name: Configure CMake @@ -111,7 +111,7 @@ jobs: run: | mkdir _build cd AE-Bin - powershell -Command "(New-Object Net.WebClient).DownloadFile('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/jrEo9X0ToUJC0g', 'external.zip' )" + powershell -Command "(New-Object Net.WebClient).DownloadFile('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/rp0poS4J38WM3A', 'external.zip' )" powershell Expand-Archive external.zip -DestinationPath "." - name: Configure CMake @@ -145,7 +145,7 @@ jobs: run: | mkdir _build cd AE-Bin - powershell -Command "(New-Object Net.WebClient).DownloadFile('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/tzvkvWlNsKtxcA', 'external.zip' )" + powershell -Command "(New-Object Net.WebClient).DownloadFile('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/y8QLb9g37nBpJA', 'external.zip' )" powershell Expand-Archive external.zip -DestinationPath "." - name: Configure CMake diff --git a/AE/CMakeLists.txt b/AE/CMakeLists.txt index 3f3b4d6b..3d3eb4a4 100644 --- a/AE/CMakeLists.txt +++ b/AE/CMakeLists.txt @@ -18,7 +18,7 @@ endif() #---------------------------------------------------------- project( "AE" - VERSION 24.8.254 # year, month, version + VERSION 24.9.258 # year, month, version LANGUAGES C CXX DESCRIPTION "async game engine" ) @@ -39,11 +39,14 @@ message( STATUS "target processor: ${CMAKE_SYSTEM_PROCESSOR}" ) #---------------------------------------------------------- if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16.0") - set( AE_USE_PCH ON CACHE BOOL "use precompiled headers" ) + set( AE_USE_PCH ON CACHE BOOL "use precompiled headers" ) + set( AE_USE_UNITY_BUILD OFF CACHE BOOL "use unity build" ) else() - set( AE_USE_PCH OFF CACHE INTERNAL "" FORCE ) + set( AE_USE_PCH OFF CACHE INTERNAL "" FORCE ) + set( AE_USE_UNITY_BUILD OFF CACHE INTERNAL "" FORCE ) endif() message( STATUS "Precompiled headers: ${AE_USE_PCH}" ) +message( STATUS "Unity build: ${AE_USE_UNITY_BUILD}" ) #---------------------------------------------------------- @@ -54,6 +57,15 @@ set( CMAKE_RELEASE_POSTFIX "" CACHE INTERNAL "" FORCE ) set( MAIN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "" FORCE ) set( MAIN_BINARY_DIR "${CMAKE_BINARY_DIR}/bin" CACHE INTERNAL "" FORCE ) +# for IDE +set( ANDROID_SDK_ROOT "$ENV{ANDROID_SDK_ROOT}" ) +set( ANDROID_NDK_INCLUDE "${ANDROID_SDK_ROOT}/ndk/26.3.11579264/toolchains/llvm/prebuilt/windows-x86_64" CACHE INTERNAL "" FORCE ) +if (WIN32) + if (NOT EXISTS ${ANDROID_NDK_INCLUDE}) + message( STATUS "Android NDK is not found in '${ANDROID_NDK_INCLUDE}'" ) + endif() +endif() + # this settings breaks native debugger on android if (NOT ANDROID) set( CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE PATH "installation directory" FORCE ) diff --git a/AE/android/cicd/build.gradle b/AE/android/cicd/build.gradle index 506005e1..c8b7308e 100644 --- a/AE/android/cicd/build.gradle +++ b/AE/android/cicd/build.gradle @@ -17,6 +17,7 @@ android { arguments '-DANDROID_STL=c++_static', // see https://developer.android.com/ndk/guides/cpp-support#static_runtimes '-DANDROID_ARM_NEON=ON', '-DAE_ENABLE_VULKAN=ON' + targets 'CICD' } } } diff --git a/AE/android/demo/build.gradle b/AE/android/demo/build.gradle index 1213317e..7b855553 100644 --- a/AE/android/demo/build.gradle +++ b/AE/android/demo/build.gradle @@ -22,6 +22,7 @@ android { arguments '-DANDROID_STL=c++_static', // see https://developer.android.com/ndk/guides/cpp-support#static_runtimes '-DANDROID_ARM_NEON=ON', '-DAE_ENABLE_VULKAN=ON' + targets 'SampleDemo' } } } diff --git a/AE/android/demo/src/main/AndroidManifest.xml b/AE/android/demo/src/main/AndroidManifest.xml index 535e6289..498d9950 100644 --- a/AE/android/demo/src/main/AndroidManifest.xml +++ b/AE/android/demo/src/main/AndroidManifest.xml @@ -9,8 +9,8 @@ - - + + +Download prebuild binaries which is used in CI and extract it to the `AE-Bin/external` folder:
+[Android](https://disk.yandex.ru/d/O1s2phbgeQ0qYQ)
+[Linux x64 GCC-13](https://disk.yandex.ru/d/kOFxBTUcF_tPSA)
+[Linux x64 Clang-16](https://disk.yandex.ru/d/EFVy6uci9CWp8Q)
+[MacOS arm64](https://disk.yandex.ru/d/D8CH8QhYzxYB6w)
+[Windows x64 MSVC](https://disk.yandex.ru/d/rp0poS4J38WM3A)
+[Windows x64 Clang](https://disk.yandex.ru/d/y8QLb9g37nBpJA)
+Then [Build engine and samples](#Build-engine-and-samples) + ## Build third party dependencies @@ -134,7 +144,7 @@ Compiled resources will be stored in `AE/../AE-Temp` folder. They can be used on ## Update file paths (optional) Visual Studio and some other IDEs and programs (Notepad++) allows to open file in URL format like a `file://absolute/path/to/a/file.txt`, but it requires absolute paths. Source code may contain links to other source files or docs which is written as URLs.
-Use IDE or another tool to replace path `[](file://C:/Projects/AllinOne/AE/)` to an absolute path like a `file://C:/Projects/AE/`. +Use IDE or another tool to replace path `[](https://github.com/azhirnov/as-en/blob/dev/AE/)` to an absolute path like a `file://C:/Projects/AE/`. ## Setup IDE and environment (optional) diff --git a/AE/docs/engine/Platforms.md b/AE/docs/engine/Platforms.md index baf0ccf8..c27ddd4c 100644 --- a/AE/docs/engine/Platforms.md +++ b/AE/docs/engine/Platforms.md @@ -7,7 +7,7 @@ Supported platforms - [ ] OS: Win11 (not tested) - [ ] Architecture: x86 (not tested) - [x] Architecture: x64 -- [ ] Architecture: ARM64 +- [ ] Architecture: ARM64 (compiled but not tested) - [x] Compiler: VisualStudio 2022 - [x] Compiler: VisualStudio 2022 with Clang - [x] CPU info @@ -38,10 +38,14 @@ Supported platforms - [ ] RGP (not tested) - [ ] Crash reporting +**ARM64**: +- [ ] Scripting +- [ ] SIMD + ## Linux -- [x] OS: Ubuntu 22, 23 +- [x] OS: Ubuntu 23, 24 - [x] Architecture: x64 - [ ] Architecture: ARM64 - [ ] Architecture: RISC-V 64 @@ -65,6 +69,9 @@ Supported platforms - [x] Graphics: - Vulkan (by default requires timeline semaphore) - Vulkan (without timeline semaphore, compile with `-DAE_VK_TIMELINE_SEMAPHORE=OFF`) +- Window system: + - [x] X11 + - [ ] Wayland - [ ] HDR display (not tested) - [ ] Profiling - [ ] Distribution: Flatpack diff --git a/AE/docs/engine/Profiling.md b/AE/docs/engine/Profiling.md index 0c7af1a5..71504768 100644 --- a/AE/docs/engine/Profiling.md +++ b/AE/docs/engine/Profiling.md @@ -11,6 +11,8 @@ The profiler measures the start and end time of the task and builds a diagram fo The profiler measures the start and end time of a render pass or a group of commands (compute, ray tracing, transfer), then builds a graph. +Time measurements are not accurate and depends on GPU frequency which is depends on power saving mode. To get more accurate measurements create device with `EDeviceFlags::SetStableClock`, it is supported for NV and AMD GPUs. + ![](img/GraphicsProfiler.jpg) @@ -18,6 +20,11 @@ The profiler measures the start and end time of a render pass or a group of comm Used hardware performance counters for GPUs: Mali, Adreno, PowerVR, NVidia, AMD, Intel. +For Mali and PowerVR: +1. Look at GPU frequency. Frequency near to 900MHz shows a maximum GPU workload, less than 900MHz shows that GPU doesn't fully utilized and driver decrease frequency to minimize power consumption. Low frequency may happens because of thermal throttling, stalling on synchronizations/memory access, stalling on present. +2. Look at GPU units utilization (cache hit, texture, ALU). 100% means this unit may be a bottleneck, but only if GPU frequency is high. Low % and low GPU frequency may means that this unit is not fully utilized because of stalls. +3. Look at external memory traffic and memory access stalls. Try to decrease it and check GPU frequency/FPS/frame time, if frequency and FPS increases then this is a bottlneck and should be optimzied. + ![](img/ARM-HWCounters.png) @@ -93,6 +100,8 @@ no source ## External tools +Overview of profiling/debugging tools which is tested for compatibility and used to optimize the engine. + #### NSigh Graphics * Mesh shader debug/profile @@ -101,7 +110,7 @@ no source * Graphics debug/profile * Async compute debug/profile * Synchronizations debug/profile - * [VNvPerfProfiler](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/graphics/Vulkan/Utils/VNvPerfProfiler.h) class for interaction + * [VNvPerfProfiler](https://github.com/azhirnov/as-en/blob/dev/AE/engine/src/graphics/Vulkan/Utils/VNvPerfProfiler.h) class for interaction #### RenderDoc diff --git a/AE/docs/engine/VulkanFeatures.md b/AE/docs/engine/VulkanFeatures.md index 41d74a5f..d8683c55 100644 --- a/AE/docs/engine/VulkanFeatures.md +++ b/AE/docs/engine/VulkanFeatures.md @@ -38,6 +38,8 @@ Source folder: [Vulkan backend](https://github.com/azhirnov/as-en/blob/dev/AE/en - [x] VK_KHR_pipeline_executable_properties - [x] VK_EXT_device_memory_report - [x] VK_NV_ray_tracing_validation +- [x] VK_NV_clip_space_w_scaling +- [x] VK_EXT_subgroup_size_control **In progress:** - [ ] VK_KHR_video_queue @@ -60,7 +62,6 @@ Source folder: [Vulkan backend](https://github.com/azhirnov/as-en/blob/dev/AE/en - [ ] VK_KHR_uniform_buffer_standard_layout - [ ] VK_KHR_format_feature_flags2 - [ ] VK_EXT_inline_uniform_block -- [ ] VK_EXT_subgroup_size_control - [ ] VK_EXT_depth_range_unrestricted - [ ] VK_EXT_sample_locations - [ ] VK_EXT_rasterization_order_attachment_access @@ -125,6 +126,7 @@ Source folder: [Vulkan backend](https://github.com/azhirnov/as-en/blob/dev/AE/en - [ ] VK_NV_displacement_micromap, GL_NV_displacement_micromap - [ ] VK_KHR_push_descriptor - [ ] VK_KHR_shader_integer_dot_product - ML +- [ ] VK_KHR_pipeline_binary **Pending for debugging:** - [ ] VK_EXT_device_fault @@ -150,7 +152,6 @@ Source folder: [Vulkan backend](https://github.com/azhirnov/as-en/blob/dev/AE/en - VK_NV_raw_access_chains - for HLSL - VK_KHR_index_type_uint8 - VK_NV_copy_memory_indirect -- VK_NV_clip_space_w_scaling - for VR, replaced by VRS **Supported shader extensions:** - [x] VK_KHR_shader_clock, GL_EXT_shader_realtime_clock, GL_ARB_shader_clock @@ -199,10 +200,10 @@ Source folder: [Vulkan backend](https://github.com/azhirnov/as-en/blob/dev/AE/en - [ ] VK_KHR_workgroup_memory_explicit_layout - [ ] VK_EXT_shader_image_atomic_int64 - [ ] VK_HUAWEI_cluster_culling_shader, GL_HUAWEI_cluster_culling_shader -- [ ] GL_EXT_expect_assume +- [ ] GL_EXT_expect_assume - depends on compiler? - [ ] VK_NV_shader_atomic_float16_vector -- [ ] VK_KHR_shader_float_controls, SPV_KHR_float_controls -- [ ] VK_KHR_shader_float_controls2, SPV_KHR_float_controls2 +- [ ] VK_KHR_shader_float_controls, SPV_KHR_float_controls - not supported by glslang +- [ ] VK_KHR_shader_float_controls2, SPV_KHR_float_controls2 - not supported by glslang - [ ] GL_EXT_spirv_intrinsics - [ ] GL_EXT_shared_memory_block - [ ] GL_EXT_fragment_invocation_density diff --git a/AE/docs/papers/ArchitectureNotes-ru.md b/AE/docs/papers/ArchitectureNotes-ru.md index 51d57b0a..13dfa788 100644 --- a/AE/docs/papers/ArchitectureNotes-ru.md +++ b/AE/docs/papers/ArchitectureNotes-ru.md @@ -99,3 +99,5 @@ In a multithreaded environment, calling SendMessage from a thread that is not th Почему плохо возвращать `enum` с кодами ошибок - на каждый вызов функции от пользователя требуется обработать все возможные ошибки, это сильно увеличивает объем кода, это требует заново читать документацию и тд, тогда как чаще всего пользователю нужно получить ответ успешно ли отработала функция или нет. Кроме сложностей для пользователей есть и сложность для разработчиков, так как надо сопоставить каждую ошибку с определенным кодом, при этом, чем чаще используется один и тот же аргумент, тем сложнее пользователю найти причину ошибки. Например коды E_INVALIDARG и GL_INVALID_VALUE возвращаются во множестве случаев. + +Например в SDL3 перешли с `int`, где отрицательные значения содержат код ошибки, на `SDL_bool`. diff --git a/AE/docs/papers/GPU_Benchmarks.md b/AE/docs/papers/GPU_Benchmarks.md new file mode 100644 index 00000000..2dd1a6fa --- /dev/null +++ b/AE/docs/papers/GPU_Benchmarks.md @@ -0,0 +1,83 @@ +GPUs: +* [Adreno 660](bench/Adreno_660.md) +* [Adreno 505](bench/Adreno_505.md) +* [AMD RX 570](bench/AMD_RX570.md) +* [Apple M1](bench/Apple_M1.md) +* [Intel UHD 620](bench/Intel_UHD620.md) +* [Mali G57](bench/ARM_Mali_G57.md) +* [Mali T830](bench/ARM_Mali_T830.md) +* [NVidia RTX 2080](bench/NVidia_RTX2080.md) +* [PowerVR BXM-8-256](bench/PowerVR_BXM.md) + +Other: +* [Comparison of Results](#Comparison-of-Results) +* [Test Sources](#Test-Sources) + + +# Comparison of Results + +## Render target compression + +**block** - compare compression between 1x1 noise and block size (4x4 or 8x8) noise.
+**max** - compare compression between 1x1 noise and solid color.
+ +| GPU | block size | block RGBA8_UNorm | max RGBA8_UNorm | block RGBA16_UNorm | max RGBA16_UNorm | method | comments | +|---|---|---|---| +| NV RTX 20xx | 4x4 | 3 | 3.2 | 4.1 | 4.1 | exec time | +| Adreno 6xx | 16x16 | 1.9 | 6.9 | ? | 3.3 | exec time | +| Adreno 5xx | 4x4 | 2.5 | 2.7 | ? | ? | exec time | +| AMD GCN4 | 4x4 | 2.3 | 3 | 2.3 | 3 | exec time | +| Intel UHD 6xx 9.5gen | 8x8 | 1.6 | 1.8 | ? | ? | exec time | +| ARM Mali Valhall | 4x4 | 6.9 | 60 | - | - | mem traffic | only 32bit formats | +| PowerVR B-Series | 8x8 | 23 | 134 | 24 | 134 | mem traffic | + + +# Test Sources + +### 1. fp16 instruction performance +[code](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Inst-fp16.as) + +### 2. fp32 instruction performance +[code](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Inst-fp32.as) + +### 3. Render target compression +[code](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/RTCompression.as) + +### 4. Shader instruction benchmark + + +### 5. Texture lookup performance + +* sequential access - UV coordinates multiplied by scale and added bias. + - scale < 1 has better texture cache usage. + - scale > 1 has high cache misses. + - scale > 1 in practice used for noise texture in procedural generation. +* 'noise NxN' - screen divided into blocks with NxN size, each block has unique offset for texture lookup, each pixel in block has 1px offset from nearest pixels. + - offset with 1px used to find case where nearest warp can not use cached texel. + - in practice this method is used for packed 2D sprites and textures for meshes. + +[code](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/TexLookup.as) + +### 6. Subgroups + +* [Subgroups in fullscreen triangle](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Subgroups-1.as) +* [Subgroups with multiple triangles](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Subgroups-2.as) + +### 7. Buffer/Image storage access + +* [Image/Buffer common cases](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Storage.as) +* [Buffer with variable data size](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/BufferStorage.as) +* [Image with thread/group reorder](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/ImageStorage-Reorder.as) +* [Image with RT compression, 4xRGBA8](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/ImageStorage-1.as) +* [Image with RT compression, 2xRGBA16](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/ImageStorage-2.as) +* [Image with RT compression, 1xR32](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/ImageStorage-3.as) + +### 9. Texture cache + +Find texture size where performance has near to 2x degradation this indicates a lot of cache misses and bottleneck in high level cache or external memory (RAM/VRAM).
+Expected hierarchy: +* texture cache (L1) +* L2 cache +* RAM / VRAM + +[code](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/TexCache.as) diff --git a/AE/docs/papers/GraphicsNotes-ru.md b/AE/docs/papers/GraphicsNotes-ru.md index 5f79031d..a8f1cdb4 100644 --- a/AE/docs/papers/GraphicsNotes-ru.md +++ b/AE/docs/papers/GraphicsNotes-ru.md @@ -81,6 +81,19 @@ float3x3 ComputeTBNinFS (float2 uv, float3 worldPos) ### Фильтрация * Фильтрация R16F текстуры с включенным `mediump float` работает по-разному на NVidia и других ГП. На NV появляются артефакты фильтрации. +* На мобилках требуется `highp sampler2D` для 16-битных форматов иначе теряется точность даже без фильтрации (texelFetch). + +* Фильтрация текстур происходит с 8-битной точностью. [ref](https://iquilezles.org/articles/hwinterpolation/) + - Актуально для 8 и 16 битных форматов. + - 32 битные форматы (R32F) фильтруются с большей точностью. + - Проявляется при расчете попиксельных нормалей для карты высот через деривативы. + + +## Эффекты + +### Bloom + +Эффект рассеивания света на линзе. Чем больше яркость, тем больше рассеивается. Результат прибавляется к цвету сцены. ## Разное @@ -96,12 +109,11 @@ float3x3 ComputeTBNinFS (float2 uv, float3 worldPos) **Multiview** - позволяет рисовать в массив 2д текстур с разными проекциями на view. Задается через `gl_ViewIndex`. Используется в VR для рисования в оба глаза за один проход. -**Viewport array** - позволяет рисовать в 2д текстуру с разными проекциями на виюпорт. Задается через `gl_ViewportIndex`. -Требует геометрический шейдер чтобы дублировать геометрию. +**Viewport array** - позволяет рисовать в 2д текстуру с разными проекциями на виюпорт. Задается через `gl_ViewportIndex` в геометричеком шейдере. Расширение `VK_EXT_shader_viewport_index_layer` позволяет выбирать виюпорт в вершинном шейдере, дублирование геометрии делается через инстансинг. Используется для ??? -**Layered rendering** - позволяет рисовать в массив 2д текстур. Задается через `gl_Layer`. +**Layered rendering** - позволяет рисовать в массив 2д текстур. Задается через `gl_Layer` в геометричеком шейдере. Расширение `VK_EXT_shader_viewport_index_layer` позволяет выбирать слой в вершинном шейдере, дублирование геометрии делается через инстансинг. Используется для рисования кубических карт за один проход. @@ -113,8 +125,8 @@ float3x3 ComputeTBNinFS (float2 uv, float3 worldPos) ## Размер воркгруппы -* На старых мобилках максимальный размер 64, поддерживается и 128, но с вдвое меньшим количеством регистров. -* На NV Turing нужно минимум 128 потоков чтобы максимально загрузить SM. +* На старых мобилках максимальный размер 64 (8х8), поддерживается и 128, но с вдвое меньшим количеством регистров. +* На NV Turing нужно минимум 128 (16х8) потоков чтобы максимально загрузить SM. ## Ветвление в шейдерах @@ -123,10 +135,13 @@ float3x3 ComputeTBNinFS (float2 uv, float3 worldPos) * Компилятор заменяет повторяющиеся деления на одно переворачивание (1/x) и умножения. * Реализация `Sign` через `Step`, который возвращает -1 или 1, намного быстрее чем `SignOrZero` (`sign` из GLSL), а `copysign` из MSL - быстрее `Step`. -* `FMA` на мобильных работает через `fp32 FMA`, а на NV использует `fp16 FMA x2` что в 2 раза быстрее fp32 для half2, half4. +* `FMA` на мобильных работает через `fp32 FMA`, а на NV и Intel использует `fp16 FMA x2` что в 2 раза быстрее fp32 для half2, half4. * `[[unroll]]` сильно замедляет компиляцию пайплайна, вплоть до повисания (PowerVR), на производительность влияет слабо. * На NV mediump может работать медленнее чем highp, на мобильных аналогично fp16. * Для uint `FindMSB` в 2 раза быстрее `FindLSB`, для int `FindLSB` может быть быстрее. +* На NV/AMD/Intel FP32ADD работает в 2 раза быстрее чем FP32FMA, FP32MUL. +* На мобилках FP32ADD, FP32MUL, FP32FMA работают примерно одинаково. +* В спецификациях считают FMA за 2 инструкции и указывают в 2 раза большую производительность. **SFU** pipe (special function unit) - на нем выполняются более редкие операции типа переворачивания (1/x), sqrt, sin, cos, exp, log, fract, ceil, round, sign и тд. Чаще всего на 4 потока варпа приходится 1-2 SFU, поэтому все перечисленные операции относительно медленные, но некоторые выполняются за одну инструкцию, а другие эмулируются и занимают еще больше времени. @@ -142,11 +157,22 @@ Normalize часто сделан через 1/Length, а Distance через Le Clamp(x,-1,1) работает в 2 раза медленнее, а без констант еще медленнее. **Конвертация типов**
-BitCast типа uintBitsToFloat работает быстрее всего.
+Битовый каст типа uintBitsToFloat работает быстрее всего.
В среднем 4x fma занимает конвертация между int и float.
**Integer типы**
-Битовые операции и сложение работает за 1x fma. На некоторых архитектурах может работать параллельно с float, на других - часть float блоков отключается и теряется производительность.
+Битовые операции и сложение работает за 1x fma. На мобильных архитектурах может быть медленее и доходить до 2-4x fma. На некоторых архитектурах может работать параллельно с float, на других - часть float блоков отключается и теряется производительность.
Около 4x fma: mul, FindMSB(uint), BitCount.
От 8x fma: FindLSB, FindMSB(int), uaddCarry, usubBorrow.
От 16x fma: div, mod, umulExtended.
+ +Подробные результаты микробенчмарков: [GPU_Benchmarks](GPU_Benchmarks.md) + + +## Профилирование на мобилках + +Чаще всего доступны счетчики производительности, часть из них в количестве (циклы, байты, транзакции и тд), часть в процентах. Для количественных значений нужно запускать микробенчмарки чтобы найти максимальное значение за кадр или за секунду, например GB/s для памяти. Значения из спецификаций не всегда совпадают с измеряемыми, и сами спецификации на тсмартфоны содержат ошибки. + +Главный показатель это частота ГП. Если на ГП нет нагрузки, то для экономии энергии частота понижается. Многие счетчики в процентах могут при этом показывать до 100% нагрузки, но пока частота низкая это не имеет особого значения. + +Второй показатель это нагрузка на внешнюю память (RAM, external memory). Если нагрузка приближается к максимальной, значит кэши не используются. diff --git a/AE/docs/papers/bench/AMD_RX570.md b/AE/docs/papers/bench/AMD_RX570.md new file mode 100644 index 00000000..10e35604 --- /dev/null +++ b/AE/docs/papers/bench/AMD_RX570.md @@ -0,0 +1,100 @@ + +# AMD RX 570 (GCN4) + +## Specs + +* FP16: **5.095** TFLOPS +* FP32: **5.095** TFLOPS +* FP64: **318.5** GFLOPS +* Memory: 4GB, GDDR5, 256 bit, **224.0** GB/s (86 GB/s from tests) +* Driver: ??? + + +## Shader + +### Quads + +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Test `subgroupQuadBroadcast( constant )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroups + +* Subgroups in fragment shader can fill multiple triangles, but only with the same `gl_InstanceIndex`. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Subgroups in fragment shader reserve threads for helper invocations, even if they are not executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroup threads order + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 64. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/amd-gcn4.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 64, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/amd-gcn4.png) + +## Render target compression + +* RGBA8 268.4MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 1.07GB, write: 268.4MB, total: 1.34GB per frame. + - 8x8 noise: linear: 4.1ms, fetch/nearest: 4.7ms. + - image storage: load: 15.5ms, fetch/linear: 12.5ms. Texture cache is faster than L2 access. + - graphics to compute: 15.5ms. Compression disabled when used storage usage flag. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 15.5 | 86 | image storage | | + | 1.3 | 12 | 112 | time for 1x1 noise | | + | 1.3 | 11.5 | 116 | time for 2x2 noise | | + | 3.0 | 5.2 | 258 | time for 4x4 noise | **same as block size** | + | 3.8 | 4.1 | 326 | time for 8x8 noise | | + | 3.9 | 4.0 | 335 | time for 16x16 noise | | + | 3.9 | 4.0 | 335 | time for gradient | | + | 3.9 | 4.0 | 335 | time for solid color | | + + +* RGBA16_UNorm 151MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 1.2GB, write: 302MB, total: 1.51GB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 18 | 86 | image storage | | + | 1.3 | 13.6 | 114 | time for 1x1 noise | | + | 1.4 | 12.9 | 120 | time for 2x2 noise | | + | 2.5 | 7.1 | 218 | time for gradient | | + | 3.1 | 5.8 | 266 | time for 4x4 noise | **same as block size** | + | 3.9 | 4.6 | 336 | time for 8x8 noise | | + | 4.0 | 4.5 | 344 | time for solid color | | + | 4.0 | 4.5 | 344 | time for 16x16 noise | | + + +* RGBA16F 151MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 1.2GB, write: 302MB, total: 1.51GB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 18 | 86 | image storage | | + | 1.3 | 13.6 | 114 | time for 1x1 noise | | + | 1.4 | 12.6 | 123 | time for 2x2 noise | | + | 3.1 | 5.8 | 266 | time for 4x4 noise | **same as block size** | + | 3.7 | 4.8 | 322 | time for gradient | | + | 3.9 | 4.6 | 336 | time for 8x8 noise | | + | 3.9 | 4.6 | 336 | time for 16x16 noise | | + | 4.0 | 4.5 | 344 | time for solid color | | + + +* RGBA32F 67.1MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 1.07GB, write: 268.4GB, total: 1.34GB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 16.2 | 85 | image storage | | + | 1.1 | 14.7 | 93 | time for 1x1 noise | | + | 1.2 | 13.7 | 100 | time for 2x2 noise | | + | 1.2 | 13.2 | 104 | time for 4x4 noise | **same as block size** | + | 1.2 | 13.2 | 104 | time for 8x8 noise | | + | 1.2 | 13.2 | 104 | time for 16x16 noise | | + | 1.2 | 14.0 | 98 | time for gradient | | + | 2.8 | 5.8 | 236 | time for solid color | | + diff --git a/AE/docs/papers/bench/ARM_Mali_G57.md b/AE/docs/papers/bench/ARM_Mali_G57.md new file mode 100644 index 00000000..fca81011 --- /dev/null +++ b/AE/docs/papers/bench/ARM_Mali_G57.md @@ -0,0 +1,179 @@ + +# ARM Mali G57 MC2 (Valhall gen1) + +## Specs + +* Cores: 2 +* ALU: 2 +* L2 cache: 512 Kb +* Tile bits/pixel: 256 *(32 bytes/pixel, 2xRGBA32)* +* Texture cache: 32 Kb +* Warp width: 16 +* FP16 GFLOPS: **242** (121 GOp/s on MulAdd from tests) +* FP32 GFLOPS: **121** (60.7 GOp/s on FMA from tests) +* Clock: 950 MHz +* Texture cache: 32 Kb +* Max work registers (32b): 64 +* Memory: 4GB, LPDDR4X, DC 16bit, 2133 MHz, **17.07** GB/s (14.2 GB/s from tests) +* Device: Realme 8I (Android 13, Driver 32.1.0) + + +## Shader + +### Quads + +* Quads on edge between 2 triangles are not merged, so 2 near pixels may execute up to 6 helper invocations. + +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` without texturing - helper invocations are **not** executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` with texturing - helper invocations are executed, even if `Nearest` immutable sampler is used. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+Red - no helper invocations, violet - 3 helper invocations per quad.
+![](img/full-quad/valhall-1-tex-ht.png) + +* Test `subgroupQuadBroadcast( constant )` without texturing - helper invocations are **not** executed. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+Red - full quad, blue - only 1 thread per quad.
+![](img/full-quad/valhall-1-qd.png) + +* Test `subgroupQuadBroadcast( constant )` with texturing - helper invocations are executed, even if `Nearest` immutable sampler is used. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroups + +* Subgroups in fragment shader can fill multiple triangles, but only with the same `gl_InstanceIndex`. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Subgroups in fragment shader reserve threads for helper invocations, even if they are not executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +* Subgroup occupancy with texturing. Helper invocations are executed and included as active thread. Red color - full subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+![](img/full-subgroup/valhall-1-tex.png) + +* Subgroup occupancy without texturing. Helper invocations are not executed but threads are reserved, so occupancy is low. Red color - full subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+![](img/full-subgroup/valhall-1.png) + +* Subgroup occupancy for too small triangles. Red color - full subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+![](img/full-subgroup/valhall-1-large.png) + + +### Subgroup threads order + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 16, image size: 16x16. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/valhall-1.png) + +Unique subgroups, image size: 32x32, gl_SubgroupSize: 16. Each subgroup in tile scheduled by quads (2x2 pixels), each quad may have any position inside 32x32 pixel tile, but often they are placed inside 8x8 region. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/valhall-1-unique-subgroups.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 16, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/valhall-1.png) + + +### Instruction cost + +* [[4](../GPU_Benchmarks.md#4-Shader-instruction-benchmark)]: + - Only fp32 FMA - *(fp16 and mediump use same fp32 FMA)*. + - Fp32 FMA is preferred than FMul or FMulAdd. + - Fp32 and i32 datapaths can execute in parallel in 2:1 rate. + - Fp16 and mediump is 2x faster than fp32 in FMull, FAdd. + - Length is a bit faster than Distance and Normalize. + - ClampUNorm and ClampSNorm are fast. + +* Fp32 performance: [[2](../GPU_Benchmarks.md#2-fp32-instruction-performance)]: + - Loop unrolling doesn't change performance. + - Manual loop unrolling doesn't change performance too. + - Loop index with `int` is faster than `float`. + - Graphics and compute has same performance. + - Compute dispatch on 128 - 2K grid is faster. + - Compiler can optimize only addition, so test combine Add and Sub. + - **60.7** GOp/s at 950 MHz on Add, Mul, MulAdd, FMA. + - Equal to **120** GFLOPS on MulAdd and FMA. + +* Fp16 (half float) performance: [[1](../GPU_Benchmarks.md#1-fp16-instruction-performance)]: + - **60** GOp/s at 950 MHz on FMA - equal to F32FMA. + - **121** GOp/s at 950 MHz on Add, Mul, MulAdd. + - Equal to **240** GFLOPS on MulAdd. + + +### Noise performance + +| name | thread count | exec time (ms) | ALU (%) | per thread (ns) | +|---|---|---|---|---| +| ValueNoise | 1.05M | 2.4 | 89 | 2.3 | +| PerlinNoise | 1.05M | 3.6 | 93 | 3.4 | +| Voronoi, 2D | 1.05M | 3.6 | 91 | 3.4 | +| SimplexNoise | 1.05M | 3.7 | 93 | 3.5 | +| GradientNoise | 1.05M | 3.9 | 93 | 3.7 | +| WaveletNoise | 1.05M | 3.9 | 91 | 3.7 | +| ValueNoiseFBM, octaves=4 | 1.05M | 9.6 | 94 | 9.1 | +| Voronoi, 3D | 1.05M | 10.9 | 94 | 10.4 | +| WarleyNoise | 1.05M | 10.9 | 94 | 10.4 | +| VoronoiCircles | 1.05M | 12.5 | 95 | 11.9 | +| SimplexNoiseFBM, octaves=4 | 1.05M | 14.8 | 96 | 14.1 | +| PerlinNoiseFBM, octaves=4 | 1.05M | 15.1 | 95 | 14.4 | +| GradientNoiseFBM, octaves=4 | 1.05M | 16.5 | 96 | 15.7 | +| VoronoiContour2, 2D | 1.05M | 21.2 | 94 | 20.2 | +| VoronoiContour3, 2D | 262K | 5.4 | 92 | 20.6 | +| WarleyNoiseFBM, octaves=4 | 262K | 12.1 | 95 | 46.2 | +| IQNoise | 262K | 18 | 95 | 68.7 | +| VoronoiContour2, 3D | 262K | 28.5 | 95 | 109 | +| VoronoiContour3, 3D | 65K | 17.5 | **40** | 269 | +| IQNoiseFBM, octaves=4 | 65K | 20.5 | 95 | 315 | +| VoronoiContourFBM, octaves=4 | 65K | 29.3 | 94 | 451 | +| VoronoiContour3FBM, octaves=4 | 16K | 21.5 | **34** | 1344 | + + +## Resource access + + +* Buffer/Image storage RGBA32F 4.19MPix 2x67.1MB [[7](../GPU_Benchmarks.md#7-BufferImage-storage-access)] + - 1.07MPix lost 2x of performance (350MHz, 5GB/s). + + | diff | exec time (ms) | traffic (GB/s) | L2 read miss (%) | name | comments | + |---|---|---|---|-----------|------| + | 1 | 9.0 | 13.2 | 50 | Image fetch/sample in FS with double buffering | + | 1.01 | 9.1 | 13.0 | 15 | Image read/write attachment RGBA32F | low L2 read miss because of prefetch (?), used 128bits/pixel in tile | + | 1.03 | 9.3 | 13.7 | 15 | Image read/write attachment 2xRGBA16 | + | 1.08 | 9.7 | 13.6 | 50 | Image load/store different order | image access should be reordered to match Z-curve (?) | + | 1.11 | 10 | 12.7 | 50 | Image load/store | + | 1.23 | 11.1 | 11.9 | 40 | Buffer load/store | 16byte per load/store, which is less than cache line (32/64?) | + | 1.35 | 12.2 | 10.7 | 34 | Buffer load/store in FS | + | 1.82 | 16.4 | 3.5 | **90** | Image read/write attachment 4xRGBA8 | RT compression doesn't increase performance, because of 4 attachments (?) | + + +## Render target compression + +* RGBA8 67.1MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - related specs: AFBC v1.3 with 4x4 block size; 16x16 tile size. + - linear: 18.3ms, fetch: 33ms, nearest: 33ms. Linear filter minimize L2 cache misses on high compression rate. + - graphics to compute r/w: 268MB / 66MB. Compression disabled when used storage usage flag. + + | diff (read) | read (MB) | write (MB) | name | comments | + |---|---|---|------|----| + | - | 268 | 67 | expected | | + | 1 | 270 | 70 | image storage | | + | 1.04 | 258 | 59 | 1x1 noise | | + | 1.2 | 220 | 65 | 2x2 noise | | + | 6.9 | 39 | 55 | 4x4 noise | **same as block size** | + | 10 | 26 | 9.5 | gradient | | + | 12.2 | 22 | 10 | 8x8 noise | | + | 56 | 4.8 | 6 | 16x16 noise | | + | 60 | 4.5 | 1.2 | solid color | | + + +* RGBA16F 16.8MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 134.2MB, write: 33.5MB, total: 167.7MB per frame. + - solid color (r/w: 135MB / 35MB) + - **no compression** + +## Texture cache + +* RGBA8_UNorm texture with random access [[9](../GPU_Benchmarks.md#9-Texture-cache)] + - Measured cache size: 16 KB, 256 KB, 1 MB. + + | size (KB) | dimension (px) | L2 bandwidth (GB/s) | external bandwidth (GB/s) | comment | + |---|---|---|---| + | 16 | 64x64 | 0.009 | 0.004 | **used only texture cache** | + | 32 | 128x64 | 0.38 | 0.004 | | + | 64 | 128x128 | 45 | 0.004 | **used L2 cache** | + | 128 | 256x128 | 45 | 0.004 | | + | 256 | 256x256 | 49 | 4 | | + | 512 | 512x256 | 49 | 7.6 | **L2 cache with 15% miss** | + | 1024 | 512x512 | 24 | 12.5 | **30% L2 miss, bottleneck on external memory** | diff --git a/AE/docs/papers/bench/ARM_Mali_T830.md b/AE/docs/papers/bench/ARM_Mali_T830.md new file mode 100644 index 00000000..f37f11ee --- /dev/null +++ b/AE/docs/papers/bench/ARM_Mali_T830.md @@ -0,0 +1,60 @@ + +# ARM Mali T830 (Midgard gen4) + +## Specs + +* Cores: 1 +* ALU: 2 +* L2: 64 Kb +* Clock: 1000 MHz +* Bus width: 128 bits +* Memory: 2GB, LPDDR3, DC 32bit, 933MHz, **14.9**GB/s (4GB/s from tests) +* FP16 GFLOPS: **56** +* FP32 GFLOPS: **32** +* Device: Samsung J7 Neo (Android 9, Driver 28.0.0) + +## Shader + +Doesn't support quad and subgroups. + +### Noise performance + +| name | thread count | exec time (ms) | per thread (ns) | +|---|---|---|---|---| +| SimplexNoise | 262K | 14.6 | 55.7 | +| ValueNoise | 262K | 15.4 | 58.8 | +| WaveletNoise | 262K | 18.1 | 69.1 | +| GradientNoise | 262K | 21.9 | 83.6 | +| Voronoi, 2D | 262K | 23.6 | 90.1 | +| PerlinNoise | 262K | 26.2 | 100 | +| SimplexNoiseFBM, octaves=4 | 65K | 10.8 | 166 | +| ValueNoiseFBM, octaves=4 | 65K | 13.5 | 208 | +| Voronoi, 3D | 65K | 15.6 | 240 | +| WarleyNoise | 65K | 16.7 | 257 | +| GradientNoiseFBM, octaves=4 | 65K | 17.5 | 269 | +| PerlinNoiseFBM, octaves=4 | 65K | 18.2 | 280 | +| VoronoiCircles | 65K | 19.8 | 305 | +| VoronoiContour3, 2D | 65K | 21.8 | 335 | +| VoronoiContour2, 2D | 65K | 22.2 | 341 | +| WarleyNoiseFBM, octaves=4 | 65K | 58 | 892 | +| IQNoise | 65K | 75.6 | 1 160 | +| VoronoiContour3, 3D | 16K | 23.1 | 1 444 | +| VoronoiContour2, 3D | 65K | 97.2 | 1 495 | +| IQNoiseFBM, octaves=4 | 16K | 76.2 | 4 762 | +| VoronoiContour3FBM, octaves=4 | 4K | 23.5 | 5 875 | +| VoronoiContourFBM, octaves=4 | 4K | 23.9 | 5 975 | + + +## Render target compression + +* RGBA8 16.8MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - **no compression** + + | diff (read) | read (MB) | write (MB) | name | + |---|---|---|------|----| + | - | 67.1 | 16.7 | expected | + | 0.9 | 75 | 17 | image storage | + | 1 | 68.5 | 17 | solid color | + | 1 | 68.5 | 17 | gradient | + + diff --git a/AE/docs/papers/bench/Adreno_505.md b/AE/docs/papers/bench/Adreno_505.md new file mode 100644 index 00000000..9dda53ac --- /dev/null +++ b/AE/docs/papers/bench/Adreno_505.md @@ -0,0 +1,37 @@ + +# Qualcomm Adreno 505 + +## Specs + +* gmem: 128 Kb + 8 Kb (128x256 tile for RGBA8) +* L2 cache: ? +* texture cache: ? +* F16 GFLOPS: **86.4** +* F32 GFLOPS: **43.2** +* F64 GFLOPS: **10.8** +* Execution units: 1 +* ALUs: 48 ? +* Clock: 450 MHz +* Memory v1: 2GB, LPDDR3, 933 MHz, 7.4GB/s (5GB/s from tests) +* Memory v2: 2GB, LPDDR3-1600, SC 32bit, 800MHz, 6.4 GB/s +* Device: Redmi 7A (Android ?, driver ?) + +## Render target compression + +* RGBA8 67.1MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 268MB, write: 67MB, total: 335MB per frame. + - 8x8 noise: linear: 25ms, nearest: 40ms, fetch: 46ms. + - image storage: load: 350ms, fetch/linear: 150ms. + - graphics to compute: ??? ms. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 0.19 | 350 | 0.95 | image storage | | + | 1 | 66 | 5.0 | 1x1 noise | | + | 1.6 | 42 | 8.0 | 2x2 noise | | + | 2.5 | 26 | 12.9 | 4x4 noise | **same as block size** | + | 2.6 | 25 | 13.4 | 8x8 noise | | + | 2.6 | 25 | 13.4 | 16x16 noise | | + | 2.6 | 25 | 13.4 | gradient | | + | 2.7 | 24 | 14 | solid color | | + diff --git a/AE/docs/papers/bench/Adreno_660.md b/AE/docs/papers/bench/Adreno_660.md new file mode 100644 index 00000000..759537ed --- /dev/null +++ b/AE/docs/papers/bench/Adreno_660.md @@ -0,0 +1,154 @@ + +# Qualcomm Adreno 660 + +## Specs + +* Clock: 840 MHz (790?) +* F16 GFLOPS: **3244** (680 GOp/s on MulAdd from tests) +* F32 GFLOPS: **1622** (364 GOp/s on FMA from tests) +* F64 GFLOPS: **405** +* GMem size: 1.5 Mb (bandwidth?) +* L2: ? (bandwidth?) +* ALUs: 1024 +* Memory: 8 GB, LPDDR5-6400, QC 16bit, 3200MHz, **51.2** GB/s (34 GB/s from tests) +* Device: Asus ROG Phone 5 (Android 13, Driver 512.530.0) + + +## Shader + +### Quads + +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` without texturing - helper invocations are **not** executed (or not detected). [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` with texturing - helper invocations are executed, even if `Nearest` immutable sampler is used. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+Red - no helper invocations, violet - 3 helper invocations per quad.
+![](img/full-quad/adreno-660-tex-ht.png) + +* Test `subgroupQuadBroadcast( constant )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroups + +* Subgroups in fragment shader can fill multiple triangles, even if they have different `gl_InstanceIndex`. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Subgroups in fragment shader reserve threads for helper invocations, even if they are not executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +* Subgroup occupancy with texturing. Helper invocations are executed and included as active thread. Red color - full subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+![](img/full-subgroup/adreno-660-tex.png) + +* Subgroup occupancy without texturing. Helper invocations are not executed but threads are reserved, so occupancy is low. Red color - full subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+![](img/full-subgroup/adreno-660.png) + +* Subgroup occupancy for too small triangles. Red color - full subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)]
+![](img/full-subgroup/adreno-660-large.png) + +### Subgroup threads order + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 64. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/adreno-600.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 64, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/adreno-600.png) + + +### Instruction cost + +* FP32 instruction benchmark [[2](../GPU_Benchmarks.md#2-fp32-instruction-performance)]: + - Loop unrolling is fast during pipeline creation if loop < 256. + - Loop unrolling is 1x - 1.4x faster, 2x slower on 1024, 1.1x slower on 256. + - Loop index with `int` and `float` has same performance. + - Compute dispatch on 2Kx2K grid is faster, 256x256 grid is a bit slower. + - Graphics render area 2Kx2K is faster. + - 128 subgroup size (wave128) has no effect on performance. + + | GOp/s | exec time (ms) | ops | max GFLOPS | + |---|---|---| + | **420** | 10.2 | F32Add, F32Mul | 420 | + | **364** | 11.8 | F32FMA, F32MulAdd | **728** | + +* FP16 instruction benchmark [[1](../GPU_Benchmarks.md#1-fp16-instruction-performance)]: + + | GOp/s | exec time (ms) | ops | max GFLOPS | + |---|---|---| + | **830** | 5.16 | F16Add, F16Mul | 830 | + | **707** | 6.06 | F16MulAdd | **1414** | + | **117** | 36.5 | F16FMA | 234 | + +## Resource access + +* Texture access 67.1MPix: [[5](../GPU_Benchmarks.md#5-Texture-lookup-performance)] + - expected read: 268MB per frame. + - UV bias has no effect. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|-------|------| + | 0.5 | 5 | 53 | sequential access, scale x0.5 | too high calculated bandwidth because of texture cache | + | 1 | 9.6 | 28 | sequential access, scale x1 | near to external memory bandwidth | + | 1.3 | 12.6 | 21 | random access, noise 16x16 | | + | 2 | 19 | 14 | sequential access, scale x1.5 | | + | 2.1 | 20.5 | 13 | random access, noise 8x8 | | + | 3.2 | 31 | 8.6 | sequential access, scale x2 | | + | 5.4 | 52.5 | 5.1 | random access, noise 4x4 | | + | 5.6 | 53.7 | 5 | random access, noise 4x4, off 1 | | + | 15.6 | 150 | 1.78 | random access, noise 2x2 | | + | 15.8 | 152 | 1.76 | random access, noise 2x2, off 1 | | + | 60 | 580 | 0.46 | random access, noise 1x1 | | + | 60 | 578 | 0.46 | random access, noise 1x1, off 1 | | + +* Buffer/Image storage 16bpp 9.4MPix 2x151MB [[7](../GPU_Benchmarks.md#7-BufferImage-storage-access)] + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 8.7 | 34 | Image load/store | near to external memory bandwidth | + | 1 | 8.7 | 34 | Buffer load/store | near to external memory bandwidth | + | 1.06 | 9.2 | 33 | Image load/store with different order | access pattern cause some cache misses | + | 1.3 | 11.3 | 27 | Buffer load/store in FS | access pattern cause some cache misses - subgroup order in FS is differ than in CS | + | 1.6 | 13.2 | 23 | Image read/write input attachment RGBA32F | RT compression is not supported, attachments stored into GMem which may be slower than L2 (?) | + | 1.7 | 15.3 | 20 | Image read/write input attachment 4xRGBA8 | best 10.3ms, depends on RT compression | + + +## Render target compression + +* RGBA8 268MPix downsample 1/2, compressed/uncompressed access: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 1.07GB, write: 268MB, total: 1.34GB per frame. + - 8x8 noise linear/fetch/nearest has same perf. + - 16x16 noise linear is faster. Linear filter minimize L2 cache misses on high compression rate. + - graphics to compute: solid color: 68ms, gradient: 72ms, 1x1 noise: 80ms. + - with storage usage flag - perf same as for graphics. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 72 | 18.6 | image storage | | + | 1.7 | 42 | 31.9 | 1x1 noise | near to external memory bandwidth | + | 1.6 | 44 | 30.4 | 2x2 noise | | + | 2.2 | 32 | 41.9 | 4x4 noise | | + | 2.5 | 29 | 46.2 | 8x8 noise | | + | 3.3 | 22 | 60.9 | 16x16 noise | **same as block size** | + | 3.4 | 21 | 63.8 | gradient | | + | 6.9 | 10.4 | 128.8 | solid color | | + +* RGBA16_UNorm, RGBA16F - RT compression is supported. [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + + | diff | exec time (ms) | name | + |---|---|---| + | 1 | 150 | image storage | + | 2 | 75 | 1x1 noise | + | 3.3 | 45 | solid color | + +* RGBA32F - RT compression is **not** supported. [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + + +## Texture cache + +* RGBA8_UNorm texture with random access [[9](../GPU_Benchmarks.md#9-Texture-cache)] + - Measured cache size: 2 KB, 128 KB. + - 8 texels per pixel, dim ??? + + | size (KB) | dimension (px) | exec time (ms) | diff | approx bandwidth (GB/s) | + |---|---|---|---| + | 1 | 16x16 | - | | | + | 2 | 32x16 | 2.3 | | | + | 4 | 32x32 | 7 | 3 | | + | 16 | 64x64 | 12.4 | 1.8 | | + | 128 | 256x128 | 14 | | | + | 256 | 256x256 | 44 | 3 | | diff --git a/AE/docs/papers/bench/Apple_M1.md b/AE/docs/papers/bench/Apple_M1.md new file mode 100644 index 00000000..3dda3e19 --- /dev/null +++ b/AE/docs/papers/bench/Apple_M1.md @@ -0,0 +1,17 @@ + +# Apple-M1 + +## Specs + +## Shader + +### Subgroup threads order + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 32. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/mac-m1.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 32, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/mac-m1.png) + diff --git a/AE/docs/papers/bench/Intel_UHD620.md b/AE/docs/papers/bench/Intel_UHD620.md new file mode 100644 index 00000000..da4aa08c --- /dev/null +++ b/AE/docs/papers/bench/Intel_UHD620.md @@ -0,0 +1,108 @@ + +# Intel UHD 620 (Gen9) + +## Specs + +* Arch: Kaby Lake-R +* Clock: 300 / 1000 MHz +* Pixel Rate: 3.000 GPixel/s +* Texture Rate: 24.00 GTexel/s +* FP16 GFLOPS: **768.0** (397 GOp/s on FMA from tests) +* FP32 GFLOPS: **384.0** (208 GOp/s on FMA from tests) +* FP64 GFLOPS: **96.00** +* Execution Units: 24 +* warp size: 16 +* Memory: 8GB LPDDR3, 1867MHz, **29.8** GB/s (21 GB/s from tests) +* Driver: ??? + + +## Shader + +### Quads + +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Test `subgroupQuadBroadcast( constant )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroups + +* Subgroups in fragment shader can not fill multiple triangles. This leads to unused threads in subgroup. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroup threads order for Gen9.5 + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 16. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/intel-gen9_5.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 16, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/intel-gen9_5.png) + + +### Instruction cost + +* [[2](../GPU_Benchmarks.md#2-fp32-instruction-performance)]: + - Better loop unrolling if count <= 128. + - Compute is 1.07x faster than graphics. + - Compute dispatch on 512x512 grid is faster. + - **208** GOp/s on F32FMA, F32MulAdd F32Mul. + - **397** GOp/s on F32Add. + +* [[1](../GPU_Benchmarks.md#1-fp16-instruction-performance)]: + - **835** GOp/s on F16Add. + - **397** GOp/s on F16Mul, F16MulAdd, F16FMA. + +## Resource access + +* Buffer/Image storage RGBA32F 9.4MPix 2x151MB [[7](../GPU_Benchmarks.md#7-BufferImage-storage-access)] + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 13.3 | 22.7 | Image load/store | | + | 1 | 13.3 | 22.7 | Buffer load/store | | + | 1.03 | 13.7 | 22 | Image fetch/sample from another | **1.5x faster with high RT compression rate** | + | 1.07 | 14.2 | 21.3 | Image read/write input attachment | **RT compression is not enabled** | + | 1.17 | 15.6 | 19.3 | Buffer load/store in FS | | + + +## Render target compression + +* RGBA8 67.1MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 268.4MB, write: 67.1MB, total: 335MB per frame. + - linear/nearest/fetch has same perf. + - image storage linear/fetch/load has same perf. + - graphics to compute 1x1 noise: load 20ms. + - graphics to compute 8x8 noise: load 30ms. Decompression overhead? + - graphics to compute 8x8 noise: linear sample 13ms (2.3x). + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 20 | 16.8 | image storage | | + | 1.4 | 14.5 | 23.1 | 1x1 noise | | + | 1.4 | 14.5 | 23.1 | 2x2 noise | | + | 1.6 | 12.5 | 26.8 | 4x4 noise | | + | 2.2 | 9.0 | 37.2 | 8x8 noise | **same as block size** | + | 2.3 | 8.5 | 39.4 | 16x16 noise | | + | 2.4 | 8.2 | 40.9 | gradient | | + | 2.4 | 8.2 | 40.9 | solid color | | + + +* RGBA16_UNorm 16.8MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 134MB, write: 33.5MB, total: 168MB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 8 | 21 | image storage | + | 1.07 | 7.5 | 22.4 | 1x1 noise | + | 2 | 4 | 42 | 16x16 noise | + +* RGBA16F 16.7MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 134MB, write: 33.5MB, total: 168MB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 8 | 21 | image storage | + | 1.07 | 7.5 | 22.4 | 1x1 noise | + | 2 | 4 | 42 | 16x16 noise | + diff --git a/AE/docs/papers/bench/NVidia_RTX2080.md b/AE/docs/papers/bench/NVidia_RTX2080.md new file mode 100644 index 00000000..f3eb1036 --- /dev/null +++ b/AE/docs/papers/bench/NVidia_RTX2080.md @@ -0,0 +1,219 @@ + +# NVidia RTX 2080 (Turing) + +## Specs + +* Pixel Rate: **109.4** GPixel/s +* Texture Rate: **314.6** GTexel/s +* Clock stable: **1515** MHz, boost from specs: 1710 MHz, boost measured: 1900+ MHz +* shaderSMCount: **46** [vk/specs] +* Driver: ? + +### Memory + +* Memory: 8GB, GDDR6, 256 bit, 1750 MHz, 448.0 GB/s (403 GB/s from tests (at 1515 MHz ?)) +* Memory max power consumption: 25W (7.5 pJ/bit, 0.06 J/GB) [calc] +* L2 cache: 4MB +* L1 Cache: 64 KB (per SM) + +### Float point performance + +* FP16: **20.14** TFLOPS at 1710 MHz +* FP32: **10.07** TFLOPS at 1710 MHz +* FP64: **314.6** GFLOPS at 1710 MHz +* ops per clock per SM: **64** fp32 FMA [compute capability 7.5] +* ops per clock per SM: **128** fp16 FMA [compute capability 7.5] +* FP32 FMA perf: **4.46** TOp/s at 1515 MHz (4.4 TOp/S from tests) +* FP16 FMA perf: **8.9** TOp/s at 1515 MHz (8.9 TOp/S from tests) + + +## Shader + +### Quads + +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Test `subgroupQuadBroadcast( constant )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroups + +* Subgroups in fragment shader can fill multiple triangles, but only with the same `gl_InstanceIndex`. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroup threads order + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 32. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/nv-turing.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 32, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/nv-turing.png) + + +### SM order + +Result of `Rainbow( gl_SMIDNV / gl_SMCountNV )` in fragment shader.
+Tile size is 16x16, image size: 102x53, gl_SMCountNV: 46, gl_SMIDNV: 0 and 1 are bound to the first tile and changed every frame, same for other tiles. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/nv-turing-smid-graphics.png) + +Result of `Rainbow( gl_SMIDNV / gl_SMCountNV )` in compute shader.
+Workgroup size is 8x8, image size: 102x53, gl_SMCountNV: 46. First set (from red to violet) has gl_SMIDNV = 0,2,4..., next set has gl_SMIDNV = 1,3,5... and next - again 0,2,4... [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/nv-turing-smid-compute.png) + + +### Instruction cost + +* [[4](../GPU_Benchmarks.md#4-Shader-instruction-benchmark)]: + - Fp16 is 2x faster in FAdd, FMul, FMA, but only for `half2`, `half4` types. Performance of this 3 operations are equal. + - Fp32 FMul is slower than FAdd. FMA has the same perf as FMul. + - fp32 & i32 datapaths can execute in parallel in 2:1 rate. + +* [[2](../GPU_Benchmarks.md#2-fp32-instruction-performance)]: + - Loop unrolling can double performance. + - Loop unrolling works for less than 1536 count, on 2048 it lose 2.5x of performance. + - Loop unrolling is too slow at pipeline creation stage. + - Benchmarking in compute shader is only 1% faster. + - Minimal dispatch size: 256x276 (1.5 of total thread count), lower size will lost performance. + - Measured with fixed clock at 1515 MHz. + + | TOp/s | exec time (ms) | ops | max TFLOPS | comments | + |---|---|---| + | **8.8** | | F32Add | **8.8** | + | **4.4** | | F32Mul, F32MulAdd, F32FMA | **8.8** | + +* [[1](../GPU_Benchmarks.md#1-fp16-instruction-performance)]: + - Measured with fixed clock at 1515 MHz. + + | TOp/s | exec time (ms) | ops | max TFLOPS | comments | + |---|---|---| + | **17.8** | | F16Add | **17.8** | | + | **8.9** | | F16Mul, F16MulAdd, F16FMA, F16Add with deps | **17.8** | | + | **4.4** | | F16MulAdd with deps | 8.8 | 2x slow than F16x2FMA (TODO: check) | + +## Resource access + +* Texture access 105MPix: [[5](../GPU_Benchmarks.md#5-Texture-lookup-performance)] + - expected read: 419MB per frame. + - UV bias has no effect. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|-------|------| + | 0.43 | 0.55 | 761 | sequential access, scale x0.5 | used texture cache | + | 1 | 1.28 | 327 | sequential access, scale x1 | near to VRAM bandwidth | + | 1.15 | 1.47 | 285 | random access, noise 16x16 | | + | 1.19 | 1.52 | 276 | random access, noise 16x16, off 1 | 1px offset has effect only for 16x16 block size | + | 1.52 | 1.94 | 216 | random access, noise 8x8 | | + | 2.1 | 2.64 | 159 | sequential access, scale x1.5 | | + | 2.2 | 2.83 | 148 | random access, noise 4x4 | | + | 3.5 | 4.44 | 94 | sequential access, scale x2 | | + | 5 | 6.4 | 65 | random access, noise 2x2 | | + | 12.5 | 16 | 26 | random access, noise 1x1 | | + + +* Buffer/Image storage 16bpp 67.1MPix 2x1.073GB [[7](../GPU_Benchmarks.md#7-BufferImage-storage-access)] + - image with 1GB size doesn't have RT compression. *Because metadata is too large?* + - image input attachment is preferred because you don't need to reorder threads and RT compression is used to minimize bandwidth. + + | diff (%) | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 26 | 6.7 | 320 | Buffer load/store in FS, 16 bytes | cache misses because of non-sequential read/write (?) | + | 7.5 | 5.7 | 376 | Image load/store, workgroup size 8x8, row major | | + | 7 | 5.66 | 379 | Image load/store, workgroup size 8x8, column major | | + | 3 | 5.45 | 394 | Buffer load/store, 16 bytes | | + | 3 | 5.45 | 394 | Image load/store, workgroup size 16x16, column major | | + | 2 | 5.4 | 397 | Image load/store, workgroup size 16x16, row major | | + | 2 | 5.4 | 397 | Image read/write input attachment RGBA32F, 1x1 noise | RT compression is not enabled because of > 1GB size | + | 2 | 2.7 | 397 | Image read/write input attachment 2xRGBA8, 1x1 noise | has RT compression, but performance is low because of 8bpp | + | 1 | 5.35 | 401 | Image load/store, group reorder, row major | | + | 1 | 5.35 | 401 | Buffer load/store, 32 bytes | | + | 0 | 5.3 | 405 | Buffer load/store, 64 bytes | 64 byte L2 cache line | + | -10 | 4.8 | 447 | Image read/write input attachment 2xRG32F, 1x1 noise | | + | -23 | 4.3 | 499 | Image read/write input attachment 4xRGBA8, 1x1 noise | better compression for RGBA8 ? | + | -72 | 2.35 | 699 | Image read/write input attachment RGBA32F, 2x2 noise, 7K | speedup on RT compression | + | -77 | 3.0 | 715 | Image read/write input attachment 2xRG32F, 2x2 noise | speedup on RT compression | + | -77 | 3.0 | 715 | Image read/write input attachment 4xRGBA8, 2x2 noise | speedup on RT compression | + + +## Render target compression + +* RGBA8 205MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - read: 822MB, write: 205MB, total: 1027MB per frame. + - linear: 6.5ms, fetch: 6.6ms, nearest: 7.3ms. + - image storage: load: 8ms, linear/fetch: 7.2ms. **Texture sampling is a bit faster because of texture cache.** + - **Compression disabled when used storage usage flag.** + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 0.97 | 2.79 | 368 | image storage 1x1 noise | + | 1 | 2.72 | 377 | image storage (other modes) | + | 1.07 | 2.53 | 405 | 1x1 noise | + | 1.78 | 1.53 | 671 | 2x2 noise | + | 3.2 | 0.84 | 1223 | 4x4 noise | **same as block size** | + | 3.3 | 0.81 | 1268 | gradient | + | 3.4 | 0.79 | 1300 | 8x8 noise | better compression for output (4x4 block) | + | 3.4 | 0.79 | 1300 | 16x16 noise | + | 3.4 | 0.79 | 1300 | solid color | + +* RGBA16_UNorm 104.8MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - read: 838MB, write: 209MB, total: 1048MB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 0.97 | 2.71 | 387 | image storage 1x1 noise, gradient | ??? | + | 1 | 2.63 | 398 | image storage 8x8 noise, solid color | ??? | + | 1.02 | 2.57 | 408 | 1x1 noise | + | 1.79 | 1.47 | 713 | 2x2 noise | + | 2.0 | 1.30 | 806 | gradient | less compression rate than in RGBA16F because of higher precision | + | 4.2 | 0.62 | 1690 | 4x4 noise | **same as block size** | + | 4.2 | 0.62 | 1690 | 8x8 noise | + | 4.2 | 0.62 | 1690 | 16x16 noise | + | 4.2 | 0.62 | 1690 | solid color | + +* RGBA16F 104.8MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - read: 838MB, write: 209MB, total: 1048MB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 0.95 | 2.75 | 381 | image storage 1x1 noise | ??? | + | 1 | 2.63 | 398 | image storage 4x4 noise, gradient, solid color | ??? | + | 1.03 | 2.55 | 411 | 1x1 noise | + | 1.8 | 1.46 | 718 | 2x2 noise | + | 3.4 | 0.77 | 1361 | gradient | + | 4.2 | 0.62 | 1690 | 4x4 noise | **same as block size** | + | 4.2 | 0.62 | 1690 | 8x8 noise | + | 4.2 | 0.62 | 1690 | 16x16 noise | + | 4.2 | 0.62 | 1690 | solid color | + +* RGBA32F 37.7MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - read: 604MB, write: 151MB, total: 755MB per frame. + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1 | 1.89 | 399 | image storage 1x1 noise | + | 1.03 | 1.84 | 410 | gradient | low compression rate because of high precision | + | 1.03 | 1.84 | 410 | 1x1 noise | + | 2.4 | 0.79 | 956 | 2x2 noise | + | 3.7 | 0.51 | 1480 | 4x4 noise | **same as block size** | + | 3.7 | 0.51 | 1480 | 8x8 noise | + | 3.7 | 0.51 | 1480 | 16x16 noise | + | 3.7 | 0.51 | 1480 | solid color | + + +## Texture cache + +* RGBA8_UNorm texture with random access [[9](../GPU_Benchmarks.md#9-Texture-cache)] + - Measured cache size: 32 KB, 1 MB, 4MB. + - 8 texels per pixel, dim ??? + - from specs: only 32KB of L1 cache is reserved for texture cache. + + | size (B) | dimension (px) | exec time (ms) | diff | approx bandwidth (GB/s) | + |---|---|---|---| + | 32K | 128x64 | 0.6 | - | TODO | + | 64K | 128x128 | 1.5 | 2.5 | | + | 1M | 512x512 | 1.9 | - | | + | 4M | 1024x1024 | 4 | 2.1 | | + | 8M | 2048x1024 | 10 | 2.5 | | + diff --git a/AE/docs/papers/bench/PowerVR_BXM.md b/AE/docs/papers/bench/PowerVR_BXM.md new file mode 100644 index 00000000..49bd9244 --- /dev/null +++ b/AE/docs/papers/bench/PowerVR_BXM.md @@ -0,0 +1,134 @@ + +# Imagination Technologies PowerVR BXM-8-256 + +## Specs + +* Clock: 950 MHz +* F16 GFLOPS: **460** (90 GOp/s on MulAdd from tests) +* F32 GFLOPS: **230** (95 GOp/s on FMA from tests) +* Memory: 8 GB, LPDDR5, QC 16bit, 3200 MHz, **51.2** GB/s (14.2 GB/s from tests) +* Device: Motorola G54 5G (Android 13, Driver 6133109) + + +## Shader + +### Quads + +* Quads on edge between 2 triangles are not merged, so 2 near pixels may execute up to 6 helper invocations. +* Test `subgroupQuadBroadcast( gl_HelperInvocation )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Test `subgroupQuadBroadcast( constant )` with/without texturing - helper invocations are executed. [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroups + +* Subgroups in fragment shader can fill multiple triangles, but only with the same `gl_InstanceIndex`. [[6](../GPU_Benchmarks.md#6-Subgroups)] +* Subgroups in fragment shader always execute all threads. *It cause a 128 threads to be executed which is bad for energy efficiency.* [[6](../GPU_Benchmarks.md#6-Subgroups)] + + +### Subgroup threads order + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in fragment shader, gl_SubgroupSize: 128, tile size: 32x32. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/graphics-subgroups/powervr-bxm.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 128, workgroup size: 8x8. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/powervr-bxm-8x8.png) + +Result of `Rainbow( gl_SubgroupInvocationID / gl_SubgroupSize )` in compute shader, gl_SubgroupSize: 128, workgroup size: 16x16. [[6](../GPU_Benchmarks.md#6-Subgroups)] + +![](img/compute-subgroups/powervr-bxm-16x16.png) + + +### Instruction cost + +* [[4](../GPU_Benchmarks.md#4-Shader-instruction-benchmark)]: + - Loop unrolling is too slow at pipeline creation stage. + - InvSqrt is much (2x) faster than Sqrt. + - ClampUNorm is much faster than ClampSNorm. + - Only fp32 FMA *(fp16 and mediump use same fp32 FMA)*. + - Fp32 FMA is preferred than FMul or FMulAdd. + - Length is a bit faster than Distance and Normalize. + - Int32 FindMSB is much slower than FindLSB. + +* [[2](../GPU_Benchmarks.md#2-fp32-instruction-performance)]: + - Loop unrolling doesn't increase performance. + - Loop unrolling is too slow at pipeline creation stage. + - Manual unrolling is slow too and performance is less than with unrolling attribute. + - Compute and graphics has same performance. + - Dispatch on 1024x1024 grid is much faster (1.3x). + - Loop index with `int` is faster than `float`. + - **105** GOp/s at 950 MHz on F32Add, 87% shader load. + - **95** GOp/s at 950 MHz on F32Mul, F32MulAdd, F32FMA. + - mediump has no effect. + +* [[1](../GPU_Benchmarks.md#1-fp16-instruction-performance)]: + - Loop index with `int`, `short`, `half` has same performance. + - **110** GOp/s at 950 MHz on F16Add. + - **90** GOp/s at 950 MHz on F16Mul, F16MulAdd. + - **58** GOp/s at 950 MHz on F16FMA *(actually it is FP32FMA)*. + +## Resource access + +* Buffer/Image storage 16bpp 2.59MPix 2x41.4MB [[7](../GPU_Benchmarks.md#7-BufferImage-storage-access)] + + | diff | exec time (ms) | approx traffic (GB/s) | name | comments | + |---|---|---|------|----| + | 1.09 | 6.18 | 13.2 | Image load/store | | + | 1 | 5.6 | 14.2 | Image read/write input attachment RGBA32F | a bit faster because of RT compression | + | 2.7 | 15 | 3.8 | Buffer load/store | ??? | + | 3.2 | 18 | 3.3 | Buffer load/store in FS | | + + +## Render target compression + +* RGBA8 67.1MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 268MB, write: 67MB, total: 335MB per frame. + - with solid color: linear: 19.4ms, fetch: 17.7ms, nearest: 17.7ms. Fetch/Nearest minimize bus load. + - with gradient: linear/fetch/nearest has same perf. + - graphics to compute r/w: 268MB / 66MB. Compression disabled when used storage usage flag. + + | diff (read) | read (MB) | write (MB) | name | comments | + |---|---|---|------|----| + | 1 | 268 | 66 | image storage | + | 1.33 | 202 | 50 | 1x1 noise | + | 1.35 | 198 | 51 | 2x2 noise | + | 2.4 | 112 | 50 | 4x4 noise | + | 13 | 21 | 7 | gradient | + | 23 | 11.5 | 27 | 8x8 noise | **same as block size** | + | 23 | 11.5 | 3.5 | 16x16 noise | less write traffic because output to 8x8 block | + | 134 | 2 | 1 | solid color | has metadata for large region or small metadata for block | + + +* RGBA16F 67.1MPix downsample 1/2, compressed/uncompressed access rate: [[3](../GPU_Benchmarks.md#3-Render-target-compression)] + - expected read: 536.8MB, write: 134.2MB, total: 671MB per frame. + - graphics to compute r/w: 530MB / 130MB. Compression disabled when used storage usage flag. + - image storage read with linear filter: 45ms, nearest/load: 61. + - 1x1 noise gradient: linear: 35ms, fetch/nearest: 50ms. + + | diff (read) | read (MB) | write (MB) | name | comments | + |---|---|---|------|----| + | 1 | 530 | 130 | image storage | + | 1.3 | 410 | 90 | 1x1 noise | + | 1.4 | 390 | 105 | 2x2 noise | + | 2.6 | 205 | 92 | 4x4 noise | + | 9.8 | 55 | 17 | gradient | + | 24 | 22 | 55 | 8x8 noise | **same as block size** | + | 27 | 20 | 5.5 | 16x16 noise | less write traffic because output to 8x8 block | + | 134 | 4 | 1.5 | solid color | has metadata for large region or small metadata for block | + + +* RGBA16_UNorm - same as RGBA16F. +* RGBA32F - has compression, but without linear filtering. + + +## Texture cache + +* RGBA8_UNorm texture with random access [[9](../GPU_Benchmarks.md#9-Texture-cache)] + - Measured cache size: 256 KB, 1 MB. + + | size (KB) | dimension (px) | external bandwidth (GB/s) | comment | + |---|---|---|---| + | 256 | 256x256 | 0.009 | **used only texture cache** | + | 1024 | 512x512 | 13.9 | bottleneck on external memory | + diff --git a/AE/docs/papers/bench/img/compute-subgroups/adreno-600.png b/AE/docs/papers/bench/img/compute-subgroups/adreno-600.png new file mode 100644 index 0000000000000000000000000000000000000000..88e4956049a1dbe0b394d1c05674cae95f4b8193 GIT binary patch literal 2770 zcmd6pT}V@57{_1j13${lh@>QiqAo@#h($4Hilq|SC&_ZT3gTRNp=%lQW1g@G3}g{l zjn-CIQf+6Lk*&0IqYhMd=z}SKY*0+}9i^F?fcrSkM|M~x) z_h~;`S++K1a|%LeZAE$MafA}_Zz4)sjhAnR_ffng=#Q6`Aj*^V1#eao#fOU#>Ncf{ z=T_l;a(%g4k5Jk`{7dLmq-{Ycg{UYkK53}=I^ONrejxMUo2|+$i#a`S&ziRJygY5v zy%n{$Qg^iXtzVsycJS)4y)8q9W!t1c=i{zJFX!CGGXU4E1l8x!=wgoOLXk%kg1Wun z{UH`vlF_v!gpx84O5cu9PB}vRZz6OhCjp^iOy0H<6DBX;0h7?ns@RS(vx3R5vXFC5 zT_VkB78V4x-2lB)@F*hjSe50*_}fh%xU=~NTVy~O?Zk}USzhe*&cfxH8+_eJ*nisQ z-?V)4{Eh2RiMDEkSi^P<=OZYwS-Nye;mohX1%JjsAS&5+Pmw(N#Mr8pp}vbR(4^s= zyxOE>G-?j^=?AfR9O!=OI1%Jhm)B~GQI@p3iRA?BMc%Bi5tNZgSM@1`mqhdBe8Q{4R9FYat`E=SbpdZef<4LiQPg-N9{3;QuJ#>MJ&XuVb z{;qgo&`$~WJv9V{qoRvFqKA@2l|C@b;rdGci_qoeL4B=K>p`B5_!!ueKgJt z<2bXDn*lfJIsXawohB%vt%ElNIuJYJ;N=I+9W&xNB0SoO#~n7@9IIx-q{a0?nw*c1 e6ms)f*yuVY^=`+MR@s5)P*icCvXm-0+x8QRS8S93 literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/compute-subgroups/amd-gcn4.png b/AE/docs/papers/bench/img/compute-subgroups/amd-gcn4.png new file mode 100644 index 0000000000000000000000000000000000000000..9151dc678c7c9e021f24cd32481362b9c2f77a8f GIT binary patch literal 1612 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K5{&TPaNx6d@azKi)ILO_JVcj{Imp~3nx}&cn z1H;CC?mvmFKt5-IM`SSr1K$x4W}K?cC(XdX`q0zGF{C2y?cIZ`JhCN@KitZr!Nzv{ zO2Z@`i5+aJOXo7GZeOrOMHNK82?{FW?rMzn>RN5EU;?8W+v_<8t6ATh|FHTye|hX{ z*>`*Q{o1Pbe*5*@$bY{jKlt0uzE{6`;mc~<-FkZF*{`H(7{lLRyNX6yeupTFlO#w>&-ueTY0w`#X4SnvvzPzC&-1qR@pznuFb?kK=CqnV156-J+XGz zub6J*uPd<)2tNP7?ANE4-|qZonDhV6=kKgU1^~pC4W+kb_wXkaDE|DiRBD#n_k;WY zNd5loUf)=JJS#&g=6HPEJHC7D5B{{Du_N8hdv|_kd}AN#eEI&<0$*SG`fC@y?49R( z{rvR{CcF6mCFH-WWjMFLP3=1)LoVE>1fmV->KnUi=T^%f*!8}`H?~>&qQ2dg3nn}G z|9x=d^r6 z{dj+I`u5Y$XYD%n?O^{p_N-TuciiIb?#`>0Z}>dF`rkehA`X~hw&m?^2F9J9%KUvH z&X-@<{g@t~T>tNde!;s4Uf<@|KTe;%dk~a8Cfp<=N1Fq4^xx%K8NYVRtXui=@p-9P z`>t<(`RgY*yU4#eOJ;K0z8;vPBfiV7UzuH?{Of4D)GW5|2j%arSeRKN|L4|qa1cw) zy2H+}g>Yeph|1Ufxo_+m4wk-ndZcLKvULZ0U;I7~ER1Rx*UJHm%U%3+HPX95*<*9& hH&PSZwM~DxL;qf#y#3?Z%fK3d!PC{xWt~$(695+?KW+d3 literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/compute-subgroups/intel-gen9_5.png b/AE/docs/papers/bench/img/compute-subgroups/intel-gen9_5.png new file mode 100644 index 0000000000000000000000000000000000000000..f2ef2ba343dbebfd48dd9d4c3ebbcecf8f89eb54 GIT binary patch literal 1178 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K58911L)MWvCLm zu=seoIEGZ*dVASXkjYWR`CvYipf;0mhXV@`sR?XUGB{|t;>4Mys~0J(uQCe0tA5uv z?yl_0bN{RV%L8q202(U*#4JEeeJ;a+{|pQd>KPb5*fTKvkY`}{v!Cn0+TGQ=X4kB? zR=fJM{yk8hp#g{$fS3b_fxc}3QVKv!B`!n5e@0Yy)Jry$mrpM-PhVB-Q~vM%Z=l<# zX(1N3qlN)L1H->~wg=bmKDl+*`0CutbN_$;2Na{0hq2fX3Q9=$gY0H!VEE4r43_DP zGv3*~%=`4}ok!`w1;e$wws(E^U6q~u>d*e~^+0zz05M&YB+r4}yS;bKHoj`R%=-WP zzrbWlL;FEKLu3b7bm%jGc%4`D_T9`^=U#%cCNPR*8o|0J>k`J4qFk;M!Qd`Cc-ajG_-Gy?-`x~Gd{NJZS+8yhoa5(OGA###qx80=`< z%@op-wW+&6UCSx-#eoMG8oML3XUM7jxc4|gO?HR8-M2k`k3VmJVOD&)_Lf=YQhFK?paA{5{?os5-TUvk=^K79 z2g0N1`Nyi*&F?GD#C-aaK23G|%;!E(Ud_ty*0?t5fdd>WWCzy7Y5k59i{G=Jw)v*P2wKbqa*8o|0J>k`J4qFk;M!Qd`Cc-ajG_-Gy?-`rl*TzNJZS+8yhoa5(OGA###qx80=`< z%@op-wW+&6UCSx-#eoMG8oML3XUM7jxaVvzS=K(|LGt<67Iy!|?xyAKJb&lW-`&&h z|9^j5$9v!W%{d?c??3%JJoo~E^uXGXs>-mbI%|NGB6-g3sGWx_p1#xlKp^t}pt(d)oW^;`9qU ztgb%y`0K8;`yXfQeDdpClOoJ{pwzzL^IxlV zH^Xhx@_x?Px%A(^`^T@}+y)HEooDZStlM*T+u^_Oi~bJ=XYQzr*{lzARnh#NsXzbL z&rgs1G-GG#@9*|NmwyEY=)3x7IiCMy2JiGrxn?2tjxNhCxy#+_n}K6<4!7mQ z`{&_(dH(#K-}5{9-ogDf)my3o05wgG4Tk_&hpr~DVLdv2^+z2g%XMh~KEOol)5xfB zSZ!7SCQfZEwm*jKs_w?aE&w+DQd*|3UfZ-4fc3*o4c7O2TmMQ-_>w!G8EAi{+qLt{ z&$j+N-#hU1&6k}-?ea^uskIlo&;D?wqF8P1r7J7_zP*F6s`u=PR8KvGHFJ!UU)XZo z##5?sS~x($wxx>;GAat%BKo|4Nn5_#0vmyYXo4gAAo^vCIGDW z`>^OHaRw8_J=f|qb0ODm+o89JjDkr41}6>i6t;X-ADMugWjOT4!}z@?@M+2(xv+9n z3UT?g@}_W!O6|EMO#hSXJwETO_L?U+O#D9mhkV|PN3qMf$*_H# zh-0K7>F=;7r$r9CEb5BE?ILxDEYvT%|@s zu%A~D6&|e{r6}KL^UCa!hvP7*uT9*Z;o|z7Ky}U$ZNR8lSG@nT*R6qJwFw`9uvk|*vC~V z>oP2{5@>vk2@hdDwL&UHfX#--p;FCE+)ibY0`#ViB{~ z%4~`1Z6QJta?x@Vzn+;7e4@^T?J}a5ol=VtGcmcKSvuhJNmy5A5pcFd;y6jK#pI4q zKa6&*q%}AdwtHb68bO`Qqd~s!F#jhocgn$H;n=f$pzBA$p?kk0rZOMO%e;56NFBBPaum)jG%rH(5jd4r>tK8j_K z4AazMkqPop%LZ2aR5-3so%uPm&4$WEj2_M$$-dsqHqUs?Hxj8-(J^`pv@x27Y+VX> mDQf6JMEy2O|5AOws~(!p1r1+p_zUy|K-0Si8<>3`4*m@y`#zrl literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/compute-subgroups/powervr-bxm-8x8.png b/AE/docs/papers/bench/img/compute-subgroups/powervr-bxm-8x8.png new file mode 100644 index 0000000000000000000000000000000000000000..c7b6709c2ec3999752ba28b3d4f6f1207ba03c61 GIT binary patch literal 1266 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K5893O0R7}x|G!U;i$lZxy-8q?;Kn_c~qpu?a z!^VE@KZ&eBK4*bPWHAE+-w_aIoT|+y4OG+a>Eakt5%>1a#qhE- zcfa@hg&A5niAJ&a=WqVb;CDZL-|yNtmtX(i0rt@s5`FaRjcvok-RtjHGZ>f0ljwl_ z!(jXE?Ei1wSGIRe;q}*2-}l(otzB>X?f<>s?^zfUnTST$AOEcTB6r~B?=#Pz@BjU4 z|JuUof9G57u2W~=5GES!p4Wf;jV;6Vo#}q(?Q8iC{5vl*?>;BPAr_+1y65We8GG*3 z{*AxB<1|ov9Em}4UjH7m1Tc;C{GBiPk5sGs55H&R`Tzg>-CsApRQK-w2Mp1Ye=q;r z|A~kp1)?VSIrH8VZM{`8Bv&!)XTqkH=P|DPtOC;dL7 z+dnh9u7CT@vN_Y&-<-DNf9SjV>)-zVpUuh8!ok46A;iESpu)hQ;K9J)FahXj1x5x2 zN1*$gSb*;2q@9B4k8AAH4{nK>2JT|&-dTl26pS! zZ@>RC($87geEy(JlQ2%mfUcu6{1-oD!M<)Bqvf4(PCGUVlo+ICltoM9?7u}hLJId4w2<#%{q#SnXzOWjmef! zw$U)wK@zgdK8C#WoaemHInQ}}|M)H6&+qs7-1qmoulxF5cibHlJ>Db2M*sleHPF{K z0{|8@^S$CHPUijmYJVd0!4hbucMB-*6`5x)*xfaaH36V9nP=aHgSqC0>DvSXz_EA# zx>(ZXj-3Gjz6%E0nie6B%i}!1=eeOezD=%-RPPvV^IIBa|IEgsdz*Nj&~_rmmbHen z?0ws>mFF#5C>B|=nBIH3u>r^B3M~wsf7UKa`u*)A>%@3%^F{WII2{>w)^m?Ewoj%s ztmfxJ4mTQeMsiVzksXKa5BX}7x;Zn*ke+YN!cItW+*L z&>eap2n`LRk4!7PI- zukr#OJv6g=19D1)KGi?B;6OGS5*15G4ROud#Q2!~2JSr^N$Ob+K+E-@jC%G$*1LK# z`9>AkfDdjS+0a&NLP`0)=Qz)E?7uL3#0NBk z-*N%7&&(DlYEBBvG3Fy2c@u6Ayqh+SBe++`W7x~a z0bNBKNPV9-jJ)freqC+EpaBVbBFNVs`n|qab+&&x6rNC~Yqy!ZIU^9|&tGvlwR}=C zoix1<7Rf=DC9bhVMk$qWc=ngu?|yNj^_Op7qE&2x&je_ub@rU?z15MSmb1UME(i%5 z;{}e~j(ft#lNBe6S!vz#T<;2n$-~#9JEWLte-fQy1J;t(Sbz|{T>yAOD|TW-{wC!bEj7!9&dU;Mhk63Nbq!^nk3H~4tZ2Z#_7h##nV zmpRPn$oUi8dtrb$3(2lzHB*@8nu1%+CX(`rQGZYDFRDJiV>8>SX4xhW?&xj%V(rNb zJlMmV?e%RpQ8bJVu_eZSl8U)0!r++AHOkhC{o*?r$R-`oU{w>O;uv=;Eao@|gM?y9 zTi6gSRhVNIW#qH>=5sO^z%MQ1FO;&P9+sMW8k)D#sm3L#`zI=Y1&>59YyPE@N@X*K zrYw7<7Sx}~2^bS~{)Gw86*->L*3YTVS}rN$Y6%+9E~at9UIJDOM-|29QX-| zWF)T$(W&=dI@o=qSL09bcNE;7oHJ77Ooo z;J&&d5fr*spe%xD@(hh_wb|81^F>BMs}=E$lYv~QmXJ!#|>{ zj-OphcD2HTW_o4h#4NcJA^(5yq?dHFC*a4!3htDExrA%QL3bp4BS+#fgTQ&USL^)C z=pk{P+Rl3z*PJGNyoIUnVQ!`F(6fVY2WC2cApK9HO-(v#ytN;lBCg8p;6kMf?GCp# zaKdMgN=*r%AOJs}LC;yZfpuKqmeRGUf31r6#*ZkhbhTGbSZN5JJCC@qZV1x2%PghF zFJxGj>{;DeT0{B`wQj z*|Wa>%i*Eadumy+m7gk!dQP4ebBc_Bv1#pjgj-AMA%yi2AF~y%Jg*rIJQY<^oUZYrt61k%iw>aLFNY&=vjqTU@k+5LdI7rnmB{PcWPO&o8B|8PX#YH;I{ulh*? zw-(yDjLR!WA_hPA*J${3uCmlVY3dmdud3;D0Dwv6?ve`Xk6}NkDN9O^@fExIAzz_; z*{L*@6h_e9xv?SR#9$gj&gUO&aiduwGAX=_CDYQEL@O$@2*+P zjH6l_LRS?KkCcFy@er{SwSm49rN($meTOM+5o1aOYALKNY&~|0Jrc{*ES<8dQoE|U zXO$7l{#|*QvWX-PnMmyUA2wK^rMp(@z``+a)4bO+y=U~TUb6v84>5z!6ipqZ)ezYh zLP^uF26NR>8n$aqEuc8D+yj>e7_G7;wQVN6KlA2ZLDERIQZ8@>x=H`^xE|H8`JOBA zFzuJ>anMlst-Vwv6N$H)XuhNp;Y{)V5!2!~lPKVd)L+)-Zv~4MGQZJt05@AIX6v?* zuhspGjgJU+06?odg3dbD4yzaatHE(gt=G2u zW#o+42ibumEI;JEzPC27a6t0i>z{GegD6lZ)F>)oZYxu2B!NZaoHKSygz&waXLHxa z-RLe#S;)h`OUu@pWYan|V7(nfk!w^CB}tst8>y%sH*d1T!fs}?wF`7%GELxI+&ld@ zcHGqmC=Lpb1b`zPD2UEVe8e7NJm?^uG6DNV9)YZa&z}m4@ilV-UAN4{d#NkrZPYp+ z1;n#4gQZT3&Vr7NWRozJX zcWAOmoq64&n*%1ExtsC+{dF&|%>i|qB(m_K^q{?`V6BVv6q~rrUfAsGv}0espZ8I! z+h<~ZmCKx{a_SXA?y)Q*NdydK1M&Z`V`ZF$s5-T)Upg%-w+4qw#?(eTYhSyqda7A3 zLxAR^OvEpS@3>QDjfyS~IU}{c&YZ!R4{=Rr_2Jbmk~4I+>BF9N<6RMD+dDT?!AV`L zOYbgGz=sN_*_tgr&M6ruJ4m4pII%ZGI$TI6!`5>sn21veECtqm%x?sA;6;PE@NMT0Pg-*y@7Dz}DSkeQPH|go zEcTn!F60AtEtmn_Fl_xa(^y?G;924>_NRYa`2UDG5TfG0o2EabWw6a3!YCriVG_Wn zP429`@#bU91Cf5U4w@A++Nf+n?1v27Oar}!Z}s25W;w+QC?@Kc@itM`5wxP`Fxhg4 zGM)j#l7&QQRDj3F-aJ)0-Dip&aDGYr+UF(s1inM^HOG<~{C5;%WrF6F*1kMReMHu@ zB8ZceRpQAUx>}3LK`#BBw02(uYfqE83Abj!CuJvnFAqXHFNWDU{-BW;J2XT%o!Rm&@s_2zvUeD EKh&&~4gdfE literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/full-quad/valhall-1-qd.png b/AE/docs/papers/bench/img/full-quad/valhall-1-qd.png new file mode 100644 index 0000000000000000000000000000000000000000..71962af2c7cc0a8d2a5058e69c2d030b64749267 GIT binary patch literal 1027 zcmeAS@N?(olHy`uVBq!ia0y~yV4MMDZ{T19l17^=b^$5I;vjb?hIQv;UIIBR>5jgR z3=A9lx&I`x0{NT;9+AZi417mGm~pB$pELsl^9)ZH$B>G+w|Bk!W(Np31fH07E88er zDZ6NWr;o%g#ilzy%FivCpVA*SW3|+jdloU%Q*Rjgzh-RcH#+R^ z9AKCrAi*HS#O&adz~I5b!_Zk6v2J#?!Ra5>wu~R_drz0z%6@+FeLE*MwVaKN3W^2{ zDp(ByNj|ibXprCYt5&B!t%-nA?B?v?XFE}ozc2P%EZg^udyIs67_i%UNRDU1&D+g8 zeXQeWEdN^2^Zk1AOo)H5sQ;Gmpm*-R{?v5;r)K@HU+!D~w!gAi4J8zib=gW(B&WYm zzInU&W-|Y?&G^h!FwwjIwd8`y`u5*Vy*Du$`-61kMwBtKs zu3vV%yhrX`u{`&Obv`(gSG9rlxpxQev=#riuQoWa+6ZS@&#U8q_D1g1^7kJZY`Sn- zjXnJVGX%&lhJ4pkKikbnoOg|x;g2oOU`dep$1tC(>w8h3>1JTYW$<+Mb6Mw<&;$TN Cq&dC- literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/full-quad/valhall-1-tex-ht.png b/AE/docs/papers/bench/img/full-quad/valhall-1-tex-ht.png new file mode 100644 index 0000000000000000000000000000000000000000..c5cf4f21743fae36a893e4876d25a5102a675274 GIT binary patch literal 1021 zcmeAS@N?(olHy`uVBq!ia0y~yV4MMDZ{T19l17^=b^$5I;vjb?hIQv;UIIBR>5jgR z3=A9lx&I`x0{NT;9+AZi417mGm~pB$pELsl^CV9f$B>G+w|A|J76%A81kTW%X?iky z)3uiCOFTW!Y9GFH@%@}5EL}RYqh(9J?(y4xdrNZUc6Nq)r*`G=v;VzW$8^A6)qp{T zg{`6G0K)_U2?il1W(TJP29GnBckO2{j(+x*v*CYcTzUL?=kIm(vw;#EJPe$Tj0%ca zbzzr$caY)FgPZR+-kHsbVGnj4P)9%|AIkAe_-Rw_m%6KN_wi3>d~jM_ZQzi$g}r#@ zcl!-%ZXb5M7ymuA371oEG&hQueUm$#;s3PicD;Q~W$hkQVO)MYB*(L7!{+Z1Zq<6q z-xBWO_D3Q=o6~2j2p7qS>2DvhzuWlX1U}aaSnXyn{%P_3e>#6#wb4P82tW#$HwQml zI`?kjkKgT&ahrxWPQEAH_*(gH&Z&~WW&N!8rs9l-ME*J5*6XX!RC4E+Anuf-tQWZ`%MmfGCSp*%}mR2*?(ab{1NS zfJ+quflTX67(t*EkR_7S88K0oq&Bk1c3Og@fN&Zk9FlO7`+h;a?fks=xfAD)-yb|7 zd7eBu@Aq9k%lpmS7qEBLudIJ%VPUcAt^a)UcNP{u|A&Re&wl;OXW$bjesmMOJjMRq z-Zw02<+hXX=I8OR`@e2sQD3xT@yIXW{d4dC=OMO*#q%BL@2PWJp0}~E*m~iuH(w9V zI6O1_%Bhg}%O2l0@m_q%<>XVp$bIdowd?+OfBkfoOC$f}bI$}zS3a}7BO*AO<`A*l zChXPRbz#4FI&R#MIp2o@HveP_~{emOms;voIOZJxGDo7PB0wWYBfETabs z)A_lrw_EU7A0hR#b+fw#a<@XC(Kze4+dA&xg28(>NiFEipD-A@7*~(Wsu}LPuG7y` zrwPKTBHlW|RqYlgEUK=Z(~VqfVnk<_H;5>yfjF+>;*3ROniIDxSUIw=S; zHdyDpeDbgV3!(k7R(ON+=lm{ypHex{mFk_d;iJB^m`K5mVC9J`U5%wl9<%9uMLhOF zr%%Tuqxxb|Ge7sx35hUZ(SVgVR9W?qWHCSYc3~)Aqe@j$Bq%GfsZ? zC`Hp1f(cF0jo(+^|v6kra{sy9SO^b6^tBW(*b<@R_LWabOsd(_A^Eh@!O zenQ)tGQRYt>@4GIDr=ka;9<2O8K-`DE8a#yqI4O(9JM3sbw-KjJ)`L?`}E~XnLhhw ziz&opnBvH**3R@>J+`zSG|l5v_YYIni}P;tKDyNw77M}#`#SiqM9#A|J+)Vz|1Z?< zgCF^9I-Z8=`m4cvKW*8^|N1}m#Mg7K{MD(NSO<|NTdvbDlrT%e8&);6U5>Z0Q>y2E zFRo_2qU7W(dMUR@Ms&I1HwJn-UK;hRc+k9#(ch~QMXclQqqx^u!jvYTyT#=RZ@8@` zO_iJtQL&{2Q%g}VD}%W=Zg}T-cexej7m(@I#hM_8iAfc7R4Qwua)(?h@Cq zO^*7%^zJ_$aHQ{9-EXa;;=)2PwRgqCC_?9PdT6iVcCPHcuESt@!HTvaRLw5DP~^zZ zorLbx1xS+|L|*Ba71cQccFVA-h^*9Hq~b$JE2QtG6p6;(!r})L;ivLl5LW(le*M!KJbv`GaUHZDI!U@yBd7}Yt~rg1a(i=} zjgQ>urEbge9r~f9(4EsosJ%X*ti7oUJd*`%UA+46EI0Wn#wkq~U-RCqOP!ryrZiRI!f}fywH*QgG&*2D}B~ErRrJc_7%K8_xVc6xvq<@%n z(VnF7OBd&eX|^Si&JUtjhowr)AKe^2b1SBf`Pj!N&U&Kn_|+giA?PN#m%no<;7LM9 zbOIP~|3j*kEp`O0rcwq&IJ`hXl((FkY|#hGm#2KH<~8LGNg)y!WxPi{WX4>0qhAr@^Yyhn zIP923{$xg$JsRFZg-3TPnrvEh;5%TD1%;^DwIZL27;91fG5&O>ajT%L<}63556p=n&#Zdpv*b}u_%8J6~oSey+!MX<6?69tW+GC&t9Dy#{$6P-nN=(D{``MbMVb<2vsq&v)?Eec|>B+J*@;O8MV0qF8jx>c17m^Pd@< zrhRe7X~WJ!ld5gU>Aa9zG8q6fPBhc`Z`k_+?TEkp-P-VaexA#H&40jwK}nZT13hj9 zY)!Uk?v_l4!riGnm^2>qaUV%!qj}_2M`MiluULQJR)52)qRIX|(lpm$NbZeXM?QN? zhN1Q^hEn#k)!P5|g4GInvl-aDOgnzSYQ^t=ch>ao%E$NXbL#Dh9t?5}nl3Kw@RhG&qZV@oIDkUGz)tzwN5M*VoAgh@;*0utRx87@-%4f)e zr@6)a3=`=T^KPvdeDB&Cz(KCON^{seT=yR)%W-;Tv^p8Dpqv<>g8xO zZBH{LTr!GZ(h;Z9jV5|wBNOrbiJ>B2SC@|e&dth(HN~`^epv%TOS*E0J~W%ZZu||E zj_W>a=&!S)ZT(@cdS!Qi8y;&-;*kfx{Pxw^T>GyohnXC1RD=ern+5*;2SHF~+q;;= zYzVP~sYi|Cu4^6@oiJsyqcY>gL__4noimhcZN4T_1PlTYCUW??TNGxfV=#_3XgCB{ zzj&go3twoBo$D&?4$1Y77s6S;fz$#qV4!qe;58NJz3$XlPDD9!O?rLE zGZV*#30fpfdh7J7tG^AJb@RcT12~f6Rv~-y>Xd*xlfS3*|0~OXJW77A=XWDb@Wo}$ zzd)~lq8#uyi}PHD74%zvwqgCJ(=@T7i-OLFB*#*iJY;ID>`@!u6zL9kKy?Bs*zD{e z!RkN(b!+#^KMPX;0uk|;G@ie1xBXJD{zU)ixUL#Qf9nF>QhT5EDv%`5KA!b}3xgkz zFjVj5_?GF3El;Fyl#?Mn2)9f90zG=f60p;u?QXsDcBhhxJ6dEkS#65(>HjkWHY7RZ#w4zmBYR7=H;k#%eh8xg&?MJz#ChAH`|E5Hm$ zw}gZoUvWze=ByI(MfETG##Aw%K*K>C%v~dj%!tGe=($6{14}z$fGDA3R0m0I&hY;j zi9c`LyLKrNBw4391iB4@$pb0iT`}({!Q>tl#Ini}y1E3~htdjE!yN{mWf|xsNW}ET zIqL4%8A@`J7gs9vTRe49w8t=KE)nNgMv9ojiW?l8Ws!4-Q9YS;H@0e$1tN#9=a{U8 zu}3Qq-x1|oLPLFwCPi1vDFprHFdZe`ZryVk=a?nFDKuQU2E#_M&>sn2SuB4Dq4tv{1vv@tChE)_ zGS52s73YQA-}rpZ^!3>_I014W03zcI(>7dA{^i72DvZpGGDscx4ie{-fD1rzK%AtA zj>x5lMWv%#>AV_LpfhSiLO^cjr)`U*3$V)w}2@FWgq)^dB+H3a8bzwq9SECO$ zlp}1YB2wi2`bA^h_&f(O&Bbux_}2r_9kKh{V~qtc!G*$XxFmTk2>QMB`i*MMt%u09Jsx}(&HcD>ZGY9=qLY##C~a$^X~uZHwG@TLpveA_1-fz{ekUTZMnNPyj1n{j z`@q~21|%vyEDhuz2zpFFAsmwzYo`*#Q6j+gjfwEtHlTG~3%^ljO(AtX3=%Xw+7%E4QQ^ylI^8Jb zr!VNCqOG3F7l4r4iW&+U^!7lPTY;xM`7D~{Q^E3j@6JO(%h2Qu>3uO2rcM6s+;E18 zW8AFX#~Vj^P{)c)hGXqQtmfYtg)#$*3`U)#$E~|PkIeS;K+`vnRuFw`jA|bINfgn# zjgGtT|0Ig;GI!tQ#zC&dt&4(u@6H=9H+wj*OI)U-dorP)nAW=AIQ0s}<<8ennekcRi z>NcqlN)*69B5t@9I&p6_UQeg<|1m(|j=-d9xq7QPM>7L)MMUY+HggI7SMA>e!N=Kl zp+zbrSIXRRf>xPQTh#|g{KbN?a{guYMni$Yw>btgWUpU$P5#%FD}cWf0V>@t;n^dH z)e(P5!@3%0O>YO}ww`96?o*R}*A;35pjD=Pk4S=rCtjNbR~g*EiSRjtxK}~1ythXl zl2*wwf%Vx~dY&@&kC^pgQ-n<*F$bN@PyM1W?{!c}OoXZo9R(i3UV87Gs&z z=kKQToS$aTM09RHGhJX9VR~a~m`^*^2g6A4P>glsm!|yW^)R2C{kT53;!v+vu-iX} z*F5-%lAn-eOVE0nr$CMh*#UR*1=Ujl0LCzM+_xF1I`I>j3hz6&F;ok?;zNo^+R7#V ze*D-tRoh0eYpV+08zN|{^9j@I@{lIH#l(JRUW{j%7{}GW%Z;AOQgPF*6<|oqdZz`| zRS)tYuSy!X?NUjpB8q#$H$Ftce%^hK8cEm^5ML@)fqCv;qldS>LSc|t*rZ@X)mR3YIxyIwuGjfhB~jz+i8n?t+4?P}?RlV>eVK#Ok;0G;5@RGqLZh=~qp# zLHU`P{Pqc{)QQi*KYo_Jeqzs38~WVi>Kd^-5iw#G)|JX~Kn@ma)Sc>m#9kj9SI3lS z=^wR?w%~ngZ8GSWb;4Ndy#Qwg7mjiMuyeiXx?92OO(w#(sgFuW!rD%D*dh15c1$)( zq=csQHMWl%#b~c(=i!5ek9_hDr0=%Wkce`Gs7G?4&L}f~P4liJG-bxAhN^0KNXsRw zujmS{noXuxemMfa>NIF_Y?sr|k(-1B!9D;UZ7`HW^PFxmTz=8}qymO2qJn$-7oJI@ z8Ba5j$KQZT@n6NGSXHG)KgAee5%_YKSDX_XG%+)`J{V9gLdgNbJCa zw+rpJA&2xF<2H4$9SC4xBtK2dyy-2(ND5{3#d)uU5kx&H9zG{f4pb}{4^5zrPv*eF zVp7@`C&0nGrMUeA{2L=%H}P>0=0RwR!0AyPHu_^-&yW?M`)7 zmhU~C(MP>e&88JTK&o)izqw$mNVt>F9aCQW17r6HtT{vJvYQY2`;QxhHUueJT9ICr zs>vEyVgh^Y>F_pKV#0zNy^?3Po{vLx<6yX%WJUWSu&$$PW@N~HQ(F?`?ugEg<;{~* zQvA_1b1Yxw_Ab%@228-6n2$AN*QW@Z4aT~On3Xw;?!bh-6>e*ZsBO!pMgXibLh1bY zMUv>^i@rJGTO&sN3-Y)PycRX~}8W9YzTwwvJ=KDi@izN4H+{3!Cv2=7P8S>*-&m~^_E7FSbd?~Ab!5Ae?N3sh%dl6!}ndR(V zBPZJdX~i0&q&I_WFZBg5i&;Fy+=Ud=By6HTb%WE31hE?|Q$jk)c1%`Bl_kBG!$u zv{ViqnVrr-{>bH=urW{|?hJZRF6BGQF9PbT;UjU#w83CSuYQuSo{&}9`GI>%3A8O) z)`*0JACmwHL4IRvtZ7T4m}Tmk7q^#vxc|y<_ZMIJskl)d6*!PE*#Jp`K1o)`*cxDq zT4N`jWUNHS&(rN?^VBh!-gi=uoPNma9TLpgED2G-;Wf9Nf%Pb1E&<)HO=*~cjrbS( z|8-CH_?r()`_&)(Q$cC9l^qF5v_l#KEV-&|TeQPCPt`TfNGazbV!jGbeh0#RpONS` zOU9~nIj6KOZKdpxNVpF(${aS$fHr$uu1dHIis9;xa;7yUPKWiC5_U`@uw(L4*7EKY z*WawFwSu=>&GpD*6xPkD5PNm+bCcBR%g&EQe5|;}(f!NJS*wHoZNR zG;I2kN($td+K0RM-FlR;$`p5XTbs{4svQ*B5X}2m)os=WW76Dq5N&>@*+^0JR4VM0 zJZB93^2X_?Ob`%=f}O+J0ji`+^EwQ^myI&rN9n}fpOfYbA+UzS0t3@sZ9~g`RB??n z>HP3{?3>FrBz7EwcEh`RR<8W=W(yAb5jNUaz?-V3_xH}h(z0A?Fr1KIjJVNSD+T?( z>!cD)Q`|qB$C!N>L%@)}e{|qc0V-R(GzJ+GbG+nnNL!mPulUe?quEC2fjF4|mE!*|pUG#5h`>&Ip29 z&#)t*PJH3Tj#z;t$5lmnR)Q|Dk($BF0;b3ql?81ef z75M5$i+N!6Vz@H&He4B!!8NqWu{TAm;ge{gfe_>vERV2pLFWi~nebf@cyK8rI0-&U zf?FD%KEk$zSCv1DciVPm(|XcrCtO|r1lx;Dm5zF$3@z@!lDsfyl-Ph!Cx`M=!t`QT&I3ipXmzCUn8+=`^zTYCcDtbOC%-~Sis Ci9@>p literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/full-subgroup/adreno-660-tex.png b/AE/docs/papers/bench/img/full-subgroup/adreno-660-tex.png new file mode 100644 index 0000000000000000000000000000000000000000..89faff6d0b361b8eacc87fcd8814dbc3daca7b80 GIT binary patch literal 1182 zcmeAS@N?(olHy`uVBq!ia0y~yU|axXZ{T19lAhCdl>sTn;vjb?hIQv;UIIBR>5jgR z3=A9lx&I`x0{NT;9+AZi417mGm~pB$pEOX7v8Rh;NJZS+yBm8&GDX}j=GEvFMLdd- zH*5RIkx(4b&#*y(?SOi3tIP(qhDgP~Y{$%W16S?3{Q2C(sP($XBo2mp)~xz``2ORL5AYs1x5QlRX)OQ z7%=o6);gHJXP&jTPX5A|?;o%1;m7G44uLl>kDq%UE`Q_l|GmjCp5qDJzrP#aU7P#r z@~&XpZ{Hc;?!EfaZr)8iMg!ycO~bdZH}3CDW;dBFE5IP*k2T^pFt75x&v^IO`})`C zGJvF&wE~01Jk<0856+O?Uu92xxj$c4Ao|T-W~=qpp1;@c2S%$DG-08HQp1s-5B2`d ze{lc$+*en3O+Ekm(u{q~ReRrs#9x=iozM>Pt=Lw|o^rhSHZYbqGcz?j=ER!V${ehB z|KG7M=EI-$Ut4FslD+Un)^F`S#<|z`raZT*V8W4xf$1l2gozofW6@4 zed5jD!sTn;vjb?hIQv;UIIBR>5jgR z3=A9lx&I`x0{NT;9+AZi417mGm~pB$pEOX7mZytjNJZS+yBl-oxJ$TRjD6lEvy;#4 z0b2-zbb*WL1Fo2c85Pn6j6r(oeK*>S)YBNXIc9il*wb)qwf84$c7=Z@MLLcgW#KQj zskfQKpYpCgs{O>>Oxa~sd-qK|{^S4tZ`XhMeSHkXhkr30C|7l0P+{R{XklQSAfUh? z#Khv@)WG0zgYoLUweLed@-0teH;LZMysPv7zdJy24gm&EMkWPCteUV({;YQ3KYZVL zr{&tiA@`W2-Y(tG{^k3xy0dRT+^;uB*odLI=IC?T^XK^g`{K0r9&=YEe@1Qls{72d z{>k~Dt#)7#K=K2Mi6?4))}8zR_wZ#kTu$$NZ2$b5?SfmYA6tdkt$81!|JQLA_q+Po z_VibI-)(1qe|_2`50_p}YX$2C(_{m(s*i6z&6i<<+h4L1WCMUIy!So6naXZ5OBz>r z9^yN}x8m!ea)X-{<4e zQ08E2yI(PzAMOW?YFx=6rh@d&&HNulMf0f9;{PZQi?rxVhI7SG31pQ!FyBW&iT~m(e6V z!J@HkH}kF9{^!^DG*a2O?CW`g$3?p~nYYis{__4@|N3>8eZGPcGa(%hE`b8%f)x(| z!!7pNZns{Ly&D{UK;@;yQS1^Ycrh{(G@vHFZZ6EMVi18Kx1(f+SR+Kagi8oVhQbj72@sNyoSbvs?}_c~?mRpD>?`l{ zynnRTOVxb&UOu?YdMW(KtEnrd;G2bM z`*(k^;8y#ZG5F4};@{u<{(=R!^OrkfmcaL4PW|vu+JXhHe`5YwSm5RAv0%Y_oISsL z|G=5(N!?KL@T01Y1Eb>XoiYDArZ?e1OoXdq*>Gmd_bscuRzy#_k@&su`kyY2uC*Ro zzWvBTbnW)@9N+q6UUOn~URuTvPr}*br!ERJ%Aafr`;p3JI&gwL3qR`)?ptLdU&Gu@MUczhs&V=BGf z%+a4JpPim;F>{WZv-YWcw&lG1q29X6B8k0wo6o~ySE9WDWX?{e_1Hi`?q1Qv9Xi%pk-2~3%VX? zGl%?_4YIj<5fttFz?}bB%N3?SzgYla>Jmn{G#>dlC)UiBXKGz(&+b(4Qu1{=!!m4qR0=e znZdqeQ?b3sxjf3Wk|Ly}zmAjdVa!*X>krh6+Vd!_vpUy5`{BtJn@Q#VY;(m^c_k@t zXNC2bz;8V3*wTvQ%Gb}V=sBq3&Z*J(k%mkpWsjO3v(=oUqO^h^SKh14YNYTPS+L3> zN9mL-yE^$Wnm}F8Q}eheUAxb%^RmNU`g}tEP67T-Gv(NRXvJ2Hgyz*UC5co*Nqh@; z<<)M^P2Uu~wRpQ&?e4v`KKg^w_`D0y99xb`IFI`5K`Bvad83Hc6MsQRzeE^^jhuq8yp#|keteY??V~P^TGtp*8Pq7U zi7%8}uiyUm-ug#Vzlndxyj?M7^9rKZdXS~-r1Y%Au~uW$3~uy|IJP~eF`+_yd{+ z-8Vv6=K8J(SKt2DHG}uhw59%~G+LZ_t4HdP_LQC*xf86vpm=-RHbW}*;Hk~-vi71+ zpS`cYGE0DXPm{S4Ue1ImH-L*O>lPpPl3_~o>QDU#k^DT;e-cfgKlU6Z{k5DbO#Nr}VT3%s76Kih*)v@@@u@x%oKqhi`o1I%P{0hfsN&nYT z&R01B+_Pb4zDT*uGBnZiT1GX&^0uezW{!#;KUFTDo%S?bH|%ar%6T=+ zzTg9+#5dwuL>2OM)vdF!00WZKYzME#4SLvISiFlJ=7t&j>eEnEmgw{KUXr9Q?%2J> z6U)DDaChv-gURYOs)O*6y45G8PyZA!p-%Ep4U=2D`;o8{^fD_pQZ!=H#HXpTeN=HoJo2fs2LUkRx z2&jn59ygaLZ{C@VDuGSdZDGDH=X$f}0JR zp-D(!j#+Gw3{Con=JR97T6|Q)5L%ciYCdWXoT&1+;FI^#g_RE;&X}-z z93RAxPR~?!92vj;xR&muqH;E%v2)Y-s1<2Mj$cOCKjI~s2Ln08iyyelIqnt1f*bcg zfP>R@@`wAuty5IlXPi@G;XA z^XgQ3ulit~C2(6acKdd7)!>9LB( z+YlneWu^OR2^`GVEV7g2sErJ}8fO`~8LYadqgS!f^$se5PMKs8FvU{)UP7%=^OLwn zG(R1q&qQIcV01}Z!{sEiOb%U0W@t@H!9$Ij7#@xZ%e0nPbL47uABxtFz2y1q+iF^J z(D2)|x|T>Iit0A+yx)|K)23TXmyh5#3zcMG5Jx^J<64FYZsMegZnD1h!gZPVDKtgr z03^Q?ExxqRcdj&Nm*rQZKu5Co=||;pJG+it*+whgbS%0phI=5#;RlRR$}Gj&4NW~8MmH%P)ua>`KnRD%MoXaDvYX<4pEb`k&&08f^ zW^oU{P}1;H8Ec13(45#;j~Eoxzuuw??+ReyU1`)jtDO~mTOyGcj`G>r%jbR*o{Hl) zuNS=Wc&cyQ?U1k_SSS@XUxID^I-4e$9`{fAEuQ+)UMp57QsA747E|ZCD5SxC!}$_A z(u~X-exB6c769dBj(^-}*c{2zpYpOYOapUB^6~K*|R`w{{p|r$3kp+_v)B z?P)aP4tBOaot0vgIxs>uk5+Mlm2!N0RehS~_N^L4-aRD36!yI} z7EI$BfF5t4a>^!b;(VpqH#aaUiEb->yYkV_<5?7IwBjt@N_C&?s}*qv(|zlP8Sw#r zDvHi?l-VX}(qDzWR>HCj&9GWbPu3LjGeyx24U%D#EjFx9WF4}}3g|4l`^6c{XwwS7 zblUQz%UyDr>bUWI`yT+8J5|S^?1xEtg>e607ToHoIf^x(Umt~(TX(q_&Uht=XlVWN z#;#)?&f+{ovaDQV{xvh#-Ns32WJh+8Z}~Q}+p+Vd)=QXEcBvwhKI-(K$lhm&H~tE> z_)5UQJ>`_3JJZ1ziutSA9QfpdNZV3>Hm~MUS>?i8g!xiVfbQO%S3iwJN51PeR;mp2 zDhn;2GN)YVq>41gVd~paxz&mR*+1o~-c3xA?*OG~&wJzi`U*@Kh4z_>Li^s>?%%I$ znlH=MY?i>Tc$9huD3$3Op=bhb+%LE7?GBmb=X>WjBf4?llfO=740vShw}{<06co@F zgM3hO7e)o^ek#t?)`^roN?TFOY_tEk^=`DdAMH9)K4nw}(@6p1Db}xHdM+5yuuEv3 zi$ntFku7yBXfgfarYL`CX<>dz*SwKLl9M(Kzu2O7ttfx^+(%|sn? z&F1+KbC_7R{pVVN{T znVT8BP%K-$?-!!1z3?z3uW+@88fo4sYE~FN0p^jK-I!}j&Is&2a%E_0j_NtRq0eQF zf^wpXe{ADB$k0Qyhhi2dl*2dzBh2lUa0&jqI`ld$;6yW8Q)-rsf`%3ryGMf5B84-N zvZRb#Jy~ldM>?p1AF%ZmXxE$yaFi)Tt&WXA$84MeoO3h3P&jO>tylLGMn14Q{o98* zW47CCYITTphzQN3_Ff*g8a33V%RJ-ce2!b}>cr}Ej6mr(+6iPMfH`j%FkvZEn?_kN zR)~XCxdD1(eA<7zUy1tm(d9=NO|90=qpdn%$?Hk0+RAS!Caf@2yWa?8?)eU}(J(j(ArYbHurfGLXRo zof0H8Mtq4Jjo1hOS|y^o<{ZKcSomo?B#_rTn2LO3!7+;By5lNawSn|4eW~(rAyzdFO5miIkk-{N zuD16yQ4{l!OaP%J$3hoK8NtmQCFAyq8_J1qr!NitDZt@a?{G*<^<^X8g62tko8bs( zRL3q_54)_)Z0I`jR_q*Ewh1-HkZTlbPL2dUMMpKzWn{)AX|0=~`e!V^2n*GmGIP-m z=8mRqI7B1M#TWtPvBrQnl{FxN&q_`O9=X}vNp97l@`gr8PRW^{b z(h>kc#Al350@5UN<7-8t@{?qBW40~1$2c5#!6{)908N_?CsuyD*+GR}z!=f2_7o}8 z0ktePphPQGWHEMWTX|2(J4=_Bk{Nn?(DO?4QBy0|x?yg<5jRfJPBkG1q$BH5@< zvcHzXCN`o>6?C+6rtWj~(A(8|f4y6&%_#F?W4T291XRIJWoF$UDfJ9F)yQEGdb_^o)Zlf3m|eWb+N;;1Y=x)et;A)nPKNIyhxK z?W%vPCzEb!+BGl1DKsyo2wKN}IY)OTI7HZ`Pc{KD`RFub4M-ie*K7y|H%iXHeuY{R zPB{QDd&*XeesZcKw;cvMC&$W>v=E%5Ht(_uw7An^MF)B#IL;=zMDXjW4-{|Sl$e8h z;I|$-a5cNbewYwEPIa}SPTF$Kb0&WR%|D3SYtKq(>K^!coInsY8k~d_(*fOk2eGvXskXioZBSW#wF&~){ zB-1vr%$?llJk{r50<|E)_dYbFjEk-nV1?UgSfd-xv*{QsFq960ys6 z_`EK?&1`+m!LVWS2QyVwJ4?PJqFvNHu-vXs&24G^RL!?X+@*B^Jll)QNrypBitP=8 z)oTObqit_+Kbg2;h-3)qJCBT6a{>-5s?O4-9tlz!IC@4g2K+XA5Ejd`3%BTCQnvB% zy<9?S-M#M+>6*Lg^DD7Gu6(K?hp$E(`*>d4Po#A5>yh&_Q8GO!Wsd7F3(9-4J529i z!mPm}?zwe0v1)2RpOM};k!pTQ_+0ER8yE0YNbT89bD*CA!6I0Fop&iBhEjsgRVh;a zri8Jo^hkdl)k!At)=KCO$93{<+@vYwCVSYb4m(4+T8&j)tDvTK))ty)#;UMdhs?;z z_d3#?#-4}e*091iLdk4oXnnq~les-F6W2MGO->Gn8Lv9h;y)h$|k18 zL*D??6oBRt%t!D*0{GVPAoT8`p?sGSJ~Qlni}v-v4?ZkuuMxF4@`uRK7_#uqog9C5 z^B-7aWSK3bl;~;RIfy#zv`$BLIR1?1hMf`p`ftm0nn0V8owv@~+P_lHV?jGbS>8Th zdudLCoO%o2<>I#m0bPhuT?3$Opt;F_*MXB=GntEkyp};R;{7WkR5k1<1T1uH`|+p+ zSIBQ(f1Dv2U~xxZS()mEx+pfPc>T0ma){1rI%@NhTSXwNt<0Yo49cdvCn> zJ~Qsy<)YcV29V^Z?8}MOk3*VLj`jvM)3(wRh=fs9#Gp3KG=rtW2dcL%MI3Yg8uL#7bY;*cFi@n8|_V7hj9Q%W4mfZ}TR4YTrS zV}ik_A(a;XIBpGUq@|^VbsWzHaF%D+*(t;}lMvYeAS^KFXAoa(LPb8~KCJ*#5^h4J=mUJpX7q1I66-(Rz8Qen zg%(`gzk>$t8aDU91LN>G*t`c5pO<1_bDkd&jhD?Oza~`LLS-E`z4)rPsOi39wSr_jhrM84_W@0Z5*Y@^pT~T1Z?W8ZC%va zF@hg~mRD1qGeLgsyS>AH)bR9<%=tTW7Pxb9*C*7fGvF0;GWVX7RQX9JJ;Hc%GorTC z|ItWxUoOI?rtK*gA2JS-(0-uGQ9GW#Oz)qSc^7oHM2Uvi%XyfhwF_^-oht~uC)mT9 zidAeJc<&*W+4S7I?tNgqyOTK|>-0QhjJ?w=b`L3}118=BYR4c~K=g)VLNsmI$hGth zdnxFQ0R*2F^giJC>uTA^-XW7)e(1X62-trFKVuK;UTuy#m?i`Rm}LlzVtJuZAad@9 zu==z9EB(Gno3j!p>1ArQlyYcm+H;A@h5SFfI3At$-<83*1dK5qX2I_=>t_YcZl)DX z+=TE|{Bg=s)P56b88+*-Fng=hJ`0d3x!d@fhHshu$abSgI?-v*0tA0rs{v_lqp#n1 zTJtrxV}}{IQ3?|Gb2qOw7_61@VAUg^L$o{l4lcGxx+1EKoLtWhQVj4|_!dKAs`51v zb$8k*pSYtum_wWD==K;Qv76uT+1dFd;4r-_0zE{mk0BZc->any#i;K2gtr~vXk`M$&uSlnn*%lsd9hHK7NneE#v*S zGC6;veaByrB|3C*H|{47!zJA7;~Vj=gP8v0%WTG4u~+nzvW;biw_9hnqe%pSqiDEx zvnEFRJ@tq|>8ssCkMTN|6z|dFYcxhM9iy~p3n?@==sa*J_@nqE2TqeCN| z4ltkwGM-oE*z3We&w%N~g7YR$rIQEf%B+Fk;;LGb(~iu|<=mYXw7mL(UQnP3^+LT( zzZvBuv9t*Nl8nuHMPpo5LU&rrW=){wvOk6ile<|l#vj@-8diAES|Rw0sWV^#gU)in z5XNO^)EDijxN<=YhjseU;tMT{59mp9&^8Z&z_6w3Y3+%FP%7BWuT!LRFw2*Q2!YKo ztNT?3UDW_&-w)Ova2v)6-r*DhR^+{1?K8Hh{hB$=RiBQO-w!{1hhW;8{lD|0IVyHV7UXHF6qwn8T z=WiRApK9#4ljYsKNT4YHp=ilO>f$|C+0SK*LjSF7a!(Hz!vzZlz4;Zwo~T;7DO-=4 zDUs3f|LD8xCk0X?^6t-MXbtC%S`uBQ4rK^=q!r~4V0UkS!I)W)t%Gm_ zUnRcmC- z{w>NpYkq8kkPb%@Cs7uVPZWG}{o4g`cqAu=Jm+P_Yc4A&$)`uK8|t(Rk_j0dhBDqf zB{m`n6bjTVSM4e8S;yn%97-`12-($q#Q?aIz zX;%{iI+VdLU1AaztKB_kRvEtUldJNaOAq*0@=>t`=P&Ptbf^8QKLYAdfn_YFPLaRTMUk zdXEuFnbe2+biI|~P^eo&vI#j}K{Mha{nI?OD<*^pvW#>M7%^begn_`|#k36*ubYtr z^Q-8|YHy?SH3)K?HELw&PA-VtNq|9GjFL>pWPhYumY%?vE<#4n0y;o?dIbK?PGaf6 zoqC_K7;kXPuB*FhOF}~|lb+q6aop>o>|AYkUKM? z6v}F@87Nm;oh&^lSMxWZK?WC(4gmVl47{db=9A zKB%CynWD5ii`FJhqU9Q@i9aFZ;yyKIio5TSNLfiAPhKXrZi;vNzJ@M!8V%-zWkkAw z`jJg#vVVY+B$r|bpi3ZOnB*;ySBiQD5iV#uRMDjtD-K+Y-pIHkPVlz1;Fjzp)$!T9 z09fmmu4lE3Tppx`Fa!wYe-)m50TMlS2A-VCF#jj{$SU@I+wA_U$;H5_k0C$Yk97Ta z#Q_vHJ}plQ;c(h`jUvBWSeJ&}YF=x>!s((S0*jTB@{xf#5%HZFlbRSM*c&*qVsdK} z-SyfawJF}E&RLTdQ*whM$Lv7~1=OB=WQgK2H5r23N9OTm7zA5-G?6OYy8;u?Z8HtE zC&>xd4F8k%cj>)4&P5LJ`?#o03x|NVaXd`#0{U>d#8*>cVx#>XRn)o>WA z2CT~2Ku=XjUP@MoNqc4|A-)oP{7orlg(O+WcmO?P51I5Z7g=%CxBhKCRtlL9KM`Sj zv(30lW4ya2vHEM1c9`NKp>VbkOmoFhIkSG0R%Amy%5ykee?DE06ty-V z@Dv_T-JP^2JlCHd8k=v6xBxC3`-kIQ4BHXEaj!NWJ}`3LtEY5lPuOSknY} zEXJaWSj#yZ;P}c^^9Ae2(~|vzEhDD2K2MeB@|Feo!OGTS{6F&PSN`$0uWO8RhrR?E zG$b#NL|h3b8V-UP+EuA)$4qgsoAT1@dO_QYc*Gjy;XDT|yW z26}(XgsS~C?fj4d)St~MBdo7oM!Q8qTp94!Io-fH24`tl_!%X zNqLmJZLi*cLZV2~nbKA4sC!b=`<)vfrP@X3SH3q#edgMO4eMl+YX< z#zvSuB=cD9nhW=s@fx>P6k&xiDI@f4=GDNK{DVFA)}*DZ3 zU#S|8e>aBy^`v423Ym`HOwy=Oay}jN!PS*5(a}k%+yRLPVaz&1dqbo;$S+%O1(U2` zkmoqrLHZxZfL(cH0IBStAj-ns2qehtVNu73FQfa3)P-euZq@Rw#m%hjCEsZHb%6!S z2A(!Ru&~J8ZBr2MvVtrxH9Pv%Z>#Q|PxYWnYgNI_N8a-JJX{L1SE} z!ZHqinfEoI&uq40Q6?h@cUwhmQ*SBZ4M!-Hee0SQoYgO$Aj))E4ngt#fGC#KA%dxd zAiCFFZyj3WsW3`lW1pa6%en$|@3T8!ovZHvX@=@*NXrzt5^MctjW1q|Lh=!a#r zyzsr6x6K7)5fm=NonX}(xMzj1osYaFB_yGFMBqTEF0@!xZ%giz0GM#*Kb)Zj20is; zHpN5KM;R>YG)fY6NJu!zT)9YCERiUYtU|~|iK|9|1Z>Xr-+rDsgINnq>-5O&3ZXbH zoE?Yt(_H1h0=qWjg=upO@Xl77`WzBBw}SpWn0)>fG~*}f5N&)KnV!-cWWba&b7yTU z4QSl2_5PE*A&Oip61^)OEUZzh!{!{yDO;HYwJ)rTIlkd+8Vv84?Ogz4#q`vYV{=hf zM^U|)H;?TGTl>_C1~r3qAv!z?&bjococC}DlEZ(A(zNka6qpsvfhg<)24YTlUK-B$ zDKCa%Kd(&?0xwsp1)VTq4d4`*maN+NeN4N3chZu!As5%S*DnN}wN;&Y~ON>omwB$p9VQVY!Rh9okJ3YjNwU5^S)Al&^>( zb664sW&}79iOUMZ$qJ#Uqy^{Xs5zu#`$9NVH}A)Lya(w8z5nYtN3hVH?FP?!=9LmE zW_T}?_}K<2if_|w6`%}_l?KZ%1JZm)D2hu5B`_@$!W)|#Y0NM^{-6nLFQ@-lJIOZ7ZaqY;E;CgSU>3%;c~{IXM0%XsJ`fVPn&A%dF}yh_cju zhOot_tZE3iI5yST(|Xa|wD1L{-7bZIRf~L9xeQGzTVz-?%Ug<~Ceb`-UfRZ8c_~`n z&zRhaEB_jB`NZyUT7;vs+fnaW*wa2xkNBpPQZu=qiBo+-LEJ3C9C&7;1}BFH+jr4Q*>!F0PKaSJ=@hUv?Oz&w((i7>upcC(%^q8cZ zo?s=wp5`2{X7YXZ?jkQ6}P$a4po zNpZ|hqo87e!>%4-G(U)b>HMRVc=*8hO?K`x>j!;c&oxuCdUC)rhgPM~*dr4v#$`DtpX3Umb5+ zFIe1Hhg^>&>1f0f%<&w=`4kZ zXb^P_Y4DHc5S!33iuk<@?H=yd6&hGlvv}0n^ev{L(Ata#<8n9xAH-%ZfRM;tu)L?H z3xsy+yNzh<M9%rJhOI7w&w<+=x+65&fvo5dMi94ObN^(@HFtj zHiR-S0NALo_X^bddIv5k!6J$Hje%E6&9D^{nCxt?H1uhoNefTa>gn1+L*$HE8;Rtb z(vDBS%ZJ}Li0)7q4OqE+&e1$++pI(6x3EbyDs1SH%aGhbQ|GK<7hqgC{`&L>&+&Jv*#;*XBovo!XS6@)2@C zwMd?H^#&oQ=8;dnTB*PvN|3VoypdrZfu;l&f?ES`VbE@{8L}bRu%ViHAiLEE{Ah-W zfb8yR%%cbA)I~)t4p*m{&g=Nq(e{`{(M;w6lzGo8M~Q4T)z(_K=ng9R=+ZqUgfS~H zI9O?QLI+XGXV@(QOfDqc+l_PYX-8+C0%HXI zEn#z1_aMTOsb@)kGCu}M8G&cP8Z{Uck`8O`=ABfjaW^P@3G~kEq;Xv!?J9w+W2$Uk z+w+x|KY>WErxu{Mvc%s_856qy=6+k6H&JBuy9+VkhN(4CCp1 z$PckQEFL)LRJ4srUnmQ#;#o>=K)Kp_QP`L*o}EmPVG20-@uBz|#j@x-=0JorIK=7= z3MS>wZi6gRee$m%W@hJirH2rPTB*@?eiFs=SCjTnpNvw>rrYzs_WkbG2S>j6 EKL$bctN;K2 literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/full-subgroup/valhall-1-tex.png b/AE/docs/papers/bench/img/full-subgroup/valhall-1-tex.png new file mode 100644 index 0000000000000000000000000000000000000000..a2cb9d0b30047c077acaf60b765fda609bf8436a GIT binary patch literal 856 zcmeAS@N?(olHy`uVBq!ia0y~yV4MMDZ{T19l17^=b^$5I;vjb?hIQv;UIIBR>5jgR z3=A9lx&I`x0{NT;9+AZi417mGm~pB$pELsl(_2p$$B>G+w|5=8m<$CRE~czADgP93 z@1${R%$J}R!#b%iwo+%@wuH}*YMGVY^P8FB{nz3erUTKOJPe$Tj0%bd3@R*a4J`*4 zCJ0C{2r)4`I3+N6aExfl;@-mg2gYYhE&bL<*o|7`|&q4Vt^ z!vq5$CL@gBG&4R}csotjhrF2j4RkR1j{a~VcQc*8!Q?MG2^Q4@NDR*VO+<}~~9;J$@=v5^M9ntYn$JCzPTq}qqb-)L* zT#tud^@On;8l=RzBHj!=IZYd+AZtx|G-OpqF(r~Bq&W`4f!n|tHF z;Ljv4GW*@%@Av(GzP~@eowRjv;7#vq9N087n+qZ1mm3_c% zyLNT|#DnJEIMrtRUCWn8?5`9DNivTm4W3>&8u7d9m)di-tocRm&;EAzXh^`J^?~!t z?0;PTiFV7uLm^AoFRkX1ClVa@m<{oJtL@E)-(p;`X-pxr>{z*8vl@+qk)u1L?qlx;dNWWZzg zeaxsdz*Xsxtx{KIgVd*-u|x+XI15GH0@69JAV_ZDi#-d|LAD{Te=X{K7fYC&a^Sz` zRfN~7EWzg*D`a#srpyG&VO*XQr#Pj(5cS&hg2^d&YW!7oRRfATyWAbNolsnu8XI!x zV*P>-QJ|SnK*3|2u|MP;sP>l&MR10h&NCZNHVfrUxlQ8WUSxQSULJy`z#^__D354j zqwBy|&PcK01k2TWS!u8jT{hRI^Cz^Jsq1%H5v!6c)wl&m_69hg}3Ybf+5b4742 zVN9$?bvF}hcUnqD#sEVZ^Y)X8$KODmA7Jf{CC17dAc7`J8$mMOrej;coC-@KEF8Pl zm{&>Yd@p1}cI@m68C#|cfKw_c<@!X_x}N%O*t#=H`=Xy$O7xV?^e)ckcm@^Rg6!Y$ z{w0->!RI({-37g-Os}D)A_eoRf#y||B{k;zJgJoPj*F?NYF_I5SbGy+RAM=|^5bW| z8f&#YQh+0-Ae!js$hIEf`3A=%brt%jhp^Uv>@nswe7D4ypGT_e%k{xJ&OA6YSj0E6 zy3zm&o;lJFVk&Ef2~KLKrP$!8i0FrtxE@W>J?<%wa%!;lOstu6pEu_7O^hdbxLO*- zDq9~|io2sRi3C4YD;r*cEs_|>9SUIHW9yO$#rm$vw>3>}qYUgAg)QQ*B~U)jn4NyLrS~a?E&xKsrH_yo zf8IXWt>{rpV>=Sz@WEcB9Ty_{KfqeuA<%njTrKRUww-oxj!`A3NU0+X))55gbSF!T z2laBwk~nqjq}Eh&M=x*DhcdM)>#`@@c{_-;F~le(?ZF>5fXJzNBH#xfy%f5gjbue; z9O`W2XV!tQRklK}f&<)XDOnJ$-YzgiR1V1UnA1WB#&DlQU+FG1Ubc@Xzv-DWq!WfH zGh0&!w6{rc=cF;u?@c-|Dk5*{9?LPr^})-UzIhTXXVed$9i!Q017ct+FY1B<@48FHgdj*ik^t_v2k*9Ex0q{hQi9tWvP`~)ka zQEf~o*pLt5sK)Zw2;i8=@Gdl^!kSoDrhi&%p1nspRo7Kul*nC)K0~U38iVhJNDo+N zUr0l~D%Y=+0FxqWU3S<6TX#n3O#9mB?le!k|ch#x|LaTXYW66+G!gKGuZ|`f7dWYm?RdCsz7~Dzjykl zwTZ&PA50||xtJgwC+)cy(O-ZA`-yeR*26RoVP6B(0xy>|;$qSk=is$rRU0ny+6s%* zv)X2fme+R3R5EV(z5pz40L_u%y(HzoYqtvOGJ(slZiSfCvF|(7B!5#pBUrDy=GA|{ zjzR#0d0bj!bWy%JV?Z^d3iO8;61ufzL-9vK(qU@G`ERu~Z)=L53HqCn|U+eellT z{}2XZrjvvAKz#ph*itGu4ENaYFh6btn8GTqi-3r{1>44$2tCN@kj=wK2(n0{oWmzQK`&RZV|=Gj<=pV(GJ-VY{Q% zzUUQ36Hj<&DpTbKAjsce6~TG*(Sc!RD3q5uv*+;@62`FOBbw5s{NAAJ2~FT@_qFc; z%NHuZ0=UG}mXU19ZfQ`;K0Pb@r$_0tc376O3+!3*dniNAq^! literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/graphics-subgroups/amd-gcn4.png b/AE/docs/papers/bench/img/graphics-subgroups/amd-gcn4.png new file mode 100644 index 0000000000000000000000000000000000000000..5eee27edc66dc695f90056bd3b4fbdcea319ad3d GIT binary patch literal 1877 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K5893O0R7}x|G$6%U;1OBOz`%C|gc+x5^GP!> zu(Nx*IEGZ*dV57M{EMT+v4;~=R6QoCcus0n@eG@!0%S&dHl?U~#zk_1L{k(!CrwoW zGnW{@|8ITA==mAe^EC}W?>^VsIz{SkR0aFPxhda+Hdph!SK4#=!kSzgUhp$8 zGc^2TFrW(az8*vLVRx17()TJW!f)Qa_U2z}xx~G=ieKOF$}=!rkY@n;`7dJvRhat| z8S>itJ>DtqS^weRrrYaou5Ay0l~!SW@#X#UdIkoEdIkvwhA-?5jMQNMZ)2>Gvs2o+ z{ms7YviA?ywuHaRs`&j-)_r;X{lCEYq?V^3A#|H{!IjOk?xt4wKir#E9E`<3-nOc#(u_Lk{`vCYZa zmU8iXHoHAfzSG!KSy4_at2W5Ydm{Gm?4j9d=DIh}9*S<+&g1|8+;3pUqE?!KB(VH# z{~a89^K|z#fAHH>z4E4QTe;x9Oj@P*ZNLUgi%r zH7-T@H*2rIxpz34bN!X8AHE&7?(_S95`JOv4%?c5G4(@l4u{xI)Q_1SkT(#$#Y zr`%nUvb_HJUtVh4WzKV;^z1vkJ?B5vZI-_N#%$AVjhnrPX_Ap|vpz_#nDm?Dz22V6 zvdNR~23CCj@a_yr@mHTeoxMOV0Z@_q#b&Y7wL=>3-J#|LT#l V5~&{pP6F#m22WQ%mvv4FO#oO4RhIw& literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/graphics-subgroups/intel-gen9_5.png b/AE/docs/papers/bench/img/graphics-subgroups/intel-gen9_5.png new file mode 100644 index 0000000000000000000000000000000000000000..2edff215909f70ed977566b0489c1b8f71c854e2 GIT binary patch literal 1620 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K5893O0R7}x|G$6%U;1OBOz`%C|gc+x5^GP!> zu(5f%IEGZ*dV9q&mx)oJ^`T*7hiD_Givkl62}@-7&)&kfOU;7c@lc!EdxQIl#}54e zboKiAcz@8z#_sR)Z|n>VU)UQM864^vBnCC}KIehGo4%j9w{UZK zZO^=a&A*u$7=AGyU|?8a&%iUNneUkv{1V%zTkY+hzw*sX`|b8az|G)*{qM(c?c6=n z>glKMNe!*E!tOAd$Z3@~{-3VJmi7KiFs#adcze_Kk}uOv&HGo3o*8o|0J>k`J4qFk;M!Qd`Cc-ajG_-Gy?3NZkczmsHx{l9$rWgLxY?e`(@9u+ zgT^d|)+N2Z4vVfSu4Hm$*Laa2ETqyTb>gSs<-;A{D>t3&`CIf}CI8%0o4)$}uGeQq z=NWI;+5i9c>d0v}X?Z)(-}&^p!svG2=TGI^RNvK4{e6FH?f?I0Ss0vHfDYqiVBip9 zU=UDYU{LU2U~rhgz|f$;$iUzT^idP-6f8LX^Or^X{ld9BpPCh)od0~Y4#=G|c0PUm zF=?CbZuWwGKaFk+ecylT@Amii#RtZX@L zm3s11{r`_2{}|jB1G#nmt$p?J^bJV3BVlp1OXm6a;%}^QCX1Z?|DXO$-lqGmeljo_{H+^s$Idza@%{D}R>h}tpG|xEy}sJ?^rYiwbidDx zu9NdmyImOf@7%T{f7ciN-@R=RB!lO#AJ3^hUixg>?K7(N^UiO20!#+U&!(+E-hFf1 z8@U5NpPkO}zVlz}`~5+Y3VuUULEV0EDwy{BjPB|8^}n;Hsh`)7L3s{>`N`2Rc3-r4l`-}LAGH@EHh m@B6O)`nTT$?bzoQ|JZwfr>a^MIz0fEPnv;&{gkJRV@O5Z+q;UVrum9AJWQ`MS76(% zAYrnLwb^s>)hApzP22^XVzXz&G@gCN_s5}x@eyCkPu|V#!tY)`YPI}Y%a*rn?$-6M z@8@4=F6N88e}7KqsWCxgYQ6I80!0m_ZwKwby@3#ZIu>EC28Gth@aGAIu4!_oj52YE`W%QfzTUF&U#drVb7}M6fZ_}^aeeQYr_rv>rbHN42 z`TY67jAZ|uN1uU1m}V&Y_u;MQZIY|(|LncFzy9u=;uHV2o-;Y}xBnAUg8?J0(1xx5 zV-|B?s=se}c*5@Y?cWdA+XG9EvbmXUKo1Mh*2D4NO?N5$eKsdJ&Q|swzx^M^eg9(8 zXFdFP-tq`-6V~h6UOkWb!0eNH_Fa2@!8uL;Z=e#({xof^{R^`8f8kxL%RS<+-_K0* z0T!2AuFp){@jsHbCFQ!m1@kLUylMN=|N1l7+WI~6b2Gbs&woP8x*_uVSNnajeT|3H Sx2^$J^$eb_elF{r5}E)(2rc&j literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/graphics-subgroups/powervr-bxm.png b/AE/docs/papers/bench/img/graphics-subgroups/powervr-bxm.png new file mode 100644 index 0000000000000000000000000000000000000000..91f8d8797fe99619ee5ce49aabba22261b098a81 GIT binary patch literal 2168 zcmb_eYfM{Z7(UzvgB8RX$I~&$y&*p$l*2%(E|wX!Vp?FXFFKI1q%i^6h?$C zj&+!@O92t1MMQ2xAp_e3f^5dHZYk{02o&co^hVjB*VCTQ@4z(KKY{%?m+$@aywCeQ z?{m)Cj5IzgI5HSP5LRkRawdZK!dC{Ci7u(LgSTl6uf-t^$=&g$@5zRZ*!_+T% z_xIf&93Iv?PFm+4n@#l+$xe~UTy{`Ic1qv$y1uDZUXqsNI#spZe^Ds!P?A<4Em0j1 zk*h9pur%KoN&8KNypb~^bzN5|%;1c-wGZ?MRD73)M4ds9s(47(cROtPRRoE;@c#=I zlSMK;Tf9t-$lTXzl?HsOh=$mNC|_Kb0l~dJQ#!6n?)6$~6hr56wsXiX!7#gQoYu?$ zov$0hR{O(gK93;Jr65Rrl+T6=VnOWSfnv=n@7_Do63iOi8V44vz%?DIP&%qXrikol zMOE?&+M`l_Wt%_gwa8j>*TC)ih>eqdYCe=(+>{$sX{37FHN;^NIjRSLlLca)4WHo? zVIG$=kV8>YlU$()rKv%{<>icS+mfPk|$hy<=f54*X2O**klRJRSq37o_K}gF~A=1Mt!{@EZ{(254|Em$Tawx9pvZ zRGhD5m%teek`As^2f``zC=gZnY<`;Eu^@SBx0o=pm>vw| zFI~y2tLT7+!tZGxvOQjuIFh7YjawEN%{<#myQ4wVN|00i^G#iZx2J#2<~o|%s@()^ zNq@*_I?lGQx}fMhCnfWgv9NY^7`5oAM%ke1V;NECeJ@KpDg$<+b`OOX8@&$r8Q4sO z{>%K9x~rTq8a|~cifmSMP1G2g0TjIFxD*88v>XJKOky^}=e)aiF3p4jdf;BVP> zMW)+7Tr(^wR98jjzb#Nxoo&k6v10L==$^d~9 zNV7&Cs3vlfw70EeX>bO1w_5o+v)6nxIJbrDZkAB&!sxE>bjS0y;yV>~LLd{+Qnyy!8&sF+vvU8=^JCBNJU@8Q$?Edn9__UFBJ>dy_%Xyno)Es zm!Ae=dGG1_qDnfqYHvDkZn*pY*xTJa*WWDZuBlor^yz@e;y`wg3id7i9@LNH4K=@Q4f=Q5F5dQ!0n^j5IBRh}oW`d9ekYn2##naxv@4EuY z2Eij(Het4r)zGu)zws7tt>d}zLb?oV0e?|RWUch3*4O`gwx9C7*9Z;Hs3z9C3Brv< zCcJlkSZ$)Z(_Lxvq_2@8P8oBf_5wYO7OJ3 zr+6AG0V}?XEYG2xkL-|}BK&nW`5&v6fHzSPZYAt&YQ7h{C`%f!MAXVx zqY73({15iDy|;lE5PyT6RZ1%Rw@~b87~1AN8{_-%#iOG@T-AVQPzH~?^-gzkahg`UkxWNWjVBvdA{b-cCpDq9(PLYFgX*(={N)F*Zs#rKa1j#E8YeE%N z8<_!?dGK<>b6anP{>V1l@@7j1bl@T^4U6v7<42F0DYy9_KmfjJQ^&em^2{rrW0b}e zBb*0~lMFSy+&VMNP`_@+KhZf(^@SncLEWiR5!Er0mxeCZrG&}N2!$%MbEZ1eP$(l^ zBsb>5ijD(*-1eeg<$m!m{KiEIJ+lZfIyx)esvst5L|XY z-R%M_aws+2X+}RE*iE+{{Vg^6+bNj^f3PLZ(59Z3Favo&0bf`_7ool&f&I-xK*i^0Cmgs{sQ%CG7Gpn zf;dHr1yk^B6B@DS-yaggXTyz1i=3-AkkY&M*$tA;Y7?VIP>IEXwSsdH8TKBThIheI zZ6ltGd}vzorgp^6BA+JSBDAB#BcO>F$EkCzhV#?Uqq^^213t+uM>+~fS_7C}PzJoP zU4eT_cp^j>`1!iGy1OzE#!mS}g};C0+33Y{O|t}u=Olbpwt|#ya~sKD7qun=xd~k8 zVnR6+7 zpOh)#Xu70~-!5koyd`6Mj*bCxY%-Dyug~S#h_>@IcO}fmD(LIpLxIhPr~bZg^T>y8 z1aD|yJU%|lh!g}SdC>b$mM0|G^0;K3{w=qj%1l(sqpX6r>W z|2S9S^r;f@{EsBs#apT7*i5(Cog1qG7f6Z1uCiRw`6CzCv{AXT{2OMlyZha{Eapt( z&$*`*o-of~w|2ZvUE>s|+t5&A>DXc#96YCSBzHas(ZbVn2cEnhWsdLc;916RVqB*A zuSoux#f7~e;jD0_mb1l?u=P|7!2J1|*RL-IzpXfv+>HSi3oVu6*l%Up#-0&S29NS? z8QC%vaG`a)ani9WmCyELDi(XR7q)Ew$a~fE7N^!sqsoa}N5YrKeRQvDv0L_rUN)Uf zkVdKxbw`=!Y^_TMUM{@vfBZtGfUHuZvJO)J_Nvsz0^!k}rzyuS2q_VBjIAz}Gsmf` zFI2~axz`tmnVLn_@Qi?i(jF|A06|ZbK3whBz)z$QY<3|JT&-cSHmu*Jy&u67zN+dq z>GX9b@pvJu7+ki|kAm7e=j(%%5)*HS8?`Z%EB$W)Hngywk+agpAe1=Hqo<#T>bD$%rhlzM1aupzNawu zi_?wFs{s}AH8^2AJ$ICu`CX5|I-V0!W4v2yT6m+qlcN)GjD#E;NAPO1vfB5A24xL$ zH2X$T`n2WR$VG#;>7Cp=U zlF(2{&L0~~eY-E%OAn%GUO~Isn&PeRKYmbBbx9yvBCK}w_0U?q8_H1Pl0zd?^O@*4 z#=C|0eG=H;r)(7ftAfLp2d>0BhW$K0h2ULnGz}Jm1?vK>d?G&g9pmE8)7|fa*=<-o zBx2u}4sI8ZgXtyHd*!P`TX^TrJLu0egp8pMm>0P9h;=hGJ@uyAM z4a_DXN5g~P*Rmjc(&suc9vRSdYA{c>cSKM~}MfVxk&&MfTc* zrgt0avy2ktBk&1!#tEN+^{{Yu`vvyznO@ zq^UeR$74hFq50$PNaK8*>^W1|m~=JD6lbuhZ34!H%vs*y0!dWbA$keE9pB;I z<|1S+hqXW+v72>Wb7i<<3(=MR9@;9mmSX5NP{tl5hRM?|;ie!kJ;mSgT;rV?$KK0V z;rPs7-q_vYT9^z6Ps*@pNI7#QGMNCJk+ImK&8qQ2-gk0ri!dA91!3^PG$(iSj?aKI zpZ=(Scsc9M@Vj+R_w(~s&9ByASOIW2H=}obh(GrehUbT$RLe+3Jw(OQhoFL?&U8U*nSliD_=F(#wzavA>NNkG@L9T@dmv zZis($Hq9MBDn5C(N{27s3i6!Gs6@wMGEeNQu`(=9`;C17KNV$hG3V;jx5ucO{8Q+{nN;du6gH4yUxEe^smn5X!Ec*z-d9$8rGcJoavU zCT9sr8oL>wU(~PU2HmhSo%HeZihCy(3C){dD3q3)4PKzCjS>u$*%s{Am@?!}YR^ps{hN}{DyB5xeY zlVwnc;!i1s>H>s3wVS^3HGx;qw&DwblTk`SumSwZyNb;;YstC|5Y~4ZL9-a?**mNb z^C6*tMi1{nX!SImKgx;XdMhnaGYy0LC>eZ>vXk`qOw3rexY(q+6}OU=vmmh%S~xO59}fs6S%4g4+r2(EOn9$%mxguVbTU?OC}HO66m^6*+yg4>V<=u3P+ zc+)nD;H3$i6-eS^qFcomG{0ZYWEKTvfThHO64vr*s zBmoR;GF{Jzv`x?0vOUHWl9^#%$3(1*C9M`&1n)=@EB*67W=8e$Q5`;crJ}7jG$!4_ z_L9E2a6kMv6gCVz5~NsC-Y?)|l3DUy%D$H}7b^7m{+s0BDn?+{0H|B^GD;6w_r$kr zAc=Whr~+dpd%HPL?kVU5>PW z%EGQ7Olo_R@E#+n4Zniri?#>RnMPTXI+UTJ#P9=NuCv7!w$X{7i`&hMCr&+Lh5b-F zV-vxVfHDY~6ZP7Ykm|G|u)X6NT-Hn@$H7^(-D(bw7FFPrlph8n=`!njZeaXAc~^9a zFQdl3+;_^kLZ;YEZh`jx-QFfix?XS#%r z^X$PVYQ2AP`YKKKCNDY#oNPW+JY8BSR!)uU0*lT;+s|KYbzd`~AHWXfP){XiUlDZ- z+M6#(yrTr+S$#7Bd0G8&MUr(%T2m)!tQ|*!&*5`#BJ!QP*(p@ z%#gy+CK__flsxF{3||s2p%+}3=$r9zIxE~n2#C5;33DpfCVr=u5KeR zC6)r>g7&<)V_I@y?{8?0Wo2%wcuk7H-Z)3xc8FXeTRL@02x)I!t(qb42|fsu$Ewj| zZGK+(0YBnv)o<@PhfH3y`21RdelZG0gJFvljXa**Gn?A6b`#2&$!wml#kVFwZ5W>B z8R3^#4QqRkflx%BogcE+cta0}Kh+!bzrk>(KZ7ih%+fibuu^RuVAxy)2#ytr8|0%f z;gfIjzowh3l@>*DiY17l7RVCk#t$QB4#uKo=P#@WQ}4=BE*^J{65tIu`rhVl8XhU>djxQ~E9wCu)BZX@wf#V&dKkOf@gv1y+}YkB#f7(jH?E$*o^p2PB536>m=Vga69 z+#~Oyg!x7R2lf5YpM`Pv2sc)lj8e1VfbLRhBic;t-s^5gszBGZD8|gEm9Hy?U<9{W z$k)Aw3!^Set!lvojM&=9N)jpb<(X?uDxow>tRK6(rL>>3raJlMCCGH*||$NlF4Gyxrk_ioYg;Rr~^}P5WN|4 zag&A1WxyZ9;Z)lZuvGZb=gocwuAA1bEw|57H46u=g(=re6)@jGq(1T^W^{p`yyha_ z&TBrtP|INz61P-pt-fCp&H;vh;RTafsD~DWM7=^32G);}vFMo&fRVM6n{5H;+F-iX z$D_PR0kDz$q}v(YWBRm0Yb4`xtZAM%ns|fs^V?!z*nUX2)3d-^GP*+)k&Ob>t*)B6 zC2~IL#jaBzxba$-avQv#xqvzwQZ~Zo1m$*eP9I}gw9SB05i5Q9jXCM+wu}WBudBxm z1agh}t_#@Z!rvILmUnVcgZ^~_EgMO3NpP+A^C-TSJ6aO%DRY$`BKUZoLJ@fmU+pzh zZ%f(*)Jc#L>jrLkC;4jld$etRo7fl}+{nXL#0f!)Pi?}$fz2XBp32xM^t|3i)o>qn z;jSDwi6@f-Wm|7%c5=cpO7}!2J;kW56K-~H?_g|j)xX;}bxEBhwk=xX9&(5KVAa4i z{#8ejKb|A==f5I&nhj@zlZl-FFe74Rz>)D7v8gvD=3Q-kho0yxH6y|~<(Hb&ks87B zaSz1()CFXxAUf*2yX(F5Z2S__vxWN{_)sCRVe)Ccmp{1ZsWfnrE|1RW*qhj~B1hzJxjbTBcb~#*w+5`u6kY;lJb`y0n}P@+kY7EdPYSB#-~n7wag%R8+EA0 zXi_)GL#I-C1yb>V5MMq>MSIp4(972Hy0XLX)&&a2nb+^K`yyK3;;QzvB5jHA`|f6$ zo7#6yTn$Il-&?g6T73#KFoQspd$+v+l@ID( z)`tg4BLWWay{2|u&1*(*NxP!#C^5qgmV+d>c9AIg5!6jrBMD3w3unOLGi5N$pd-Mw~Y`kRD%LfJoN$m8(0 zFq#}TBI?Z%O82p$bxyQ@K;t8RkO3C8Gjxx8k3$QC*fNCP0CaISRyUA~EL?F34f)R( zG7_YqFoUgM7rjQZ2e(BU*5tP*h-X8M7JwERmQ^9MfKt3p~wSANuPf|lLpsO^rUfb1vt`0}q* z#-(qk!rMEy*bNM4aggaWDfMbf41N1Qyb1hB#ZSdWmFJ-#(1D&1No)~R?S#M98pS#M zM_maV6x8anPSgQ&QzUU@Vk~CqiRb-t3s7^ zKd-z++{Aic4jRpLr`27dYQMgAsVvTWxRBEL#o&9&0ck{Zb=9U}oe40)nV2ux_9FPJ zjP;w|xkGe{^Ir5i1Rm!J=*|*pOhpxIzoC@fnj9TE6*n z?|LrGjhgi?KH{HzsL;}($nhG)Q#r3!$dB;}!~WlNtOmOUDdOb}I9h9C{JVGJ)A~Hp z8Bq7>kI&`$V?o+L8dY<$7PU7nOUuX=zVhZ2&JS@e?=JQ;f)|Dmh@FhylN1jsfsYd& zUB2TXume8}i{8901Q;`4=f*oV)?fp@sr>T#eEeii&$sA%Nl(pch) zqXLiKYxg)Nv$8n}V>4agyomDod+eQ{ZQt&sZ%`_51Q%}ZYYvxKVd%(Mx3$XClT@Cc zLMP*Ps??1?7=w>EINTlCGA4}nzAo$~PUtG{aQ|EDc25y4CNj~ap~IuPAXNyo*~zuoc{Rzt@H)iTb1 zT;a{aM+3P;ad$f3F=t@iKInic*8Cy9hRH2T>Jqpox2Nqmo>N{1iePFk`oLaBK-Ep@R z;BV2s-86MeTep|OfiLa8<^yhJ&0?d1?ysdA2e@=45H_5FtSj$y)6@A~$SPTjp*71ryppmT4eRTxUm@!RpQrfO zS(o_F;)@DQ3d6#AcALvz) zGn2m|Qd#YwQ6l}Sh%u6#5j={D7%rIzqC9D=m<5MPT(-r>1C1cuP?@?4H=Mp#l8s3z z4;LGS&+um<1#TnCM_9GPKljdvK3W$$If~ZIy=0_hDh=OP!D*{|2FX)a%5do&bnd)5 z`>xlE{iMIRD-9@*k9(I5d2m#6LigI&3JIs)ipuX^w@ye{N=_QnE!I`r9!<$&PXzxh zblOaemXcX%H3eqYmTsPV)7`!U#p_?oKV=v6oZZ1!{7nx;LFTvCE$iG^h5vBHD3a4$OteGvtoh!^K0MM zBRIEJLF2|g*hh10Omgm1=2pcSO$>6%jr) zT64@5u%I95W}+6tC_&<5@W&p>X0Op{&jE)W$t;zTYkoq(t(}~_av7PmJ>UB*)&8=w zYkrO2Td`!z6$60>gc~He%@Xar9q2do#B!!BA;pdR!?_Bu&Bk}z&}z^9S9`NG#HQ<& z8r`tvX^0<9Kse&3`fbaLtHQuIIQ1}ODc5>%AGoF9F#noo0?s|KYSb$nA5^Xy^;jjb zl1Xh`T#Ii64QES(a7SrFJ-M~o^=-dh#RF>&Z7>>?2;^^s-?lsfqZ^ojfmcG6XIFHx zR4B$)GpucwMwq}wDT1cMIW!Iawb^V&h-C-BVCfM@HZl$@R=r;56x zASN9@eBzyR72+J@Ww6)lxo-TSr}b<0>yI-62DIn(gw+qV2yFF%giVj~twAix=0;5yYT?@KI$vg)m`$D0J_hbDN0 ziHidrYbz_Q3sV3S{)!hUK$x(mXX`d@T zATr>3Y$8K@jQ1BYqp5`R#iPGdY0-fXo*tmp4!?VsbasfWNtYSR6W=juP5(un^m}Qo zqi+Lz^A4!XAeo}udOR2QcY}6>SkOSQX#cMrA=3lYSjLcP>~1YHL1WO0k_Xbk2iv|Q zcsdzb_K21ra;1Q;!LA6vvw=bmV?XMdgZQNBhjz0|v)hZ9SAJbV-g<ps2Qq&RBij53c?X3r6|5;8BFonQO&&e$L zi;_%?9%hQWr+lpDNXKmOh+u=LkG`$KPtPP-dZ|&`JM0X!oRNQv3HfZ zC2jhF)#0d9^QAcTUtzYh+TgRlG(AAD#XZ}S^<%lS6L2y)pTVZ6jP50SpZ0qXB12&4}r=b@WYb+@$P_Gi0OB* zjQfp#o2zr}5sqN)Eh`sz-*6s1vxzb2^4{73Ned(ANKgM6Q?Q* zN~7vMaud+HG;i#>$Q;jB7IU$VX;T%p1J@|Q{!X~A0;0vDsa@=f!ZuEc%x#`ZfKJ^a zooYtt}*#5vEgr0o~R=13BWH3ly&8RNp!BU2wc@Z%rzZ z(bG8pMA(`n2w6LJ zCaMnISK=Ci8=hy;_k3v^sCy~_rWB-N(kI53yCskpS6d4(KtZ|hG~~cq-s^8B9bbOm zSPfYxo_&`Bi9ZkVXHESxkhsdGqW0|`uY-XEffVwDwb8|o{asJm3D?BjOlOXMoW5(` zl4UO4!KXO2x5Y=1_hcd`{-XTFx8z|5!%)>M2mbf++y0659*uS(I1L;s8TnUO&@Wj7 zKNPBP^5zAQbNeCT;tQw)?8nyOw_kR0W;*AuUCqrJ{`0K(OSaeoaEMooH|0KIya#zN zW4sb6>?2B@~^AyW=SoC3~*5X6bjTqraqMgG2n4zxg!ijn_XCTnad^KkzaiFGe%pmPy$$QFIp>8xK z@+0~97JYSrihAWUCyzCqIB+YOfKoR&!To&g49~dLbtnpyK)XheM>a$M68W$JASuc1 zJ`?{=Fd+J1|B}com3nxTz~r#ppO0`ZFZ3}aGnBnl>fj_ya1g}=vzY?fzY7wu?x_Wi z6!%K@V4aZcG`tq2R{zn6_@w^2VOuiOOMrhvH7C+k_m27B*35-~eQy2h|L%S>UD~K? zTjbZQ`xcBNasV5M>uyLJ%fa8m2N-_~mh|J%#Jo;!Um8;kq&uFB+y`9 z>c_tV8zz*~b0~7mr4P7KCmkMLaCY{Bi7u zT^Rykv=eOV+c=rK8rG~1gr#(T+dSZJ0C%V}<*9+v-*cZ)|Lpos(6FCnS~%hzBsPY- zJF*`#bsX}H3eWha0U0zQ!s|fk5QhC6KI>HdVEYa5&w6+~XsF!QP9T%Q?&HUKV8?}@ zA|>DtH*$cdT6A(M|?Ce<%t+J7S{NA@LC6-B)H^;6j z*-0oAXhw<-&Hjm1TM({EviFN__?ZtKk$NbIl86tD=*8Lo` z1uYB<3T{a=5G_A*Fa*#1y*umTMyxD*GsrRiyKBEKOgaMOdBw0k7L=#e$?VQ;@#cSd)DGw`xKoB)n*b%y62cN;wvs7IT zwPQ$^qXLmJ^e=MJhZmKwWAOsmC&};g1l)GE?nefZCH;`uyYu4?QT*vU(P(WBN77e4W>TI91rgS6{x6%*PcO^L z2RY*bkvV3z6NQ1pA?1%NBsL*zJ!~+G|)+b-MQbwF)MK-d(IS`Yd`IcLvJ$md0hzfdpuh-u_jK>=-u()9h0st z_5Uc}4`(1(>Bs4C!(d^?pSrhH!^Lm6U<$_Ig^8zDLCPB|gH~X3%%08}C4A5LE>v!6 z$Gt;9?F^mc%-j`>@0llnj>tb@Sdpz_1`xW*f)B*qP|0qO)uN)pK!GG?@1aT~pNrY( z$p@e*JCaAODL$@frpWio5&8FJ&6BdzWM{+s_VFe^2PN#_wN+Z!7Zb#E=Y zlL$t~QD36?@e~V1?2f(wxoBrnU!tVqr<+5vCS$6%vkl%t+a_^)JXNqLv*3l%7f#Cx z*eFmO82T=7iBiq2+yurvtIM;PmxwaQH<1pmn3bg(o=YcgLRl|k`zti-vd4NE>E{Z?P z^GYbN|L>BTpU@b;3&u~yzf!3N{x0aCo*T@C5w~?gojynSX*JkyoLpiiaFNMGXE?G7 zGQBoH>9)&H=!m{EoNqHh;r`pNpy%NE`$mS#HOlB**v%V8`sO!Nzk`roHcHX42x0(& z%iAvOT&&sFpY=Ae1?si+z3kq9Rb~HM&9;4Wyv*e{iy-g~p+7mS7b*2-7m#yYCqYCibBB zcDOX1Pv2hC%5YXC!8qyX3d1$qAYWy<>s0CDZa= zS{K2=;L#D|&V=_mmX*)874r?#BD@w4h?DqdfG^`bfYLO*e+}(S>UKE5N1OI3a2`^ zt%ckau~YPKHG&9BROU$VP0Co^3Q0Zqp2~|Haes$Kq2MRPEnGl3$phrDY#=t$ zWs6Hyn=%_*y!~n$#v{>L5{#tDMt#_Pp{zG((Y1wPiNP{%Aa6QBir%&xtQwQIaoxF5 zA0P0BS1AU(h1Tg0maKM=atkn;4&-h~C8hn71Kdzv1ufMMe}RaG&cOey(DDDfug%lp as%(|T)vV`czJ-7vXRXhlt~rUg^S=Px&R+EZ literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/nv-turing-smid-compute.png b/AE/docs/papers/bench/img/nv-turing-smid-compute.png new file mode 100644 index 0000000000000000000000000000000000000000..e472e25869c016564fb86bc5a3b6a5abdeb37ee4 GIT binary patch literal 8296 zcmeAS@N?(olHy`uVBq!ia0y~yU^ZZ2U|i0@1{BHovgss{;wb| z85k58JY5_^DsH{KJ~2!+L89%VyxS!O&ZZI-k0uijHXs{Fb2h2)PD)4siRCG9PRav{ zy_sJu=zgxs%;x>CcMqbg&lsldyIZmRb+@c3lC; zF{5ZS1V%$(Gz12D2yE^-Rx{&Q#&^5he|yX2^6l?M?Vi8*ZsDHW6~)C{ev7`(DrJ^v zU>rrGAwVDmDn=8@Xb22&2+VlOkY=aXS2h3by|mr-AJ08{d-S(p{+YQI>L1@d+PmfV zytnhhEk;YT(GVC7fzc2c0wECT^*GJ&Yu}^X?a}gY`5wg{`7NIRk@fc5W7fUp^6$6a zy&iLyfh}PajfTKz2#kinU=M*b%ZAD*pS`au_CL-&DqXC6FLBT53jU9FxAvYdtA9NA z`0e8QyZyJb zTr5Aob#&xxfX4#}3|S3<9>8b-j)nj|LI5~&_Uw4ltKOZ$?^W)_?wMY8fAihqJ+mv^ zKW@9dw!e)1qh8qPz}aYVGE72XLtp$^-~s_Y&{_a+B`||6;U9C1;l&LP()6bTWf(kN L{an^LB{Ts53u&od literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/nv-turing-smid-graphics.png b/AE/docs/papers/bench/img/nv-turing-smid-graphics.png new file mode 100644 index 0000000000000000000000000000000000000000..8e952fe3d71e1af6bc482aee71639b8e12150336 GIT binary patch literal 2583 zcmeAS@N?(olHy`uVBq!ia0y~yU^ZZ2U|hk$1{9gOh(QKOF%}28J29*~C-V}>VM%xN zb!1@J*w6hZkrl}2EbxddW?X?_wfUrhVrx8I978JN-rn5E)t4x7{NeMd2Q^mv zo?4VrAl7v;Vd#E&x=d*Shh%+!e*sq?zz|g?M$jrcy!Xd%H zpx}G}=q5=fpzBWv06pT+*1*WXV8p`1z%W5^ROx643`htZEBqMqJjd)y`rUa)`}VEZ z-+bd@@7FY7Lf$BMHpzYd^G(aY&%agwfA0Z?(X=}n0z)ALWM03lJeTIKzyDl<_x}8| z3GU|lXA`{d-vbio<<0UkM?HNtaqaiRfLO|yAb6|RQUcU~Qfc?+y1{Tn_za|0; t=!(0~>(j&QfGK*v-Ed6NE^EGu_g_$HOrE@aHn8$x@O1TaS?83{1OQv}&xHU0 literal 0 HcmV?d00001 diff --git a/AE/docs/papers/bench/img/valhall-1-unique-subgroups.png b/AE/docs/papers/bench/img/valhall-1-unique-subgroups.png new file mode 100644 index 0000000000000000000000000000000000000000..5c9f798a11d09b6cfebd17fbbf1f22001ae07037 GIT binary patch literal 3718 zcmeAS@N?(olHy`uVBq!ia0y~yU~B+k4mP03lH*(V11Zh|kH}&M2EHR8%s5q>Pa4Ru z@N{tuskrs_n&BzgP?`3JmpPIgHi{k83M{n5APxA)!NeEa>D zwSV*Pq%Z#c)MnYHuT^(8Z~J>(>Mj$|Jwp{7@Mc)XU&0%~d*DZ7ALDDr8MYU`He6%y zk-zYZcm3ZmV5kmV5EsV(;$P;{tIV&{%i(&% zRZylo{#@txGn>VmUNT43JM1aaOC7?#W7xqE!2e?3-b=3<*R5tRc{_Vc)q>Zr825^Y zgR@`uec#pWQbXB)*BKYcz1a1#;VSc8bLkD$;Sw)5No|m70QxfPz)<#Afh@zdy7J|< zTiw5Z|0$YUe$HFt_uo6oMcMUpw!L4!#r}M1(fRYiTG77)Z@B}r<*oVKhjJD_RdHnf* z8m`^FnLC6%2P|3y_)G2;9FSXg)0ce>I2S%;4j=Nh-iPy+hwjB~{{41O+orFt{i+Mz z8Sb3DWBuc6|LoqK-uA!jmh|e}TEpG!t9gfVM3`5*2`U3Uij1D_*Ld8<~jT&e;bE#SD}HMLI2l8KWR{dv-C1!wbh=N l4PTjWF45B86IXSCc_+am%>ex9y=F6*2UngHQWR;~a5 literal 0 HcmV?d00001 diff --git a/AE/engine/Changelog.md b/AE/engine/Changelog.md index 869776c1..833bc520 100644 --- a/AE/engine/Changelog.md +++ b/AE/engine/Changelog.md @@ -1,4 +1,15 @@ -## 24.08.xxx +## 24.09.258 + +- added PerformanceStat instead of CpuPerformance class +- CMake: unity build +- Profiler: add GeneralProfiler and RemoteGeneralProfiler with cpu usage, mem usage, etc +- Serializing: Bit packing moved to separate BitSerializer +- Vulkan: add VK_NV_clip_space_w_scaling +- Vulkan: add VK_EXT_subgroup_size_control +- add AE_LICENSE_* to shader code, allow to disable code with unsupported license + + +## 24.08.254 - Profiling: NVML runtime profiler - Profiling: used AMD GPUPerfAPI tp set stable clock @@ -6,7 +17,7 @@ - Vulkan: add VK_KHR_shader_quad_control, GL_EXT_shader_quad - Vulkan: add VK_KHR_shader_maximal_reconvergence, GL_EXT_maximal_reconvergence - Vulkan: add VK_NV_ray_tracing_validation -- update to cmake 3.18 +- CMake: update to 3.18 ## 24.07.249 diff --git a/AE/engine/cmake/utils.cmake b/AE/engine/cmake/utils.cmake index ab39f23d..dbc226bf 100644 --- a/AE/engine/cmake/utils.cmake +++ b/AE/engine/cmake/utils.cmake @@ -48,3 +48,15 @@ if (DEFINED ENGINE_LIBS_PATH) endif() endif() endfunction() + +#---------------------------------------------------------- + +function( EnableUnitBuild projName ) +if (${AE_USE_UNITY_BUILD}) + set_target_properties( "${projName}" PROPERTIES + UNITY_BUILD ON + UNITY_BUILD_MODE BATCH # BATCH or GROUP, default is BATCH + UNITY_BUILD_BATCH_SIZE 8 # defult 8 + ) +endif() +endfunction() diff --git a/AE/engine/external/android-clang/AndroidTempl/app/build.gradle b/AE/engine/external/android-clang/AndroidTempl/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/android-clang/AndroidTempl/app/build.gradle +++ b/AE/engine/external/android-clang/AndroidTempl/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/android-clang/AndroidTest/app/build.gradle b/AE/engine/external/android-clang/AndroidTest/app/build.gradle index d938b9a6..cf943ff2 100644 --- a/AE/engine/external/android-clang/AndroidTest/app/build.gradle +++ b/AE/engine/external/android-clang/AndroidTest/app/build.gradle @@ -28,7 +28,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path '../CMakeLists.txt' } } diff --git a/AE/engine/external/android-clang/HWCPipe/android/app/build.gradle b/AE/engine/external/android-clang/HWCPipe/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/android-clang/HWCPipe/android/app/build.gradle +++ b/AE/engine/external/android-clang/HWCPipe/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/Abseil/android/app/build.gradle b/AE/engine/external/shared/Abseil/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/Abseil/android/app/build.gradle +++ b/AE/engine/external/shared/Abseil/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/AngelScript/android/app/build.gradle b/AE/engine/external/shared/AngelScript/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/AngelScript/android/app/build.gradle +++ b/AE/engine/external/shared/AngelScript/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/Brotli/android/app/build.gradle b/AE/engine/external/shared/Brotli/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/Brotli/android/app/build.gradle +++ b/AE/engine/external/shared/Brotli/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/GLM/update.bat b/AE/engine/external/shared/GLM/update.bat index 91db600a..8cb5d050 100644 --- a/AE/engine/external/shared/GLM/update.bat +++ b/AE/engine/external/shared/GLM/update.bat @@ -1,6 +1,6 @@ rmdir /Q /S "..\..\..\..\..\AE-Bin\external\source\GLM" rmdir /Q /S "temp" -git clone --branch "AE-version" "..\..\..\..\..\3party\ae-glm" "temp" +git clone --branch "ae-24.08" "..\..\..\..\..\3party\ae-glm" "temp" mkdir "..\..\..\..\..\AE-Bin\external\source\GLM" robocopy "temp\glm" "..\..\..\..\..\AE-Bin\external\source\GLM" *.h /S robocopy "temp\glm" "..\..\..\..\..\AE-Bin\external\source\GLM" *.hpp /S diff --git a/AE/engine/external/shared/GLM/update.sh b/AE/engine/external/shared/GLM/update.sh index 8b6ebf85..84fa8efd 100644 --- a/AE/engine/external/shared/GLM/update.sh +++ b/AE/engine/external/shared/GLM/update.sh @@ -1,6 +1,6 @@ rm -rf "../../../../../AE-Bin/external/source/GLM" rm -rf "temp" -git clone --branch "AE-version" "../../../../../3party/ae-glm" "temp" +git clone --branch "ae-24.08" "../../../../../3party/ae-glm" "temp" mkdir "../../../../../AE-Bin/external/source/GLM" cp -TR "temp/glm/**/*.h" "../../../../../AE-Bin/external/source/GLM" cp -TR "temp/glm/**/*.hpp" "../../../../../AE-Bin/external/source/GLM" diff --git a/AE/engine/external/shared/Utf8Proc/android/app/build.gradle b/AE/engine/external/shared/Utf8Proc/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/Utf8Proc/android/app/build.gradle +++ b/AE/engine/external/shared/Utf8Proc/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/imgui/android/app/build.gradle b/AE/engine/external/shared/imgui/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/imgui/android/app/build.gradle +++ b/AE/engine/external/shared/imgui/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/lz4/android/app/build.gradle b/AE/engine/external/shared/lz4/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/lz4/android/app/build.gradle +++ b/AE/engine/external/shared/lz4/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/shared/xxHash/android/install.bat b/AE/engine/external/shared/xxHash/android/install.bat deleted file mode 100644 index 947d8637..00000000 --- a/AE/engine/external/shared/xxHash/android/install.bat +++ /dev/null @@ -1,8 +0,0 @@ -rmdir /Q /S "..\..\..\..\..\AE-Bin\external\android-clang\xxHash" -rmdir /Q /S "temp" -git clone --branch "v0.8.2" "http://readonly:369@192.168.0.104/Bonobo.Git.Server/xxHash.git" "temp" -copy /Y "temp\cmake_unofficial\CMakeLists.txt" "temp\cmake_unofficial\origin_CMakeLists.txt" -copy /Y "xxHash_CMakeLists.txt" "temp\cmake_unofficial\CMakeLists.txt" -cmake -S temp/cmake_unofficial -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="../../../../../AE-Bin/external/android-clang/xxHash" -cmake --build build --config Release --target install -j 2 -rmdir /Q /S "temp" diff --git a/AE/engine/external/shared/zstd/android/app/build.gradle b/AE/engine/external/shared/zstd/android/app/build.gradle index 71bd4c4d..294193bb 100644 --- a/AE/engine/external/shared/zstd/android/app/build.gradle +++ b/AE/engine/external/shared/zstd/android/app/build.gradle @@ -26,7 +26,7 @@ android { } externalNativeBuild { cmake { - version '3.26.4' + version '3.22.1+' path 'src/main/cpp/CMakeLists.txt' } } diff --git a/AE/engine/external/win-arm64-msvc143/CMakeLists.txt b/AE/engine/external/win-arm64-msvc143/CMakeLists.txt new file mode 100644 index 00000000..93ec25f1 --- /dev/null +++ b/AE/engine/external/win-arm64-msvc143/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +cmake_minimum_required( VERSION 3.10 FATAL_ERROR ) + +set( CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE INTERNAL "installation directory" FORCE ) +set( AE_DST_PATH "${AE_EXTERNAL_BIN_PATH}/external/win-arm64-msvc143" CACHE INTERNAL "" FORCE ) +set( AE_DST_SHARED_PATH "${AE_EXTERNAL_BIN_PATH}/external/win-arm64" CACHE INTERNAL "" FORCE ) + +set( AE_ENABLE_ANGELSCRIPT OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_ASSIMP OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_ASTC_ENCODER OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_AMD_GPU_PERF OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_BASS OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_CDT OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_GLSLANG OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_FREETYPE OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_KTX OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_MSDFGEN OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_MESH_OPTIMIZER OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_NVML OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_SPIRV_CROSS OFF CACHE INTERNAL "" FORCE ) + +set( AE_ENABLE_COMPRESSONATOR OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_DEVIL OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_FFMPEG OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_NVIDIA_API OFF CACHE INTERNAL "" FORCE ) +set( AE_ENABLE_OPENVR OFF CACHE INTERNAL "" FORCE ) + +add_subdirectory( "../windows" "windows" ) diff --git a/AE/engine/pch/Base.h b/AE/engine/pch/Base.h index 7592db51..741e05fc 100644 --- a/AE/engine/pch/Base.h +++ b/AE/engine/pch/Base.h @@ -35,6 +35,7 @@ #include "base/Containers/UntypedStorage.h" // Pointers +#include "base/Pointers/Ref.h" #include "base/Pointers/Ptr.h" #include "base/Pointers/PackedPtr.h" #include "base/Pointers/RefCounter.h" @@ -111,6 +112,7 @@ // Platforms #include "base/Platforms/CPUInfo.h" +#include "base/Platforms/PerformanceStat.h" #include "base/Platforms/Platform.h" // Time diff --git a/AE/engine/pch/Graphics.h b/AE/engine/pch/Graphics.h index 0b788b03..3dd9d2c1 100644 --- a/AE/engine/pch/Graphics.h +++ b/AE/engine/pch/Graphics.h @@ -4,11 +4,4 @@ #include "graphics/GraphicsImpl.h" #include "graphics/RenderGraphImpl.h" - -// cleanup defines -#undef AE_VALIDATE_GCTX -#undef AE_DBG_GRAPHICS -#undef DBG_GRAPHICS_ONLY -#undef GCTX_CHECK -#undef GCTX_CHECK_MSG -#undef GRES_CHECK +#include "graphics/Private/Undef.h" diff --git a/AE/engine/pch/Serializing.h b/AE/engine/pch/Serializing.h index 5e2cd9c0..882aaead 100644 --- a/AE/engine/pch/Serializing.h +++ b/AE/engine/pch/Serializing.h @@ -2,6 +2,6 @@ #pragma once -#include "serializing/Common.h" -#include "serializing/ISerializable.h" -#include "serializing/ObjectFactory.h" +#include "serializing/Public/Common.h" +#include "serializing/Public/ISerializable.h" +#include "serializing/Public/ObjectFactory.h" diff --git a/AE/engine/performance/base/Perf_FindSubString.cpp b/AE/engine/performance/base/Perf_FindSubString.cpp index de87bb14..2b1a40ec 100644 --- a/AE/engine/performance/base/Perf_FindSubString.cpp +++ b/AE/engine/performance/base/Perf_FindSubString.cpp @@ -55,14 +55,14 @@ namespace return ret; } - forceinline const char* forward_search (const char *, const char *end_ptr, const char *ptr, const char n) + forceinline const char* forward_search (const char *, const char* end_ptr, const char* ptr, const char n) { for_likely (; (ptr <= end_ptr) and (*ptr != n); ++ptr) {} return *ptr == n ? ptr : nullptr; } - forceinline const char* fast_memchr (const char *haystack, const char n1, size_t len) + forceinline const char* fast_memchr (const char* haystack, const char n1, size_t len) { uint64_t vn1 = repeat_byte( n1 ); size_t loop_size = Min( LOOP_SIZE, len ); diff --git a/AE/engine/performance/base/main.cpp b/AE/engine/performance/base/main.cpp index d65c1c65..8b489317 100644 --- a/AE/engine/performance/base/main.cpp +++ b/AE/engine/performance/base/main.cpp @@ -26,6 +26,14 @@ int main (const int argc, char* argv[]) //PerfTest_LogicOp(); AE_LOGI( "PerformanceTests.Base finished" ); + + #else + # ifdef AE_PLATFORM_ANDROID + Unused( path ); + # else + Unused( argc, argv ); + # endif #endif + return 0; } diff --git a/AE/engine/performance/threading/main.cpp b/AE/engine/performance/threading/main.cpp index 03ebbd37..14f24065 100644 --- a/AE/engine/performance/threading/main.cpp +++ b/AE/engine/performance/threading/main.cpp @@ -32,6 +32,14 @@ int main (const int argc, char* argv[]) //PerfTest_Raw_Atomic(); AE_LOGI( "PerformanceTests.Threading finished" ); + + #else + # ifdef AE_PLATFORM_ANDROID + Unused( path ); + # else + Unused( argc, argv ); + # endif #endif + return 0; } diff --git a/AE/engine/shared_data/3party_shaders/Blur-1.glsl b/AE/engine/shared_data/3party_shaders/Blur-1.glsl index 142c5870..c3de01fe 100644 --- a/AE/engine/shared_data/3party_shaders/Blur-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Blur-1.glsl @@ -1,4 +1,6 @@ +#ifdef AE_LICENSE_MIT + // from https://github.com/Jam3/glsl-fast-gaussian-blur // MIT license @@ -41,3 +43,5 @@ float4 Blur13 (gl::CombinedTex2D image, float2 uv, float2 invResolution, color += gl.texture.Sample( image, uv - (off3 * invResolution) ) * 0.010381362401148057; return color; } + +#endif // AE_LICENSE_MIT diff --git a/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl b/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl index 93ab8f5a..518649e8 100644 --- a/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl +++ b/AE/engine/shared_data/3party_shaders/ColorSpaceUtility-1.glsl @@ -11,6 +11,8 @@ #include "Math.glsl" +#ifdef AE_LICENSE_MIT + // // Gamma ramps and encoding transfer functions // @@ -31,35 +33,35 @@ // Approximately pow(x, 1.0 / 2.2) float ApplySRGBCurve (float x) { return x < 0.0031308 ? 12.92 * x : 1.055 * Pow(x, 1.0 / 2.4) - 0.055; } -float3 ApplySRGBCurve (float3 v) { return float3( ApplySRGBCurve(v.r), ApplySRGBCurve(v.g), ApplySRGBCurve(v.b) ); } -float4 ApplySRGBCurve (float4 v) { return float4( ApplySRGBCurve(v.r), ApplySRGBCurve(v.g), ApplySRGBCurve(v.b), v.a ); } +float3 ApplySRGBCurve (float3 v) { return SelectFp( v, float3(0.0031308), (12.92 * v), (1.055 * Pow( v, float3(1.0 / 2.4) ) - 0.055) ); } +float4 ApplySRGBCurve (float4 v) { return float4( ApplySRGBCurve( v.rgb ), v.a ); } // Approximately pow(x, 2.2) float RemoveSRGBCurve (float x) { return x < 0.04045 ? x / 12.92 : Pow( (x + 0.055) / 1.055, 2.4 ); } -float3 RemoveSRGBCurve (float3 v) { return float3( RemoveSRGBCurve(v.r), RemoveSRGBCurve(v.g), RemoveSRGBCurve(v.b) ); } -float4 RemoveSRGBCurve (float4 v) { return float4( RemoveSRGBCurve(v.r), RemoveSRGBCurve(v.g), RemoveSRGBCurve(v.b), v.a ); } +float3 RemoveSRGBCurve (float3 v) { return SelectFp( v, float3(0.04045), (v / 12.92), Pow( (v + 0.055) / 1.055, float3(2.4) )); } +float4 RemoveSRGBCurve (float4 v) { return float4( RemoveSRGBCurve( v.rgb ), v.a ); } // These functions avoid pow() to efficiently approximate sRGB with an error < 0.4%. float ApplySRGBCurve_Fast (float x) { return x < 0.0031308 ? 12.92 * x : 1.13005 * Sqrt(x - 0.00228) - 0.13448 * x + 0.005719; } -float3 ApplySRGBCurve_Fast (float3 v) { return float3( ApplySRGBCurve_Fast(v.r), ApplySRGBCurve_Fast(v.g), ApplySRGBCurve_Fast(v.b) ); } -float4 ApplySRGBCurve_Fast (float4 v) { return float4( ApplySRGBCurve_Fast(v.r), ApplySRGBCurve_Fast(v.g), ApplySRGBCurve_Fast(v.b), v.a ); } +float3 ApplySRGBCurve_Fast (float3 v) { return SelectFp( v, float3(0.0031308), (12.92 * v), (1.13005 * Sqrt(v - 0.00228) - 0.13448 * v + 0.005719) ); } +float4 ApplySRGBCurve_Fast (float4 v) { return float4( ApplySRGBCurve_Fast( v.rgb ), v.a ); } float RemoveSRGBCurve_Fast (float x) { return x < 0.04045 ? x / 12.92 : -7.43605 * x - 31.24297 * Sqrt(-0.53792 * x + 1.279924) + 35.34864; } -float3 RemoveSRGBCurve_Fast (float3 v) { return float3( RemoveSRGBCurve_Fast(v.r), RemoveSRGBCurve_Fast(v.g), RemoveSRGBCurve_Fast(v.b) ); } -float4 RemoveSRGBCurve_Fast (float4 v) { return float4( RemoveSRGBCurve_Fast(v.r), RemoveSRGBCurve_Fast(v.g), RemoveSRGBCurve_Fast(v.b), v.a ); } +float3 RemoveSRGBCurve_Fast (float3 v) { return SelectFp( v, float3(0.04045), (v / 12.92), (-7.43605 * v - 31.24297 * Sqrt(-0.53792 * v + 1.279924) + 35.34864) ); } +float4 RemoveSRGBCurve_Fast (float4 v) { return float4( RemoveSRGBCurve_Fast( v.rgb ), v.a ); } // The OETF recommended for content shown on HDTVs. This "gamma ramp" may increase contrast as // appropriate for viewing in a dark environment. Always use this curve with Limited RGB as it is // used in conjunction with HDTVs. float ApplyREC709Curve (float x) { return x < 0.0181 ? 4.5 * x : 1.0993 * Pow(x, 0.45) - 0.0993; } -float3 ApplyREC709Curve (float3 v) { return float3( ApplyREC709Curve(v.r), ApplyREC709Curve(v.g), ApplyREC709Curve(v.b) ); } -float4 ApplyREC709Curve (float4 v) { return float4( ApplyREC709Curve(v.r), ApplyREC709Curve(v.g), ApplyREC709Curve(v.b), v.a ); } +float3 ApplyREC709Curve (float3 v) { return SelectFp( v, float3(0.0181), (4.5 * v), (1.0993 * Pow( v, float3(0.45) ) - 0.0993) ); } +float4 ApplyREC709Curve (float4 v) { return float4( ApplyREC709Curve( v.rgb ), v.a ); } float RemoveREC709Curve (float x) { return x < 0.08145 ? x / 4.5 : Pow((x + 0.0993) / 1.0993, 1.0 / 0.45); } -float3 RemoveREC709Curve (float3 v) { return float3( RemoveREC709Curve(v.r), RemoveREC709Curve(v.g), RemoveREC709Curve(v.b) ); } -float4 RemoveREC709Curve (float4 v) { return float4( RemoveREC709Curve(v.r), RemoveREC709Curve(v.g), RemoveREC709Curve(v.b), v.a ); } +float3 RemoveREC709Curve (float3 v) { return SelectFp( v, float3(0.08145), (v / 4.5), Pow( (v + 0.0993) / 1.0993, float3(1.0 / 0.45) )); } +float4 RemoveREC709Curve (float4 v) { return float4( RemoveREC709Curve( v.rgb ), v.a ); } // This is the new HDR transfer function, also called "PQ" for perceptual quantizer. Note that REC2084 @@ -176,5 +178,4 @@ float3 ApplyDisplayProfile (const float3 color, const int displayFormat) }; } - - +#endif // AE_LICENSE_MIT diff --git a/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl b/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl index 8549a9ce..4c380e48 100644 --- a/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl +++ b/AE/engine/shared_data/3party_shaders/ColorUtils-1.glsl @@ -1,4 +1,6 @@ +#ifdef AE_ENABLE_UNKNOWN_LICENSE + /* ================================================= RGBtoHSV / HSVtoRGB @@ -331,3 +333,6 @@ float3 RGBLerpOklab (const float3 lin1, const float3 lin2, const float factor) return kLMStoCONE * (lms * lms * lms); } + + +#endif // AE_ENABLE_UNKNOWN_LICENSE diff --git a/AE/engine/shared_data/3party_shaders/Easing-1.glsl b/AE/engine/shared_data/3party_shaders/Easing-1.glsl index 88c470f1..fde620eb 100644 --- a/AE/engine/shared_data/3party_shaders/Easing-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Easing-1.glsl @@ -2,6 +2,9 @@ based on code from GLM (MIT license) https://github.com/g-truc/glm */ +#ifdef AE_LICENSE_MIT + + float QuadraticEaseIn (const float x) { return x * x; @@ -81,3 +84,80 @@ float QuinticEaseInOut (const float x) (16.f * x * x * x * x * x) : (0.5f * a * a * a * a * a + 1.f); } +//------------------------------------------------ + + +float SineEaseIn (const float x) +{ + return Sin( (x - 1.f) * float_HalfPi ) + 1.f; +} + +float SineEaseOut (const float x) +{ + return Sin( x * float_HalfPi ); +} + +float SineEaseInOut (const float x) +{ + return 0.5f * (1.f - Cos( x * float_Pi )); +} +//------------------------------------------------ + + +float CircularEaseIn (const float x) +{ + return 1.f - Sqrt( 1.f - (x*x) ); +} + +float CircularEaseOut (const float x) +{ + return Sqrt( (2.f - x) * x ); +} + +float CircularEaseInOut (const float x) +{ + return x < 0.5f ? + 0.5f * (1.f - Sqrt( 1.f - 4.f * x*x )) : + 0.5f * (Sqrt( -(2.f * x - 3.f) * ((2.f * x) - 1.f)) + 1.f); +} +//------------------------------------------------ + + +float ExponentialEaseIn (const float x) +{ + return Exp2( (x - 1.f) * 10.f ); +} + +float ExponentialEaseOut (const float x) +{ + return 1.f - Exp2( -10.f * x ); +} + +float ExponentialEaseInOut (const float x) +{ + return x < 0.5f ? + 0.5f * Exp2( 20.f * x - 10.f ) : + -0.5f * Exp2( (-20.f * x) + 10.f ) + 1.f; +} +//------------------------------------------------ + + +float ElasticEaseIn (const float x) +{ + return Sin( 13.f * float_HalfPi * x ) * Exp2( 10.f * (x - 1.f) ); +} + +float ElasticEaseOut (const float x) +{ + return Sin( -13.f * float_HalfPi * (x + 1.f) ) * Exp2( -10.f * x ) + 1.f; +} + +float ElasticEaseInOut (const float x) +{ + return x < 0.5f ? + 0.5f * Sin( 13.f * float_Pi * x ) * Exp2( 10.f * (2.f * x - 1.f) ) : + 0.5f * (Sin( -13.f * float_Pi * x ) * Exp2( -10.f * (2.f * x - 1.f)) + 2.f); +} +//------------------------------------------------ + +#endif // AE_LICENSE_MIT diff --git a/AE/engine/shared_data/3party_shaders/FastMath-1.glsl b/AE/engine/shared_data/3party_shaders/FastMath-1.glsl new file mode 100644 index 00000000..d822b875 --- /dev/null +++ b/AE/engine/shared_data/3party_shaders/FastMath-1.glsl @@ -0,0 +1,205 @@ + +#ifdef AE_ENABLE_UNKNOWN_LICENSE + +/* +================================================= + FastACos +---- + from https://developer.download.nvidia.com/cg/acos.html +---- + Handbook of Mathematical Functions + M. Abramowitz and I.A. Stegun, Ed. +---- + Absolute error <= 6.7e-5 +================================================= +*/ +#define Gen_FAST_ACOS1( _stype_, _vtype_ )\ + ND_ _vtype_ FastACos (_vtype_ x) \ + { \ + _vtype_ negate = LessFp( x, _vtype_(_stype_(0.0)) ); \ + x = Abs( x ); \ + _vtype_ ret = _vtype_(_stype_(-0.0187293)); \ + ret *= x; \ + ret += _stype_(0.0742610); \ + ret *= x; \ + ret -= _stype_(0.2121144); \ + ret *= x; \ + ret += _stype_(float_HalfPi); \ + ret *= Sqrt( _stype_(1.0) - x ); \ + ret -= _stype_(2.0) * negate * ret; \ + return negate * _stype_(float_Pi) + ret; \ + } + +#define Gen_FAST_ACOS( _stype_, _vtype_ )\ + Gen_FAST_ACOS1( _stype_, _stype_ )\ + Gen_FAST_ACOS1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_FAST_ACOS1( _stype_, UNITE( _vtype_, 3 ))\ + Gen_FAST_ACOS1( _stype_, UNITE( _vtype_, 4 )) + +Gen_FAST_ACOS( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_FAST_ACOS( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_FAST_ACOS( double, double_vec_t ) +#endif + +#undef Gen_FAST_ACOS1 +#undef Gen_FAST_ACOS + +/* +================================================= + FastASin +---- + from https://developer.download.nvidia.com/cg/asin.html +---- + Handbook of Mathematical Functions + M. Abramowitz and I.A. Stegun, Ed. +================================================= +*/ +#define Gen_FAST_ASIN1( _stype_, _vtype_ )\ + ND_ _vtype_ FastASin (_vtype_ x) \ + { \ + _vtype_ negate = LessFp( x, _vtype_(_stype_(0.0)) ); \ + x = Abs( x ); \ + _vtype_ ret = _vtype_(_stype_(-0.0187293)); \ + ret *= x; \ + ret += _stype_(0.0742610); \ + ret *= x; \ + ret -= _stype_(0.2121144); \ + ret *= x; \ + ret += _stype_(float_HalfPi); \ + ret = _stype_(float_HalfPi) - Sqrt( _stype_(1.0) - x ) * ret; \ + return ret - _stype_(2.0) * negate * ret; \ + } + +#define Gen_FAST_ASIN( _stype_, _vtype_ )\ + Gen_FAST_ASIN1( _stype_, _stype_ )\ + Gen_FAST_ASIN1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_FAST_ASIN1( _stype_, UNITE( _vtype_, 3 ))\ + Gen_FAST_ASIN1( _stype_, UNITE( _vtype_, 4 )) + +Gen_FAST_ASIN( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_FAST_ASIN( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_FAST_ASIN( double, double_vec_t ) +#endif + +#undef Gen_FAST_ASIN1 +#undef Gen_FAST_ASIN + +/* +================================================= + FastATan, FastATan2 +---- + from https://developer.download.nvidia.com/cg/atan2.html +================================================= +*/ +#define Gen_FAST_ATAN1( _stype_, _vtype_ )\ + ND_ _vtype_ FastATan2 (const _vtype_ y, const _vtype_ x) \ + { \ + _vtype_ t0, t1, t2, t3, t4; \ + \ + t3 = Abs( x ); \ + t1 = Abs( y ); \ + t0 = Max( t3, t1 ); \ + t1 = Min( t3, t1 ); \ + t3 = _stype_(1.0) / t0; \ + t3 = t1 * t3; \ + \ + t4 = t3 * t3; \ + t0 = _vtype_( - _stype_(0.013480470)); \ + t0 = t0 * t4 + _stype_(0.057477314); \ + t0 = t0 * t4 - _stype_(0.121239071); \ + t0 = t0 * t4 + _stype_(0.195635925); \ + t0 = t0 * t4 - _stype_(0.332994597); \ + t0 = t0 * t4 + _stype_(0.999995630); \ + t3 = t0 * t3; \ + \ + t3 = SelectFp( Abs(x), Abs(y), _stype_(float_HalfPi) - t3, t3 ); \ + t3 = SelectFp( x, _vtype_(_stype_(0.0)), _stype_(float_Pi) - t3, t3 ); \ + t3 = SelectFp( y, _vtype_(_stype_(0.0)), -t3, t3 ); \ + \ + return t3; \ + } \ + \ + ND_ _vtype_ FastATan (_vtype_ x) { \ + return FastATan2( x, _vtype_(_stype_(1.0)) ); \ + } + +#define Gen_FAST_ATAN( _stype_, _vtype_ )\ + Gen_FAST_ATAN1( _stype_, _stype_ )\ + Gen_FAST_ATAN1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_FAST_ATAN1( _stype_, UNITE( _vtype_, 3 ))\ + Gen_FAST_ATAN1( _stype_, UNITE( _vtype_, 4 )) + +Gen_FAST_ATAN( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_FAST_ATAN( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_FAST_ATAN( double, double_vec_t ) +#endif + +#undef Gen_FAST_ATAN1 +#undef Gen_FAST_ATAN + +#endif // AE_ENABLE_UNKNOWN_LICENSE +//----------------------------------------------------------------------------- + + +#ifdef AE_LICENSE_MIT + +/* +================================================= + FastSqrt +---- + warning: native Sqrt is faster on most devices +---- + from https://www.shadertoy.com/view/ssyyDh + The MIT License + Copyright © 2022 Inigo Quilez +================================================= +*/ + ND_ float FastSqrt_v1 (const float x) + { + float y = uintBitsToFloat( 532545536u + (floatBitsToUint(x) >> 1) ); + y = 0.5 * (y + x/y); + y = 0.5 * (y + x/y); + return y; + } + + ND_ float FastSqrt_v2 (float x) + { + float y = uintBitsToFloat( 532545536u + (floatBitsToUint(x) >> 1) ); + float y2 = y*y; + float x2 = x*x; + return (y2 * (y2 + 6.0 * x) + x2) / (4.0 * y * (y2 + x)); + } + +/* +================================================= + FastCbrt +---- + warning: native Pow is faster on most devices +---- + from https://www.shadertoy.com/view/ssyyDh + The MIT License + Copyright © 2022 Inigo Quilez +================================================= +*/ + ND_ float FastCbrt (const float x) + { + float y = uintBitsToFloat( 709973695u + floatBitsToUint(x) / 3u ); + y = y * (2.0/3.0) + (1.0/3.0) * x / (y*y); + y = y * (2.0/3.0) + (1.0/3.0) * x / (y*y); + return y; + } + +#endif // AE_LICENSE_MIT +//----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/GBuffer-1.glsl b/AE/engine/shared_data/3party_shaders/GBuffer-1.glsl index 8042f032..371386de 100644 --- a/AE/engine/shared_data/3party_shaders/GBuffer-1.glsl +++ b/AE/engine/shared_data/3party_shaders/GBuffer-1.glsl @@ -3,10 +3,14 @@ // TODO // https://jcgt.org/published/0003/02/01/paper.pdf +#ifdef AE_ENABLE_UNKNOWN_LICENSE + /* ================================================= Stalker version +---- + Best accuracy, requires signed format for negative Z. ================================================= */ float2 Stalker_EncodeNormal (const float3 norm) @@ -80,6 +84,7 @@ float3 SigOctahedron_DecodeNormal (float3 n) Stereographic Projection ---- from https://aras-p.info/texts/CompactNormalStorage.html + Requires signed format for negative Z. ================================================= */ float2 Stereo_EncodeNormal (const float3 n) @@ -101,6 +106,8 @@ float3 Stereo_DecodeNormal (const float2 enc) /* ================================================= CryTeck version (Spheremap Transform) +---- + Large error on z = 1, but less than 'Spheremap' version. ================================================= */ float2 CryTeck_EncodeNormal (const float3 norm) @@ -121,6 +128,7 @@ float3 CryTeck_DecodeNormal (const float2 packed) Spheremap Transform ---- from https://aras-p.info/texts/CompactNormalStorage.html + Large error on z = -1. ================================================= */ float2 Spheremap_EncodeNormal (const float3 n) @@ -234,3 +242,6 @@ float3 Diamond_DecodeTangent (float3 normal, float diamond_tangent) return packed_tangent.x * t1 + packed_tangent.y * t2; } + + +#endif // AE_ENABLE_UNKNOWN_LICENSE diff --git a/AE/engine/shared_data/3party_shaders/Hash-1.glsl b/AE/engine/shared_data/3party_shaders/Hash-1.glsl index e377cbc6..9341b6d4 100644 --- a/AE/engine/shared_data/3party_shaders/Hash-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Hash-1.glsl @@ -5,6 +5,8 @@ #include "Math.glsl" +#ifdef AE_LICENSE_MIT + // DHash from https://www.shadertoy.com/view/4djSRW // MIT License... // Copyright (c) 2014 David Hoskins. @@ -103,9 +105,13 @@ ND_ float4 DHash44 (const float4 p) p4 += Dot( p4, p4.wzxy + float(19.19) ); return Fract( (p4.xxyz + p4.yzzw) * p4.zywx ); } + +#endif // AE_LICENSE_MIT //----------------------------------------------------------------------------- +#ifdef AE_LICENSE_MIT + // HEHash from https://www.shadertoy.com/view/llGSzw, https://www.shadertoy.com/view/XlXcW4, https://www.shadertoy.com/view/4tXyWN // The MIT License // Copyright (c) 2017 Inigo Quilez @@ -161,7 +167,7 @@ ND_ float HEHash13 (int3 uv) { return HEHash13( uint3(uv) ); } ND_ float2 HEHash24 (int4 uv) { return HEHash24( uint4(uv) ); } -#if AE_ENABLE_HALF_TYPE +# if AE_ENABLE_HALF_TYPE ND_ half HEHash11hf (uint n) { n = HEHash11i( n ); @@ -175,4 +181,7 @@ ND_ float2 HEHash24 (int4 uv) { return HEHash24( uint4(uv) ); } ND_ half2 HEHash22hf (uint2 uv) { return half2(HEHash11hf(uv.x), HEHash11hf(uv.y)); } ND_ half3 HEHash33hf (uint3 uv) { return half3(HEHash11hf(uv.x), HEHash11hf(uv.y), HEHash11hf(uv.z)); } ND_ half4 HEHash44hf (uint4 uv) { return half4(HEHash11hf(uv.x), HEHash11hf(uv.y), HEHash11hf(uv.z), HEHash11hf(uv.w)); } -#endif +# endif + +#endif // AE_LICENSE_MIT +//----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/Hash-2.glsl b/AE/engine/shared_data/3party_shaders/Hash-2.glsl index 2ac331ef..b6e99f8c 100644 --- a/AE/engine/shared_data/3party_shaders/Hash-2.glsl +++ b/AE/engine/shared_data/3party_shaders/Hash-2.glsl @@ -5,11 +5,17 @@ #include "Math.glsl" +#ifdef AE_LICENSE_UNLICENSE +/* +================================================= + WeylHash +---- + from https://www.shadertoy.com/view/MsV3z3 + LICENSE: http://unlicense.org/ +================================================= +*/ ND_ float WeylHash12 (const int2 c) { - // from https://www.shadertoy.com/view/MsV3z3 - // LICENSE: http://unlicense.org/ - const int x = 0x3504f333 * c.x * c.x + c.y; const int y = 0xf1bbcdcb * c.y * c.y + c.x; @@ -18,29 +24,46 @@ ND_ float WeylHash12 (const int2 c) ND_ float WeylHash12 (const float2 c) { - // from https://www.shadertoy.com/view/Xdy3Rc - // LICENSE: http://unlicense.org/ - // a pair of Weyl values with low star discrepancy const float2 W = float2( 0.5545497, 0.308517 ); const float2 v = c * Fract( c * W ); return Fract( v.x * v.y ); } +#endif +//----------------------------------------------------------------------------- + +#ifdef AE_LICENSE_CC_BY_NC_SA_3 +/* +================================================= + ModHash +---- + from https://www.shadertoy.com/view/Xts3R7 + license CC BY-NC-SA 3.0 +================================================= +*/ ND_ float ModHash12 (float2 uv) { - // from https://www.shadertoy.com/view/Xts3R7 - // license CC BY-NC-SA 3.0 uv = Abs( Mod( float(10.0) * Fract( (uv + float(1.1312)) * float(31.0) ), uv + float(2.0) )); uv = Abs( Mod( uv.x * Fract( (uv + float(1.721711)) * float(17.0) ), uv )); return Fract( float(10.0) * (float(7.0) * uv.y + float(31.0) * uv.x) ); } +#endif //----------------------------------------------------------------------------- +#ifdef AE_LICENSE_UNLICENSE +/* +================================================= + FastSqrt +---- + from https://www.shadertoy.com/view/4dlcR4 + LICENSE: http://unlicense.org/ +================================================= +*/ uint3 _IWeylConst () { return uint3( @@ -52,9 +75,6 @@ uint3 _IWeylConst () ND_ uint IWeylHash (const uint2 p) { - // from https://www.shadertoy.com/view/4dlcR4 - // LICENSE: http://unlicense.org/ - uint x = p.x; uint y = p.y; @@ -67,9 +87,6 @@ ND_ uint IWeylHash (const uint2 p) ND_ uint IWeylHash2 (const uint2 p) { - // from https://www.shadertoy.com/view/4dlcR4 - // LICENSE: http://unlicense.org/ - uint x = p.x; uint y = p.y; @@ -81,17 +98,27 @@ ND_ uint IWeylHash2 (const uint2 p) x *= _IWeylConst().z; // MLCG constant return x; } + +#endif // AE_LICENSE_UNLICENSE //----------------------------------------------------------------------------- -// from https://www.shadertoy.com/view/4ssXRX -// license CC BY-NC-SA 3.0 +#ifdef AE_LICENSE_CC_BY_NC_SA_3 +/* +================================================= + FastSqrt +---- + result in range [0, 1] + 'seed' in range [0, 1] +---- + from https://www.shadertoy.com/view/4ssXRX + license CC BY-NC-SA 3.0 +================================================= +*/ //note: uniformly distributed, normalized rand, [0;1] -#define nrand( n ) Fract( Sin( Dot( (n).xy, float2(12.9898, 78.233) )) * float(43758.5453) ) - -// range [0, 1] +# define nrand( n ) Fract( Sin( Dot( (n).xy, float2(12.9898, 78.233) )) * float(43758.5453) ) ND_ float Hash_Uniform (const float2 n, const float seed) { @@ -134,4 +161,6 @@ ND_ float Hash_MoarGaussianish (const float2 n, const float seed) return (nrnd0 + nrnd1 + nrnd2 + nrnd3 + nrnd4 + nrnd5 + nrnd6 + nrnd7) / float(8.0); } -#undef nrand +# undef nrand +#endif // AE_LICENSE_CC_BY_NC_SA_3 +//----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/Hash-3.glsl b/AE/engine/shared_data/3party_shaders/Hash-3.glsl index 44b8e346..7d3d0bd3 100644 --- a/AE/engine/shared_data/3party_shaders/Hash-3.glsl +++ b/AE/engine/shared_data/3party_shaders/Hash-3.glsl @@ -5,9 +5,15 @@ #include "Math.glsl" -// from https://www.shadertoy.com/view/ttc3zr -// license CC BY-NC-SA 3.0 - +#ifdef AE_LICENSE_CC_BY_NC_SA_3 +/* +================================================= + MHash +---- + from https://www.shadertoy.com/view/ttc3zr + license CC BY-NC-SA 3.0 +================================================= +*/ ND_ uint MHash11 (uint src) { const uint M = 0x5bd1e995u; @@ -263,3 +269,5 @@ ND_ float4 MHash44 (float4 src) uint4 h = MHash44(floatBitsToUint(src)); return float4(uintBitsToFloat(h & 0x007fffffu | 0x3f800000u) - 1.0); } + +#endif // AE_LICENSE_CC_BY_NC_SA_3 diff --git a/AE/engine/shared_data/3party_shaders/Hash-4.glsl b/AE/engine/shared_data/3party_shaders/Hash-4.glsl index 71b03947..1e955740 100644 --- a/AE/engine/shared_data/3party_shaders/Hash-4.glsl +++ b/AE/engine/shared_data/3party_shaders/Hash-4.glsl @@ -5,6 +5,8 @@ #include "Math.glsl" +#ifdef AE_ENABLE_UNKNOWN_LICENSE + ND_ uint PCG11 (uint n) { // from https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/ @@ -56,27 +58,33 @@ ND_ uint4 PCG44 (uint4 v) { // from http://www.jcgt.org/published/0009/03/02/ - v = v * 1664525u + 1013904223u; + v = v * 1664525u + 1013904223u; - v.x += v.y*v.w; - v.y += v.z*v.x; - v.z += v.x*v.y; - v.w += v.y*v.z; + v.x += v.y*v.w; + v.y += v.z*v.x; + v.z += v.x*v.y; + v.w += v.y*v.z; - v ^= v >> 16u; + v ^= v >> 16u; - v.x += v.y*v.w; - v.y += v.z*v.x; - v.z += v.x*v.y; - v.w += v.y*v.z; + v.x += v.y*v.w; + v.y += v.z*v.x; + v.z += v.x*v.y; + v.w += v.y*v.z; - return v; + return v; } + +#endif // AE_ENABLE_UNKNOWN_LICENSE //----------------------------------------------------------------------------- + +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + // Helpers // from https://www.shadertoy.com/view/XlGcRh +// license CC BY-NC-SA 3.0 (shadertoy default) // commonly used constants #define c1 0xcc9e2d51u @@ -124,7 +132,7 @@ ND_ uint _bswap32 (uint x) ND_ uint _taus (uint z, int s1, int s2, int s3, uint m) { uint b = (((z << s1) ^ z) >> s2); - return (((z & m) << s3) ^ b); + return (((z & m) << s3) ^ b); } //----------------------------------------------------------------------------- @@ -132,7 +140,7 @@ ND_ uint _taus (uint z, int s1, int s2, int s3, uint m) // CityHash32, adapted from Hash32Len0to4 in https://github.com/google/cityhash (MIT license) // from https://www.jcgt.org/published/0009/03/02/, https://www.shadertoy.com/view/XlGcRh -// license: Creative Commons CC BY-ND 3.0 (?) +// license: Creative Commons CC BY-ND 3.0 (shadertoy default) ND_ uint CityHash11 (uint s) { @@ -236,23 +244,23 @@ ND_ float InterleavedGradientNoise12 (float2 v) ND_ uint JKiss12 (uint2 p) { - uint x = p.x;//123456789; - uint y = p.y;//234567891; + uint x = p.x;//123456789; + uint y = p.y;//234567891; - uint z=345678912u,w=456789123u,c=0u; - int t; - y ^= (y<<5); y ^= (y>>7); y ^= (y<<22); - t = int(z+w+c); z = w; c = uint(t < 0); w = uint(t&2147483647); - x += 1411392427u; - return x + y + w; + uint z=345678912u,w=456789123u,c=0u; + int t; + y ^= (y<<5); y ^= (y>>7); y ^= (y<<22); + t = int(z+w+c); z = w; c = uint(t < 0); w = uint(t&2147483647); + x += 1411392427u; + return x + y + w; } // UE4's PseudoRandom function // https://github.com/EpicGames/UnrealEngine/blob/release/Engine/Shaders/Private/Random.ush ND_ float PseudoHash12 (float2 v) { - v = fract(v / float(128.)) * float(128.) + float2(-64.340622, -72.465622); - return fract(dot(v.xyx * v.xyy, float3(20.390625, 60.703125, 2.4281209))); + v = fract(v / float(128.)) * float(128.) + float2(-64.340622, -72.465622); + return fract(dot(v.xyx * v.xyy, float3(20.390625, 60.703125, 2.4281209))); } @@ -260,78 +268,78 @@ ND_ float PseudoHash12 (float2 v) // Numerical Recipies 3rd Edition ND_ uint Ranlim11 (uint j) { - uint u, v, w1, w2, x, y; + uint u, v, w1, w2, x, y; - v = 2244614371U; - w1 = 521288629U; - w2 = 362436069U; + v = 2244614371U; + w1 = 521288629U; + w2 = 362436069U; - u = j ^ v; + u = j ^ v; - u = u * 2891336453U + 1640531513U; - v ^= v >> 13; v ^= v << 17; v ^= v >> 5; - w1 = 33378u * (w1 & 0xffffu) + (w1 >> 16); - w2 = 57225u * (w2 & 0xffffu) + (w2 >> 16); + u = u * 2891336453U + 1640531513U; + v ^= v >> 13; v ^= v << 17; v ^= v >> 5; + w1 = 33378u * (w1 & 0xffffu) + (w1 >> 16); + w2 = 57225u * (w2 & 0xffffu) + (w2 >> 16); - v = u; + v = u; - u = u * 2891336453U + 1640531513U; - v ^= v >> 13; v ^= v << 17; v ^= v >> 5; - w1 = 33378u * (w1 & 0xffffu) + (w1 >> 16); - w2 = 57225u * (w2 & 0xffffu) + (w2 >> 16); + u = u * 2891336453U + 1640531513U; + v ^= v >> 13; v ^= v << 17; v ^= v >> 5; + w1 = 33378u * (w1 & 0xffffu) + (w1 >> 16); + w2 = 57225u * (w2 & 0xffffu) + (w2 >> 16); - x = u ^ (u << 9); x ^= x >> 17; x ^= x << 6; - y = w1 ^ (w1 << 17); y ^= y >> 15; y ^= y << 5; + x = u ^ (u << 9); x ^= x >> 17; x ^= x << 6; + y = w1 ^ (w1 << 17); y ^= y >> 15; y ^= y << 5; - return (x + v) ^ (y + w2); + return (x + v) ^ (y + w2); } // Tiny Encryption Algorithm // - Zafar et al., GPU random numbers via the tiny encryption algorithm, HPG 2010 ND_ uint2 TeaHash22 (int tea, uint2 p) { - uint s = 0u; + uint s = 0u; - for( int i = 0; i < tea; i++) { - s += 0x9E3779B9u; - p.x += (p.y<<4u)^(p.y+s)^(p.y>>5u); - p.y += (p.x<<4u)^(p.x+s)^(p.x>>5u); - } - return p.xy; + for( int i = 0; i < tea; i++) { + s += 0x9E3779B9u; + p.x += (p.y<<4u)^(p.y+s)^(p.y>>5u); + p.y += (p.x<<4u)^(p.x+s)^(p.x>>5u); + } + return p.xy; } // Wang hash, described on http://burtleburtle.net/bob/hash/integer.html // original page by Thomas Wang 404 ND_ uint WangHash11 (uint v) { - v = (v ^ 61u) ^ (v >> 16u); - v *= 9u; - v ^= v >> 4u; - v *= 0x27d4eb2du; - v ^= v >> 15u; - return v; + v = (v ^ 61u) ^ (v >> 16u); + v *= 9u; + v ^= v >> 4u; + v *= 0x27d4eb2du; + v ^= v >> 15u; + return v; } // 128-bit xorshift // - Marsaglia, Xorshift RNGs, Journal of Statistical Software, v8n14, 2003 ND_ uint XorShift14 (uint4 v) { - v.w ^= v.w << 11u; - v.w ^= v.w >> 8u; - v = v.wxyz; - v.x ^= v.y; - v.x ^= v.y >> 19u; - return v.x; + v.w ^= v.w << 11u; + v.w ^= v.w >> 8u; + v = v.wxyz; + v.x ^= v.y; + v.x ^= v.y >> 19u; + return v.x; } // 32-bit xorshift // - Marsaglia, Xorshift RNGs, Journal of Statistical Software, v8n14, 2003 ND_ uint XorShift11 (uint v) { - v ^= v << 13u; - v ^= v >> 17u; - v ^= v << 5u; - return v; + v ^= v << 13u; + v ^= v >> 17u; + v ^= v << 5u; + return v; } //----------------------------------------------------------------------------- @@ -341,134 +349,134 @@ ND_ uint XorShift11 (uint v) ND_ uint Murmur11 (uint seed) { - uint h = 0u; - uint k = seed; + uint h = 0u; + uint k = seed; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - h ^= 4u; + h ^= 4u; - return _fmix(h); + return _fmix(h); } ND_ uint Murmur12 (uint2 seed) { - uint h = 0u; - uint k = seed.x; + uint h = 0u; + uint k = seed.x; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - k = seed.y; + k = seed.y; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - h ^= 8u; + h ^= 8u; - return _fmix(h); + return _fmix(h); } ND_ uint Murmur13 (uint3 seed) { - uint h = 0u; - uint k = seed.x; + uint h = 0u; + uint k = seed.x; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - k = seed.y; + k = seed.y; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - k = seed.z; + k = seed.z; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - h ^= 12u; + h ^= 12u; - return _fmix(h); + return _fmix(h); } ND_ uint Murmur14 (uint4 seed) { - uint h = 0u; - uint k = seed.x; + uint h = 0u; + uint k = seed.x; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - k = seed.y; + k = seed.y; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - k = seed.z; + k = seed.z; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - k = seed.w; + k = seed.w; - k *= c1; - k = _rotl(k,15u); - k *= c2; + k *= c1; + k = _rotl(k,15u); + k *= c2; - h ^= k; - h = _rotl(h,13u); - h = h*5u+0xe6546b64u; + h ^= k; + h = _rotl(h,13u); + h = h*5u+0xe6546b64u; - h ^= 16u; + h ^= 16u; - return _fmix(h); + return _fmix(h); } #undef c1 @@ -482,114 +490,118 @@ ND_ uint SuperFastHash11 (uint data) { uint hash = 4u, tmp; - hash += data & 0xffffu; - tmp = (((data >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data & 0xffffu; + tmp = (((data >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - // Force "avalanching" of final 127 bits - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; - return hash; + return hash; } ND_ uint SuperFastHash12 (uint2 data) { - uint hash = 8u, tmp; + uint hash = 8u, tmp; - hash += data.x & 0xffffu; - tmp = (((data.x >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.x & 0xffffu; + tmp = (((data.x >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - hash += data.y & 0xffffu; - tmp = (((data.y >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.y & 0xffffu; + tmp = (((data.y >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - // Force "avalanching" of final 127 bits - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; - return hash; + return hash; } ND_ uint SuperFastHash13 (uint3 data) { - uint hash = 8u, tmp; + uint hash = 8u, tmp; - hash += data.x & 0xffffu; - tmp = (((data.x >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.x & 0xffffu; + tmp = (((data.x >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - hash += data.y & 0xffffu; - tmp = (((data.y >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.y & 0xffffu; + tmp = (((data.y >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - hash += data.z & 0xffffu; - tmp = (((data.z >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.z & 0xffffu; + tmp = (((data.z >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - // Force "avalanching" of final 127 bits - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; - return hash; + return hash; } ND_ uint SuperFastHash14 (uint4 data) { - uint hash = 8u, tmp; + uint hash = 8u, tmp; - hash += data.x & 0xffffu; - tmp = (((data.x >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.x & 0xffffu; + tmp = (((data.x >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - hash += data.y & 0xffffu; - tmp = (((data.y >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.y & 0xffffu; + tmp = (((data.y >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - hash += data.z & 0xffffu; - tmp = (((data.z >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.z & 0xffffu; + tmp = (((data.z >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - hash += data.w & 0xffffu; - tmp = (((data.w >> 16) & 0xffffu) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - hash += hash >> 11; + hash += data.w & 0xffffu; + tmp = (((data.w >> 16) & 0xffffu) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; - // Force "avalanching" of final 127 bits - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; - return hash; + return hash; } + +#endif // AE_LICENSE_CC_BY_NC_SA_3 //----------------------------------------------------------------------------- +#ifdef AE_LICENSE_MIT + // xxhash (https://github.com/Cyan4973/xxHash) // From https://www.shadertoy.com/view/Xt3cDn // by nimitz 2018 @@ -601,38 +613,38 @@ ND_ uint XXHash11 (uint p) const uint PRIME32_4 = 668265263U, PRIME32_5 = 374761393U; uint h32 = p + PRIME32_5; h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); - h32 = PRIME32_2*(h32^(h32 >> 15)); - h32 = PRIME32_3*(h32^(h32 >> 13)); - return h32^(h32 >> 16); + h32 = PRIME32_2*(h32^(h32 >> 15)); + h32 = PRIME32_3*(h32^(h32 >> 13)); + return h32^(h32 >> 16); } ND_ uint XXHash12 (uint2 p) { - const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; + const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; const uint PRIME32_4 = 668265263U, PRIME32_5 = 374761393U; - uint h32 = p.y + PRIME32_5 + p.x*PRIME32_3; - h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); - h32 = PRIME32_2*(h32^(h32 >> 15)); - h32 = PRIME32_3*(h32^(h32 >> 13)); - return h32^(h32 >> 16); + uint h32 = p.y + PRIME32_5 + p.x*PRIME32_3; + h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); + h32 = PRIME32_2*(h32^(h32 >> 15)); + h32 = PRIME32_3*(h32^(h32 >> 13)); + return h32^(h32 >> 16); } ND_ uint XXHash13 (uint3 p) { - const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; + const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; const uint PRIME32_4 = 668265263U, PRIME32_5 = 374761393U; uint h32 = p.z + PRIME32_5 + p.x*PRIME32_3; h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); h32 += p.y * PRIME32_3; h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); - h32 = PRIME32_2*(h32^(h32 >> 15)); - h32 = PRIME32_3*(h32^(h32 >> 13)); - return h32^(h32 >> 16); + h32 = PRIME32_2*(h32^(h32 >> 15)); + h32 = PRIME32_3*(h32^(h32 >> 13)); + return h32^(h32 >> 16); } ND_ uint XXHash14 (uint4 p) { - const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; + const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; const uint PRIME32_4 = 668265263U, PRIME32_5 = 374761393U; uint h32 = p.w + PRIME32_5 + p.x*PRIME32_3; h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); @@ -640,7 +652,10 @@ ND_ uint XXHash14 (uint4 p) h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); h32 += p.z * PRIME32_3; h32 = PRIME32_4*((h32 << 17) | (h32 >> (32 - 17))); - h32 = PRIME32_2*(h32^(h32 >> 15)); - h32 = PRIME32_3*(h32^(h32 >> 13)); - return h32^(h32 >> 16); + h32 = PRIME32_2*(h32^(h32 >> 15)); + h32 = PRIME32_3*(h32^(h32 >> 13)); + return h32^(h32 >> 16); } + +#endif // AE_LICENSE_MIT +//----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/Intersectors-1.glsl b/AE/engine/shared_data/3party_shaders/Intersectors-1.glsl index 4c94cfa3..b91285c4 100644 --- a/AE/engine/shared_data/3party_shaders/Intersectors-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Intersectors-1.glsl @@ -2,6 +2,8 @@ from https://iquilezles.org/articles/intersectors/ */ +#ifdef AE_ENABLE_UNKNOWN_LICENSE + /* ================================================= Sphere_Ray_Intersect @@ -76,3 +78,4 @@ bool Rect_Ray_Intersect (const float2 rectSize, const float2 rayDir, const floa return !(tN > tF or tF < 0.0); } +#endif // AE_ENABLE_UNKNOWN_LICENSE diff --git a/AE/engine/shared_data/3party_shaders/Intersectors-2.glsl b/AE/engine/shared_data/3party_shaders/Intersectors-2.glsl index 1a61f713..5b73d84b 100644 --- a/AE/engine/shared_data/3party_shaders/Intersectors-2.glsl +++ b/AE/engine/shared_data/3party_shaders/Intersectors-2.glsl @@ -1,4 +1,5 @@ +#ifdef AE_ENABLE_UNKNOWN_LICENSE /* ================================================= @@ -18,3 +19,5 @@ bool AABB_Ray_Intersect (const AABB aabb, const Ray ray, out float2 tBeginEnd) tBeginEnd = float2( tmin, tmax ); return (tmax >= 0) and (tmin <= tmax); } + +#endif // AE_ENABLE_UNKNOWN_LICENSE diff --git a/AE/engine/shared_data/3party_shaders/LightModels.glsl b/AE/engine/shared_data/3party_shaders/LightModels.glsl index 35007787..f1ae8261 100644 --- a/AE/engine/shared_data/3party_shaders/LightModels.glsl +++ b/AE/engine/shared_data/3party_shaders/LightModels.glsl @@ -50,6 +50,8 @@ LightingResult Phong (const float3 diffuse, const float3 specular, const float https://github.com/pboechat/cook_torrance/blob/master/application/shaders/blinn_phong_colored.fs.glsl ================================================= */ +#ifdef AE_LICENSE_MIT + float BlinnSpecular (const float shininess, const float3 lightDir, const float3 viewDir, const float3 surfNorm) { float3 halfway_dir = Normalize( lightDir + viewDir ); @@ -65,6 +67,8 @@ LightingResult Blinn (const float3 diffuse, const float3 specular, const float return res; } +#endif // AE_LICENSE_MIT + /* ================================================= OrenNayar @@ -72,6 +76,9 @@ LightingResult Blinn (const float3 diffuse, const float3 specular, const float original paper https://dl.acm.org/doi/pdf/10.1145/192161.192213 ================================================= */ + +#ifdef AE_LICENSE_MIT + float OrenNayarDiffuse (const float3 lightDir, const float3 viewDir, const float3 surfNorm, float roughness, float albedo) { // from https://github.com/glslify/glsl-diffuse-oren-nayar/blob/master/index.glsl (MIT license) @@ -106,15 +113,6 @@ float OrenNayarDiffuse2 (const float3 lightDir, const float3 viewDir, const flo return nl * (a + b * ga * Sqrt( (1.0 - nv * nv) * (1.0 - nl * nl) ) / Max( nl, nv )); } -/*float OrenNayarDiffuse3 (const float3 lightDir, const float3 viewDir, const float3 surfNorm, float roughness) -{ - // from https://www.pbr-book.org/3ed-2018/Reflection_Models/Microfacet_Models - - float sigma2 = roughness * roughness; - float A = 1.f - (sigma2 / (2.f * (sigma2 + 0.33f))); - float B = 0.45f * sigma2 / (sigma2 + 0.09f); -}*/ - LightingResult OrenNayar (const float3 diffuse, const float3 lightDir, const float3 viewDir, const float3 surfNorm, float roughness, float albedo) { @@ -124,6 +122,17 @@ LightingResult OrenNayar (const float3 diffuse, const float3 lightDir, const fl return res; } +#endif // AE_LICENSE_MIT + +/*float OrenNayarDiffuse3 (const float3 lightDir, const float3 viewDir, const float3 surfNorm, float roughness) +{ + // from https://www.pbr-book.org/3ed-2018/Reflection_Models/Microfacet_Models + + float sigma2 = roughness * roughness; + float A = 1.f - (sigma2 / (2.f * (sigma2 + 0.33f))); + float B = 0.45f * sigma2 / (sigma2 + 0.09f); +}*/ + /* ================================================= CookTorrance @@ -131,6 +140,8 @@ LightingResult OrenNayar (const float3 diffuse, const float3 lightDir, const fl from https://github.com/pboechat/cook_torrance/blob/master/application/shaders/cook_torrance_colored.fs.glsl (MIT license) ================================================= */ +#ifdef AE_LICENSE_MIT + LightingResult CookTorrance (const float3 diffuse, const float3 specular, const float3 lightDir, const float3 viewDir, const float3 surfNorm, const float roughness, const float F0) { @@ -162,6 +173,8 @@ LightingResult CookTorrance (const float3 diffuse, const float3 specular, const return res; } +#endif // AE_LICENSE_MIT + /* ================================================= SpecularBRDF @@ -169,6 +182,8 @@ LightingResult CookTorrance (const float3 diffuse, const float3 specular, const from https://github.com/SaschaWillems/Vulkan?tab=readme-ov-file#pbr-basics (MIT license) ================================================= */ +#ifdef AE_LICENSE_MIT + float3 SpecularBRDF (const float3 albedo, const float3 lightDir, const float3 viewDir, const float3 surfNorm, const float metallic, const float roughness) { @@ -215,3 +230,5 @@ float3 SpecularBRDF (const float3 albedo, const float3 lightDir, const float3 v return color; } + +#endif // AE_LICENSE_MIT diff --git a/AE/engine/shared_data/3party_shaders/Noise-1.glsl b/AE/engine/shared_data/3party_shaders/Noise-1.glsl index 99de3b57..65c07a9b 100644 --- a/AE/engine/shared_data/3party_shaders/Noise-1.glsl +++ b/AE/engine/shared_data/3party_shaders/Noise-1.glsl @@ -23,6 +23,8 @@ Copyright (c) 2017 Inigo Quilez ================================================= */ +#ifdef AE_LICENSE_MIT + #define _GRADIENT_NOISE( hash ) \ { \ /* grid */ \ @@ -65,7 +67,9 @@ float GradientNoise (gl::CombinedTex2D rgbaNoise, const float3 pos) { _GRADIENT_NOISE( _RGBTexSHash33 )} float GradientNoise (const float3 pos) { _GRADIENT_NOISE( _SHash33 )} + #undef _GRADIENT_NOISE +#endif // AE_LICENSE_MIT /* ================================================= @@ -78,6 +82,8 @@ float GradientNoise (const float3 pos) { _GRADIENT_NOISE( _SHash33 )} Copyright (c) 2014 Inigo Quilez ================================================= */ +#ifdef AE_LICENSE_MIT + #define _IQNOISE( hash ) \ { \ uv = Clamp( uv, float2(-0.5,0.4), float2(0.5,1.0) ); \ @@ -107,7 +113,9 @@ float IQNoise (gl::CombinedTex2D rgbaNoise, const float3 pos, float2 uv) float IQNoise (const float3 pos, float2 uv) { _IQNOISE( _SHash33 )} float IQNoise (gl::CombinedTex2D rgbaNoise, const float3 pos) { return IQNoise( rgbaNoise, pos, float2(0.5, 0.9) ); } float IQNoise (const float3 pos) { return IQNoise( pos, float2(0.5, 0.9) ); } + #undef _IQNOISE +#endif // AE_LICENSE_MIT /* ================================================= @@ -119,6 +127,8 @@ float IQNoise (const float3 pos) { return IQNoise( pos, float2(0.5, license CC BY-NC-SA 3.0 (shadertoy default) ================================================= */ +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + #define _VALUENOISE \ { \ float3 pi = Floor(pos); \ @@ -153,6 +163,7 @@ float ValueNoise (const float3 pos) } #undef _VALUENOISE +#endif // AE_LICENSE_CC_BY_NC_SA_3 /* ================================================= @@ -164,6 +175,8 @@ float ValueNoise (const float3 pos) license CC BY-NC-SA 3.0 (shadertoy default) ================================================= */ +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + #define _PERLINNOISE( hash ) \ { \ float3 pi = Floor(pos); \ @@ -193,7 +206,9 @@ float ValueNoise (const float3 pos) float PerlinNoise (gl::CombinedTex2D rgbaNoise, const float3 pos) { _PERLINNOISE( _RGBTexSHash33 )} float PerlinNoise (const float3 pos) { _PERLINNOISE( _SHash33 )} + #undef _PERLINNOISE +#endif // AE_LICENSE_CC_BY_NC_SA_3 /* ================================================= @@ -205,6 +220,8 @@ float PerlinNoise (const float3 pos) { _PERLINNOISE( _SHash33 )} license CC BY-NC-SA 3.0 (shadertoy default) ================================================= */ +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + #define _SIMPLEXNOISE( hash ) \ { \ const float K1 = 0.333333333; \ @@ -229,7 +246,9 @@ float PerlinNoise (const float3 pos) { _PERLINNOISE( _SHash33 )} float SimplexNoise (gl::CombinedTex2D rgbaNoise, const float3 pos) { _SIMPLEXNOISE( _RGBTexSHash33 )} float SimplexNoise (const float3 pos) { _SIMPLEXNOISE( _SHash33 )} + #undef _SIMPLEXNOISE +#endif // AE_LICENSE_CC_BY_NC_SA_3 /* ================================================= @@ -242,6 +261,8 @@ float SimplexNoise (const float3 pos) { _SIMPLEXNOISE( _SHash33 )} Copyright (c) 2013 Inigo Quilez ================================================= */ +#ifdef AE_LICENSE_MIT + VoronoiResult2 VoronoiContour2 (const float2 coord, const float3 seedScaleBias_offsetScale) { float2 ipoint = Floor( coord ); @@ -332,6 +353,8 @@ VoronoiResult3 VoronoiContour2 (const float3 coord, const float3 seedScaleBias_ return result; } +#endif // AE_LICENSE_MIT + /* ================================================= VoronoiCircles @@ -341,6 +364,8 @@ VoronoiResult3 VoronoiContour2 (const float3 coord, const float3 seedScaleBias_ based on VoronoiContour ================================================= */ +#ifdef AE_LICENSE_MIT + float VoronoiCircles (const float2 coord, const float radiusScale, const float3 seedScaleBias_offsetScale) { float2 ipoint = Floor( coord ); @@ -394,6 +419,8 @@ float VoronoiCircles (const float2 coord, const float radiusScale) return VoronoiCircles( coord, radiusScale, float3(1.0, 0.0, 0.75) ); } +#endif // AE_LICENSE_MIT + /* ================================================= WaveletNoise @@ -403,6 +430,8 @@ float VoronoiCircles (const float2 coord, const float radiusScale) Copyright (c) 2020 Martijn Steinrucken ================================================= */ +#ifdef AE_LICENSE_MIT + float WaveletNoise (float2 coord, const float2 zk) { float d = 0.0f; @@ -430,6 +459,8 @@ float WaveletNoise (float2 coord) { return WaveletNoise( coord, float2(0.2, 0.0) ); } + +#endif // AE_LICENSE_MIT //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/Noise-2.glsl b/AE/engine/shared_data/3party_shaders/Noise-2.glsl index 59376ba6..8dc70820 100644 --- a/AE/engine/shared_data/3party_shaders/Noise-2.glsl +++ b/AE/engine/shared_data/3party_shaders/Noise-2.glsl @@ -16,6 +16,8 @@ adapted from https://en.wikipedia.org/wiki/Hilbert_curve ================================================= */ +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + int HilbertCurve (int2 p, int level) { int d = 0; @@ -55,3 +57,4 @@ int2 InvHilbertCurve (int i, int level) return p; } +#endif // AE_LICENSE_CC_BY_NC_SA_3 diff --git a/AE/engine/shared_data/3party_shaders/PBR-1.glsl b/AE/engine/shared_data/3party_shaders/PBR-1.glsl index 0d1652f8..694e4355 100644 --- a/AE/engine/shared_data/3party_shaders/PBR-1.glsl +++ b/AE/engine/shared_data/3party_shaders/PBR-1.glsl @@ -1,6 +1,8 @@ // from https://github.com/KhronosGroup/glTF-Sample-Viewer/blob/main/source/Renderer/shaders/brdf.glsl // Apache-2.0 license +#ifdef AE_LICENSE_APACHE_2 + // // Fresnel // @@ -249,3 +251,4 @@ ND_ float3 BRDF_SpecularSheen (float3 sheenColor, float sheenRoughness, float N return sheenColor * sheen_distribution * sheen_visibility; } +#endif // AE_LICENSE_APACHE_2 diff --git a/AE/engine/shared_data/3party_shaders/PBR-2.glsl b/AE/engine/shared_data/3party_shaders/PBR-2.glsl index adc2cc52..f69aaa43 100644 --- a/AE/engine/shared_data/3party_shaders/PBR-2.glsl +++ b/AE/engine/shared_data/3party_shaders/PBR-2.glsl @@ -12,6 +12,8 @@ based on code from PBRTv3 (BSD2 license) https://pbr-book.org/ ================================================= */ +#ifdef AE_LICENSE_BSD2 + float FresnelDielectric (const float cosThetaI, const float eta) { float sin_theta_t_sq = (eta * eta) * (1.0f - cosThetaI * cosThetaI); // Snell law @@ -28,6 +30,8 @@ float FresnelDielectric (const float cosThetaI, const float eta) 0.5f * (Rp * Rp + Rs * Rs); } +#endif + /* ================================================= FresnelDielectric @@ -41,6 +45,8 @@ float FresnelDielectric (const float cosThetaI, const float eta) from PBRTv3 (BSD2 license) https://pbr-book.org/ ================================================= */ +#ifdef AE_LICENSE_BSD2 + float FresnelDielectric (float cosThetaI, float etaI, float etaT) { if ( cosThetaI <= 0.0f ) @@ -51,6 +57,8 @@ float FresnelDielectric (float cosThetaI, float etaI, float etaT) return FresnelDielectric( Saturate( cosThetaI ), etaI / etaT ); } +#endif + /* ================================================= FresnelDielectricConductor @@ -64,6 +72,8 @@ float FresnelDielectric (float cosThetaI, float etaI, float etaT) from https://seblagarde.wordpress.com/2013/04/29/memo-on-fresnel-equations/ ================================================= */ +#ifdef AE_LICENSE_BSD2 + float FresnelDielectricConductor (float cosThetaI, const float etaI, const float etaT, const float k) { cosThetaI = Clamp( cosThetaI, -1.f, 1.f ); @@ -89,6 +99,8 @@ float FresnelDielectricConductor (float cosThetaI, const float etaI, const floa return 0.5f * (Rp + Rs); } +#endif + /* ================================================= FresnelAirConductorApprox @@ -101,6 +113,8 @@ float FresnelDielectricConductor (float cosThetaI, const float etaI, const floa from https://seblagarde.wordpress.com/2013/04/29/memo-on-fresnel-equations/ ================================================= */ +#ifdef AE_ENABLE_UNKNOWN_LICENSE + float FresnelDielectricConductorApprox (float cosThetaI, const float etaI, const float etaT, const float k) { cosThetaI = Clamp( cosThetaI, -1.f, 1.f ); @@ -117,3 +131,5 @@ float FresnelDielectricConductorApprox (float cosThetaI, const float etaI, cons return 0.5f * (Rp + Rs); } + +#endif diff --git a/AE/engine/shared_data/3party_shaders/SDF-1.glsl b/AE/engine/shared_data/3party_shaders/SDF-1.glsl index 9ef4cf6b..24d91160 100644 --- a/AE/engine/shared_data/3party_shaders/SDF-1.glsl +++ b/AE/engine/shared_data/3party_shaders/SDF-1.glsl @@ -6,6 +6,7 @@ https://iquilezles.org/articles/distfunctions2d/ */ +#ifdef AE_ENABLE_UNKNOWN_LICENSE //----------------------------------------------------------------------------- // 2D Shapes @@ -290,6 +291,7 @@ float SDF_OpIntersect (const float d1, const float d2) } +// TODO: https://iquilezles.org/articles/smin/ float SDF_OpUnite (const float d1, const float d2, const float smoothFactor) { const float h = Saturate( 0.5f + 0.5f * (d2-d1) / smoothFactor ); @@ -389,12 +391,12 @@ float3 SDF_Move (const float3 position, const float3 delta) return position - delta; } -float3 SDF_Rotate (const float3 position, const quat q) +float3 SDF_Rotate (const float3 position, const Quat q) { return QMul( QInverse( q ), position ); } -float3 SDF_Transform (const float3 position, const quat q, const float3 delta) +float3 SDF_Transform (const float3 position, const Quat q, const float3 delta) { return SDF_Rotate( SDF_Move( position, delta ), q ); } @@ -482,3 +484,5 @@ float2 SDF_Rotate2D (const float2 p, const float angle) return _fnName_( pos, 0.0001f ); \ } + +#endif // AE_ENABLE_UNKNOWN_LICENSE diff --git a/AE/engine/shared_data/3party_shaders/SDF-2.glsl b/AE/engine/shared_data/3party_shaders/SDF-2.glsl index bc4f3549..4bba5713 100644 --- a/AE/engine/shared_data/3party_shaders/SDF-2.glsl +++ b/AE/engine/shared_data/3party_shaders/SDF-2.glsl @@ -3,6 +3,8 @@ MIT license */ +#ifdef AE_LICENSE_MIT + ND_ float SDF_MinCubic (const float a, float b, float k) { // polynomial smooth min (k = 0.1); @@ -17,6 +19,8 @@ float2 SDF_OpBlend (const float2 d1, const float2 d2) float m = Lerp( d1.y, d2.y, Clamp( d1.x - d, 0.f, 1.f )); return float2( d, m ); } + +#endif // AE_LICENSE_MIT //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/3party_shaders/TileableNoise.glsl b/AE/engine/shared_data/3party_shaders/TileableNoise.glsl new file mode 100644 index 00000000..1bd1a9f4 --- /dev/null +++ b/AE/engine/shared_data/3party_shaders/TileableNoise.glsl @@ -0,0 +1,69 @@ +/* + 3party tileable noise functions. +*/ +#ifdef __cplusplus +# pragma once +#endif + +#define _UHash22( _a_ ) DHash22( _a_ ) +#define _UHash33( _a_ ) DHash33( _a_ ) +//----------------------------------------------------------------------------- + +/* +================================================= + TileableGradientNoise +---- + range ? + + from https://www.shadertoy.com/view/3dVXDc + license CC BY-NC-SA 3.0 (shadertoy default) +================================================= +*/ +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + +float TileableGradientNoise (const float3 x, const float freq) +{ + // grid + float3 p = floor(x); + float3 w = fract(x); + + // quintic interpolant + float3 u = w * w * w * (w * (w * 6. - 15.) + 10.); + + // gradients + float3 ga = _UHash33(Mod(p + float3(0., 0., 0.), freq)); + float3 gb = _UHash33(Mod(p + float3(1., 0., 0.), freq)); + float3 gc = _UHash33(Mod(p + float3(0., 1., 0.), freq)); + float3 gd = _UHash33(Mod(p + float3(1., 1., 0.), freq)); + float3 ge = _UHash33(Mod(p + float3(0., 0., 1.), freq)); + float3 gf = _UHash33(Mod(p + float3(1., 0., 1.), freq)); + float3 gg = _UHash33(Mod(p + float3(0., 1., 1.), freq)); + float3 gh = _UHash33(Mod(p + float3(1., 1., 1.), freq)); + + // projections + float va = Dot(ga, w - float3(0., 0., 0.)); + float vb = Dot(gb, w - float3(1., 0., 0.)); + float vc = Dot(gc, w - float3(0., 1., 0.)); + float vd = Dot(gd, w - float3(1., 1., 0.)); + float ve = Dot(ge, w - float3(0., 0., 1.)); + float vf = Dot(gf, w - float3(1., 0., 1.)); + float vg = Dot(gg, w - float3(0., 1., 1.)); + float vh = Dot(gh, w - float3(1., 1., 1.)); + + // interpolation + return va + + u.x * (vb - va) + + u.y * (vc - va) + + u.z * (ve - va) + + u.x * u.y * (va - vb - vc + vd) + + u.y * u.z * (va - vc - ve + vg) + + u.z * u.x * (va - vb - ve + vf) + + u.x * u.y * u.z * (-va + vb + vc - vd + ve - vf - vg + vh); +} + +#endif +//----------------------------------------------------------------------------- + + +#undef _UHash22 +#undef _UHash33 diff --git a/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl b/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl index 2486054b..9aa6abd2 100644 --- a/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl +++ b/AE/engine/shared_data/3party_shaders/ToneMapping-1.glsl @@ -9,6 +9,8 @@ #include "Math.glsl" +#ifdef AE_LICENSE_MIT + // // Reinhard // @@ -106,3 +108,6 @@ float3 InvToneMap_ACES (const float3 sdr) const float A = 2.51, B = 0.03, C = 2.43, D = 0.59, E = 0.14; return 0.5 * (D * sdr - sqrt(((D*D - 4*C*E) * sdr + 4*A*E-2*B*D) * sdr + B*B) - B) / (A - C * sdr); } + + +#endif // AE_LICENSE_MIT diff --git a/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl b/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl index 42dd01aa..afc7ab42 100644 --- a/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl +++ b/AE/engine/shared_data/3party_shaders/ToneMapping-2.glsl @@ -1,9 +1,13 @@ +#ifdef AE_LICENSE_CC_BY_NC_SA_3 + // // ACES // // from https://www.shadertoy.com/view/XsGfWV +// license CC BY-NC-SA 3.0 (shadertoy default) + float3 ToneMap_ACES_v2 (const float3 color) { const float3x3 m1 = float3x3( @@ -43,6 +47,7 @@ float3 ToneMap_ACES_v3 (const float3 color) //----------------------------------------------------------------------------- // from https://www.shadertoy.com/view/WdjSW3 +// license CC BY-NC-SA 3.0 (shadertoy default) // @@ -150,6 +155,8 @@ float3 Tonemap_Lottes (const float3 hdr) { // from https://www.shadertoy.com/view/tl3GR8 +// license CC BY-NC-SA 3.0 (shadertoy default) + float3 ToneMap_whitePreservingLumaBasedReinhardToneMapping (float3 color) { float white = 2.; @@ -159,3 +166,6 @@ float3 ToneMap_whitePreservingLumaBasedReinhardToneMapping (float3 color) return color; } //----------------------------------------------------------------------------- + + +#endif // AE_LICENSE_CC_BY_NC_SA_3 diff --git a/AE/engine/shared_data/feature_set/min_mobile_mali.as b/AE/engine/shared_data/feature_set/min_mobile_mali.as index 425a7859..7b874f2d 100644 --- a/AE/engine/shared_data/feature_set/min_mobile_mali.as +++ b/AE/engine/shared_data/feature_set/min_mobile_mali.as @@ -182,9 +182,20 @@ void ASmain () EPixelFormat::ASTC_sRGB8_A8_8x6, EPixelFormat::ASTC_sRGB8_A8_8x8, EPixelFormat::ASTC_sRGB8_A8_10x5, EPixelFormat::ASTC_sRGB8_A8_10x6, EPixelFormat::ASTC_sRGB8_A8_10x8, EPixelFormat::ASTC_sRGB8_A8_10x10, EPixelFormat::ASTC_sRGB8_A8_12x10, EPixelFormat::ASTC_sRGB8_A8_12x12 }); - //fset.AddTexelFormats( EFormatFeature::HWCompressedAttachment, { - // EPixelFormat::RGBA8_UNorm - //}); + fset.AddTexelFormats( EFormatFeature::HWCompressedAttachment, { + // full support in Bifrost gen3 + EPixelFormat::RGBA8_UNorm, EPixelFormat::sRGB8_A8, EPixelFormat::BGRA8_UNorm, EPixelFormat::sBGR8_A8, + EPixelFormat::RGB10_A2_UNorm, EPixelFormat::R8_UNorm, EPixelFormat::RG8_UNorm, EPixelFormat::RGB8_UNorm, + EPixelFormat::RGB_5_6_5_UNorm, EPixelFormat::RGB5_A1_UNorm, EPixelFormat::RGBA4_UNorm, + EPixelFormat::Depth16, EPixelFormat::Depth24_Stencil8, EPixelFormat::Depth32F, + + // all 32 bit or smaller formats in Valhall gen1+ + EPixelFormat::R16_UNorm, EPixelFormat::RG16_UNorm, EPixelFormat::R16_SNorm, EPixelFormat::RG16_SNorm, + EPixelFormat::R16F, EPixelFormat::RG16F, + + // in Valhall gen3 + EPixelFormat::RGBA16_UNorm, EPixelFormat::RGBA16_SNorm, EPixelFormat::RGBA16F + }); fset.samplerMipLodBias (True); fset.maxSamplerAnisotropy (1.00); fset.maxSamplerLodBias (2.00); diff --git a/AE/engine/shared_data/scripts/asset_packer.as b/AE/engine/shared_data/scripts/asset_packer.as index be31f579..ec24e2ae 100644 --- a/AE/engine/shared_data/scripts/asset_packer.as +++ b/AE/engine/shared_data/scripts/asset_packer.as @@ -1,4 +1,4 @@ -//e2654f94 +//596e1c9f #include #include @@ -8,7 +8,6 @@ using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; -using int = std::int32_t; using uint = std::uint32_t; using int32 = std::int32_t; using uint32 = std::uint32_t; @@ -22,112 +21,1360 @@ struct RC; template using array = std::vector; -struct EReductionMode; -struct EVertexInputRate; -struct MipmapLevel; -struct EShadingRateCombinerOp; -struct ESurfaceFormat; -struct Mesh; -struct Material; -struct DepthStencil; -struct EPixelFormatExternal; -struct FixedLayout; -struct ImageLayer; -struct int2; -struct int4; -struct UIFontStyle; -struct int3; -struct ECubeFace; -struct short4; -struct EPixelFormat; -struct sbyte2; -struct ushort3; -struct ushort2; -struct BaseUIDrawable; -struct Model; -struct EPolygonMode; +using namespace std::string_literals; + +template +string operator + (const string &lhs, T rhs); + +struct BaseUIController; +struct UIWidget; +struct UIStyleCollection; +struct BaseLayout; struct RectI; -struct RectangleDrawable; -struct ERasterFontMode; -struct EShaderStages; struct uint3; +struct RectangleDrawable; struct uint2; -struct EGPUVendor; -struct EShader; -struct ECompareOp; struct uint4; -struct UIImageStyle; struct RectU; -struct EMipmapFilter; +struct UIImageStyle; struct ImageAtlas; -struct RectF; struct RGBA8u; +struct RectF; struct RGBA32u; struct FillStackLayout; -struct BaseUIController; -struct EImageOpt; -struct EFeature; -struct EResourceState; -struct EBufferOpt; -struct UIStyleCollection; -struct UIWidget; -struct BaseLayout; -struct EBufferUsage; -struct EBorderColor; -struct ESamplerYcbcrRange; -struct ubyte4; -struct EShadingRate; -struct ELayoutAlign; -struct EPrimitive; -struct ubyte3; -struct ubyte2; -struct ESubgroupOperation; -struct EGraphicsDeviceID; -struct EImage; -struct bool2; -struct bool3; -struct RGBA32i; -struct EVertexType; -struct ELogicOp; -struct bool4; -struct PaddingLayout; -struct EBlendOp; -struct EQueueMask; -struct HSVColor; -struct ECullMode; -struct RGBA32f; -struct ERTInstanceOpt; -struct EBlendFactor; -struct EStencilOp; -struct EStackOrigin; -struct EAddressMode; -struct EIndex; +struct int4; +struct ImageLayer; +struct int3; +struct int2; +struct UIFontStyle; +struct short4; +struct ushort3; +struct sbyte2; +struct ushort2; +struct BaseUIDrawable; +struct Model; +struct MipmapLevel; +struct Mesh; +struct Material; +struct DepthStencil; +struct FixedLayout; +struct UIColorStyle; +struct MultiSamples; +struct float2; +struct RasterFont; +struct float3; struct ButtonController; struct float4; -struct ELayoutType; -struct sbyte4; struct AlignedLayout; -struct ESamplerYcbcrModelConversion; -struct short2; +struct sbyte4; struct sbyte3; -struct short3; -struct ESubgroupTypes; struct Texture; +struct short2; struct ushort4; -struct ESamplerOpt; -struct EImageUsage; -struct ESamplerChromaLocation; -struct EPipelineDynamicState; -struct UIColorStyle; -struct EFilter; -struct MultiSamples; -struct EShaderIO; -struct EImageAspect; -struct RasterFont; -struct EPipelineOpt; -struct float2; -struct float3; -struct EDescSetUsage; +struct short3; +struct bool2; +struct bool3; +struct RGBA32i; +struct bool4; +struct PaddingLayout; +struct RGBA32f; +struct HSVColor; +struct ubyte4; +struct ubyte3; +struct ubyte2; + +enum class EImage : uint8 +{ + Cube, + CubeArray, +}; +uint8 operator | (EImage lhs, EImage rhs); +uint8 operator | (uint8 lhs, EImage rhs); +uint8 operator | (EImage lhs, uint8 rhs); +static constexpr EImage EImage_1D = EImage(0); +static constexpr EImage EImage_2D = EImage(1); +static constexpr EImage EImage_3D = EImage(2); +static constexpr EImage EImage_1DArray = EImage(3); +static constexpr EImage EImage_2DArray = EImage(4); + +enum class EIndex : uint8 +{ + UShort, + UInt, +}; +uint8 operator | (EIndex lhs, EIndex rhs); +uint8 operator | (uint8 lhs, EIndex rhs); +uint8 operator | (EIndex lhs, uint8 rhs); + +enum class EPixelFormat : uint8 +{ + RGBA16_SNorm, + RGBA8_SNorm, + RGB16_SNorm, + RGB8_SNorm, + RG16_SNorm, + RG8_SNorm, + R16_SNorm, + R8_SNorm, + RGBA16_UNorm, + RGBA8_UNorm, + RGB16_UNorm, + RGB8_UNorm, + RG16_UNorm, + RG8_UNorm, + R16_UNorm, + R8_UNorm, + RGB10_A2_UNorm, + RGBA4_UNorm, + RGB5_A1_UNorm, + RGB_5_6_5_UNorm, + BGR8_UNorm, + BGRA8_UNorm, + sRGB8, + sRGB8_A8, + sBGR8, + sBGR8_A8, + R8I, + RG8I, + RGB8I, + RGBA8I, + R16I, + RG16I, + RGB16I, + RGBA16I, + R32I, + RG32I, + RGB32I, + RGBA32I, + R64I, + R8U, + RG8U, + RGB8U, + RGBA8U, + R16U, + RG16U, + RGB16U, + RGBA16U, + R32U, + RG32U, + RGB32U, + RGBA32U, + RGB10_A2U, + R64U, + R16F, + RG16F, + RGB16F, + RGBA16F, + R32F, + RG32F, + RGB32F, + RGBA32F, + RGB_11_11_10F, + RGB9F_E5, + Depth16, + Depth24, + Depth32F, + Depth16_Stencil8, + Depth24_Stencil8, + Depth32F_Stencil8, + BC1_RGB8_UNorm, + BC1_sRGB8, + BC1_RGB8_A1_UNorm, + BC1_sRGB8_A1, + BC2_RGBA8_UNorm, + BC2_sRGB8, + BC3_RGBA8_UNorm, + BC3_sRGB8, + BC4_R8_SNorm, + BC4_R8_UNorm, + BC5_RG8_SNorm, + BC5_RG8_UNorm, + BC6H_RGB16F, + BC6H_RGB16UF, + BC7_RGBA8_UNorm, + BC7_sRGB8_A8, + ETC2_RGB8_UNorm, + ETC2_sRGB8, + ETC2_RGB8_A1_UNorm, + ETC2_sRGB8_A1, + ETC2_RGBA8_UNorm, + ETC2_sRGB8_A8, + EAC_R11_SNorm, + EAC_R11_UNorm, + EAC_RG11_SNorm, + EAC_RG11_UNorm, + ASTC_RGBA8_4x4, + ASTC_RGBA8_5x4, + ASTC_RGBA8_5x5, + ASTC_RGBA8_6x5, + ASTC_RGBA8_6x6, + ASTC_RGBA8_8x5, + ASTC_RGBA8_8x6, + ASTC_RGBA8_8x8, + ASTC_RGBA8_10x5, + ASTC_RGBA8_10x6, + ASTC_RGBA8_10x8, + ASTC_RGBA8_10x10, + ASTC_RGBA8_12x10, + ASTC_RGBA8_12x12, + ASTC_sRGB8_A8_4x4, + ASTC_sRGB8_A8_5x4, + ASTC_sRGB8_A8_5x5, + ASTC_sRGB8_A8_6x5, + ASTC_sRGB8_A8_6x6, + ASTC_sRGB8_A8_8x5, + ASTC_sRGB8_A8_8x6, + ASTC_sRGB8_A8_8x8, + ASTC_sRGB8_A8_10x5, + ASTC_sRGB8_A8_10x6, + ASTC_sRGB8_A8_10x8, + ASTC_sRGB8_A8_10x10, + ASTC_sRGB8_A8_12x10, + ASTC_sRGB8_A8_12x12, + ASTC_RGBA16F_4x4, + ASTC_RGBA16F_5x4, + ASTC_RGBA16F_5x5, + ASTC_RGBA16F_6x5, + ASTC_RGBA16F_6x6, + ASTC_RGBA16F_8x5, + ASTC_RGBA16F_8x6, + ASTC_RGBA16F_8x8, + ASTC_RGBA16F_10x5, + ASTC_RGBA16F_10x6, + ASTC_RGBA16F_10x8, + ASTC_RGBA16F_10x10, + ASTC_RGBA16F_12x10, + ASTC_RGBA16F_12x12, + G8B8G8R8_422_UNorm, + B8G8R8G8_422_UNorm, + G8_B8R8_420_UNorm, + G8_B8R8_422_UNorm, + G8_B8R8_444_UNorm, + G8_B8_R8_420_UNorm, + G8_B8_R8_422_UNorm, + G8_B8_R8_444_UNorm, + B10x6G10x6R10x6G10x6_422_UNorm, + G10x6B10x6G10x6R10x6_422_UNorm, + G10x6_B10x6R10x6_420_UNorm, + G10x6_B10x6R10x6_422_UNorm, + G10x6_B10x6R10x6_444_UNorm, + G10x6_B10x6_R10x6_420_UNorm, + G10x6_B10x6_R10x6_422_UNorm, + G10x6_B10x6_R10x6_444_UNorm, + R10x6G10x6B10x6A10x6_UNorm, + R10x6G10x6_UNorm, + R10x6_UNorm, + B12x4G12x4R12x4G12x4_422_UNorm, + G12x4B12x4G12x4R12x4_422_UNorm, + G12x4_B12x4R12x4_420_UNorm, + G12x4_B12x4R12x4_422_UNorm, + G12x4_B12x4R12x4_444_UNorm, + G12x4_B12x4_R12x4_420_UNorm, + G12x4_B12x4_R12x4_422_UNorm, + G12x4_B12x4_R12x4_444_UNorm, + R12x4G12x4B12x4A12x4_UNorm, + R12x4G12x4_UNorm, + R12x4_UNorm, + B16G16R16G16_422_UNorm, + G16B16G16R16_422_UNorm, + G16_B16R16_420_UNorm, + G16_B16R16_422_UNorm, + G16_B16R16_444_UNorm, + G16_B16_R16_420_UNorm, + G16_B16_R16_422_UNorm, + G16_B16_R16_444_UNorm, + SwapchainColor, +}; +uint8 operator | (EPixelFormat lhs, EPixelFormat rhs); +uint8 operator | (uint8 lhs, EPixelFormat rhs); +uint8 operator | (EPixelFormat lhs, uint8 rhs); + +enum class EPixelFormatExternal : uint8 +{ + Android_Depth16, + Android_DepthJPEG, + Android_DepthPointCloud, + Android_JPEG, + Android_Raw16, + Android_Raw12, + Android_Raw10, + Android_NV16, + Android_NV21, + Android_YCBCR_P010, + Android_YUV_420, + Android_YUV_422, + Android_YUV_444, + Android_YUY2, + Android_YV12, + Android_Y8, + Android_HEIC, +}; +uint8 operator | (EPixelFormatExternal lhs, EPixelFormatExternal rhs); +uint8 operator | (uint8 lhs, EPixelFormatExternal rhs); +uint8 operator | (EPixelFormatExternal lhs, uint8 rhs); + +enum class ECompareOp : uint8 +{ + Never, + Less, + Equal, + LEqual, + Greater, + NotEqual, + GEqual, + Always, + LessOrEqual, + GreaterOrEqual, +}; +uint8 operator | (ECompareOp lhs, ECompareOp rhs); +uint8 operator | (uint8 lhs, ECompareOp rhs); +uint8 operator | (ECompareOp lhs, uint8 rhs); + +enum class EBlendFactor : uint8 +{ + + // S, srcColor - from shader + // D, dstColor - from render target + // S1 - from shader (dual src blend) + // cc - constant color + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + // + + // 0 + Zero, + + // 1 + One, + + // S + SrcColor, + + // 1 - S + OneMinusSrcColor, + + // D + DstColor, + + // 1 - D + OneMinusDstColor, + + // S.a + SrcAlpha, + + // 1 - S.a + OneMinusSrcAlpha, + + // D.a + DstAlpha, + + // 1 - D.a + OneMinusDstAlpha, + + // cc + ConstColor, + + // 1 - cc + OneMinusConstColor, + + // cc.a + ConstAlpha, + + // 1 - cc.a + OneMinusConstAlpha, + + // rgb * min( S.a, D.a ), a * 1 + SrcAlphaSaturate, + + // S1 + Src1Color, + + // 1 - S1 + OneMinusSrc1Color, + + // S1.a + Src1Alpha, + + // 1 - S1.a + OneMinusSrc1Alpha, +}; +uint8 operator | (EBlendFactor lhs, EBlendFactor rhs); +uint8 operator | (uint8 lhs, EBlendFactor rhs); +uint8 operator | (EBlendFactor lhs, uint8 rhs); + +enum class EBlendOp : uint8 +{ + + // S, srcColor - from shader + // D, dstColor - from render target + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + // + + // S + D + Add, + + // S - D + Sub, + + // D - S + RevSub, + + // min( S, D ) + Min, + + // max( S, D ) + Max, +}; +uint8 operator | (EBlendOp lhs, EBlendOp rhs); +uint8 operator | (uint8 lhs, EBlendOp rhs); +uint8 operator | (EBlendOp lhs, uint8 rhs); + +enum class ELogicOp : uint8 +{ + + // S - from shader + // D - from render target + // result = S [logicOp] D + // + + // disabled + None, + + // 0 + Clear, + + // 1 + Set, + + // S + Copy, + + // ~S + CopyInverted, + + // D + NoOp, + + // ~D + Invert, + + // S & D + And, + + // ~ ( S & D ) + NotAnd, + + // S | D + Or, + + // ~ ( S | D ) + NotOr, + + // S ^ D + Xor, + + // ~ ( S ^ D ) + Equiv, + + // S & ~D + AndReverse, + + // ~S & D + AndInverted, + + // S | ~D + OrReverse, + + // ~S | D + OrInverted, +}; +uint8 operator | (ELogicOp lhs, ELogicOp rhs); +uint8 operator | (uint8 lhs, ELogicOp rhs); +uint8 operator | (ELogicOp lhs, uint8 rhs); + +enum class EStencilOp : uint8 +{ + + // src + Keep, + + // 0 + Zero, + + // ref + Replace, + + // min( ++src, 0 ) + Incr, + + // ++src & maxValue + IncrWrap, + + // max( --src, 0 ) + Decr, + + // --src & maxValue + DecrWrap, + + // ~src + Invert, +}; +uint8 operator | (EStencilOp lhs, EStencilOp rhs); +uint8 operator | (uint8 lhs, EStencilOp rhs); +uint8 operator | (EStencilOp lhs, uint8 rhs); + +enum class EPolygonMode : uint8 +{ + Point, + Line, + Fill, +}; +uint8 operator | (EPolygonMode lhs, EPolygonMode rhs); +uint8 operator | (uint8 lhs, EPolygonMode rhs); +uint8 operator | (EPolygonMode lhs, uint8 rhs); + +enum class EPrimitive : uint8 +{ + Point, + LineList, + LineStrip, + LineListAdjacency, + LineStripAdjacency, + TriangleList, + TriangleStrip, + TriangleFan, + TriangleListAdjacency, + TriangleStripAdjacency, + Patch, +}; +uint8 operator | (EPrimitive lhs, EPrimitive rhs); +uint8 operator | (uint8 lhs, EPrimitive rhs); +uint8 operator | (EPrimitive lhs, uint8 rhs); + +enum class ECullMode : uint8 +{ + None, + Front, + Back, + FontAndBack, +}; +uint8 operator | (ECullMode lhs, ECullMode rhs); +uint8 operator | (uint8 lhs, ECullMode rhs); +uint8 operator | (ECullMode lhs, uint8 rhs); + +enum class EPipelineDynamicState : uint16 +{ + None, + StencilCompareMask, + StencilWriteMask, + StencilReference, + DepthBias, + BlendConstants, + RTStackSize, + FragmentShadingRate, + ViewportWScaling, +}; +uint16 operator | (EPipelineDynamicState lhs, EPipelineDynamicState rhs); +uint16 operator | (uint16 lhs, EPipelineDynamicState rhs); +uint16 operator | (EPipelineDynamicState lhs, uint16 rhs); + +enum class EResourceState : uint32 +{ + Unknown, + Preserve, + ShaderStorage_Read, + ShaderStorage_Write, + ShaderStorage_RW, + ShaderUniform, + ShaderSample, + CopySrc, + CopyDst, + ClearDst, + BlitSrc, + BlitDst, + InputColorAttachment, + InputColorAttachment_RW, + ColorAttachment, + ColorAttachment_Blend, + DepthStencilAttachment_Read, + DepthStencilAttachment_Write, + DepthStencilAttachment_RW, + DepthTest_StencilRW, + DepthRW_StencilTest, + DepthStencilTest_ShaderSample, + DepthTest_DepthSample_StencilRW, + InputDepthStencilAttachment, + InputDepthStencilAttachment_RW, + Host_Read, + PresentImage, + IndirectBuffer, + IndexBuffer, + VertexBuffer, + ShadingRateImage, + CopyRTAS_Read, + CopyRTAS_Write, + BuildRTAS_Read, + BuildRTAS_Write, + BuildRTAS_RW, + BuildRTAS_IndirectBuffer, + ShaderRTAS, + RTShaderBindingTable, + DSTestBeforeFS, + DSTestAfterFS, + Invalidate, + General, + MeshTaskShader, + VertexProcessingShaders, + TileShader, + FragmentShader, + PreRasterizationShaders, + PostRasterizationShaders, + ComputeShader, + RayTracingShaders, + AllGraphicsShaders, + AllShaders, + BuildRTAS_ScratchBuffer, + InputDepthAttachment, + DepthStencilAttachment, +}; +uint32 operator | (EResourceState lhs, EResourceState rhs); +uint32 operator | (uint32 lhs, EResourceState rhs); +uint32 operator | (EResourceState lhs, uint32 rhs); + +enum class EImageAspect : uint8 +{ + Color, + Depth, + Stencil, + DepthStencil, + Plane_0, + Plane_1, + Plane_2, +}; +uint8 operator | (EImageAspect lhs, EImageAspect rhs); +uint8 operator | (uint8 lhs, EImageAspect rhs); +uint8 operator | (EImageAspect lhs, uint8 rhs); + +enum class EShaderIO : uint8 +{ + Int, + UInt, + Float, + UFloat, + Half, + UNorm, + SNorm, + sRGB, + AnyColor, + Depth, + Stencil, + DepthStencil, +}; +uint8 operator | (EShaderIO lhs, EShaderIO rhs); +uint8 operator | (uint8 lhs, EShaderIO rhs); +uint8 operator | (EShaderIO lhs, uint8 rhs); + +enum class ESubgroupTypes : uint8 +{ + Float16, + Float32, + Int8, + Int16, + Int32, + Int64, +}; +uint8 operator | (ESubgroupTypes lhs, ESubgroupTypes rhs); +uint8 operator | (uint8 lhs, ESubgroupTypes rhs); +uint8 operator | (ESubgroupTypes lhs, uint8 rhs); + +enum class ESubgroupOperation : uint32 +{ + IndexAndSize, + Elect, + Barrier, + Any, + All, + AllEqual, + Add, + Mul, + Min, + Max, + And, + Or, + Xor, + InclusiveMul, + InclusiveAdd, + InclusiveMin, + InclusiveMax, + InclusiveAnd, + InclusiveOr, + InclusiveXor, + ExclusiveAdd, + ExclusiveMul, + ExclusiveMin, + ExclusiveMax, + ExclusiveAnd, + ExclusiveOr, + ExclusiveXor, + Ballot, + Broadcast, + BroadcastFirst, + InverseBallot, + BallotBitExtract, + BallotBitCount, + BallotInclusiveBitCount, + BallotExclusiveBitCount, + BallotFindLSB, + BallotFindMSB, + Shuffle, + ShuffleXor, + ShuffleUp, + ShuffleDown, + ClusteredAdd, + ClusteredMul, + ClusteredMin, + ClusteredMax, + ClusteredAnd, + ClusteredOr, + ClusteredXor, + QuadBroadcast, + QuadSwapHorizontal, + QuadSwapVertical, + QuadSwapDiagonal, + _Basic_Begin, + _Basic_End, + _Vote_Begin, + _Vote_End, + _Arithmetic_Begin, + _Arithmetic_End, + _Ballot_Begin, + _Ballot_End, + _Shuffle_Begin, + _Shuffle_End, + _ShuffleRelative_Begin, + _ShuffleRelative_End, + _Clustered_Begin, + _Clustered_End, + _Quad_Begin, + _Quad_End, +}; +uint32 operator | (ESubgroupOperation lhs, ESubgroupOperation rhs); +uint32 operator | (uint32 lhs, ESubgroupOperation rhs); +uint32 operator | (ESubgroupOperation lhs, uint32 rhs); + +enum class EFeature : uint8 +{ + Ignore, + RequireTrue, + RequireFalse, +}; +uint8 operator | (EFeature lhs, EFeature rhs); +uint8 operator | (uint8 lhs, EFeature rhs); +uint8 operator | (EFeature lhs, uint8 rhs); + +enum class EShader : uint8 +{ + Vertex, + TessControl, + TessEvaluation, + Geometry, + Fragment, + Compute, + Tile, + MeshTask, + Mesh, + RayGen, + RayAnyHit, + RayClosestHit, + RayMiss, + RayIntersection, + RayCallable, +}; +uint8 operator | (EShader lhs, EShader rhs); +uint8 operator | (uint8 lhs, EShader rhs); +uint8 operator | (EShader lhs, uint8 rhs); + +enum class EShaderStages : uint16 +{ + Vertex, + TessControl, + TessEvaluation, + Geometry, + Fragment, + Compute, + Tile, + MeshTask, + Mesh, + RayGen, + RayAnyHit, + RayClosestHit, + RayMiss, + RayIntersection, + RayCallable, + All, + AllGraphics, + GraphicsPipeStages, + MeshPipeStages, + VertexProcessingStages, + PreRasterizationStages, + PostRasterizationStages, + AllRayTracing, +}; +uint16 operator | (EShaderStages lhs, EShaderStages rhs); +uint16 operator | (uint16 lhs, EShaderStages rhs); +uint16 operator | (EShaderStages lhs, uint16 rhs); + +enum class EGPUVendor : uint32 +{ + AMD, + NVidia, + Intel, + ARM, + Qualcomm, + ImgTech, + Microsoft, + Apple, + Mesa, + Broadcom, + Samsung, + VeriSilicon, + Huawei, +}; +uint32 operator | (EGPUVendor lhs, EGPUVendor rhs); +uint32 operator | (uint32 lhs, EGPUVendor rhs); +uint32 operator | (EGPUVendor lhs, uint32 rhs); + +enum class EVertexType : uint16 +{ + Byte, + Byte2, + Byte3, + Byte4, + Byte_Norm, + Byte2_Norm, + Byte3_Norm, + Byte4_Norm, + Byte_Scaled, + Byte2_Scaled, + Byte3_Scaled, + Byte4_Scaled, + UByte, + UByte2, + UByte3, + UByte4, + UByte_Norm, + UByte2_Norm, + UByte3_Norm, + UByte4_Norm, + UByte_Scaled, + UByte2_Scaled, + UByte3_Scaled, + UByte4_Scaled, + Short, + Short2, + Short3, + Short4, + Short_Norm, + Short2_Norm, + Short3_Norm, + Short4_Norm, + Short_Scaled, + Short2_Scaled, + Short3_Scaled, + Short4_Scaled, + UShort, + UShort2, + UShort3, + UShort4, + UShort_Norm, + UShort2_Norm, + UShort3_Norm, + UShort4_Norm, + UShort_Scaled, + UShort2_Scaled, + UShort3_Scaled, + UShort4_Scaled, + Int, + Int2, + Int3, + Int4, + UInt, + UInt2, + UInt3, + UInt4, + Long, + Long2, + Long3, + Long4, + ULong, + ULong2, + ULong3, + ULong4, + Half, + Half2, + Half3, + Half4, + Float, + Float2, + Float3, + Float4, + Double, + Double2, + Double3, + Double4, + UInt_2_10_10_10, + UInt_2_10_10_10_Norm, + UInt_2_10_10_10_Scaled, +}; +uint16 operator | (EVertexType lhs, EVertexType rhs); +uint16 operator | (uint16 lhs, EVertexType rhs); +uint16 operator | (EVertexType lhs, uint16 rhs); + +enum class EGraphicsDeviceID : uint32 +{ + Adreno_500, + Adreno_600, + Adreno_700, + AMD_GCN1, + AMD_GCN2, + AMD_GCN3, + AMD_GCN4, + AMD_GCN5, + AMD_GCN5_APU, + AMD_RDNA1, + AMD_RDNA2, + AMD_RDNA2_APU, + AMD_RDNA3, + AMD_RDNA3_APU, + AMD_RDNA4, + Apple_A8, + Apple_A9_A10, + Apple_A11, + Apple_A12, + Apple_A13, + Apple_A14_M1, + Apple_A15_M2, + Apple_A16, + Apple_A17_M3, + Mali_Midgard_Gen2, + Mali_Midgard_Gen3, + Mali_Midgard_Gen4, + Mali_Bifrost_Gen1, + Mali_Bifrost_Gen2, + Mali_Bifrost_Gen3, + Mali_Valhall_Gen1, + Mali_Valhall_Gen2, + Mali_Valhall_Gen3, + Mali_Valhall_Gen4, + Mali_5thGen_Gen1, + Mali_5thGen_Gen2, + NV_Maxwell, + NV_Maxwell_Tegra, + NV_Pascal, + NV_Pascal_MX, + NV_Pascal_Tegra, + NV_Volta, + NV_Turing_16, + NV_Turing, + NV_Turing_MX, + NV_Ampere, + NV_Ampere_Orin, + NV_Ada, + NV_Blackwell, + Intel_Gen7, + Intel_Gen8, + Intel_Gen9, + Intel_Gen11, + Intel_Gen12, + Intel_Gen12_7, + PowerVR_Series8, + PowerVR_Series9, + PowerVR_SeriesA, + PowerVR_SeriesB, + VeriSilicon, + SwiftShader, +}; +uint32 operator | (EGraphicsDeviceID lhs, EGraphicsDeviceID rhs); +uint32 operator | (uint32 lhs, EGraphicsDeviceID rhs); +uint32 operator | (EGraphicsDeviceID lhs, uint32 rhs); + +enum class EFilter : uint8 +{ + Nearest, + Linear, +}; +uint8 operator | (EFilter lhs, EFilter rhs); +uint8 operator | (uint8 lhs, EFilter rhs); +uint8 operator | (EFilter lhs, uint8 rhs); + +enum class EMipmapFilter : uint8 +{ + None, + Nearest, + Linear, +}; +uint8 operator | (EMipmapFilter lhs, EMipmapFilter rhs); +uint8 operator | (uint8 lhs, EMipmapFilter rhs); +uint8 operator | (EMipmapFilter lhs, uint8 rhs); + +enum class EAddressMode : uint8 +{ + Repeat, + MirrorRepeat, + ClampToEdge, + ClampToBorder, + MirrorClampToEdge, + Clamp, + MirrorClamp, +}; +uint8 operator | (EAddressMode lhs, EAddressMode rhs); +uint8 operator | (uint8 lhs, EAddressMode rhs); +uint8 operator | (EAddressMode lhs, uint8 rhs); + +enum class EBorderColor : uint8 +{ + FloatTransparentBlack, + FloatOpaqueBlack, + FloatOpaqueWhite, + IntTransparentBlack, + IntOpaqueBlack, + IntOpaqueWhite, +}; +uint8 operator | (EBorderColor lhs, EBorderColor rhs); +uint8 operator | (uint8 lhs, EBorderColor rhs); +uint8 operator | (EBorderColor lhs, uint8 rhs); + +enum class EReductionMode : uint8 +{ + Average, + Min, + Max, +}; +uint8 operator | (EReductionMode lhs, EReductionMode rhs); +uint8 operator | (uint8 lhs, EReductionMode rhs); +uint8 operator | (EReductionMode lhs, uint8 rhs); + +enum class ESamplerOpt : uint8 +{ + ArgumentBuffer, + UnnormalizedCoordinates, + NonSeamlessCubeMap, +}; +uint8 operator | (ESamplerOpt lhs, ESamplerOpt rhs); +uint8 operator | (uint8 lhs, ESamplerOpt rhs); +uint8 operator | (ESamplerOpt lhs, uint8 rhs); + +enum class EVertexInputRate : uint8 +{ + Vertex, + Instance, +}; +uint8 operator | (EVertexInputRate lhs, EVertexInputRate rhs); +uint8 operator | (uint8 lhs, EVertexInputRate rhs); +uint8 operator | (EVertexInputRate lhs, uint8 rhs); + +enum class EDescSetUsage : uint8 +{ + AllowPartialyUpdate, + UpdateTemplate, + ArgumentBuffer, + MutableArgBuffer, + MaybeUnsupported, +}; +uint8 operator | (EDescSetUsage lhs, EDescSetUsage rhs); +uint8 operator | (uint8 lhs, EDescSetUsage rhs); +uint8 operator | (EDescSetUsage lhs, uint8 rhs); + +enum class EPipelineOpt : uint16 +{ + + // Optimize pipeline during creation, may be slow. + Optimize, + CS_DispatchBase, + RT_NoNullAnyHitShaders, + RT_NoNullClosestHitShaders, + RT_NoNullMissShaders, + RT_NoNullIntersectionShaders, + RT_SkipTriangles, + RT_SkipAABBs, + + // Pipeline creation will fail if it is not exists in cache. + DontCompile, + + // When a pipeline is created, its state and shaders are compiled into zero or more device-specific executables, + // which are used when executing commands against that pipeline. + CaptureStatistics, + + // May include the final shader assembly, a binary form of the compiled shader, + // or the shader compiler’s internal representation at any number of intermediate compile steps. + CaptureInternalRepresentation, + + // Disable pipeline optimization to speedup creation. + DontOptimize, + None, +}; +uint16 operator | (EPipelineOpt lhs, EPipelineOpt rhs); +uint16 operator | (uint16 lhs, EPipelineOpt rhs); +uint16 operator | (EPipelineOpt lhs, uint16 rhs); + +enum class EQueueMask : uint8 +{ + Graphics, + AsyncCompute, + AsyncTransfer, + VideoEncode, + VideoDecode, + All, +}; +uint8 operator | (EQueueMask lhs, EQueueMask rhs); +uint8 operator | (uint8 lhs, EQueueMask rhs); +uint8 operator | (EQueueMask lhs, uint8 rhs); + +enum class ESamplerChromaLocation : uint8 +{ + CositedEven, + Midpoint, +}; +uint8 operator | (ESamplerChromaLocation lhs, ESamplerChromaLocation rhs); +uint8 operator | (uint8 lhs, ESamplerChromaLocation rhs); +uint8 operator | (ESamplerChromaLocation lhs, uint8 rhs); + +enum class ESamplerYcbcrModelConversion : uint8 +{ + RGB_Identity, + Ycbcr_Identity, + Ycbcr_709, + Ycbcr_601, + Ycbcr_2020, +}; +uint8 operator | (ESamplerYcbcrModelConversion lhs, ESamplerYcbcrModelConversion rhs); +uint8 operator | (uint8 lhs, ESamplerYcbcrModelConversion rhs); +uint8 operator | (ESamplerYcbcrModelConversion lhs, uint8 rhs); + +enum class ESamplerYcbcrRange : uint8 +{ + ITU_Full, + ITU_Narrow, +}; +uint8 operator | (ESamplerYcbcrRange lhs, ESamplerYcbcrRange rhs); +uint8 operator | (uint8 lhs, ESamplerYcbcrRange rhs); +uint8 operator | (ESamplerYcbcrRange lhs, uint8 rhs); + +enum class ESurfaceFormat : uint8 +{ + BGRA8_sRGB_nonlinear, + RGBA8_sRGB_nonlinear, + BGRA8_BT709_nonlinear, + RGBA16F_Extended_sRGB_linear, + RGBA16F_sRGB_nonlinear, + RGBA16F_BT709_nonlinear, + RGBA16F_HDR10_ST2084, + RGBA16F_BT2020_linear, + RGB10A2_sRGB_nonlinear, + RGB10A2_HDR10_ST2084, +}; +uint8 operator | (ESurfaceFormat lhs, ESurfaceFormat rhs); +uint8 operator | (uint8 lhs, ESurfaceFormat rhs); +uint8 operator | (ESurfaceFormat lhs, uint8 rhs); + +enum class ERTInstanceOpt : uint8 +{ + TriangleCullDisable, + TriangleFrontCCW, + ForceOpaque, + ForceNonOpaque, + TriangleCullBack, + TriangleFrontCW, +}; +uint8 operator | (ERTInstanceOpt lhs, ERTInstanceOpt rhs); +uint8 operator | (uint8 lhs, ERTInstanceOpt rhs); +uint8 operator | (ERTInstanceOpt lhs, uint8 rhs); + +enum class EImageUsage : uint32 +{ + TransferSrc, + TransferDst, + Sampled, + Storage, + ColorAttachment, + DepthStencilAttachment, + InputAttachment, + ShadingRate, + All, + Transfer, + RWAttachment, +}; +uint32 operator | (EImageUsage lhs, EImageUsage rhs); +uint32 operator | (uint32 lhs, EImageUsage rhs); +uint32 operator | (EImageUsage lhs, uint32 rhs); + +enum class EImageOpt : uint32 +{ + BlitSrc, + BlitDst, + CubeCompatible, + MutableFormat, + Array2DCompatible, + BlockTexelViewCompatible, + SparseResidency, + SparseAliased, + Alias, + SampleLocationsCompatible, + StorageAtomic, + ColorAttachmentBlend, + SampledLinear, + SampledMinMax, + VertexPplnStore, + FragmentPplnStore, + LossyRTCompression, + ExtendedUsage, + All, + SparseResidencyAliased, +}; +uint32 operator | (EImageOpt lhs, EImageOpt rhs); +uint32 operator | (uint32 lhs, EImageOpt rhs); +uint32 operator | (EImageOpt lhs, uint32 rhs); + +enum class EBufferUsage : uint32 +{ + TransferSrc, + TransferDst, + UniformTexel, + StorageTexel, + Uniform, + Storage, + Index, + Vertex, + Indirect, + ShaderAddress, + ShaderBindingTable, + ASBuild_ReadOnly, + ASBuild_Scratch, + All, + Transfer, +}; +uint32 operator | (EBufferUsage lhs, EBufferUsage rhs); +uint32 operator | (uint32 lhs, EBufferUsage rhs); +uint32 operator | (EBufferUsage lhs, uint32 rhs); + +enum class EBufferOpt : uint32 +{ + SparseResidency, + SparseAliased, + VertexPplnStore, + FragmentPplnStore, + StorageTexelAtomic, + All, + SparseResidencyAliased, +}; +uint32 operator | (EBufferOpt lhs, EBufferOpt rhs); +uint32 operator | (uint32 lhs, EBufferOpt rhs); +uint32 operator | (EBufferOpt lhs, uint32 rhs); + +enum class EShadingRate : uint8 +{ + Size1x1, + Size1x2, + Size1x4, + Size2x1, + Size2x2, + Size2x4, + Size4x1, + Size4x2, + Size4x4, +}; +uint8 operator | (EShadingRate lhs, EShadingRate rhs); +uint8 operator | (uint8 lhs, EShadingRate rhs); +uint8 operator | (EShadingRate lhs, uint8 rhs); + +enum class EShadingRateCombinerOp : uint8 +{ + Keep, + Replace, + Min, + Max, + Sum, + Mul, +}; +uint8 operator | (EShadingRateCombinerOp lhs, EShadingRateCombinerOp rhs); +uint8 operator | (uint8 lhs, EShadingRateCombinerOp rhs); +uint8 operator | (EShadingRateCombinerOp lhs, uint8 rhs); + +enum class ECubeFace : uint32 +{ + XPos, + XNeg, + YPos, + YNeg, + ZPos, + ZNeg, +}; +uint32 operator | (ECubeFace lhs, ECubeFace rhs); +uint32 operator | (uint32 lhs, ECubeFace rhs); +uint32 operator | (ECubeFace lhs, uint32 rhs); + +enum class ERasterFontMode : uint32 +{ + + // Raster font, glyph dimension on screen should be multiple of glyph bitmap size. + Raster, + + // SDF font, can be scaled, but have smooth angles. + SDF, + + // Multichannel SDF font, can be scaled, have sharp angles, but a bit less performance. + MC_SDF, +}; +uint32 operator | (ERasterFontMode lhs, ERasterFontMode rhs); +uint32 operator | (uint32 lhs, ERasterFontMode rhs); +uint32 operator | (ERasterFontMode lhs, uint32 rhs); + +enum class ELayoutType : uint8 +{ + FixedLayoutPx, + FixedLayoutMm, + PaddingLayoutPx, + PaddingLayoutMm, + PaddingLayoutRel, + AlignedLayoutPx, + AlignedLayoutMm, + AlignedLayoutRel, + StackLayoutL, + StackLayoutR, + StackLayoutB, + StackLayoutT, + FillStackLayout, +}; +uint8 operator | (ELayoutType lhs, ELayoutType rhs); +uint8 operator | (uint8 lhs, ELayoutType rhs); +uint8 operator | (ELayoutType lhs, uint8 rhs); + +enum class ELayoutAlign : uint8 +{ + Left, + Right, + Bottom, + Top, + CenterX, + CenterY, + FillX, + FillY, + Center, + Fill, +}; +uint8 operator | (ELayoutAlign lhs, ELayoutAlign rhs); +uint8 operator | (uint8 lhs, ELayoutAlign rhs); +uint8 operator | (ELayoutAlign lhs, uint8 rhs); + +enum class EStackOrigin : uint8 +{ + Left, + Right, + Bottom, + Top, +}; +uint8 operator | (EStackOrigin lhs, EStackOrigin rhs); +uint8 operator | (uint8 lhs, EStackOrigin rhs); +uint8 operator | (EStackOrigin lhs, uint8 rhs); using sbyte = int8; using ubyte = uint8; @@ -1329,1105 +2576,6 @@ void LogDebug (const string & msg); void LogFatal (const string & msg); void Assert (bool expr); void Assert (bool expr, const string & msg); -struct EImage -{ - EImage () {} - EImage (uint8) {} - operator uint8 () const; - static constexpr uint8 1D = 0; - static constexpr uint8 2D = 1; - static constexpr uint8 3D = 2; - static constexpr uint8 1DArray = 3; - static constexpr uint8 2DArray = 4; - static constexpr uint8 Cube = 5; - static constexpr uint8 CubeArray = 6; -}; - -struct EIndex -{ - EIndex () {} - EIndex (uint8) {} - operator uint8 () const; - static constexpr uint8 UShort = 0; - static constexpr uint8 UInt = 1; -}; - -struct EPixelFormat -{ - EPixelFormat () {} - EPixelFormat (uint8) {} - operator uint8 () const; - static constexpr uint8 RGBA16_SNorm = 0; - static constexpr uint8 RGBA8_SNorm = 1; - static constexpr uint8 RGB16_SNorm = 2; - static constexpr uint8 RGB8_SNorm = 3; - static constexpr uint8 RG16_SNorm = 4; - static constexpr uint8 RG8_SNorm = 5; - static constexpr uint8 R16_SNorm = 6; - static constexpr uint8 R8_SNorm = 7; - static constexpr uint8 RGBA16_UNorm = 8; - static constexpr uint8 RGBA8_UNorm = 9; - static constexpr uint8 RGB16_UNorm = 10; - static constexpr uint8 RGB8_UNorm = 11; - static constexpr uint8 RG16_UNorm = 12; - static constexpr uint8 RG8_UNorm = 13; - static constexpr uint8 R16_UNorm = 14; - static constexpr uint8 R8_UNorm = 15; - static constexpr uint8 RGB10_A2_UNorm = 16; - static constexpr uint8 RGBA4_UNorm = 17; - static constexpr uint8 RGB5_A1_UNorm = 18; - static constexpr uint8 RGB_5_6_5_UNorm = 19; - static constexpr uint8 BGR8_UNorm = 20; - static constexpr uint8 BGRA8_UNorm = 21; - static constexpr uint8 sRGB8 = 22; - static constexpr uint8 sRGB8_A8 = 23; - static constexpr uint8 sBGR8 = 24; - static constexpr uint8 sBGR8_A8 = 25; - static constexpr uint8 R8I = 26; - static constexpr uint8 RG8I = 27; - static constexpr uint8 RGB8I = 28; - static constexpr uint8 RGBA8I = 29; - static constexpr uint8 R16I = 30; - static constexpr uint8 RG16I = 31; - static constexpr uint8 RGB16I = 32; - static constexpr uint8 RGBA16I = 33; - static constexpr uint8 R32I = 34; - static constexpr uint8 RG32I = 35; - static constexpr uint8 RGB32I = 36; - static constexpr uint8 RGBA32I = 37; - static constexpr uint8 R64I = 38; - static constexpr uint8 R8U = 39; - static constexpr uint8 RG8U = 40; - static constexpr uint8 RGB8U = 41; - static constexpr uint8 RGBA8U = 42; - static constexpr uint8 R16U = 43; - static constexpr uint8 RG16U = 44; - static constexpr uint8 RGB16U = 45; - static constexpr uint8 RGBA16U = 46; - static constexpr uint8 R32U = 47; - static constexpr uint8 RG32U = 48; - static constexpr uint8 RGB32U = 49; - static constexpr uint8 RGBA32U = 50; - static constexpr uint8 RGB10_A2U = 51; - static constexpr uint8 R64U = 52; - static constexpr uint8 R16F = 53; - static constexpr uint8 RG16F = 54; - static constexpr uint8 RGB16F = 55; - static constexpr uint8 RGBA16F = 56; - static constexpr uint8 R32F = 57; - static constexpr uint8 RG32F = 58; - static constexpr uint8 RGB32F = 59; - static constexpr uint8 RGBA32F = 60; - static constexpr uint8 RGB_11_11_10F = 61; - static constexpr uint8 RGB9F_E5 = 62; - static constexpr uint8 Depth16 = 63; - static constexpr uint8 Depth24 = 64; - static constexpr uint8 Depth32F = 65; - static constexpr uint8 Depth16_Stencil8 = 66; - static constexpr uint8 Depth24_Stencil8 = 67; - static constexpr uint8 Depth32F_Stencil8 = 68; - static constexpr uint8 BC1_RGB8_UNorm = 69; - static constexpr uint8 BC1_sRGB8 = 70; - static constexpr uint8 BC1_RGB8_A1_UNorm = 71; - static constexpr uint8 BC1_sRGB8_A1 = 72; - static constexpr uint8 BC2_RGBA8_UNorm = 73; - static constexpr uint8 BC2_sRGB8 = 74; - static constexpr uint8 BC3_RGBA8_UNorm = 75; - static constexpr uint8 BC3_sRGB8 = 76; - static constexpr uint8 BC4_R8_SNorm = 77; - static constexpr uint8 BC4_R8_UNorm = 78; - static constexpr uint8 BC5_RG8_SNorm = 79; - static constexpr uint8 BC5_RG8_UNorm = 80; - static constexpr uint8 BC6H_RGB16F = 81; - static constexpr uint8 BC6H_RGB16UF = 82; - static constexpr uint8 BC7_RGBA8_UNorm = 83; - static constexpr uint8 BC7_sRGB8_A8 = 84; - static constexpr uint8 ETC2_RGB8_UNorm = 85; - static constexpr uint8 ETC2_sRGB8 = 86; - static constexpr uint8 ETC2_RGB8_A1_UNorm = 87; - static constexpr uint8 ETC2_sRGB8_A1 = 88; - static constexpr uint8 ETC2_RGBA8_UNorm = 89; - static constexpr uint8 ETC2_sRGB8_A8 = 90; - static constexpr uint8 EAC_R11_SNorm = 91; - static constexpr uint8 EAC_R11_UNorm = 92; - static constexpr uint8 EAC_RG11_SNorm = 93; - static constexpr uint8 EAC_RG11_UNorm = 94; - static constexpr uint8 ASTC_RGBA8_4x4 = 95; - static constexpr uint8 ASTC_RGBA8_5x4 = 96; - static constexpr uint8 ASTC_RGBA8_5x5 = 97; - static constexpr uint8 ASTC_RGBA8_6x5 = 98; - static constexpr uint8 ASTC_RGBA8_6x6 = 99; - static constexpr uint8 ASTC_RGBA8_8x5 = 100; - static constexpr uint8 ASTC_RGBA8_8x6 = 101; - static constexpr uint8 ASTC_RGBA8_8x8 = 102; - static constexpr uint8 ASTC_RGBA8_10x5 = 103; - static constexpr uint8 ASTC_RGBA8_10x6 = 104; - static constexpr uint8 ASTC_RGBA8_10x8 = 105; - static constexpr uint8 ASTC_RGBA8_10x10 = 106; - static constexpr uint8 ASTC_RGBA8_12x10 = 107; - static constexpr uint8 ASTC_RGBA8_12x12 = 108; - static constexpr uint8 ASTC_sRGB8_A8_4x4 = 109; - static constexpr uint8 ASTC_sRGB8_A8_5x4 = 110; - static constexpr uint8 ASTC_sRGB8_A8_5x5 = 111; - static constexpr uint8 ASTC_sRGB8_A8_6x5 = 112; - static constexpr uint8 ASTC_sRGB8_A8_6x6 = 113; - static constexpr uint8 ASTC_sRGB8_A8_8x5 = 114; - static constexpr uint8 ASTC_sRGB8_A8_8x6 = 115; - static constexpr uint8 ASTC_sRGB8_A8_8x8 = 116; - static constexpr uint8 ASTC_sRGB8_A8_10x5 = 117; - static constexpr uint8 ASTC_sRGB8_A8_10x6 = 118; - static constexpr uint8 ASTC_sRGB8_A8_10x8 = 119; - static constexpr uint8 ASTC_sRGB8_A8_10x10 = 120; - static constexpr uint8 ASTC_sRGB8_A8_12x10 = 121; - static constexpr uint8 ASTC_sRGB8_A8_12x12 = 122; - static constexpr uint8 ASTC_RGBA16F_4x4 = 123; - static constexpr uint8 ASTC_RGBA16F_5x4 = 124; - static constexpr uint8 ASTC_RGBA16F_5x5 = 125; - static constexpr uint8 ASTC_RGBA16F_6x5 = 126; - static constexpr uint8 ASTC_RGBA16F_6x6 = 127; - static constexpr uint8 ASTC_RGBA16F_8x5 = 128; - static constexpr uint8 ASTC_RGBA16F_8x6 = 129; - static constexpr uint8 ASTC_RGBA16F_8x8 = 130; - static constexpr uint8 ASTC_RGBA16F_10x5 = 131; - static constexpr uint8 ASTC_RGBA16F_10x6 = 132; - static constexpr uint8 ASTC_RGBA16F_10x8 = 133; - static constexpr uint8 ASTC_RGBA16F_10x10 = 134; - static constexpr uint8 ASTC_RGBA16F_12x10 = 135; - static constexpr uint8 ASTC_RGBA16F_12x12 = 136; - static constexpr uint8 G8B8G8R8_422_UNorm = 137; - static constexpr uint8 B8G8R8G8_422_UNorm = 138; - static constexpr uint8 G8_B8R8_420_UNorm = 139; - static constexpr uint8 G8_B8R8_422_UNorm = 140; - static constexpr uint8 G8_B8R8_444_UNorm = 141; - static constexpr uint8 G8_B8_R8_420_UNorm = 142; - static constexpr uint8 G8_B8_R8_422_UNorm = 143; - static constexpr uint8 G8_B8_R8_444_UNorm = 144; - static constexpr uint8 B10x6G10x6R10x6G10x6_422_UNorm = 145; - static constexpr uint8 G10x6B10x6G10x6R10x6_422_UNorm = 146; - static constexpr uint8 G10x6_B10x6R10x6_420_UNorm = 147; - static constexpr uint8 G10x6_B10x6R10x6_422_UNorm = 148; - static constexpr uint8 G10x6_B10x6R10x6_444_UNorm = 149; - static constexpr uint8 G10x6_B10x6_R10x6_420_UNorm = 150; - static constexpr uint8 G10x6_B10x6_R10x6_422_UNorm = 151; - static constexpr uint8 G10x6_B10x6_R10x6_444_UNorm = 152; - static constexpr uint8 R10x6G10x6B10x6A10x6_UNorm = 153; - static constexpr uint8 R10x6G10x6_UNorm = 154; - static constexpr uint8 R10x6_UNorm = 155; - static constexpr uint8 B12x4G12x4R12x4G12x4_422_UNorm = 156; - static constexpr uint8 G12x4B12x4G12x4R12x4_422_UNorm = 157; - static constexpr uint8 G12x4_B12x4R12x4_420_UNorm = 158; - static constexpr uint8 G12x4_B12x4R12x4_422_UNorm = 159; - static constexpr uint8 G12x4_B12x4R12x4_444_UNorm = 160; - static constexpr uint8 G12x4_B12x4_R12x4_420_UNorm = 161; - static constexpr uint8 G12x4_B12x4_R12x4_422_UNorm = 162; - static constexpr uint8 G12x4_B12x4_R12x4_444_UNorm = 163; - static constexpr uint8 R12x4G12x4B12x4A12x4_UNorm = 164; - static constexpr uint8 R12x4G12x4_UNorm = 165; - static constexpr uint8 R12x4_UNorm = 166; - static constexpr uint8 B16G16R16G16_422_UNorm = 167; - static constexpr uint8 G16B16G16R16_422_UNorm = 168; - static constexpr uint8 G16_B16R16_420_UNorm = 169; - static constexpr uint8 G16_B16R16_422_UNorm = 170; - static constexpr uint8 G16_B16R16_444_UNorm = 171; - static constexpr uint8 G16_B16_R16_420_UNorm = 172; - static constexpr uint8 G16_B16_R16_422_UNorm = 173; - static constexpr uint8 G16_B16_R16_444_UNorm = 174; - static constexpr uint8 SwapchainColor = 254; -}; - -struct EPixelFormatExternal -{ - EPixelFormatExternal () {} - EPixelFormatExternal (uint8) {} - operator uint8 () const; - static constexpr uint8 Android_Depth16 = 0; - static constexpr uint8 Android_DepthJPEG = 1; - static constexpr uint8 Android_DepthPointCloud = 2; - static constexpr uint8 Android_JPEG = 3; - static constexpr uint8 Android_Raw16 = 5; - static constexpr uint8 Android_Raw12 = 6; - static constexpr uint8 Android_Raw10 = 7; - static constexpr uint8 Android_NV16 = 9; - static constexpr uint8 Android_NV21 = 10; - static constexpr uint8 Android_YCBCR_P010 = 11; - static constexpr uint8 Android_YUV_420 = 12; - static constexpr uint8 Android_YUV_422 = 13; - static constexpr uint8 Android_YUV_444 = 14; - static constexpr uint8 Android_YUY2 = 15; - static constexpr uint8 Android_YV12 = 16; - static constexpr uint8 Android_Y8 = 17; - static constexpr uint8 Android_HEIC = 18; -}; - -struct ECompareOp -{ - ECompareOp () {} - ECompareOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Never = 0; - static constexpr uint8 Less = 1; - static constexpr uint8 Equal = 2; - static constexpr uint8 LEqual = 3; - static constexpr uint8 Greater = 4; - static constexpr uint8 NotEqual = 5; - static constexpr uint8 GEqual = 6; - static constexpr uint8 Always = 7; - static constexpr uint8 LessOrEqual = 3; - static constexpr uint8 GreaterOrEqual = 6; -}; - -struct EBlendFactor -{ - EBlendFactor () {} - EBlendFactor (uint8) {} - operator uint8 () const; - static constexpr uint8 Zero = 0; - static constexpr uint8 One = 1; - static constexpr uint8 SrcColor = 2; - static constexpr uint8 OneMinusSrcColor = 3; - static constexpr uint8 DstColor = 4; - static constexpr uint8 OneMinusDstColor = 5; - static constexpr uint8 SrcAlpha = 6; - static constexpr uint8 OneMinusSrcAlpha = 7; - static constexpr uint8 DstAlpha = 8; - static constexpr uint8 OneMinusDstAlpha = 9; - static constexpr uint8 ConstColor = 10; - static constexpr uint8 OneMinusConstColor = 11; - static constexpr uint8 ConstAlpha = 12; - static constexpr uint8 OneMinusConstAlpha = 13; - static constexpr uint8 SrcAlphaSaturate = 14; - static constexpr uint8 Src1Color = 15; - static constexpr uint8 OneMinusSrc1Color = 16; - static constexpr uint8 Src1Alpha = 17; - static constexpr uint8 OneMinusSrc1Alpha = 18; -}; - -struct EBlendOp -{ - EBlendOp () {} - EBlendOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Add = 0; - static constexpr uint8 Sub = 1; - static constexpr uint8 RevSub = 2; - static constexpr uint8 Min = 3; - static constexpr uint8 Max = 4; -}; - -struct ELogicOp -{ - ELogicOp () {} - ELogicOp (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Clear = 1; - static constexpr uint8 Set = 2; - static constexpr uint8 Copy = 3; - static constexpr uint8 CopyInverted = 4; - static constexpr uint8 NoOp = 5; - static constexpr uint8 Invert = 6; - static constexpr uint8 And = 7; - static constexpr uint8 NotAnd = 8; - static constexpr uint8 Or = 9; - static constexpr uint8 NotOr = 10; - static constexpr uint8 Xor = 11; - static constexpr uint8 Equiv = 12; - static constexpr uint8 AndReverse = 13; - static constexpr uint8 AndInverted = 14; - static constexpr uint8 OrReverse = 15; - static constexpr uint8 OrInverted = 16; -}; - -struct EStencilOp -{ - EStencilOp () {} - EStencilOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Keep = 0; - static constexpr uint8 Zero = 1; - static constexpr uint8 Replace = 2; - static constexpr uint8 Incr = 3; - static constexpr uint8 IncrWrap = 4; - static constexpr uint8 Decr = 5; - static constexpr uint8 DecrWrap = 6; - static constexpr uint8 Invert = 7; -}; - -struct EPolygonMode -{ - EPolygonMode () {} - EPolygonMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Point = 0; - static constexpr uint8 Line = 1; - static constexpr uint8 Fill = 2; -}; - -struct EPrimitive -{ - EPrimitive () {} - EPrimitive (uint8) {} - operator uint8 () const; - static constexpr uint8 Point = 0; - static constexpr uint8 LineList = 1; - static constexpr uint8 LineStrip = 2; - static constexpr uint8 LineListAdjacency = 3; - static constexpr uint8 LineStripAdjacency = 4; - static constexpr uint8 TriangleList = 5; - static constexpr uint8 TriangleStrip = 6; - static constexpr uint8 TriangleFan = 7; - static constexpr uint8 TriangleListAdjacency = 8; - static constexpr uint8 TriangleStripAdjacency = 9; - static constexpr uint8 Patch = 10; -}; - -struct ECullMode -{ - ECullMode () {} - ECullMode (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Front = 1; - static constexpr uint8 Back = 2; - static constexpr uint8 FontAndBack = 3; -}; - -struct EPipelineDynamicState -{ - EPipelineDynamicState () {} - EPipelineDynamicState (uint16) {} - operator uint16 () const; - static constexpr uint16 None = 0; - static constexpr uint16 StencilCompareMask = 1; - static constexpr uint16 StencilWriteMask = 2; - static constexpr uint16 StencilReference = 4; - static constexpr uint16 DepthBias = 8; - static constexpr uint16 BlendConstants = 16; - static constexpr uint16 RTStackSize = 64; - static constexpr uint16 FragmentShadingRate = 128; -}; - -struct EResourceState -{ - EResourceState () {} - EResourceState (uint32) {} - operator uint32 () const; - static constexpr uint32 Unknown = 0; - static constexpr uint32 Preserve = 1; - static constexpr uint32 ShaderStorage_Read = 271; - static constexpr uint32 ShaderStorage_Write = 528; - static constexpr uint32 ShaderStorage_RW = 785; - static constexpr uint32 ShaderUniform = 274; - static constexpr uint32 ShaderSample = 275; - static constexpr uint32 CopySrc = 258; - static constexpr uint32 CopyDst = 515; - static constexpr uint32 ClearDst = 516; - static constexpr uint32 BlitSrc = 261; - static constexpr uint32 BlitDst = 518; - static constexpr uint32 InputColorAttachment = 276; - static constexpr uint32 InputColorAttachment_RW = 533; - static constexpr uint32 ColorAttachment = 519; - static constexpr uint32 ColorAttachment_Blend = 775; - static constexpr uint32 DepthStencilAttachment_Read = 1288; - static constexpr uint32 DepthStencilAttachment_Write = 2569; - static constexpr uint32 DepthStencilAttachment_RW = 3849; - static constexpr uint32 DepthTest_StencilRW = 3338; - static constexpr uint32 DepthRW_StencilTest = 1803; - static constexpr uint32 DepthStencilTest_ShaderSample = 1304; - static constexpr uint32 DepthTest_DepthSample_StencilRW = 3353; - static constexpr uint32 InputDepthStencilAttachment = 1302; - static constexpr uint32 InputDepthStencilAttachment_RW = 3863; - static constexpr uint32 Host_Read = 282; - static constexpr uint32 PresentImage = 268; - static constexpr uint32 IndirectBuffer = 284; - static constexpr uint32 IndexBuffer = 285; - static constexpr uint32 VertexBuffer = 286; - static constexpr uint32 ShadingRateImage = 269; - static constexpr uint32 CopyRTAS_Read = 287; - static constexpr uint32 CopyRTAS_Write = 544; - static constexpr uint32 BuildRTAS_Read = 289; - static constexpr uint32 BuildRTAS_Write = 546; - static constexpr uint32 BuildRTAS_RW = 802; - static constexpr uint32 BuildRTAS_IndirectBuffer = 291; - static constexpr uint32 ShaderRTAS = 283; - static constexpr uint32 RTShaderBindingTable = 292; - static constexpr uint32 DSTestBeforeFS = 4096; - static constexpr uint32 DSTestAfterFS = 8192; - static constexpr uint32 Invalidate = 16384; - static constexpr uint32 General = 782; - static constexpr uint32 MeshTaskShader = 32768; - static constexpr uint32 VertexProcessingShaders = 65536; - static constexpr uint32 TileShader = 131072; - static constexpr uint32 FragmentShader = 262144; - static constexpr uint32 PreRasterizationShaders = 98304; - static constexpr uint32 PostRasterizationShaders = 393216; - static constexpr uint32 ComputeShader = 524288; - static constexpr uint32 RayTracingShaders = 1048576; - static constexpr uint32 AllGraphicsShaders = 491520; - static constexpr uint32 AllShaders = 2064384; - static constexpr uint32 BuildRTAS_ScratchBuffer = 802; - static constexpr uint32 InputDepthAttachment = 1302; - static constexpr uint32 DepthStencilAttachment = 16137; -}; - -struct EImageAspect -{ - EImageAspect () {} - EImageAspect (uint8) {} - operator uint8 () const; - static constexpr uint8 Color = 1; - static constexpr uint8 Depth = 2; - static constexpr uint8 Stencil = 4; - static constexpr uint8 DepthStencil = 6; - static constexpr uint8 Plane_0 = 16; - static constexpr uint8 Plane_1 = 32; - static constexpr uint8 Plane_2 = 64; -}; - -struct EShaderIO -{ - EShaderIO () {} - EShaderIO (uint8) {} - operator uint8 () const; - static constexpr uint8 Int = 1; - static constexpr uint8 UInt = 2; - static constexpr uint8 Float = 3; - static constexpr uint8 UFloat = 4; - static constexpr uint8 Half = 5; - static constexpr uint8 UNorm = 6; - static constexpr uint8 SNorm = 7; - static constexpr uint8 sRGB = 8; - static constexpr uint8 AnyColor = 9; - static constexpr uint8 Depth = 10; - static constexpr uint8 Stencil = 11; - static constexpr uint8 DepthStencil = 12; -}; - -struct ESubgroupTypes -{ - ESubgroupTypes () {} - ESubgroupTypes (uint8) {} - operator uint8 () const; - static constexpr uint8 Float16 = 32; - static constexpr uint8 Float32 = 1; - static constexpr uint8 Int8 = 4; - static constexpr uint8 Int16 = 8; - static constexpr uint8 Int32 = 2; - static constexpr uint8 Int64 = 16; -}; - -struct ESubgroupOperation -{ - ESubgroupOperation () {} - ESubgroupOperation (uint32) {} - operator uint32 () const; - static constexpr uint32 IndexAndSize = 0; - static constexpr uint32 Elect = 1; - static constexpr uint32 Barrier = 2; - static constexpr uint32 Any = 3; - static constexpr uint32 All = 4; - static constexpr uint32 AllEqual = 5; - static constexpr uint32 Add = 6; - static constexpr uint32 Mul = 7; - static constexpr uint32 Min = 8; - static constexpr uint32 Max = 9; - static constexpr uint32 And = 10; - static constexpr uint32 Or = 11; - static constexpr uint32 Xor = 12; - static constexpr uint32 InclusiveMul = 13; - static constexpr uint32 InclusiveAdd = 14; - static constexpr uint32 InclusiveMin = 15; - static constexpr uint32 InclusiveMax = 16; - static constexpr uint32 InclusiveAnd = 17; - static constexpr uint32 InclusiveOr = 18; - static constexpr uint32 InclusiveXor = 19; - static constexpr uint32 ExclusiveAdd = 20; - static constexpr uint32 ExclusiveMul = 21; - static constexpr uint32 ExclusiveMin = 22; - static constexpr uint32 ExclusiveMax = 23; - static constexpr uint32 ExclusiveAnd = 24; - static constexpr uint32 ExclusiveOr = 25; - static constexpr uint32 ExclusiveXor = 26; - static constexpr uint32 Ballot = 27; - static constexpr uint32 Broadcast = 28; - static constexpr uint32 BroadcastFirst = 29; - static constexpr uint32 InverseBallot = 30; - static constexpr uint32 BallotBitExtract = 31; - static constexpr uint32 BallotBitCount = 32; - static constexpr uint32 BallotInclusiveBitCount = 33; - static constexpr uint32 BallotExclusiveBitCount = 34; - static constexpr uint32 BallotFindLSB = 35; - static constexpr uint32 BallotFindMSB = 36; - static constexpr uint32 Shuffle = 37; - static constexpr uint32 ShuffleXor = 38; - static constexpr uint32 ShuffleUp = 39; - static constexpr uint32 ShuffleDown = 40; - static constexpr uint32 ClusteredAdd = 41; - static constexpr uint32 ClusteredMul = 42; - static constexpr uint32 ClusteredMin = 43; - static constexpr uint32 ClusteredMax = 44; - static constexpr uint32 ClusteredAnd = 45; - static constexpr uint32 ClusteredOr = 46; - static constexpr uint32 ClusteredXor = 47; - static constexpr uint32 QuadBroadcast = 48; - static constexpr uint32 QuadSwapHorizontal = 49; - static constexpr uint32 QuadSwapVertical = 50; - static constexpr uint32 QuadSwapDiagonal = 51; - static constexpr uint32 _Basic_Begin = 0; - static constexpr uint32 _Basic_End = 2; - static constexpr uint32 _Vote_Begin = 3; - static constexpr uint32 _Vote_End = 5; - static constexpr uint32 _Arithmetic_Begin = 6; - static constexpr uint32 _Arithmetic_End = 26; - static constexpr uint32 _Ballot_Begin = 27; - static constexpr uint32 _Ballot_End = 36; - static constexpr uint32 _Shuffle_Begin = 37; - static constexpr uint32 _Shuffle_End = 38; - static constexpr uint32 _ShuffleRelative_Begin = 39; - static constexpr uint32 _ShuffleRelative_End = 40; - static constexpr uint32 _Clustered_Begin = 41; - static constexpr uint32 _Clustered_End = 47; - static constexpr uint32 _Quad_Begin = 48; - static constexpr uint32 _Quad_End = 51; -}; - -struct EFeature -{ - EFeature () {} - EFeature (uint8) {} - operator uint8 () const; - static constexpr uint8 Ignore = 0; - static constexpr uint8 RequireTrue = 2; - static constexpr uint8 RequireFalse = 1; -}; - -struct EShader -{ - EShader () {} - EShader (uint8) {} - operator uint8 () const; - static constexpr uint8 Vertex = 0; - static constexpr uint8 TessControl = 1; - static constexpr uint8 TessEvaluation = 2; - static constexpr uint8 Geometry = 3; - static constexpr uint8 Fragment = 4; - static constexpr uint8 Compute = 5; - static constexpr uint8 Tile = 6; - static constexpr uint8 MeshTask = 7; - static constexpr uint8 Mesh = 8; - static constexpr uint8 RayGen = 9; - static constexpr uint8 RayAnyHit = 10; - static constexpr uint8 RayClosestHit = 11; - static constexpr uint8 RayMiss = 12; - static constexpr uint8 RayIntersection = 13; - static constexpr uint8 RayCallable = 14; -}; - -struct EShaderStages -{ - EShaderStages () {} - EShaderStages (uint16) {} - operator uint16 () const; - static constexpr uint16 Vertex = 1; - static constexpr uint16 TessControl = 2; - static constexpr uint16 TessEvaluation = 4; - static constexpr uint16 Geometry = 8; - static constexpr uint16 Fragment = 16; - static constexpr uint16 Compute = 32; - static constexpr uint16 Tile = 64; - static constexpr uint16 MeshTask = 128; - static constexpr uint16 Mesh = 256; - static constexpr uint16 RayGen = 512; - static constexpr uint16 RayAnyHit = 1024; - static constexpr uint16 RayClosestHit = 2048; - static constexpr uint16 RayMiss = 4096; - static constexpr uint16 RayIntersection = 8192; - static constexpr uint16 RayCallable = 16384; - static constexpr uint16 All = 32767; - static constexpr uint16 AllGraphics = 415; - static constexpr uint16 GraphicsPipeStages = 31; - static constexpr uint16 MeshPipeStages = 400; - static constexpr uint16 VertexProcessingStages = 271; - static constexpr uint16 PreRasterizationStages = 399; - static constexpr uint16 PostRasterizationStages = 80; - static constexpr uint16 AllRayTracing = 32256; -}; - -struct EGPUVendor -{ - EGPUVendor () {} - EGPUVendor (uint32) {} - operator uint32 () const; - static constexpr uint32 AMD = 0; - static constexpr uint32 NVidia = 1; - static constexpr uint32 Intel = 2; - static constexpr uint32 ARM = 3; - static constexpr uint32 Qualcomm = 4; - static constexpr uint32 ImgTech = 5; - static constexpr uint32 Microsoft = 6; - static constexpr uint32 Apple = 7; - static constexpr uint32 Mesa = 8; - static constexpr uint32 Broadcom = 9; - static constexpr uint32 Samsung = 10; - static constexpr uint32 VeriSilicon = 11; - static constexpr uint32 Huawei = 12; -}; - -struct EVertexType -{ - EVertexType () {} - EVertexType (uint16) {} - operator uint16 () const; - static constexpr uint16 Byte = 4; - static constexpr uint16 Byte2 = 5; - static constexpr uint16 Byte3 = 6; - static constexpr uint16 Byte4 = 7; - static constexpr uint16 Byte_Norm = 68; - static constexpr uint16 Byte2_Norm = 69; - static constexpr uint16 Byte3_Norm = 70; - static constexpr uint16 Byte4_Norm = 71; - static constexpr uint16 Byte_Scaled = 132; - static constexpr uint16 Byte2_Scaled = 133; - static constexpr uint16 Byte3_Scaled = 134; - static constexpr uint16 Byte4_Scaled = 135; - static constexpr uint16 UByte = 8; - static constexpr uint16 UByte2 = 9; - static constexpr uint16 UByte3 = 10; - static constexpr uint16 UByte4 = 11; - static constexpr uint16 UByte_Norm = 72; - static constexpr uint16 UByte2_Norm = 73; - static constexpr uint16 UByte3_Norm = 74; - static constexpr uint16 UByte4_Norm = 75; - static constexpr uint16 UByte_Scaled = 136; - static constexpr uint16 UByte2_Scaled = 137; - static constexpr uint16 UByte3_Scaled = 138; - static constexpr uint16 UByte4_Scaled = 139; - static constexpr uint16 Short = 12; - static constexpr uint16 Short2 = 13; - static constexpr uint16 Short3 = 14; - static constexpr uint16 Short4 = 15; - static constexpr uint16 Short_Norm = 76; - static constexpr uint16 Short2_Norm = 77; - static constexpr uint16 Short3_Norm = 78; - static constexpr uint16 Short4_Norm = 79; - static constexpr uint16 Short_Scaled = 140; - static constexpr uint16 Short2_Scaled = 141; - static constexpr uint16 Short3_Scaled = 142; - static constexpr uint16 Short4_Scaled = 143; - static constexpr uint16 UShort = 16; - static constexpr uint16 UShort2 = 17; - static constexpr uint16 UShort3 = 18; - static constexpr uint16 UShort4 = 19; - static constexpr uint16 UShort_Norm = 80; - static constexpr uint16 UShort2_Norm = 81; - static constexpr uint16 UShort3_Norm = 82; - static constexpr uint16 UShort4_Norm = 83; - static constexpr uint16 UShort_Scaled = 144; - static constexpr uint16 UShort2_Scaled = 145; - static constexpr uint16 UShort3_Scaled = 146; - static constexpr uint16 UShort4_Scaled = 147; - static constexpr uint16 Int = 20; - static constexpr uint16 Int2 = 21; - static constexpr uint16 Int3 = 22; - static constexpr uint16 Int4 = 23; - static constexpr uint16 UInt = 24; - static constexpr uint16 UInt2 = 25; - static constexpr uint16 UInt3 = 26; - static constexpr uint16 UInt4 = 27; - static constexpr uint16 Long = 28; - static constexpr uint16 Long2 = 29; - static constexpr uint16 Long3 = 30; - static constexpr uint16 Long4 = 31; - static constexpr uint16 ULong = 32; - static constexpr uint16 ULong2 = 33; - static constexpr uint16 ULong3 = 34; - static constexpr uint16 ULong4 = 35; - static constexpr uint16 Half = 36; - static constexpr uint16 Half2 = 37; - static constexpr uint16 Half3 = 38; - static constexpr uint16 Half4 = 39; - static constexpr uint16 Float = 40; - static constexpr uint16 Float2 = 41; - static constexpr uint16 Float3 = 42; - static constexpr uint16 Float4 = 43; - static constexpr uint16 Double = 44; - static constexpr uint16 Double2 = 45; - static constexpr uint16 Double3 = 46; - static constexpr uint16 Double4 = 47; - static constexpr uint16 UInt_2_10_10_10 = 51; - static constexpr uint16 UInt_2_10_10_10_Norm = 115; - static constexpr uint16 UInt_2_10_10_10_Scaled = 179; -}; - -struct EGraphicsDeviceID -{ - EGraphicsDeviceID () {} - EGraphicsDeviceID (uint32) {} - operator uint32 () const; - static constexpr uint32 Adreno_500 = 0; - static constexpr uint32 Adreno_600 = 1; - static constexpr uint32 Adreno_700 = 2; - static constexpr uint32 AMD_GCN1 = 3; - static constexpr uint32 AMD_GCN2 = 4; - static constexpr uint32 AMD_GCN3 = 5; - static constexpr uint32 AMD_GCN4 = 6; - static constexpr uint32 AMD_GCN5 = 7; - static constexpr uint32 AMD_GCN5_APU = 8; - static constexpr uint32 AMD_RDNA1 = 9; - static constexpr uint32 AMD_RDNA2 = 10; - static constexpr uint32 AMD_RDNA2_APU = 11; - static constexpr uint32 AMD_RDNA3 = 12; - static constexpr uint32 AMD_RDNA3_APU = 13; - static constexpr uint32 AMD_RDNA4 = 14; - static constexpr uint32 Apple_A8 = 15; - static constexpr uint32 Apple_A9_A10 = 16; - static constexpr uint32 Apple_A11 = 17; - static constexpr uint32 Apple_A12 = 18; - static constexpr uint32 Apple_A13 = 19; - static constexpr uint32 Apple_A14_M1 = 20; - static constexpr uint32 Apple_A15_M2 = 21; - static constexpr uint32 Apple_A16 = 22; - static constexpr uint32 Apple_A17_M3 = 23; - static constexpr uint32 Mali_Midgard_Gen2 = 24; - static constexpr uint32 Mali_Midgard_Gen3 = 25; - static constexpr uint32 Mali_Midgard_Gen4 = 26; - static constexpr uint32 Mali_Bifrost_Gen1 = 27; - static constexpr uint32 Mali_Bifrost_Gen2 = 28; - static constexpr uint32 Mali_Bifrost_Gen3 = 29; - static constexpr uint32 Mali_Valhall_Gen1 = 30; - static constexpr uint32 Mali_Valhall_Gen2 = 31; - static constexpr uint32 Mali_Valhall_Gen3 = 32; - static constexpr uint32 Mali_Valhall_Gen4 = 33; - static constexpr uint32 Mali_5thGen_Gen1 = 34; - static constexpr uint32 Mali_5thGen_Gen2 = 35; - static constexpr uint32 NV_Maxwell = 36; - static constexpr uint32 NV_Maxwell_Tegra = 37; - static constexpr uint32 NV_Pascal = 38; - static constexpr uint32 NV_Pascal_MX = 39; - static constexpr uint32 NV_Pascal_Tegra = 40; - static constexpr uint32 NV_Volta = 41; - static constexpr uint32 NV_Turing_16 = 42; - static constexpr uint32 NV_Turing = 43; - static constexpr uint32 NV_Turing_MX = 44; - static constexpr uint32 NV_Ampere = 45; - static constexpr uint32 NV_Ampere_Orin = 46; - static constexpr uint32 NV_Ada = 47; - static constexpr uint32 NV_Blackwell = 48; - static constexpr uint32 Intel_Gen7 = 49; - static constexpr uint32 Intel_Gen8 = 50; - static constexpr uint32 Intel_Gen9 = 51; - static constexpr uint32 Intel_Gen11 = 52; - static constexpr uint32 Intel_Gen12 = 53; - static constexpr uint32 Intel_Gen12_7 = 54; - static constexpr uint32 PowerVR_Series8 = 55; - static constexpr uint32 PowerVR_Series9 = 56; - static constexpr uint32 PowerVR_SeriesA = 57; - static constexpr uint32 PowerVR_SeriesB = 58; - static constexpr uint32 VeriSilicon = 59; - static constexpr uint32 SwiftShader = 60; -}; - -struct EFilter -{ - EFilter () {} - EFilter (uint8) {} - operator uint8 () const; - static constexpr uint8 Nearest = 0; - static constexpr uint8 Linear = 1; -}; - -struct EMipmapFilter -{ - EMipmapFilter () {} - EMipmapFilter (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Nearest = 1; - static constexpr uint8 Linear = 2; -}; - -struct EAddressMode -{ - EAddressMode () {} - EAddressMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Repeat = 0; - static constexpr uint8 MirrorRepeat = 1; - static constexpr uint8 ClampToEdge = 2; - static constexpr uint8 ClampToBorder = 3; - static constexpr uint8 MirrorClampToEdge = 4; - static constexpr uint8 Clamp = 2; - static constexpr uint8 MirrorClamp = 4; -}; - -struct EBorderColor -{ - EBorderColor () {} - EBorderColor (uint8) {} - operator uint8 () const; - static constexpr uint8 FloatTransparentBlack = 0; - static constexpr uint8 FloatOpaqueBlack = 1; - static constexpr uint8 FloatOpaqueWhite = 2; - static constexpr uint8 IntTransparentBlack = 3; - static constexpr uint8 IntOpaqueBlack = 4; - static constexpr uint8 IntOpaqueWhite = 5; -}; - -struct EReductionMode -{ - EReductionMode () {} - EReductionMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Average = 0; - static constexpr uint8 Min = 1; - static constexpr uint8 Max = 2; -}; - -struct ESamplerOpt -{ - ESamplerOpt () {} - ESamplerOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 ArgumentBuffer = 1; - static constexpr uint8 UnnormalizedCoordinates = 4; - static constexpr uint8 NonSeamlessCubeMap = 2; -}; - -struct EVertexInputRate -{ - EVertexInputRate () {} - EVertexInputRate (uint8) {} - operator uint8 () const; - static constexpr uint8 Vertex = 0; - static constexpr uint8 Instance = 1; -}; - -struct EDescSetUsage -{ - EDescSetUsage () {} - EDescSetUsage (uint8) {} - operator uint8 () const; - static constexpr uint8 AllowPartialyUpdate = 1; - static constexpr uint8 UpdateTemplate = 2; - static constexpr uint8 ArgumentBuffer = 4; - static constexpr uint8 MutableArgBuffer = 8; - static constexpr uint8 MaybeUnsupported = 16; -}; - -struct EPipelineOpt -{ - EPipelineOpt () {} - EPipelineOpt (uint16) {} - operator uint16 () const; - - // Optimize pipeline during creation, may be slow. - static constexpr uint16 Optimize = 1; - static constexpr uint16 CS_DispatchBase = 2; - static constexpr uint16 RT_NoNullAnyHitShaders = 4; - static constexpr uint16 RT_NoNullClosestHitShaders = 8; - static constexpr uint16 RT_NoNullMissShaders = 16; - static constexpr uint16 RT_NoNullIntersectionShaders = 32; - static constexpr uint16 RT_SkipTriangles = 64; - static constexpr uint16 RT_SkipAABBs = 128; - - // Pipeline creation will fail if it is not exists in cache. - static constexpr uint16 DontCompile = 256; - - // When a pipeline is created, its state and shaders are compiled into zero or more device-specific executables, - // which are used when executing commands against that pipeline. - static constexpr uint16 CaptureStatistics = 512; - - // May include the final shader assembly, a binary form of the compiled shader, - // or the shader compiler’s internal representation at any number of intermediate compile steps. - static constexpr uint16 CaptureInternalRepresentation = 1024; - - // Disable pipeline optimization to speedup creation. - static constexpr uint16 DontOptimize = 0; - static constexpr uint16 None = 0; -}; - -struct EQueueMask -{ - EQueueMask () {} - EQueueMask (uint8) {} - operator uint8 () const; - static constexpr uint8 Graphics = 1; - static constexpr uint8 AsyncCompute = 2; - static constexpr uint8 AsyncTransfer = 4; - static constexpr uint8 VideoEncode = 8; - static constexpr uint8 VideoDecode = 16; - static constexpr uint8 All = 31; -}; - -struct ESamplerChromaLocation -{ - ESamplerChromaLocation () {} - ESamplerChromaLocation (uint8) {} - operator uint8 () const; - static constexpr uint8 CositedEven = 0; - static constexpr uint8 Midpoint = 1; -}; - -struct ESamplerYcbcrModelConversion -{ - ESamplerYcbcrModelConversion () {} - ESamplerYcbcrModelConversion (uint8) {} - operator uint8 () const; - static constexpr uint8 RGB_Identity = 0; - static constexpr uint8 Ycbcr_Identity = 1; - static constexpr uint8 Ycbcr_709 = 2; - static constexpr uint8 Ycbcr_601 = 3; - static constexpr uint8 Ycbcr_2020 = 4; -}; - -struct ESamplerYcbcrRange -{ - ESamplerYcbcrRange () {} - ESamplerYcbcrRange (uint8) {} - operator uint8 () const; - static constexpr uint8 ITU_Full = 0; - static constexpr uint8 ITU_Narrow = 1; -}; - -struct ESurfaceFormat -{ - ESurfaceFormat () {} - ESurfaceFormat (uint8) {} - operator uint8 () const; - static constexpr uint8 BGRA8_sRGB_nonlinear = 0; - static constexpr uint8 RGBA8_sRGB_nonlinear = 1; - static constexpr uint8 BGRA8_BT709_nonlinear = 2; - static constexpr uint8 RGBA16F_Extended_sRGB_linear = 3; - static constexpr uint8 RGBA16F_sRGB_nonlinear = 4; - static constexpr uint8 RGBA16F_BT709_nonlinear = 5; - static constexpr uint8 RGBA16F_HDR10_ST2084 = 6; - static constexpr uint8 RGBA16F_BT2020_linear = 7; - static constexpr uint8 RGB10A2_sRGB_nonlinear = 8; - static constexpr uint8 RGB10A2_HDR10_ST2084 = 9; -}; - -struct ERTInstanceOpt -{ - ERTInstanceOpt () {} - ERTInstanceOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 TriangleCullDisable = 1; - static constexpr uint8 TriangleFrontCCW = 2; - static constexpr uint8 ForceOpaque = 4; - static constexpr uint8 ForceNonOpaque = 8; - static constexpr uint8 TriangleCullBack = 0; - static constexpr uint8 TriangleFrontCW = 0; -}; - -struct EImageUsage -{ - EImageUsage () {} - EImageUsage (uint32) {} - operator uint32 () const; - static constexpr uint32 TransferSrc = 1; - static constexpr uint32 TransferDst = 2; - static constexpr uint32 Sampled = 4; - static constexpr uint32 Storage = 8; - static constexpr uint32 ColorAttachment = 16; - static constexpr uint32 DepthStencilAttachment = 32; - static constexpr uint32 InputAttachment = 64; - static constexpr uint32 ShadingRate = 128; - static constexpr uint32 All = 255; - static constexpr uint32 Transfer = 3; - static constexpr uint32 RWAttachment = 80; -}; - -struct EImageOpt -{ - EImageOpt () {} - EImageOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 BlitSrc = 1; - static constexpr uint32 BlitDst = 2; - static constexpr uint32 CubeCompatible = 4; - static constexpr uint32 MutableFormat = 8; - static constexpr uint32 Array2DCompatible = 16; - static constexpr uint32 BlockTexelViewCompatible = 32; - static constexpr uint32 SparseResidency = 64; - static constexpr uint32 SparseAliased = 128; - static constexpr uint32 Alias = 256; - static constexpr uint32 SampleLocationsCompatible = 512; - static constexpr uint32 StorageAtomic = 1024; - static constexpr uint32 ColorAttachmentBlend = 2048; - static constexpr uint32 SampledLinear = 4096; - static constexpr uint32 SampledMinMax = 8192; - static constexpr uint32 VertexPplnStore = 16384; - static constexpr uint32 FragmentPplnStore = 32768; - static constexpr uint32 LossyRTCompression = 65536; - static constexpr uint32 All = 131071; - static constexpr uint32 SparseResidencyAliased = 192; -}; - -struct EBufferUsage -{ - EBufferUsage () {} - EBufferUsage (uint32) {} - operator uint32 () const; - static constexpr uint32 TransferSrc = 1; - static constexpr uint32 TransferDst = 2; - static constexpr uint32 UniformTexel = 4; - static constexpr uint32 StorageTexel = 8; - static constexpr uint32 Uniform = 16; - static constexpr uint32 Storage = 32; - static constexpr uint32 Index = 64; - static constexpr uint32 Vertex = 128; - static constexpr uint32 Indirect = 256; - static constexpr uint32 ShaderAddress = 512; - static constexpr uint32 ShaderBindingTable = 1024; - static constexpr uint32 ASBuild_ReadOnly = 2048; - static constexpr uint32 ASBuild_Scratch = 4096; - static constexpr uint32 All = 8191; - static constexpr uint32 Transfer = 3; -}; - -struct EBufferOpt -{ - EBufferOpt () {} - EBufferOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 SparseResidency = 1; - static constexpr uint32 SparseAliased = 2; - static constexpr uint32 VertexPplnStore = 4; - static constexpr uint32 FragmentPplnStore = 8; - static constexpr uint32 StorageTexelAtomic = 16; - static constexpr uint32 All = 31; - static constexpr uint32 SparseResidencyAliased = 3; -}; - -struct EShadingRate -{ - EShadingRate () {} - EShadingRate (uint8) {} - operator uint8 () const; - static constexpr uint8 Size1x1 = 16; - static constexpr uint8 Size1x2 = 32; - static constexpr uint8 Size1x4 = 48; - static constexpr uint8 Size2x1 = 64; - static constexpr uint8 Size2x2 = 80; - static constexpr uint8 Size2x4 = 96; - static constexpr uint8 Size4x1 = 112; - static constexpr uint8 Size4x2 = 128; - static constexpr uint8 Size4x4 = 144; -}; - -struct EShadingRateCombinerOp -{ - EShadingRateCombinerOp () {} - EShadingRateCombinerOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Keep = 0; - static constexpr uint8 Replace = 1; - static constexpr uint8 Min = 2; - static constexpr uint8 Max = 3; - static constexpr uint8 Sum = 4; - static constexpr uint8 Mul = 5; -}; - struct MultiSamples { MultiSamples (); @@ -2452,19 +2600,6 @@ struct MipmapLevel MipmapLevel (uint); }; -struct ECubeFace -{ - ECubeFace () {} - ECubeFace (uint32) {} - operator uint32 () const; - static constexpr uint32 XPos = 0; - static constexpr uint32 XNeg = 1; - static constexpr uint32 YPos = 2; - static constexpr uint32 YNeg = 3; - static constexpr uint32 ZPos = 4; - static constexpr uint32 ZNeg = 5; -}; - struct Texture { Texture (); @@ -2497,22 +2632,6 @@ struct ImageAtlas void Format (EPixelFormat newFormat); }; -struct ERasterFontMode -{ - ERasterFontMode () {} - ERasterFontMode (uint32) {} - operator uint32 () const; - - // Raster font, glyph dimension on screen should be multiple of glyph bitmap size. - static constexpr uint32 Raster = 0; - - // SDF font, can be scaled, but have smooth angles. - static constexpr uint32 SDF = 1; - - // Multichannel SDF font, can be scaled, have sharp angles, but a bit less performance. - static constexpr uint32 MC_SDF = 2; -}; - struct RasterFont { RasterFont (); @@ -2589,54 +2708,6 @@ struct UIStyleCollection void Store (const string & nameInArchive); }; -struct ELayoutType -{ - ELayoutType () {} - ELayoutType (uint8) {} - operator uint8 () const; - static constexpr uint8 FixedLayoutPx = 1; - static constexpr uint8 FixedLayoutMm = 2; - static constexpr uint8 PaddingLayoutPx = 3; - static constexpr uint8 PaddingLayoutMm = 4; - static constexpr uint8 PaddingLayoutRel = 5; - static constexpr uint8 AlignedLayoutPx = 6; - static constexpr uint8 AlignedLayoutMm = 7; - static constexpr uint8 AlignedLayoutRel = 8; - static constexpr uint8 StackLayoutL = 9; - static constexpr uint8 StackLayoutR = 10; - static constexpr uint8 StackLayoutB = 11; - static constexpr uint8 StackLayoutT = 12; - static constexpr uint8 FillStackLayout = 13; -}; - -struct ELayoutAlign -{ - ELayoutAlign () {} - ELayoutAlign (uint8) {} - operator uint8 () const; - static constexpr uint8 Left = 1; - static constexpr uint8 Right = 2; - static constexpr uint8 Bottom = 4; - static constexpr uint8 Top = 8; - static constexpr uint8 CenterX = 16; - static constexpr uint8 CenterY = 32; - static constexpr uint8 FillX = 3; - static constexpr uint8 FillY = 12; - static constexpr uint8 Center = 48; - static constexpr uint8 Fill = 15; -}; - -struct EStackOrigin -{ - EStackOrigin () {} - EStackOrigin (uint8) {} - operator uint8 () const; - static constexpr uint8 Left = 0; - static constexpr uint8 Right = 1; - static constexpr uint8 Bottom = 2; - static constexpr uint8 Top = 3; -}; - struct BaseUIDrawable { }; @@ -2712,39 +2783,27 @@ struct UIWidget }; template <> -struct RC : Mesh -{ - RC (const Mesh &); -}; - -template <> -struct RC : Material -{ - RC (const Material &); -}; - -template <> -struct RC : FixedLayout +struct RC : BaseUIController { - RC (const FixedLayout &); + RC (const BaseUIController &); }; template <> -struct RC : UIFontStyle +struct RC : UIWidget { - RC (const UIFontStyle &); + RC (const UIWidget &); }; template <> -struct RC : BaseUIDrawable +struct RC : UIStyleCollection { - RC (const BaseUIDrawable &); + RC (const UIStyleCollection &); }; template <> -struct RC : Model +struct RC : BaseLayout { - RC (const Model &); + RC (const BaseLayout &); }; template <> @@ -2772,33 +2831,51 @@ struct RC : FillStackLayout }; template <> -struct RC : BaseUIController +struct RC : UIFontStyle { - RC (const BaseUIController &); + RC (const UIFontStyle &); }; template <> -struct RC : UIStyleCollection +struct RC : BaseUIDrawable { - RC (const UIStyleCollection &); + RC (const BaseUIDrawable &); }; template <> -struct RC : UIWidget +struct RC : Model { - RC (const UIWidget &); + RC (const Model &); }; template <> -struct RC : BaseLayout +struct RC : Mesh { - RC (const BaseLayout &); + RC (const Mesh &); }; template <> -struct RC : PaddingLayout +struct RC : Material { - RC (const PaddingLayout &); + RC (const Material &); +}; + +template <> +struct RC : FixedLayout +{ + RC (const FixedLayout &); +}; + +template <> +struct RC : UIColorStyle +{ + RC (const UIColorStyle &); +}; + +template <> +struct RC : RasterFont +{ + RC (const RasterFont &); }; template <> @@ -2820,14 +2897,8 @@ struct RC : Texture }; template <> -struct RC : UIColorStyle -{ - RC (const UIColorStyle &); -}; - -template <> -struct RC : RasterFont +struct RC : PaddingLayout { - RC (const RasterFont &); + RC (const PaddingLayout &); }; diff --git a/AE/engine/shared_data/scripts/input_actions.as b/AE/engine/shared_data/scripts/input_actions.as index 9c5a2047..90a39b0e 100644 --- a/AE/engine/shared_data/scripts/input_actions.as +++ b/AE/engine/shared_data/scripts/input_actions.as @@ -1,4 +1,4 @@ -//6e22be57 +//f60471aa #include #include @@ -8,7 +8,6 @@ using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; -using int = std::int32_t; using uint = std::uint32_t; using int32 = std::int32_t; using uint32 = std::uint32_t; @@ -22,47 +21,742 @@ struct RC; template using array = std::vector; -struct EGestureType; -struct ubyte3; -struct ubyte2; -struct ubyte4; -struct EGestureState; -struct OpenVR_Input; -struct WinAPI_ActionBindings; -struct WinAPI_Input; -struct EValueType; -struct VecSwizzle; -struct Android_ActionBindings; -struct bool4; -struct int3; +using namespace std::string_literals; + +template +string operator + (const string &lhs, T rhs); + +struct int4; struct bool2; +struct int3; struct int2; struct bool3; -struct GLFW_Input; -struct int4; +struct bool4; struct short4; -struct OpenVR_BindingsMode; +struct sbyte2; struct ushort3; -struct sbyte4; +struct ushort2; +struct ActionInfo; struct Android_BindingsMode; +struct OpenVR_BindingsMode; +struct Android_ActionBindings; +struct VecSwizzle; +struct ubyte4; +struct WinAPI_ActionBindings; +struct ubyte3; +struct ubyte2; +struct OpenVR_ActionBindings; +struct WinAPI_BindingsMode; +struct GLFW_ActionBindings; +struct float2; +struct float3; +struct uint3; +struct uint2; +struct float4; +struct uint4; +struct sbyte4; struct sbyte3; -struct ActionInfo; -struct sbyte2; +struct short2; struct ushort4; -struct ushort2; struct short3; -struct short2; struct GLFW_BindingsMode; -struct uint3; -struct uint4; -struct uint2; -struct Android_Input; -struct float4; -struct float2; -struct float3; -struct WinAPI_BindingsMode; -struct GLFW_ActionBindings; -struct OpenVR_ActionBindings; + +enum class EGestureState : uint8 +{ + Begin, + Update, + End, + Cancel, + Outside, +}; +uint8 operator | (EGestureState lhs, EGestureState rhs); +uint8 operator | (uint8 lhs, EGestureState rhs); +uint8 operator | (EGestureState lhs, uint8 rhs); + +enum class EGestureType : uint8 +{ + Hold, + Move, + LongPress_Move, + ScaleRotate2D, + Down, + Click, + LongPress, + DoubleClick, +}; +uint8 operator | (EGestureType lhs, EGestureType rhs); +uint8 operator | (uint8 lhs, EGestureType rhs); +uint8 operator | (EGestureType lhs, uint8 rhs); + +enum class EValueType : uint8 +{ + Unknown, + Float, + Float2, + Float3, + Float4, + Quat, + Float4x4, + Chars, + GNS, +}; +uint8 operator | (EValueType lhs, EValueType rhs); +uint8 operator | (uint8 lhs, EValueType rhs); +uint8 operator | (EValueType lhs, uint8 rhs); + +enum class GLFW_Input : uint16 +{ + Space, + Apostrophe, + Comma, + Minus, + Period, + Slash, + Semicolon, + Equal, + A, + B, + C, + D, + E, + F, + G, + H, + I, + J, + K, + L, + M, + N, + O, + P, + Q, + R, + S, + T, + U, + V, + W, + X, + Y, + Z, + LeftBracket, + BackSlash, + RightBracket, + GraveAccent, + World1, + World2, + Escape, + Enter, + Tab, + Backspace, + Insert, + Delete, + ArrowRight, + ArrowLeft, + ArrowDown, + ArrowUp, + PageUp, + PageDown, + Home, + End, + CapsLock, + ScrollLock, + NumLock, + PrintScreen, + Pause, + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12, + F13, + F14, + F15, + F16, + F17, + F18, + F19, + F20, + F21, + F22, + F23, + F24, + F25, + LeftShift, + LeftControl, + LeftAlt, + LeftSuper, + RightShift, + RightControl, + RightAlt, + RightSuper, + Menu, + KP_0, + KP_1, + KP_2, + KP_3, + KP_4, + KP_5, + KP_6, + KP_7, + KP_8, + KP_9, + KP_Decimal, + KP_Divide, + KP_Multiply, + KP_Subtract, + KP_Add, + KP_Enter, + KP_Equal, + MouseBtn1, + MouseBtn2, + MouseBtn3, + MouseBtn4, + MouseBtn5, + MouseBtn6, + MouseBtn7, + MouseBtn8, + Accelerometer, + MagneticField, + GeoLocation, + Gyroscope, + AmbientLight, + AirPressure, + Proximity, + Gravity, + LinearAcceleration, + RotationVector, + RelativeHumidity, + AirTemperature, + GameRotationVector, + Pose6DOF, + MultiTouch, + MouseWheel, + CursorPos, + CursorPos_mm, + CursorDelta, + CursorDelta_norm, + TouchPos, + TouchPos_mm, + TouchDelta, + TouchDelta_norm, + MouseLeft, + MouseRight, + MouseMiddle, +}; +uint16 operator | (GLFW_Input lhs, GLFW_Input rhs); +uint16 operator | (uint16 lhs, GLFW_Input rhs); +uint16 operator | (GLFW_Input lhs, uint16 rhs); +static constexpr GLFW_Input GLFW_Input_0 = GLFW_Input(48); +static constexpr GLFW_Input GLFW_Input_1 = GLFW_Input(49); +static constexpr GLFW_Input GLFW_Input_2 = GLFW_Input(50); +static constexpr GLFW_Input GLFW_Input_3 = GLFW_Input(51); +static constexpr GLFW_Input GLFW_Input_4 = GLFW_Input(52); +static constexpr GLFW_Input GLFW_Input_5 = GLFW_Input(53); +static constexpr GLFW_Input GLFW_Input_6 = GLFW_Input(54); +static constexpr GLFW_Input GLFW_Input_7 = GLFW_Input(55); +static constexpr GLFW_Input GLFW_Input_8 = GLFW_Input(56); +static constexpr GLFW_Input GLFW_Input_9 = GLFW_Input(57); + +enum class WinAPI_Input : uint16 +{ + Backspace, + Tab, + Clear, + Enter, + LeftShift, + RightShift, + LeftCtrl, + RightCtrl, + LeftAlt, + RightAlt, + Pause, + CapsLock, + Escape, + Space, + PageUp, + PageDown, + End, + Home, + ArrowLeft, + ArrowUp, + ArrowRight, + ArrowDown, + Select, + Print, + Execute, + Snapshot, + Insert, + Delete, + Help, + A, + B, + C, + D, + E, + F, + G, + H, + I, + J, + K, + L, + M, + N, + O, + P, + Q, + R, + S, + T, + U, + V, + W, + X, + Y, + Z, + LeftWin, + RightWin, + LeftApps, + RightApps, + Sleep, + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12, + F13, + F14, + F15, + F16, + F17, + F18, + F19, + F20, + F21, + F22, + F23, + F24, + NumLock, + ScrollLock, + BrowserBack, + BrowserForward, + BrowserRefresh, + BrowserStop, + BrowserSearch, + BrowserFavorites, + BrowserHome, + VolumeMute, + VolumeDown, + VolumeUp, + MediaNextTrack, + MediaPrevTrack, + MediaStop, + MediaPlayPause, + LaunchMail, + LaunchMediaSelect, + LaunchApp1, + LaunchApp2, + Semicolon, + Equal, + Comma, + Minus, + Period, + Slash, + GraveAccent, + LeftBracket, + BackSlash, + RightBracket, + Apostrophe, + KP_Enter, + KP_0, + KP_1, + KP_2, + KP_3, + KP_4, + KP_5, + KP_6, + KP_7, + KP_8, + KP_9, + KP_Multiply, + KP_Add, + KP_Separator, + KP_Subtract, + KP_Decimal, + KP_Divide, + KP_Divide2, + KP_End, + KP_ArrowLeft, + KP_ArrowUp, + KP_ArrowRight, + KP_ArrowDown, + KP_Home, + KP_PageUp, + KP_PageDown, + KP_Insert, + KP_Delete, + MouseBtn0, + MouseBtn1, + MouseBtn2, + MouseBtn3, + MouseBtn4, + MultiTouch, + MouseWheel, + CursorPos, + CursorPos_mm, + CursorDelta, + CursorDelta_norm, + TouchPos, + TouchPos_mm, + TouchDelta, + TouchDelta_norm, + MouseLeft, + MouseRight, + MouseMiddle, +}; +uint16 operator | (WinAPI_Input lhs, WinAPI_Input rhs); +uint16 operator | (uint16 lhs, WinAPI_Input rhs); +uint16 operator | (WinAPI_Input lhs, uint16 rhs); +static constexpr WinAPI_Input WinAPI_Input_0 = WinAPI_Input(48); +static constexpr WinAPI_Input WinAPI_Input_1 = WinAPI_Input(49); +static constexpr WinAPI_Input WinAPI_Input_2 = WinAPI_Input(50); +static constexpr WinAPI_Input WinAPI_Input_3 = WinAPI_Input(51); +static constexpr WinAPI_Input WinAPI_Input_4 = WinAPI_Input(52); +static constexpr WinAPI_Input WinAPI_Input_5 = WinAPI_Input(53); +static constexpr WinAPI_Input WinAPI_Input_6 = WinAPI_Input(54); +static constexpr WinAPI_Input WinAPI_Input_7 = WinAPI_Input(55); +static constexpr WinAPI_Input WinAPI_Input_8 = WinAPI_Input(56); +static constexpr WinAPI_Input WinAPI_Input_9 = WinAPI_Input(57); + +enum class Android_Input : uint16 +{ + Back, + Star, + Paund, + VolumeUp, + VolumeDown, + VolumeMute, + Power, + Camera, + Clear, + A, + B, + C, + D, + E, + F, + G, + H, + I, + J, + K, + L, + M, + N, + O, + P, + Q, + R, + S, + T, + U, + V, + W, + X, + Y, + Z, + Comma, + Period, + LeftAlt, + RightAlt, + LeftShift, + RightShift, + Tab, + Space, + Sym, + Browser, + LaunchMail, + Enter, + Delete, + Grave, + Minus, + Equal, + LeftBracket, + RightBracket, + BackSlash, + Semicolon, + Apostrophe, + Slash, + At, + Num, + Plus, + Menu, + Notification, + Search, + MicMute, + PageUp, + PageDown, + PictSymbols, + SwitchCharset, + Escape, + ForwardDelete, + LeftControl, + RightControl, + CapsLock, + ScrollLock, + LeftMeta, + RightMeta, + Function, + SysRq, + PauseBreak, + MoveHome, + MoveEnd, + Insert, + Forward, + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12, + NumLock, + Info, + ChannelUp, + ChannelDown, + ZoomIn, + ZoomOut, + Window, + Guide, + DVR, + Bookmark, + Captions, + Settings, + AppSwitch, + LangSwitch, + MannerMode, + Contacts, + Calendar, + Music, + Calculator, + ZenkakuHankaku, + Eisu, + Muhenkan, + Henkan, + KatakanaHiragana, + Yen, + Ro, + Kana, + Assist, + BrightnessDown, + BrightnessUp, + Sleep, + Wakeup, + Pairing, + LastChannel, + VoiceAssist, + Help, + NavPrev, + NavNext, + NavIn, + NavOut, + StemPrimary, + Stem1, + Stem2, + Stem3, + SoftSleep, + Cut, + Copy, + Paste, + SysNavUp, + SysNavDown, + SysNavLeft, + SysNavRight, + AllApps, + Refresh, + ThumbsUp, + ThumbsDown, + ProfileSwitch, + MediaPlayPause, + MediaStop, + MediaNext, + MediaPrev, + MediaRewind, + MediaFastForward, + MediaPlay, + MediaPause, + MediaClose, + MediaEject, + MediaRecord, + MediaAudioTrack, + MediaTopMenu, + MediaSkipForward, + MediaSkipBackward, + MediaStepForward, + MediaStepBackward, + DPadUp, + DPadDown, + DPadLeft, + DPadRight, + DPadCenter, + DPadUpLeft, + DPadDownLeft, + DPadUpRight, + DPadDownRight, + GPadA, + GPadB, + GPadC, + GPadX, + GPadY, + GPadZ, + GPadL1, + GPadR1, + GPadL2, + GPadR2, + GPadThumbL, + GPadThumbR, + GPadStart, + GPadSelect, + GPadMode, + GPad1, + GPad2, + GPad3, + GPad4, + GPad5, + GPad6, + GPad7, + GPad8, + GPad9, + GPad10, + GPad11, + GPad12, + GPad13, + GPad14, + GPad15, + GPad16, + KP_0, + KP_1, + KP_2, + KP_3, + KP_4, + KP_5, + KP_6, + KP_7, + KP_8, + KP_9, + KP_Divide, + KP_Multiply, + KP_Subtract, + KP_Add, + KP_Dot, + KP_Comma, + KP_Enter, + KP_Equal, + KP_LeftParen, + KP_RightParen, + TV, + TV_Power, + TV_Input, + TV_STBPower, + TV_STBInput, + AV_Power, + AVR_Input, + TV_ProgRed, + TV_ProgGreen, + TV_ProgYellow, + TV_ProgBlue, + TV_DataService, + TV_RadioService, + TV_Teletext, + TV_NumberEntry, + TV_TerrestrialAnalog, + TV_TerrestrialDigital, + TV_Satellite, + TV_SatelliteBS, + TV_SattelliteCS, + TV_SatelliteService, + TV_Network, + TV_AntennaCable, + TV_InputHdmi1, + TV_InputHdmi2, + TV_InputHdmi3, + TV_InputHdmi4, + TV_inputComposite1, + TV_inputComposite2, + TV_InputComponent1, + TV_InputComponent2, + TV_InputVGA1, + TV_AudioDesc, + TV_AudioDescMixUp, + TV_AudioDescMixDown, + TV_ZoomMode, + TV_ContentsMenu, + TV_MediaContextMenu, + TV_TimerProgramming, + Accelerometer, + MagneticField, + GeoLocation, + Gyroscope, + AmbientLight, + AirPressure, + Proximity, + Gravity, + LinearAcceleration, + RotationVector, + RelativeHumidity, + AirTemperature, + GameRotationVector, + Pose6DOF, + MultiTouch, + TouchPos, + TouchPos_mm, + TouchDelta, + TouchDelta_norm, +}; +uint16 operator | (Android_Input lhs, Android_Input rhs); +uint16 operator | (uint16 lhs, Android_Input rhs); +uint16 operator | (Android_Input lhs, uint16 rhs); +static constexpr Android_Input Android_Input_0 = Android_Input(7); +static constexpr Android_Input Android_Input_1 = Android_Input(8); +static constexpr Android_Input Android_Input_2 = Android_Input(9); +static constexpr Android_Input Android_Input_3 = Android_Input(10); +static constexpr Android_Input Android_Input_4 = Android_Input(11); +static constexpr Android_Input Android_Input_5 = Android_Input(12); +static constexpr Android_Input Android_Input_6 = Android_Input(13); +static constexpr Android_Input Android_Input_7 = Android_Input(14); +static constexpr Android_Input Android_Input_8 = Android_Input(15); +static constexpr Android_Input Android_Input_9 = Android_Input(16); +static constexpr Android_Input Android_Input_3DMode = Android_Input(206); +static constexpr Android_Input Android_Input_11 = Android_Input(227); +static constexpr Android_Input Android_Input_12 = Android_Input(228); + +enum class OpenVR_Input : uint16 +{ +}; +uint16 operator | (OpenVR_Input lhs, OpenVR_Input rhs); +uint16 operator | (uint16 lhs, OpenVR_Input rhs); +uint16 operator | (OpenVR_Input lhs, uint16 rhs); using sbyte = int8; using ubyte = uint8; @@ -1078,49 +1772,6 @@ struct VecSwizzle VecSwizzle (const string & swizzle); }; -struct EGestureState -{ - EGestureState () {} - EGestureState (uint8) {} - operator uint8 () const; - static constexpr uint8 Begin = 0; - static constexpr uint8 Update = 1; - static constexpr uint8 End = 2; - static constexpr uint8 Cancel = 3; - static constexpr uint8 Outside = 4; -}; - -struct EGestureType -{ - EGestureType () {} - EGestureType (uint8) {} - operator uint8 () const; - static constexpr uint8 Hold = 3; - static constexpr uint8 Move = 5; - static constexpr uint8 LongPress_Move = 6; - static constexpr uint8 ScaleRotate2D = 7; - static constexpr uint8 Down = 0; - static constexpr uint8 Click = 1; - static constexpr uint8 LongPress = 4; - static constexpr uint8 DoubleClick = 2; -}; - -struct EValueType -{ - EValueType () {} - EValueType (uint8) {} - operator uint8 () const; - static constexpr uint8 Unknown = 255; - static constexpr uint8 Float = 0; - static constexpr uint8 Float2 = 1; - static constexpr uint8 Float3 = 2; - static constexpr uint8 Float4 = 3; - static constexpr uint8 Quat = 4; - static constexpr uint8 Float4x4 = 5; - static constexpr uint8 Chars = 6; - static constexpr uint8 GNS = 7; -}; - struct ActionInfo { ActionInfo (); @@ -1138,168 +1789,6 @@ struct ActionInfo VecSwizzle swizzle; }; -struct GLFW_Input -{ - GLFW_Input () {} - GLFW_Input (uint16) {} - operator uint16 () const; - static constexpr uint16 Space = 32; - static constexpr uint16 Apostrophe = 39; - static constexpr uint16 Comma = 44; - static constexpr uint16 Minus = 45; - static constexpr uint16 Period = 46; - static constexpr uint16 Slash = 47; - static constexpr uint16 0 = 48; - static constexpr uint16 1 = 49; - static constexpr uint16 2 = 50; - static constexpr uint16 3 = 51; - static constexpr uint16 4 = 52; - static constexpr uint16 5 = 53; - static constexpr uint16 6 = 54; - static constexpr uint16 7 = 55; - static constexpr uint16 8 = 56; - static constexpr uint16 9 = 57; - static constexpr uint16 Semicolon = 59; - static constexpr uint16 Equal = 61; - static constexpr uint16 A = 65; - static constexpr uint16 B = 66; - static constexpr uint16 C = 67; - static constexpr uint16 D = 68; - static constexpr uint16 E = 69; - static constexpr uint16 F = 70; - static constexpr uint16 G = 71; - static constexpr uint16 H = 72; - static constexpr uint16 I = 73; - static constexpr uint16 J = 74; - static constexpr uint16 K = 75; - static constexpr uint16 L = 76; - static constexpr uint16 M = 77; - static constexpr uint16 N = 78; - static constexpr uint16 O = 79; - static constexpr uint16 P = 80; - static constexpr uint16 Q = 81; - static constexpr uint16 R = 82; - static constexpr uint16 S = 83; - static constexpr uint16 T = 84; - static constexpr uint16 U = 85; - static constexpr uint16 V = 86; - static constexpr uint16 W = 87; - static constexpr uint16 X = 88; - static constexpr uint16 Y = 89; - static constexpr uint16 Z = 90; - static constexpr uint16 LeftBracket = 91; - static constexpr uint16 BackSlash = 92; - static constexpr uint16 RightBracket = 93; - static constexpr uint16 GraveAccent = 96; - static constexpr uint16 World1 = 161; - static constexpr uint16 World2 = 162; - static constexpr uint16 Escape = 256; - static constexpr uint16 Enter = 257; - static constexpr uint16 Tab = 258; - static constexpr uint16 Backspace = 259; - static constexpr uint16 Insert = 260; - static constexpr uint16 Delete = 261; - static constexpr uint16 ArrowRight = 262; - static constexpr uint16 ArrowLeft = 263; - static constexpr uint16 ArrowDown = 264; - static constexpr uint16 ArrowUp = 265; - static constexpr uint16 PageUp = 266; - static constexpr uint16 PageDown = 267; - static constexpr uint16 Home = 268; - static constexpr uint16 End = 269; - static constexpr uint16 CapsLock = 280; - static constexpr uint16 ScrollLock = 281; - static constexpr uint16 NumLock = 282; - static constexpr uint16 PrintScreen = 283; - static constexpr uint16 Pause = 284; - static constexpr uint16 F1 = 290; - static constexpr uint16 F2 = 291; - static constexpr uint16 F3 = 292; - static constexpr uint16 F4 = 293; - static constexpr uint16 F5 = 294; - static constexpr uint16 F6 = 295; - static constexpr uint16 F7 = 296; - static constexpr uint16 F8 = 297; - static constexpr uint16 F9 = 298; - static constexpr uint16 F10 = 299; - static constexpr uint16 F11 = 300; - static constexpr uint16 F12 = 301; - static constexpr uint16 F13 = 302; - static constexpr uint16 F14 = 303; - static constexpr uint16 F15 = 304; - static constexpr uint16 F16 = 305; - static constexpr uint16 F17 = 306; - static constexpr uint16 F18 = 307; - static constexpr uint16 F19 = 308; - static constexpr uint16 F20 = 309; - static constexpr uint16 F21 = 310; - static constexpr uint16 F22 = 311; - static constexpr uint16 F23 = 312; - static constexpr uint16 F24 = 313; - static constexpr uint16 F25 = 314; - static constexpr uint16 LeftShift = 340; - static constexpr uint16 LeftControl = 341; - static constexpr uint16 LeftAlt = 342; - static constexpr uint16 LeftSuper = 343; - static constexpr uint16 RightShift = 344; - static constexpr uint16 RightControl = 345; - static constexpr uint16 RightAlt = 346; - static constexpr uint16 RightSuper = 347; - static constexpr uint16 Menu = 348; - static constexpr uint16 KP_0 = 320; - static constexpr uint16 KP_1 = 321; - static constexpr uint16 KP_2 = 322; - static constexpr uint16 KP_3 = 323; - static constexpr uint16 KP_4 = 324; - static constexpr uint16 KP_5 = 325; - static constexpr uint16 KP_6 = 326; - static constexpr uint16 KP_7 = 327; - static constexpr uint16 KP_8 = 328; - static constexpr uint16 KP_9 = 329; - static constexpr uint16 KP_Decimal = 330; - static constexpr uint16 KP_Divide = 331; - static constexpr uint16 KP_Multiply = 332; - static constexpr uint16 KP_Subtract = 333; - static constexpr uint16 KP_Add = 334; - static constexpr uint16 KP_Enter = 335; - static constexpr uint16 KP_Equal = 336; - static constexpr uint16 MouseBtn1 = 0; - static constexpr uint16 MouseBtn2 = 1; - static constexpr uint16 MouseBtn3 = 2; - static constexpr uint16 MouseBtn4 = 3; - static constexpr uint16 MouseBtn5 = 4; - static constexpr uint16 MouseBtn6 = 5; - static constexpr uint16 MouseBtn7 = 6; - static constexpr uint16 MouseBtn8 = 7; - static constexpr uint16 Accelerometer = 373; - static constexpr uint16 MagneticField = 377; - static constexpr uint16 GeoLocation = 381; - static constexpr uint16 Gyroscope = 375; - static constexpr uint16 AmbientLight = 369; - static constexpr uint16 AirPressure = 370; - static constexpr uint16 Proximity = 371; - static constexpr uint16 Gravity = 374; - static constexpr uint16 LinearAcceleration = 376; - static constexpr uint16 RotationVector = 378; - static constexpr uint16 RelativeHumidity = 372; - static constexpr uint16 AirTemperature = 368; - static constexpr uint16 GameRotationVector = 379; - static constexpr uint16 Pose6DOF = 380; - static constexpr uint16 MultiTouch = 358; - static constexpr uint16 MouseWheel = 359; - static constexpr uint16 CursorPos = 360; - static constexpr uint16 CursorPos_mm = 361; - static constexpr uint16 CursorDelta = 362; - static constexpr uint16 CursorDelta_norm = 363; - static constexpr uint16 TouchPos = 364; - static constexpr uint16 TouchPos_mm = 365; - static constexpr uint16 TouchDelta = 366; - static constexpr uint16 TouchDelta_norm = 367; - static constexpr uint16 MouseLeft = 0; - static constexpr uint16 MouseRight = 1; - static constexpr uint16 MouseMiddle = 2; -}; - struct GLFW_BindingsMode { GLFW_BindingsMode (); @@ -1314,184 +1803,6 @@ struct GLFW_ActionBindings RC CreateMode (const string &); }; -struct WinAPI_Input -{ - WinAPI_Input () {} - WinAPI_Input (uint16) {} - operator uint16 () const; - static constexpr uint16 Backspace = 8; - static constexpr uint16 Tab = 9; - static constexpr uint16 Clear = 12; - static constexpr uint16 Enter = 13; - static constexpr uint16 LeftShift = 16; - static constexpr uint16 RightShift = 4096; - static constexpr uint16 LeftCtrl = 17; - static constexpr uint16 RightCtrl = 4352; - static constexpr uint16 LeftAlt = 18; - static constexpr uint16 RightAlt = 4608; - static constexpr uint16 Pause = 19; - static constexpr uint16 CapsLock = 20; - static constexpr uint16 Escape = 27; - static constexpr uint16 Space = 32; - static constexpr uint16 PageUp = 8448; - static constexpr uint16 PageDown = 8704; - static constexpr uint16 End = 8960; - static constexpr uint16 Home = 9216; - static constexpr uint16 ArrowLeft = 9472; - static constexpr uint16 ArrowUp = 9728; - static constexpr uint16 ArrowRight = 9984; - static constexpr uint16 ArrowDown = 10240; - static constexpr uint16 Select = 41; - static constexpr uint16 Print = 42; - static constexpr uint16 Execute = 43; - static constexpr uint16 Snapshot = 44; - static constexpr uint16 Insert = 11520; - static constexpr uint16 Delete = 11776; - static constexpr uint16 Help = 47; - static constexpr uint16 0 = 48; - static constexpr uint16 1 = 49; - static constexpr uint16 2 = 50; - static constexpr uint16 3 = 51; - static constexpr uint16 4 = 52; - static constexpr uint16 5 = 53; - static constexpr uint16 6 = 54; - static constexpr uint16 7 = 55; - static constexpr uint16 8 = 56; - static constexpr uint16 9 = 57; - static constexpr uint16 A = 65; - static constexpr uint16 B = 66; - static constexpr uint16 C = 67; - static constexpr uint16 D = 68; - static constexpr uint16 E = 69; - static constexpr uint16 F = 70; - static constexpr uint16 G = 71; - static constexpr uint16 H = 72; - static constexpr uint16 I = 73; - static constexpr uint16 J = 74; - static constexpr uint16 K = 75; - static constexpr uint16 L = 76; - static constexpr uint16 M = 77; - static constexpr uint16 N = 78; - static constexpr uint16 O = 79; - static constexpr uint16 P = 80; - static constexpr uint16 Q = 81; - static constexpr uint16 R = 82; - static constexpr uint16 S = 83; - static constexpr uint16 T = 84; - static constexpr uint16 U = 85; - static constexpr uint16 V = 86; - static constexpr uint16 W = 87; - static constexpr uint16 X = 88; - static constexpr uint16 Y = 89; - static constexpr uint16 Z = 90; - static constexpr uint16 LeftWin = 91; - static constexpr uint16 RightWin = 23552; - static constexpr uint16 LeftApps = 93; - static constexpr uint16 RightApps = 23808; - static constexpr uint16 Sleep = 95; - static constexpr uint16 F1 = 112; - static constexpr uint16 F2 = 113; - static constexpr uint16 F3 = 114; - static constexpr uint16 F4 = 115; - static constexpr uint16 F5 = 116; - static constexpr uint16 F6 = 117; - static constexpr uint16 F7 = 118; - static constexpr uint16 F8 = 119; - static constexpr uint16 F9 = 120; - static constexpr uint16 F10 = 121; - static constexpr uint16 F11 = 122; - static constexpr uint16 F12 = 123; - static constexpr uint16 F13 = 124; - static constexpr uint16 F14 = 125; - static constexpr uint16 F15 = 126; - static constexpr uint16 F16 = 127; - static constexpr uint16 F17 = 128; - static constexpr uint16 F18 = 129; - static constexpr uint16 F19 = 130; - static constexpr uint16 F20 = 131; - static constexpr uint16 F21 = 132; - static constexpr uint16 F22 = 133; - static constexpr uint16 F23 = 134; - static constexpr uint16 F24 = 135; - static constexpr uint16 NumLock = 144; - static constexpr uint16 ScrollLock = 145; - static constexpr uint16 BrowserBack = 166; - static constexpr uint16 BrowserForward = 167; - static constexpr uint16 BrowserRefresh = 168; - static constexpr uint16 BrowserStop = 169; - static constexpr uint16 BrowserSearch = 170; - static constexpr uint16 BrowserFavorites = 171; - static constexpr uint16 BrowserHome = 172; - static constexpr uint16 VolumeMute = 173; - static constexpr uint16 VolumeDown = 174; - static constexpr uint16 VolumeUp = 175; - static constexpr uint16 MediaNextTrack = 176; - static constexpr uint16 MediaPrevTrack = 177; - static constexpr uint16 MediaStop = 178; - static constexpr uint16 MediaPlayPause = 179; - static constexpr uint16 LaunchMail = 180; - static constexpr uint16 LaunchMediaSelect = 181; - static constexpr uint16 LaunchApp1 = 182; - static constexpr uint16 LaunchApp2 = 183; - static constexpr uint16 Semicolon = 186; - static constexpr uint16 Equal = 187; - static constexpr uint16 Comma = 188; - static constexpr uint16 Minus = 189; - static constexpr uint16 Period = 190; - static constexpr uint16 Slash = 191; - static constexpr uint16 GraveAccent = 192; - static constexpr uint16 LeftBracket = 219; - static constexpr uint16 BackSlash = 220; - static constexpr uint16 RightBracket = 221; - static constexpr uint16 Apostrophe = 222; - static constexpr uint16 KP_Enter = 3328; - static constexpr uint16 KP_0 = 96; - static constexpr uint16 KP_1 = 97; - static constexpr uint16 KP_2 = 98; - static constexpr uint16 KP_3 = 99; - static constexpr uint16 KP_4 = 100; - static constexpr uint16 KP_5 = 101; - static constexpr uint16 KP_6 = 102; - static constexpr uint16 KP_7 = 103; - static constexpr uint16 KP_8 = 104; - static constexpr uint16 KP_9 = 105; - static constexpr uint16 KP_Multiply = 106; - static constexpr uint16 KP_Add = 107; - static constexpr uint16 KP_Separator = 108; - static constexpr uint16 KP_Subtract = 109; - static constexpr uint16 KP_Decimal = 110; - static constexpr uint16 KP_Divide = 111; - static constexpr uint16 KP_Divide2 = 28416; - static constexpr uint16 KP_End = 35; - static constexpr uint16 KP_ArrowLeft = 37; - static constexpr uint16 KP_ArrowUp = 38; - static constexpr uint16 KP_ArrowRight = 39; - static constexpr uint16 KP_ArrowDown = 40; - static constexpr uint16 KP_Home = 36; - static constexpr uint16 KP_PageUp = 33; - static constexpr uint16 KP_PageDown = 34; - static constexpr uint16 KP_Insert = 45; - static constexpr uint16 KP_Delete = 46; - static constexpr uint16 MouseBtn0 = 0; - static constexpr uint16 MouseBtn1 = 1; - static constexpr uint16 MouseBtn2 = 2; - static constexpr uint16 MouseBtn3 = 3; - static constexpr uint16 MouseBtn4 = 4; - static constexpr uint16 MultiTouch = 57098; - static constexpr uint16 MouseWheel = 57099; - static constexpr uint16 CursorPos = 57100; - static constexpr uint16 CursorPos_mm = 57101; - static constexpr uint16 CursorDelta = 57102; - static constexpr uint16 CursorDelta_norm = 57103; - static constexpr uint16 TouchPos = 57104; - static constexpr uint16 TouchPos_mm = 57105; - static constexpr uint16 TouchDelta = 57106; - static constexpr uint16 TouchDelta_norm = 57107; - static constexpr uint16 MouseLeft = 0; - static constexpr uint16 MouseRight = 1; - static constexpr uint16 MouseMiddle = 2; -}; - struct WinAPI_BindingsMode { WinAPI_BindingsMode (); @@ -1506,313 +1817,6 @@ struct WinAPI_ActionBindings RC CreateMode (const string &); }; -struct Android_Input -{ - Android_Input () {} - Android_Input (uint16) {} - operator uint16 () const; - static constexpr uint16 Back = 4; - static constexpr uint16 0 = 7; - static constexpr uint16 1 = 8; - static constexpr uint16 2 = 9; - static constexpr uint16 3 = 10; - static constexpr uint16 4 = 11; - static constexpr uint16 5 = 12; - static constexpr uint16 6 = 13; - static constexpr uint16 7 = 14; - static constexpr uint16 8 = 15; - static constexpr uint16 9 = 16; - static constexpr uint16 Star = 17; - static constexpr uint16 Paund = 18; - static constexpr uint16 VolumeUp = 24; - static constexpr uint16 VolumeDown = 25; - static constexpr uint16 VolumeMute = 164; - static constexpr uint16 Power = 26; - static constexpr uint16 Camera = 27; - static constexpr uint16 Clear = 28; - static constexpr uint16 A = 29; - static constexpr uint16 B = 30; - static constexpr uint16 C = 31; - static constexpr uint16 D = 32; - static constexpr uint16 E = 33; - static constexpr uint16 F = 34; - static constexpr uint16 G = 35; - static constexpr uint16 H = 36; - static constexpr uint16 I = 37; - static constexpr uint16 J = 38; - static constexpr uint16 K = 39; - static constexpr uint16 L = 40; - static constexpr uint16 M = 41; - static constexpr uint16 N = 42; - static constexpr uint16 O = 43; - static constexpr uint16 P = 44; - static constexpr uint16 Q = 45; - static constexpr uint16 R = 46; - static constexpr uint16 S = 47; - static constexpr uint16 T = 48; - static constexpr uint16 U = 49; - static constexpr uint16 V = 50; - static constexpr uint16 W = 51; - static constexpr uint16 X = 52; - static constexpr uint16 Y = 53; - static constexpr uint16 Z = 54; - static constexpr uint16 Comma = 55; - static constexpr uint16 Period = 56; - static constexpr uint16 LeftAlt = 57; - static constexpr uint16 RightAlt = 58; - static constexpr uint16 LeftShift = 59; - static constexpr uint16 RightShift = 60; - static constexpr uint16 Tab = 61; - static constexpr uint16 Space = 62; - static constexpr uint16 Sym = 63; - static constexpr uint16 Browser = 64; - static constexpr uint16 LaunchMail = 65; - static constexpr uint16 Enter = 66; - static constexpr uint16 Delete = 67; - static constexpr uint16 Grave = 68; - static constexpr uint16 Minus = 69; - static constexpr uint16 Equal = 70; - static constexpr uint16 LeftBracket = 71; - static constexpr uint16 RightBracket = 72; - static constexpr uint16 BackSlash = 73; - static constexpr uint16 Semicolon = 74; - static constexpr uint16 Apostrophe = 75; - static constexpr uint16 Slash = 76; - static constexpr uint16 At = 77; - static constexpr uint16 Num = 78; - static constexpr uint16 Plus = 81; - static constexpr uint16 Menu = 82; - static constexpr uint16 Notification = 83; - static constexpr uint16 Search = 84; - static constexpr uint16 MicMute = 91; - static constexpr uint16 PageUp = 92; - static constexpr uint16 PageDown = 93; - static constexpr uint16 PictSymbols = 94; - static constexpr uint16 SwitchCharset = 95; - static constexpr uint16 Escape = 111; - static constexpr uint16 ForwardDelete = 112; - static constexpr uint16 LeftControl = 113; - static constexpr uint16 RightControl = 114; - static constexpr uint16 CapsLock = 115; - static constexpr uint16 ScrollLock = 116; - static constexpr uint16 LeftMeta = 117; - static constexpr uint16 RightMeta = 118; - static constexpr uint16 Function = 119; - static constexpr uint16 SysRq = 120; - static constexpr uint16 PauseBreak = 121; - static constexpr uint16 MoveHome = 122; - static constexpr uint16 MoveEnd = 123; - static constexpr uint16 Insert = 124; - static constexpr uint16 Forward = 125; - static constexpr uint16 F1 = 131; - static constexpr uint16 F2 = 132; - static constexpr uint16 F3 = 133; - static constexpr uint16 F4 = 134; - static constexpr uint16 F5 = 135; - static constexpr uint16 F6 = 136; - static constexpr uint16 F7 = 137; - static constexpr uint16 F8 = 138; - static constexpr uint16 F9 = 139; - static constexpr uint16 F10 = 140; - static constexpr uint16 F11 = 141; - static constexpr uint16 F12 = 142; - static constexpr uint16 NumLock = 143; - static constexpr uint16 Info = 165; - static constexpr uint16 ChannelUp = 166; - static constexpr uint16 ChannelDown = 167; - static constexpr uint16 ZoomIn = 168; - static constexpr uint16 ZoomOut = 169; - static constexpr uint16 Window = 171; - static constexpr uint16 Guide = 172; - static constexpr uint16 DVR = 173; - static constexpr uint16 Bookmark = 174; - static constexpr uint16 Captions = 175; - static constexpr uint16 Settings = 176; - static constexpr uint16 AppSwitch = 187; - static constexpr uint16 LangSwitch = 204; - static constexpr uint16 MannerMode = 205; - static constexpr uint16 3DMode = 206; - static constexpr uint16 Contacts = 207; - static constexpr uint16 Calendar = 208; - static constexpr uint16 Music = 209; - static constexpr uint16 Calculator = 210; - static constexpr uint16 ZenkakuHankaku = 211; - static constexpr uint16 Eisu = 212; - static constexpr uint16 Muhenkan = 213; - static constexpr uint16 Henkan = 214; - static constexpr uint16 KatakanaHiragana = 215; - static constexpr uint16 Yen = 216; - static constexpr uint16 Ro = 217; - static constexpr uint16 Kana = 218; - static constexpr uint16 Assist = 219; - static constexpr uint16 BrightnessDown = 220; - static constexpr uint16 BrightnessUp = 221; - static constexpr uint16 Sleep = 223; - static constexpr uint16 Wakeup = 224; - static constexpr uint16 Pairing = 225; - static constexpr uint16 11 = 227; - static constexpr uint16 12 = 228; - static constexpr uint16 LastChannel = 229; - static constexpr uint16 VoiceAssist = 231; - static constexpr uint16 Help = 259; - static constexpr uint16 NavPrev = 260; - static constexpr uint16 NavNext = 261; - static constexpr uint16 NavIn = 262; - static constexpr uint16 NavOut = 263; - static constexpr uint16 StemPrimary = 264; - static constexpr uint16 Stem1 = 265; - static constexpr uint16 Stem2 = 266; - static constexpr uint16 Stem3 = 267; - static constexpr uint16 SoftSleep = 276; - static constexpr uint16 Cut = 277; - static constexpr uint16 Copy = 278; - static constexpr uint16 Paste = 279; - static constexpr uint16 SysNavUp = 280; - static constexpr uint16 SysNavDown = 281; - static constexpr uint16 SysNavLeft = 282; - static constexpr uint16 SysNavRight = 283; - static constexpr uint16 AllApps = 284; - static constexpr uint16 Refresh = 285; - static constexpr uint16 ThumbsUp = 286; - static constexpr uint16 ThumbsDown = 287; - static constexpr uint16 ProfileSwitch = 288; - static constexpr uint16 MediaPlayPause = 85; - static constexpr uint16 MediaStop = 86; - static constexpr uint16 MediaNext = 87; - static constexpr uint16 MediaPrev = 88; - static constexpr uint16 MediaRewind = 89; - static constexpr uint16 MediaFastForward = 90; - static constexpr uint16 MediaPlay = 126; - static constexpr uint16 MediaPause = 127; - static constexpr uint16 MediaClose = 128; - static constexpr uint16 MediaEject = 129; - static constexpr uint16 MediaRecord = 130; - static constexpr uint16 MediaAudioTrack = 222; - static constexpr uint16 MediaTopMenu = 226; - static constexpr uint16 MediaSkipForward = 272; - static constexpr uint16 MediaSkipBackward = 273; - static constexpr uint16 MediaStepForward = 274; - static constexpr uint16 MediaStepBackward = 275; - static constexpr uint16 DPadUp = 19; - static constexpr uint16 DPadDown = 20; - static constexpr uint16 DPadLeft = 21; - static constexpr uint16 DPadRight = 22; - static constexpr uint16 DPadCenter = 23; - static constexpr uint16 DPadUpLeft = 268; - static constexpr uint16 DPadDownLeft = 269; - static constexpr uint16 DPadUpRight = 270; - static constexpr uint16 DPadDownRight = 271; - static constexpr uint16 GPadA = 96; - static constexpr uint16 GPadB = 97; - static constexpr uint16 GPadC = 98; - static constexpr uint16 GPadX = 99; - static constexpr uint16 GPadY = 100; - static constexpr uint16 GPadZ = 101; - static constexpr uint16 GPadL1 = 102; - static constexpr uint16 GPadR1 = 103; - static constexpr uint16 GPadL2 = 104; - static constexpr uint16 GPadR2 = 105; - static constexpr uint16 GPadThumbL = 106; - static constexpr uint16 GPadThumbR = 107; - static constexpr uint16 GPadStart = 108; - static constexpr uint16 GPadSelect = 109; - static constexpr uint16 GPadMode = 110; - static constexpr uint16 GPad1 = 188; - static constexpr uint16 GPad2 = 189; - static constexpr uint16 GPad3 = 190; - static constexpr uint16 GPad4 = 191; - static constexpr uint16 GPad5 = 192; - static constexpr uint16 GPad6 = 193; - static constexpr uint16 GPad7 = 194; - static constexpr uint16 GPad8 = 195; - static constexpr uint16 GPad9 = 196; - static constexpr uint16 GPad10 = 197; - static constexpr uint16 GPad11 = 198; - static constexpr uint16 GPad12 = 199; - static constexpr uint16 GPad13 = 200; - static constexpr uint16 GPad14 = 201; - static constexpr uint16 GPad15 = 202; - static constexpr uint16 GPad16 = 203; - static constexpr uint16 KP_0 = 144; - static constexpr uint16 KP_1 = 145; - static constexpr uint16 KP_2 = 146; - static constexpr uint16 KP_3 = 147; - static constexpr uint16 KP_4 = 148; - static constexpr uint16 KP_5 = 149; - static constexpr uint16 KP_6 = 150; - static constexpr uint16 KP_7 = 151; - static constexpr uint16 KP_8 = 152; - static constexpr uint16 KP_9 = 153; - static constexpr uint16 KP_Divide = 154; - static constexpr uint16 KP_Multiply = 155; - static constexpr uint16 KP_Subtract = 156; - static constexpr uint16 KP_Add = 157; - static constexpr uint16 KP_Dot = 158; - static constexpr uint16 KP_Comma = 159; - static constexpr uint16 KP_Enter = 160; - static constexpr uint16 KP_Equal = 161; - static constexpr uint16 KP_LeftParen = 162; - static constexpr uint16 KP_RightParen = 163; - static constexpr uint16 TV = 170; - static constexpr uint16 TV_Power = 177; - static constexpr uint16 TV_Input = 178; - static constexpr uint16 TV_STBPower = 179; - static constexpr uint16 TV_STBInput = 180; - static constexpr uint16 AV_Power = 181; - static constexpr uint16 AVR_Input = 182; - static constexpr uint16 TV_ProgRed = 183; - static constexpr uint16 TV_ProgGreen = 184; - static constexpr uint16 TV_ProgYellow = 185; - static constexpr uint16 TV_ProgBlue = 186; - static constexpr uint16 TV_DataService = 230; - static constexpr uint16 TV_RadioService = 232; - static constexpr uint16 TV_Teletext = 233; - static constexpr uint16 TV_NumberEntry = 234; - static constexpr uint16 TV_TerrestrialAnalog = 235; - static constexpr uint16 TV_TerrestrialDigital = 236; - static constexpr uint16 TV_Satellite = 237; - static constexpr uint16 TV_SatelliteBS = 238; - static constexpr uint16 TV_SattelliteCS = 239; - static constexpr uint16 TV_SatelliteService = 240; - static constexpr uint16 TV_Network = 241; - static constexpr uint16 TV_AntennaCable = 242; - static constexpr uint16 TV_InputHdmi1 = 243; - static constexpr uint16 TV_InputHdmi2 = 244; - static constexpr uint16 TV_InputHdmi3 = 245; - static constexpr uint16 TV_InputHdmi4 = 246; - static constexpr uint16 TV_inputComposite1 = 247; - static constexpr uint16 TV_inputComposite2 = 248; - static constexpr uint16 TV_InputComponent1 = 249; - static constexpr uint16 TV_InputComponent2 = 250; - static constexpr uint16 TV_InputVGA1 = 251; - static constexpr uint16 TV_AudioDesc = 252; - static constexpr uint16 TV_AudioDescMixUp = 253; - static constexpr uint16 TV_AudioDescMixDown = 254; - static constexpr uint16 TV_ZoomMode = 255; - static constexpr uint16 TV_ContentsMenu = 256; - static constexpr uint16 TV_MediaContextMenu = 257; - static constexpr uint16 TV_TimerProgramming = 258; - static constexpr uint16 Accelerometer = 532; - static constexpr uint16 MagneticField = 536; - static constexpr uint16 GeoLocation = 540; - static constexpr uint16 Gyroscope = 534; - static constexpr uint16 AmbientLight = 528; - static constexpr uint16 AirPressure = 529; - static constexpr uint16 Proximity = 530; - static constexpr uint16 Gravity = 533; - static constexpr uint16 LinearAcceleration = 535; - static constexpr uint16 RotationVector = 537; - static constexpr uint16 RelativeHumidity = 531; - static constexpr uint16 AirTemperature = 527; - static constexpr uint16 GameRotationVector = 538; - static constexpr uint16 Pose6DOF = 539; - static constexpr uint16 MultiTouch = 522; - static constexpr uint16 TouchPos = 523; - static constexpr uint16 TouchPos_mm = 524; - static constexpr uint16 TouchDelta = 525; - static constexpr uint16 TouchDelta_norm = 526; -}; - struct Android_BindingsMode { Android_BindingsMode (); @@ -1826,13 +1830,6 @@ struct Android_ActionBindings RC CreateMode (const string &); }; -struct OpenVR_Input -{ - OpenVR_Input () {} - OpenVR_Input (uint16) {} - operator uint16 () const; -}; - struct OpenVR_BindingsMode { OpenVR_BindingsMode (); @@ -1847,33 +1844,33 @@ struct OpenVR_ActionBindings }; template <> -struct RC : WinAPI_ActionBindings +struct RC : Android_BindingsMode { - RC (const WinAPI_ActionBindings &); + RC (const Android_BindingsMode &); }; template <> -struct RC : Android_ActionBindings +struct RC : OpenVR_BindingsMode { - RC (const Android_ActionBindings &); + RC (const OpenVR_BindingsMode &); }; template <> -struct RC : OpenVR_BindingsMode +struct RC : Android_ActionBindings { - RC (const OpenVR_BindingsMode &); + RC (const Android_ActionBindings &); }; template <> -struct RC : Android_BindingsMode +struct RC : WinAPI_ActionBindings { - RC (const Android_BindingsMode &); + RC (const WinAPI_ActionBindings &); }; template <> -struct RC : GLFW_BindingsMode +struct RC : OpenVR_ActionBindings { - RC (const GLFW_BindingsMode &); + RC (const OpenVR_ActionBindings &); }; template <> @@ -1889,8 +1886,8 @@ struct RC : GLFW_ActionBindings }; template <> -struct RC : OpenVR_ActionBindings +struct RC : GLFW_BindingsMode { - RC (const OpenVR_ActionBindings &); + RC (const GLFW_BindingsMode &); }; diff --git a/AE/engine/shared_data/scripts/offline_packer.as b/AE/engine/shared_data/scripts/offline_packer.as index 1168e842..c15c729f 100644 --- a/AE/engine/shared_data/scripts/offline_packer.as +++ b/AE/engine/shared_data/scripts/offline_packer.as @@ -1,12 +1,13 @@ -//ce32c8f6 +//babefd0b #include #include +#define funcdef // typedef for function + using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; -using int = std::int32_t; using uint = std::uint32_t; using int32 = std::int32_t; using uint32 = std::uint32_t; @@ -20,20 +21,15 @@ struct RC; template using array = std::vector; -struct float2; -struct float3; -struct PipelineCompiler; -struct uint4; -struct float4; -struct uint3; -struct uint2; -struct EReflectionFlags; -struct AssetPacker; -struct short3; -struct short2; -struct ushort4; -struct sbyte3; -struct sbyte4; +using namespace std::string_literals; + +template +string operator + (const string &lhs, T rhs); + +struct ubyte2; +struct ubyte3; +struct InputActions; +struct ubyte4; struct Archive; struct ushort2; struct sbyte2; @@ -42,14 +38,46 @@ struct short4; struct bool4; struct int2; struct bool3; -struct int3; struct bool2; +struct int3; struct int4; -struct ubyte4; -struct ubyte2; -struct ubyte3; -struct EFileType; -struct InputActions; +struct AssetPacker; +struct short3; +struct short2; +struct ushort4; +struct sbyte3; +struct sbyte4; +struct uint4; +struct float4; +struct uint2; +struct uint3; +struct float2; +struct float3; +struct PipelineCompiler; + +enum class EFileType : uint32 +{ + Raw, + Brotli, + InMemory, + BrotliInMemory, + ZStd, + ZStdInMemory, +}; +uint32 operator | (EFileType lhs, EFileType rhs); +uint32 operator | (uint32 lhs, EFileType rhs); +uint32 operator | (EFileType lhs, uint32 rhs); + +enum class EReflectionFlags : uint32 +{ + RenderTechniques, + RTechPass_Pipelines, + RTech_ShaderBindingTable, + All, +}; +uint32 operator | (EReflectionFlags lhs, EReflectionFlags rhs); +uint32 operator | (uint32 lhs, EReflectionFlags rhs); +uint32 operator | (EReflectionFlags lhs, uint32 rhs); using sbyte = int8; using ubyte = uint8; @@ -1071,30 +1099,6 @@ string GetOutputDir (); void DeleteFolder (const string &); bool IsGLSLCompilerSupported (); bool IsMetalCompilerSupported (); -struct EFileType -{ - EFileType () {} - EFileType (uint32) {} - operator uint32 () const; - static constexpr uint32 Raw = 1; - static constexpr uint32 Brotli = 2; - static constexpr uint32 InMemory = 4; - static constexpr uint32 BrotliInMemory = 6; - static constexpr uint32 ZStd = 16; - static constexpr uint32 ZStdInMemory = 20; -}; - -struct EReflectionFlags -{ - EReflectionFlags () {} - EReflectionFlags (uint32) {} - operator uint32 () const; - static constexpr uint32 RenderTechniques = 1; - static constexpr uint32 RTechPass_Pipelines = 2; - static constexpr uint32 RTech_ShaderBindingTable = 4; - static constexpr uint32 All = 7; -}; - struct PipelineCompiler { PipelineCompiler (); @@ -1145,26 +1149,26 @@ struct Archive }; template <> -struct RC : PipelineCompiler +struct RC : InputActions { - RC (const PipelineCompiler &); + RC (const InputActions &); }; template <> -struct RC : AssetPacker +struct RC : Archive { - RC (const AssetPacker &); + RC (const Archive &); }; template <> -struct RC : Archive +struct RC : AssetPacker { - RC (const Archive &); + RC (const AssetPacker &); }; template <> -struct RC : InputActions +struct RC : PipelineCompiler { - RC (const InputActions &); + RC (const PipelineCompiler &); }; diff --git a/AE/engine/shared_data/scripts/pipeline_compiler.as b/AE/engine/shared_data/scripts/pipeline_compiler.as index 6eeae5d4..541fbb8d 100644 --- a/AE/engine/shared_data/scripts/pipeline_compiler.as +++ b/AE/engine/shared_data/scripts/pipeline_compiler.as @@ -1,4 +1,4 @@ -//20d3f215 +//6594b751 #include #include @@ -8,7 +8,6 @@ using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; -using int = std::int32_t; using uint = std::uint32_t; using int32 = std::int32_t; using uint32 = std::uint32_t; @@ -22,146 +21,1718 @@ struct RC; template using array = std::vector; -struct EShaderIO; -struct EImageAspect; -struct EPipelineOpt; -struct InstanceIndex; -struct float3; -struct float2; -struct EDescSetUsage; -struct EVertexInputRate; -struct DescriptorSetLayout; -struct ESamplerChromaLocation; -struct RenderState_StencilBufferState; -struct EFilter; -struct EPipelineDynamicState; -struct RayTracingShaderBinding; -struct MultiSamples; -struct RenderTechnique; -struct RayTracingPipeline; -struct sbyte3; -struct EAttachment; -struct sbyte4; -struct ESamplerYcbcrModelConversion; -struct short3; -struct ushort4; -struct GraphicsPass; -struct ESamplerOpt; -struct ESubgroupTypes; -struct EImageUsage; -struct Align; -struct short2; -struct EIndex; -struct ComputePipeline; -struct EAccessType; -struct float4; -struct ShaderStructType; -struct EBlendFactor; -struct Shader; -struct EStencilOp; -struct ERTInstanceOpt; -struct EAddressMode; -struct ECompilationTarget; -struct FS_DirectoryIterator; -struct RenderState_MultisampleState; -struct RayIndex; -struct bool3; -struct EImage; -struct bool2; -struct RGBA32i; -struct bool4; -struct EVertexType; -struct ELogicOp; -struct ETessPatch; -struct ECullMode; -struct HSVColor; -struct ShaderStructTypeUsage; -struct EBlendOp; -struct EQueueMask; -struct EShaderPreprocessor; -struct RenderState_ColorBuffer_ColorMask; -struct RGBA32f; -struct ComputePipelineSpec; -struct EPrimitive; -struct ESubgroupOperation; -struct ubyte2; -struct ubyte3; -struct EFormatFeature; -struct RenderState_InputAssemblyState; -struct EGraphicsDeviceID; -struct RenderState_RasterizationState; -struct ESamplerYcbcrRange; -struct ubyte4; -struct CompatibleRenderPass; -struct EShadingRate; -struct GlobalConfig; -struct MeshPipeline; -struct NamedRenderState; -struct EBufferUsage; -struct FeatureSet; -struct ComputePass; -struct EBufferOpt; -struct EResourceState; -struct EBorderColor; -struct VertexDivisor; -struct EImageType; -struct EAttachmentStoreOp; -struct Sampler; -struct CallableIndex; -struct MeshPipelineSpec; -struct EShaderOpt; -struct GraphicsPipeline; -struct EImageOpt; -struct EFeature; -struct EMipmapFilter; +using namespace std::string_literals; + +template +string operator + (const string &lhs, T rhs); + struct VertexBufferInput; struct RGBA8u; -struct EStructLayout; -struct RGBA32u; struct Attachment; +struct RGBA32u; struct uint2; struct ShaderIO; -struct EShaderStages; -struct EMutableRenderState; -struct uint3; -struct EShader; struct RenderState_DepthBufferState; -struct RenderState_StencilFaceState; -struct ECompareOp; +struct uint3; struct uint4; -struct EGPUVendor; -struct EValueType; -struct ETessSpacing; -struct TilePipelineSpec; +struct RenderState_StencilFaceState; +struct FeatureSet; +struct ComputePass; +struct NamedRenderState; +struct MeshPipeline; +struct VertexDivisor; +struct Sampler; +struct MeshPipelineSpec; +struct DepthStencil; +struct GraphicsPipeline; +struct RenderState; +struct CallableIndex; +struct RayTracingPipelineSpec; +struct AttachmentSpec; +struct RenderState_ColorBuffersState; +struct MipmapLevel; +struct FS_RecursiveDirectoryIter; struct TilePipeline; -struct EPolygonMode; -struct EAttachmentLoadOp; -struct int2; -struct int4; -struct ImageLayer; struct GraphicsPipelineSpec; +struct TilePipelineSpec; +struct PipelineLayout; +struct ImageLayer; +struct int4; struct ArraySize; struct int3; -struct FS_RecursiveDirectoryIter; -struct EPixelFormat; -struct short4; -struct PipelineLayout; -struct ushort3; -struct ushort2; -struct sbyte2; struct RenderPass; +struct int2; +struct ushort2; struct RenderState_ColorBuffer; -struct RenderState; -struct AttachmentSpec; -struct DepthStencil; -struct EPixelFormatExternal; -struct RenderState_ColorBuffersState; -struct EReductionMode; -struct RayTracingPipelineSpec; -struct ESurfaceFormat; -struct EShadingRateCombinerOp; -struct MipmapLevel; -struct EShaderVersion; +struct ushort3; +struct sbyte2; +struct short2; +struct short4; +struct sbyte4; +struct sbyte3; +struct GraphicsPass; +struct ushort4; +struct short3; +struct Align; +struct ComputePipeline; +struct float4; +struct float2; +struct InstanceIndex; +struct float3; +struct DescriptorSetLayout; +struct RenderTechnique; +struct RayTracingPipeline; +struct RayTracingShaderBinding; +struct MultiSamples; +struct RenderState_StencilBufferState; +struct ComputePipelineSpec; +struct RenderState_RasterizationState; +struct ubyte3; +struct ubyte2; +struct RenderState_InputAssemblyState; +struct CompatibleRenderPass; +struct ubyte4; +struct GlobalConfig; +struct ShaderStructType; +struct FS_DirectoryIterator; +struct Shader; +struct RayIndex; +struct RenderState_MultisampleState; +struct RGBA32i; +struct bool2; +struct bool4; +struct bool3; +struct RGBA32f; +struct HSVColor; +struct RenderState_ColorBuffer_ColorMask; + +enum class EImage : uint8 +{ + Cube, + CubeArray, +}; +uint8 operator | (EImage lhs, EImage rhs); +uint8 operator | (uint8 lhs, EImage rhs); +uint8 operator | (EImage lhs, uint8 rhs); +static constexpr EImage EImage_1D = EImage(0); +static constexpr EImage EImage_2D = EImage(1); +static constexpr EImage EImage_3D = EImage(2); +static constexpr EImage EImage_1DArray = EImage(3); +static constexpr EImage EImage_2DArray = EImage(4); + +enum class EIndex : uint8 +{ + UShort, + UInt, +}; +uint8 operator | (EIndex lhs, EIndex rhs); +uint8 operator | (uint8 lhs, EIndex rhs); +uint8 operator | (EIndex lhs, uint8 rhs); + +enum class EPixelFormat : uint8 +{ + RGBA16_SNorm, + RGBA8_SNorm, + RGB16_SNorm, + RGB8_SNorm, + RG16_SNorm, + RG8_SNorm, + R16_SNorm, + R8_SNorm, + RGBA16_UNorm, + RGBA8_UNorm, + RGB16_UNorm, + RGB8_UNorm, + RG16_UNorm, + RG8_UNorm, + R16_UNorm, + R8_UNorm, + RGB10_A2_UNorm, + RGBA4_UNorm, + RGB5_A1_UNorm, + RGB_5_6_5_UNorm, + BGR8_UNorm, + BGRA8_UNorm, + sRGB8, + sRGB8_A8, + sBGR8, + sBGR8_A8, + R8I, + RG8I, + RGB8I, + RGBA8I, + R16I, + RG16I, + RGB16I, + RGBA16I, + R32I, + RG32I, + RGB32I, + RGBA32I, + R64I, + R8U, + RG8U, + RGB8U, + RGBA8U, + R16U, + RG16U, + RGB16U, + RGBA16U, + R32U, + RG32U, + RGB32U, + RGBA32U, + RGB10_A2U, + R64U, + R16F, + RG16F, + RGB16F, + RGBA16F, + R32F, + RG32F, + RGB32F, + RGBA32F, + RGB_11_11_10F, + RGB9F_E5, + Depth16, + Depth24, + Depth32F, + Depth16_Stencil8, + Depth24_Stencil8, + Depth32F_Stencil8, + BC1_RGB8_UNorm, + BC1_sRGB8, + BC1_RGB8_A1_UNorm, + BC1_sRGB8_A1, + BC2_RGBA8_UNorm, + BC2_sRGB8, + BC3_RGBA8_UNorm, + BC3_sRGB8, + BC4_R8_SNorm, + BC4_R8_UNorm, + BC5_RG8_SNorm, + BC5_RG8_UNorm, + BC6H_RGB16F, + BC6H_RGB16UF, + BC7_RGBA8_UNorm, + BC7_sRGB8_A8, + ETC2_RGB8_UNorm, + ETC2_sRGB8, + ETC2_RGB8_A1_UNorm, + ETC2_sRGB8_A1, + ETC2_RGBA8_UNorm, + ETC2_sRGB8_A8, + EAC_R11_SNorm, + EAC_R11_UNorm, + EAC_RG11_SNorm, + EAC_RG11_UNorm, + ASTC_RGBA8_4x4, + ASTC_RGBA8_5x4, + ASTC_RGBA8_5x5, + ASTC_RGBA8_6x5, + ASTC_RGBA8_6x6, + ASTC_RGBA8_8x5, + ASTC_RGBA8_8x6, + ASTC_RGBA8_8x8, + ASTC_RGBA8_10x5, + ASTC_RGBA8_10x6, + ASTC_RGBA8_10x8, + ASTC_RGBA8_10x10, + ASTC_RGBA8_12x10, + ASTC_RGBA8_12x12, + ASTC_sRGB8_A8_4x4, + ASTC_sRGB8_A8_5x4, + ASTC_sRGB8_A8_5x5, + ASTC_sRGB8_A8_6x5, + ASTC_sRGB8_A8_6x6, + ASTC_sRGB8_A8_8x5, + ASTC_sRGB8_A8_8x6, + ASTC_sRGB8_A8_8x8, + ASTC_sRGB8_A8_10x5, + ASTC_sRGB8_A8_10x6, + ASTC_sRGB8_A8_10x8, + ASTC_sRGB8_A8_10x10, + ASTC_sRGB8_A8_12x10, + ASTC_sRGB8_A8_12x12, + ASTC_RGBA16F_4x4, + ASTC_RGBA16F_5x4, + ASTC_RGBA16F_5x5, + ASTC_RGBA16F_6x5, + ASTC_RGBA16F_6x6, + ASTC_RGBA16F_8x5, + ASTC_RGBA16F_8x6, + ASTC_RGBA16F_8x8, + ASTC_RGBA16F_10x5, + ASTC_RGBA16F_10x6, + ASTC_RGBA16F_10x8, + ASTC_RGBA16F_10x10, + ASTC_RGBA16F_12x10, + ASTC_RGBA16F_12x12, + G8B8G8R8_422_UNorm, + B8G8R8G8_422_UNorm, + G8_B8R8_420_UNorm, + G8_B8R8_422_UNorm, + G8_B8R8_444_UNorm, + G8_B8_R8_420_UNorm, + G8_B8_R8_422_UNorm, + G8_B8_R8_444_UNorm, + B10x6G10x6R10x6G10x6_422_UNorm, + G10x6B10x6G10x6R10x6_422_UNorm, + G10x6_B10x6R10x6_420_UNorm, + G10x6_B10x6R10x6_422_UNorm, + G10x6_B10x6R10x6_444_UNorm, + G10x6_B10x6_R10x6_420_UNorm, + G10x6_B10x6_R10x6_422_UNorm, + G10x6_B10x6_R10x6_444_UNorm, + R10x6G10x6B10x6A10x6_UNorm, + R10x6G10x6_UNorm, + R10x6_UNorm, + B12x4G12x4R12x4G12x4_422_UNorm, + G12x4B12x4G12x4R12x4_422_UNorm, + G12x4_B12x4R12x4_420_UNorm, + G12x4_B12x4R12x4_422_UNorm, + G12x4_B12x4R12x4_444_UNorm, + G12x4_B12x4_R12x4_420_UNorm, + G12x4_B12x4_R12x4_422_UNorm, + G12x4_B12x4_R12x4_444_UNorm, + R12x4G12x4B12x4A12x4_UNorm, + R12x4G12x4_UNorm, + R12x4_UNorm, + B16G16R16G16_422_UNorm, + G16B16G16R16_422_UNorm, + G16_B16R16_420_UNorm, + G16_B16R16_422_UNorm, + G16_B16R16_444_UNorm, + G16_B16_R16_420_UNorm, + G16_B16_R16_422_UNorm, + G16_B16_R16_444_UNorm, + SwapchainColor, +}; +uint8 operator | (EPixelFormat lhs, EPixelFormat rhs); +uint8 operator | (uint8 lhs, EPixelFormat rhs); +uint8 operator | (EPixelFormat lhs, uint8 rhs); + +enum class EPixelFormatExternal : uint8 +{ + Android_Depth16, + Android_DepthJPEG, + Android_DepthPointCloud, + Android_JPEG, + Android_Raw16, + Android_Raw12, + Android_Raw10, + Android_NV16, + Android_NV21, + Android_YCBCR_P010, + Android_YUV_420, + Android_YUV_422, + Android_YUV_444, + Android_YUY2, + Android_YV12, + Android_Y8, + Android_HEIC, +}; +uint8 operator | (EPixelFormatExternal lhs, EPixelFormatExternal rhs); +uint8 operator | (uint8 lhs, EPixelFormatExternal rhs); +uint8 operator | (EPixelFormatExternal lhs, uint8 rhs); + +enum class ECompareOp : uint8 +{ + Never, + Less, + Equal, + LEqual, + Greater, + NotEqual, + GEqual, + Always, + LessOrEqual, + GreaterOrEqual, +}; +uint8 operator | (ECompareOp lhs, ECompareOp rhs); +uint8 operator | (uint8 lhs, ECompareOp rhs); +uint8 operator | (ECompareOp lhs, uint8 rhs); + +enum class EBlendFactor : uint8 +{ + + // S, srcColor - from shader + // D, dstColor - from render target + // S1 - from shader (dual src blend) + // cc - constant color + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + // + + // 0 + Zero, + + // 1 + One, + + // S + SrcColor, + + // 1 - S + OneMinusSrcColor, + + // D + DstColor, + + // 1 - D + OneMinusDstColor, + + // S.a + SrcAlpha, + + // 1 - S.a + OneMinusSrcAlpha, + + // D.a + DstAlpha, + + // 1 - D.a + OneMinusDstAlpha, + + // cc + ConstColor, + + // 1 - cc + OneMinusConstColor, + + // cc.a + ConstAlpha, + + // 1 - cc.a + OneMinusConstAlpha, + + // rgb * min( S.a, D.a ), a * 1 + SrcAlphaSaturate, + + // S1 + Src1Color, + + // 1 - S1 + OneMinusSrc1Color, + + // S1.a + Src1Alpha, + + // 1 - S1.a + OneMinusSrc1Alpha, +}; +uint8 operator | (EBlendFactor lhs, EBlendFactor rhs); +uint8 operator | (uint8 lhs, EBlendFactor rhs); +uint8 operator | (EBlendFactor lhs, uint8 rhs); + +enum class EBlendOp : uint8 +{ + + // S, srcColor - from shader + // D, dstColor - from render target + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + // + + // S + D + Add, + + // S - D + Sub, + + // D - S + RevSub, + + // min( S, D ) + Min, + + // max( S, D ) + Max, +}; +uint8 operator | (EBlendOp lhs, EBlendOp rhs); +uint8 operator | (uint8 lhs, EBlendOp rhs); +uint8 operator | (EBlendOp lhs, uint8 rhs); + +enum class ELogicOp : uint8 +{ + + // S - from shader + // D - from render target + // result = S [logicOp] D + // + + // disabled + None, + + // 0 + Clear, + + // 1 + Set, + + // S + Copy, + + // ~S + CopyInverted, + + // D + NoOp, + + // ~D + Invert, + + // S & D + And, + + // ~ ( S & D ) + NotAnd, + + // S | D + Or, + + // ~ ( S | D ) + NotOr, + + // S ^ D + Xor, + + // ~ ( S ^ D ) + Equiv, + + // S & ~D + AndReverse, + + // ~S & D + AndInverted, + + // S | ~D + OrReverse, + + // ~S | D + OrInverted, +}; +uint8 operator | (ELogicOp lhs, ELogicOp rhs); +uint8 operator | (uint8 lhs, ELogicOp rhs); +uint8 operator | (ELogicOp lhs, uint8 rhs); + +enum class EStencilOp : uint8 +{ + + // src + Keep, + + // 0 + Zero, + + // ref + Replace, + + // min( ++src, 0 ) + Incr, + + // ++src & maxValue + IncrWrap, + + // max( --src, 0 ) + Decr, + + // --src & maxValue + DecrWrap, + + // ~src + Invert, +}; +uint8 operator | (EStencilOp lhs, EStencilOp rhs); +uint8 operator | (uint8 lhs, EStencilOp rhs); +uint8 operator | (EStencilOp lhs, uint8 rhs); + +enum class EPolygonMode : uint8 +{ + Point, + Line, + Fill, +}; +uint8 operator | (EPolygonMode lhs, EPolygonMode rhs); +uint8 operator | (uint8 lhs, EPolygonMode rhs); +uint8 operator | (EPolygonMode lhs, uint8 rhs); + +enum class EPrimitive : uint8 +{ + Point, + LineList, + LineStrip, + LineListAdjacency, + LineStripAdjacency, + TriangleList, + TriangleStrip, + TriangleFan, + TriangleListAdjacency, + TriangleStripAdjacency, + Patch, +}; +uint8 operator | (EPrimitive lhs, EPrimitive rhs); +uint8 operator | (uint8 lhs, EPrimitive rhs); +uint8 operator | (EPrimitive lhs, uint8 rhs); + +enum class ECullMode : uint8 +{ + None, + Front, + Back, + FontAndBack, +}; +uint8 operator | (ECullMode lhs, ECullMode rhs); +uint8 operator | (uint8 lhs, ECullMode rhs); +uint8 operator | (ECullMode lhs, uint8 rhs); + +enum class EPipelineDynamicState : uint16 +{ + None, + StencilCompareMask, + StencilWriteMask, + StencilReference, + DepthBias, + BlendConstants, + RTStackSize, + FragmentShadingRate, + ViewportWScaling, +}; +uint16 operator | (EPipelineDynamicState lhs, EPipelineDynamicState rhs); +uint16 operator | (uint16 lhs, EPipelineDynamicState rhs); +uint16 operator | (EPipelineDynamicState lhs, uint16 rhs); + +enum class EResourceState : uint32 +{ + Unknown, + Preserve, + ShaderStorage_Read, + ShaderStorage_Write, + ShaderStorage_RW, + ShaderUniform, + ShaderSample, + CopySrc, + CopyDst, + ClearDst, + BlitSrc, + BlitDst, + InputColorAttachment, + InputColorAttachment_RW, + ColorAttachment, + ColorAttachment_Blend, + DepthStencilAttachment_Read, + DepthStencilAttachment_Write, + DepthStencilAttachment_RW, + DepthTest_StencilRW, + DepthRW_StencilTest, + DepthStencilTest_ShaderSample, + DepthTest_DepthSample_StencilRW, + InputDepthStencilAttachment, + InputDepthStencilAttachment_RW, + Host_Read, + PresentImage, + IndirectBuffer, + IndexBuffer, + VertexBuffer, + ShadingRateImage, + CopyRTAS_Read, + CopyRTAS_Write, + BuildRTAS_Read, + BuildRTAS_Write, + BuildRTAS_RW, + BuildRTAS_IndirectBuffer, + ShaderRTAS, + RTShaderBindingTable, + DSTestBeforeFS, + DSTestAfterFS, + Invalidate, + General, + MeshTaskShader, + VertexProcessingShaders, + TileShader, + FragmentShader, + PreRasterizationShaders, + PostRasterizationShaders, + ComputeShader, + RayTracingShaders, + AllGraphicsShaders, + AllShaders, + BuildRTAS_ScratchBuffer, + InputDepthAttachment, + DepthStencilAttachment, +}; +uint32 operator | (EResourceState lhs, EResourceState rhs); +uint32 operator | (uint32 lhs, EResourceState rhs); +uint32 operator | (EResourceState lhs, uint32 rhs); + +enum class EImageAspect : uint8 +{ + Color, + Depth, + Stencil, + DepthStencil, + Plane_0, + Plane_1, + Plane_2, +}; +uint8 operator | (EImageAspect lhs, EImageAspect rhs); +uint8 operator | (uint8 lhs, EImageAspect rhs); +uint8 operator | (EImageAspect lhs, uint8 rhs); + +enum class EShaderIO : uint8 +{ + Int, + UInt, + Float, + UFloat, + Half, + UNorm, + SNorm, + sRGB, + AnyColor, + Depth, + Stencil, + DepthStencil, +}; +uint8 operator | (EShaderIO lhs, EShaderIO rhs); +uint8 operator | (uint8 lhs, EShaderIO rhs); +uint8 operator | (EShaderIO lhs, uint8 rhs); + +enum class ESubgroupTypes : uint8 +{ + Float16, + Float32, + Int8, + Int16, + Int32, + Int64, +}; +uint8 operator | (ESubgroupTypes lhs, ESubgroupTypes rhs); +uint8 operator | (uint8 lhs, ESubgroupTypes rhs); +uint8 operator | (ESubgroupTypes lhs, uint8 rhs); + +enum class ESubgroupOperation : uint32 +{ + IndexAndSize, + Elect, + Barrier, + Any, + All, + AllEqual, + Add, + Mul, + Min, + Max, + And, + Or, + Xor, + InclusiveMul, + InclusiveAdd, + InclusiveMin, + InclusiveMax, + InclusiveAnd, + InclusiveOr, + InclusiveXor, + ExclusiveAdd, + ExclusiveMul, + ExclusiveMin, + ExclusiveMax, + ExclusiveAnd, + ExclusiveOr, + ExclusiveXor, + Ballot, + Broadcast, + BroadcastFirst, + InverseBallot, + BallotBitExtract, + BallotBitCount, + BallotInclusiveBitCount, + BallotExclusiveBitCount, + BallotFindLSB, + BallotFindMSB, + Shuffle, + ShuffleXor, + ShuffleUp, + ShuffleDown, + ClusteredAdd, + ClusteredMul, + ClusteredMin, + ClusteredMax, + ClusteredAnd, + ClusteredOr, + ClusteredXor, + QuadBroadcast, + QuadSwapHorizontal, + QuadSwapVertical, + QuadSwapDiagonal, + _Basic_Begin, + _Basic_End, + _Vote_Begin, + _Vote_End, + _Arithmetic_Begin, + _Arithmetic_End, + _Ballot_Begin, + _Ballot_End, + _Shuffle_Begin, + _Shuffle_End, + _ShuffleRelative_Begin, + _ShuffleRelative_End, + _Clustered_Begin, + _Clustered_End, + _Quad_Begin, + _Quad_End, +}; +uint32 operator | (ESubgroupOperation lhs, ESubgroupOperation rhs); +uint32 operator | (uint32 lhs, ESubgroupOperation rhs); +uint32 operator | (ESubgroupOperation lhs, uint32 rhs); + +enum class EFeature : uint8 +{ + Ignore, + RequireTrue, + RequireFalse, +}; +uint8 operator | (EFeature lhs, EFeature rhs); +uint8 operator | (uint8 lhs, EFeature rhs); +uint8 operator | (EFeature lhs, uint8 rhs); + +enum class EShader : uint8 +{ + Vertex, + TessControl, + TessEvaluation, + Geometry, + Fragment, + Compute, + Tile, + MeshTask, + Mesh, + RayGen, + RayAnyHit, + RayClosestHit, + RayMiss, + RayIntersection, + RayCallable, +}; +uint8 operator | (EShader lhs, EShader rhs); +uint8 operator | (uint8 lhs, EShader rhs); +uint8 operator | (EShader lhs, uint8 rhs); + +enum class EShaderStages : uint16 +{ + Vertex, + TessControl, + TessEvaluation, + Geometry, + Fragment, + Compute, + Tile, + MeshTask, + Mesh, + RayGen, + RayAnyHit, + RayClosestHit, + RayMiss, + RayIntersection, + RayCallable, + All, + AllGraphics, + GraphicsPipeStages, + MeshPipeStages, + VertexProcessingStages, + PreRasterizationStages, + PostRasterizationStages, + AllRayTracing, +}; +uint16 operator | (EShaderStages lhs, EShaderStages rhs); +uint16 operator | (uint16 lhs, EShaderStages rhs); +uint16 operator | (EShaderStages lhs, uint16 rhs); + +enum class EGPUVendor : uint32 +{ + AMD, + NVidia, + Intel, + ARM, + Qualcomm, + ImgTech, + Microsoft, + Apple, + Mesa, + Broadcom, + Samsung, + VeriSilicon, + Huawei, +}; +uint32 operator | (EGPUVendor lhs, EGPUVendor rhs); +uint32 operator | (uint32 lhs, EGPUVendor rhs); +uint32 operator | (EGPUVendor lhs, uint32 rhs); + +enum class EVertexType : uint16 +{ + Byte, + Byte2, + Byte3, + Byte4, + Byte_Norm, + Byte2_Norm, + Byte3_Norm, + Byte4_Norm, + Byte_Scaled, + Byte2_Scaled, + Byte3_Scaled, + Byte4_Scaled, + UByte, + UByte2, + UByte3, + UByte4, + UByte_Norm, + UByte2_Norm, + UByte3_Norm, + UByte4_Norm, + UByte_Scaled, + UByte2_Scaled, + UByte3_Scaled, + UByte4_Scaled, + Short, + Short2, + Short3, + Short4, + Short_Norm, + Short2_Norm, + Short3_Norm, + Short4_Norm, + Short_Scaled, + Short2_Scaled, + Short3_Scaled, + Short4_Scaled, + UShort, + UShort2, + UShort3, + UShort4, + UShort_Norm, + UShort2_Norm, + UShort3_Norm, + UShort4_Norm, + UShort_Scaled, + UShort2_Scaled, + UShort3_Scaled, + UShort4_Scaled, + Int, + Int2, + Int3, + Int4, + UInt, + UInt2, + UInt3, + UInt4, + Long, + Long2, + Long3, + Long4, + ULong, + ULong2, + ULong3, + ULong4, + Half, + Half2, + Half3, + Half4, + Float, + Float2, + Float3, + Float4, + Double, + Double2, + Double3, + Double4, + UInt_2_10_10_10, + UInt_2_10_10_10_Norm, + UInt_2_10_10_10_Scaled, +}; +uint16 operator | (EVertexType lhs, EVertexType rhs); +uint16 operator | (uint16 lhs, EVertexType rhs); +uint16 operator | (EVertexType lhs, uint16 rhs); + +enum class EGraphicsDeviceID : uint32 +{ + Adreno_500, + Adreno_600, + Adreno_700, + AMD_GCN1, + AMD_GCN2, + AMD_GCN3, + AMD_GCN4, + AMD_GCN5, + AMD_GCN5_APU, + AMD_RDNA1, + AMD_RDNA2, + AMD_RDNA2_APU, + AMD_RDNA3, + AMD_RDNA3_APU, + AMD_RDNA4, + Apple_A8, + Apple_A9_A10, + Apple_A11, + Apple_A12, + Apple_A13, + Apple_A14_M1, + Apple_A15_M2, + Apple_A16, + Apple_A17_M3, + Mali_Midgard_Gen2, + Mali_Midgard_Gen3, + Mali_Midgard_Gen4, + Mali_Bifrost_Gen1, + Mali_Bifrost_Gen2, + Mali_Bifrost_Gen3, + Mali_Valhall_Gen1, + Mali_Valhall_Gen2, + Mali_Valhall_Gen3, + Mali_Valhall_Gen4, + Mali_5thGen_Gen1, + Mali_5thGen_Gen2, + NV_Maxwell, + NV_Maxwell_Tegra, + NV_Pascal, + NV_Pascal_MX, + NV_Pascal_Tegra, + NV_Volta, + NV_Turing_16, + NV_Turing, + NV_Turing_MX, + NV_Ampere, + NV_Ampere_Orin, + NV_Ada, + NV_Blackwell, + Intel_Gen7, + Intel_Gen8, + Intel_Gen9, + Intel_Gen11, + Intel_Gen12, + Intel_Gen12_7, + PowerVR_Series8, + PowerVR_Series9, + PowerVR_SeriesA, + PowerVR_SeriesB, + VeriSilicon, + SwiftShader, +}; +uint32 operator | (EGraphicsDeviceID lhs, EGraphicsDeviceID rhs); +uint32 operator | (uint32 lhs, EGraphicsDeviceID rhs); +uint32 operator | (EGraphicsDeviceID lhs, uint32 rhs); + +enum class EFilter : uint8 +{ + Nearest, + Linear, +}; +uint8 operator | (EFilter lhs, EFilter rhs); +uint8 operator | (uint8 lhs, EFilter rhs); +uint8 operator | (EFilter lhs, uint8 rhs); + +enum class EMipmapFilter : uint8 +{ + None, + Nearest, + Linear, +}; +uint8 operator | (EMipmapFilter lhs, EMipmapFilter rhs); +uint8 operator | (uint8 lhs, EMipmapFilter rhs); +uint8 operator | (EMipmapFilter lhs, uint8 rhs); + +enum class EAddressMode : uint8 +{ + Repeat, + MirrorRepeat, + ClampToEdge, + ClampToBorder, + MirrorClampToEdge, + Clamp, + MirrorClamp, +}; +uint8 operator | (EAddressMode lhs, EAddressMode rhs); +uint8 operator | (uint8 lhs, EAddressMode rhs); +uint8 operator | (EAddressMode lhs, uint8 rhs); + +enum class EBorderColor : uint8 +{ + FloatTransparentBlack, + FloatOpaqueBlack, + FloatOpaqueWhite, + IntTransparentBlack, + IntOpaqueBlack, + IntOpaqueWhite, +}; +uint8 operator | (EBorderColor lhs, EBorderColor rhs); +uint8 operator | (uint8 lhs, EBorderColor rhs); +uint8 operator | (EBorderColor lhs, uint8 rhs); + +enum class EReductionMode : uint8 +{ + Average, + Min, + Max, +}; +uint8 operator | (EReductionMode lhs, EReductionMode rhs); +uint8 operator | (uint8 lhs, EReductionMode rhs); +uint8 operator | (EReductionMode lhs, uint8 rhs); + +enum class ESamplerOpt : uint8 +{ + ArgumentBuffer, + UnnormalizedCoordinates, + NonSeamlessCubeMap, +}; +uint8 operator | (ESamplerOpt lhs, ESamplerOpt rhs); +uint8 operator | (uint8 lhs, ESamplerOpt rhs); +uint8 operator | (ESamplerOpt lhs, uint8 rhs); + +enum class EVertexInputRate : uint8 +{ + Vertex, + Instance, +}; +uint8 operator | (EVertexInputRate lhs, EVertexInputRate rhs); +uint8 operator | (uint8 lhs, EVertexInputRate rhs); +uint8 operator | (EVertexInputRate lhs, uint8 rhs); + +enum class EDescSetUsage : uint8 +{ + AllowPartialyUpdate, + UpdateTemplate, + ArgumentBuffer, + MutableArgBuffer, + MaybeUnsupported, +}; +uint8 operator | (EDescSetUsage lhs, EDescSetUsage rhs); +uint8 operator | (uint8 lhs, EDescSetUsage rhs); +uint8 operator | (EDescSetUsage lhs, uint8 rhs); + +enum class EPipelineOpt : uint16 +{ + + // Optimize pipeline during creation, may be slow. + Optimize, + CS_DispatchBase, + RT_NoNullAnyHitShaders, + RT_NoNullClosestHitShaders, + RT_NoNullMissShaders, + RT_NoNullIntersectionShaders, + RT_SkipTriangles, + RT_SkipAABBs, + + // Pipeline creation will fail if it is not exists in cache. + DontCompile, + + // When a pipeline is created, its state and shaders are compiled into zero or more device-specific executables, + // which are used when executing commands against that pipeline. + CaptureStatistics, + + // May include the final shader assembly, a binary form of the compiled shader, + // or the shader compiler’s internal representation at any number of intermediate compile steps. + CaptureInternalRepresentation, + + // Disable pipeline optimization to speedup creation. + DontOptimize, + None, +}; +uint16 operator | (EPipelineOpt lhs, EPipelineOpt rhs); +uint16 operator | (uint16 lhs, EPipelineOpt rhs); +uint16 operator | (EPipelineOpt lhs, uint16 rhs); + +enum class EQueueMask : uint8 +{ + Graphics, + AsyncCompute, + AsyncTransfer, + VideoEncode, + VideoDecode, + All, +}; +uint8 operator | (EQueueMask lhs, EQueueMask rhs); +uint8 operator | (uint8 lhs, EQueueMask rhs); +uint8 operator | (EQueueMask lhs, uint8 rhs); + +enum class ESamplerChromaLocation : uint8 +{ + CositedEven, + Midpoint, +}; +uint8 operator | (ESamplerChromaLocation lhs, ESamplerChromaLocation rhs); +uint8 operator | (uint8 lhs, ESamplerChromaLocation rhs); +uint8 operator | (ESamplerChromaLocation lhs, uint8 rhs); + +enum class ESamplerYcbcrModelConversion : uint8 +{ + RGB_Identity, + Ycbcr_Identity, + Ycbcr_709, + Ycbcr_601, + Ycbcr_2020, +}; +uint8 operator | (ESamplerYcbcrModelConversion lhs, ESamplerYcbcrModelConversion rhs); +uint8 operator | (uint8 lhs, ESamplerYcbcrModelConversion rhs); +uint8 operator | (ESamplerYcbcrModelConversion lhs, uint8 rhs); + +enum class ESamplerYcbcrRange : uint8 +{ + ITU_Full, + ITU_Narrow, +}; +uint8 operator | (ESamplerYcbcrRange lhs, ESamplerYcbcrRange rhs); +uint8 operator | (uint8 lhs, ESamplerYcbcrRange rhs); +uint8 operator | (ESamplerYcbcrRange lhs, uint8 rhs); + +enum class ESurfaceFormat : uint8 +{ + BGRA8_sRGB_nonlinear, + RGBA8_sRGB_nonlinear, + BGRA8_BT709_nonlinear, + RGBA16F_Extended_sRGB_linear, + RGBA16F_sRGB_nonlinear, + RGBA16F_BT709_nonlinear, + RGBA16F_HDR10_ST2084, + RGBA16F_BT2020_linear, + RGB10A2_sRGB_nonlinear, + RGB10A2_HDR10_ST2084, +}; +uint8 operator | (ESurfaceFormat lhs, ESurfaceFormat rhs); +uint8 operator | (uint8 lhs, ESurfaceFormat rhs); +uint8 operator | (ESurfaceFormat lhs, uint8 rhs); + +enum class ERTInstanceOpt : uint8 +{ + TriangleCullDisable, + TriangleFrontCCW, + ForceOpaque, + ForceNonOpaque, + TriangleCullBack, + TriangleFrontCW, +}; +uint8 operator | (ERTInstanceOpt lhs, ERTInstanceOpt rhs); +uint8 operator | (uint8 lhs, ERTInstanceOpt rhs); +uint8 operator | (ERTInstanceOpt lhs, uint8 rhs); + +enum class EImageUsage : uint32 +{ + TransferSrc, + TransferDst, + Sampled, + Storage, + ColorAttachment, + DepthStencilAttachment, + InputAttachment, + ShadingRate, + All, + Transfer, + RWAttachment, +}; +uint32 operator | (EImageUsage lhs, EImageUsage rhs); +uint32 operator | (uint32 lhs, EImageUsage rhs); +uint32 operator | (EImageUsage lhs, uint32 rhs); + +enum class EImageOpt : uint32 +{ + BlitSrc, + BlitDst, + CubeCompatible, + MutableFormat, + Array2DCompatible, + BlockTexelViewCompatible, + SparseResidency, + SparseAliased, + Alias, + SampleLocationsCompatible, + StorageAtomic, + ColorAttachmentBlend, + SampledLinear, + SampledMinMax, + VertexPplnStore, + FragmentPplnStore, + LossyRTCompression, + ExtendedUsage, + All, + SparseResidencyAliased, +}; +uint32 operator | (EImageOpt lhs, EImageOpt rhs); +uint32 operator | (uint32 lhs, EImageOpt rhs); +uint32 operator | (EImageOpt lhs, uint32 rhs); + +enum class EBufferUsage : uint32 +{ + TransferSrc, + TransferDst, + UniformTexel, + StorageTexel, + Uniform, + Storage, + Index, + Vertex, + Indirect, + ShaderAddress, + ShaderBindingTable, + ASBuild_ReadOnly, + ASBuild_Scratch, + All, + Transfer, +}; +uint32 operator | (EBufferUsage lhs, EBufferUsage rhs); +uint32 operator | (uint32 lhs, EBufferUsage rhs); +uint32 operator | (EBufferUsage lhs, uint32 rhs); + +enum class EBufferOpt : uint32 +{ + SparseResidency, + SparseAliased, + VertexPplnStore, + FragmentPplnStore, + StorageTexelAtomic, + All, + SparseResidencyAliased, +}; +uint32 operator | (EBufferOpt lhs, EBufferOpt rhs); +uint32 operator | (uint32 lhs, EBufferOpt rhs); +uint32 operator | (EBufferOpt lhs, uint32 rhs); + +enum class EShadingRate : uint8 +{ + Size1x1, + Size1x2, + Size1x4, + Size2x1, + Size2x2, + Size2x4, + Size4x1, + Size4x2, + Size4x4, +}; +uint8 operator | (EShadingRate lhs, EShadingRate rhs); +uint8 operator | (uint8 lhs, EShadingRate rhs); +uint8 operator | (EShadingRate lhs, uint8 rhs); + +enum class EShadingRateCombinerOp : uint8 +{ + Keep, + Replace, + Min, + Max, + Sum, + Mul, +}; +uint8 operator | (EShadingRateCombinerOp lhs, EShadingRateCombinerOp rhs); +uint8 operator | (uint8 lhs, EShadingRateCombinerOp rhs); +uint8 operator | (EShadingRateCombinerOp lhs, uint8 rhs); + +enum class EShaderVersion : uint32 +{ + + // Vulkan 1.0 + SPIRV_1_0, + SPIRV_1_1, + SPIRV_1_2, + + // Vulkan 1.1 + SPIRV_1_3, + + // Vulkan 1.1 extension + SPIRV_1_4, + + // Vulkan 1.2 + SPIRV_1_5, + + // Vulkan 1.3 + SPIRV_1_6, + + // Metal API + Metal_2_0, + Metal_2_1, + Metal_2_2, + + // Added ray tracing. + Metal_2_3, + Metal_2_4, + + // Added mesh shading. + Metal_3_0, + Metal_3_1, + + // Compile for iOS. + Metal_iOS_2_0, + Metal_iOS_2_1, + Metal_iOS_2_2, + Metal_iOS_2_3, + Metal_iOS_2_4, + Metal_iOS_3_0, + Metal_iOS_3_1, + + // Compile for MacOS. + Metal_Mac_2_0, + Metal_Mac_2_1, + Metal_Mac_2_2, + Metal_Mac_2_3, + Metal_Mac_2_4, + Metal_Mac_3_0, + Metal_Mac_3_1, +}; +uint32 operator | (EShaderVersion lhs, EShaderVersion rhs); +uint32 operator | (uint32 lhs, EShaderVersion rhs); +uint32 operator | (EShaderVersion lhs, uint32 rhs); + +enum class EShaderOpt : uint32 +{ + None, + + // Add debug information. Used in RenderDoc shader debugger. + DebugInfo, + + // Insert shader trace recording. Shader will be very slow. + Trace, + + // Insert shader function profiling. Shader will be very slow. + FnProfiling, + + // Insert whole shader time measurement. Shader will be a bit slow. + TimeHeatMap, + + // Enable optimizations. Take a lot of CPU time at shader compilation. + Optimize, + + // Enable bytecode size optimizations. Take a lot of CPU time at shader compilation. + OptimizeSize, + + // Enable strong optimizations. Take a lot of CPU time at shader compilation. + StrongOptimization, + WarnAsError, +}; +uint32 operator | (EShaderOpt lhs, EShaderOpt rhs); +uint32 operator | (uint32 lhs, EShaderOpt rhs); +uint32 operator | (EShaderOpt lhs, uint32 rhs); + +enum class EAccessType : uint32 +{ + Coherent, + Volatile, + Restrict, + + // Require 'vulkanMemoryModel' feature. + DeviceCoherent, + QueueFamilyCoherent, + WorkgroupCoherent, + SubgroupCoherent, + NonPrivate, +}; +uint32 operator | (EAccessType lhs, EAccessType rhs); +uint32 operator | (uint32 lhs, EAccessType rhs); +uint32 operator | (EAccessType lhs, uint32 rhs); + +enum class EImageType : uint16 +{ + Cube, + CubeArray, + Buffer, + Float, + Half, + SNorm, + UNorm, + Int, + UInt, + sRGB, + Depth, + Stencil, + DepthStencil, + Shadow, + FImage1D, + FImage2D, + FImage2D_sRGB, + FImage3D, + FImage1DArray, + FImage2DArray, + FImageCube, + FImageCubeArray, + FImage2DMS, + FImage2DMSArray, + FImageBuffer, + HImage1D, + HImage2D, + HImage3D, + HImage1DArray, + HImage2DArray, + HImageCube, + HImageCubeArray, + HImage2DMS, + HImage2DMSArray, + HImageBuffer, + Image1DShadow, + Image2DShadow, + Image1DArrayShadow, + Image2DArrayShadow, + ImageCubeShadow, + ImageCubeArrayShadow, + IImage1D, + IImage2D, + IImage3D, + IImage1DArray, + IImage2DArray, + IImageCube, + IImageCubeArray, + IImage2DMS, + IImage2DMSArray, + IImageBuffer, + UImage1D, + UImage2D, + UImage3D, + UImage1DArray, + UImage2DArray, + UImageCube, + UImageCubeArray, + UImage2DMS, + UImage2DMSArray, + UImageBuffer, + SLongImage1D, + SLongImage2D, + SLongImage3D, + SLongImage1DArray, + SLongImage2DArray, + SLongImageCube, + SLongImageCubeArray, + SLongImage2DMS, + SLongImage2DMSArray, + SLongImageBuffer, + ULongImage1D, + ULongImage2D, + ULongImage3D, + ULongImage1DArray, + ULongImage2DArray, + ULongImageCube, + ULongImageCubeArray, + ULongImage2DMS, + ULongImage2DMSArray, + ULongImageBuffer, +}; +uint16 operator | (EImageType lhs, EImageType rhs); +uint16 operator | (uint16 lhs, EImageType rhs); +uint16 operator | (EImageType lhs, uint16 rhs); +static constexpr EImageType EImageType_1D = EImageType(1); +static constexpr EImageType EImageType_1DArray = EImageType(2); +static constexpr EImageType EImageType_2D = EImageType(3); +static constexpr EImageType EImageType_2DArray = EImageType(4); +static constexpr EImageType EImageType_2DMS = EImageType(5); +static constexpr EImageType EImageType_2DMSArray = EImageType(6); +static constexpr EImageType EImageType_3D = EImageType(9); + +enum class ECompilationTarget : uint32 +{ + Vulkan, + Metal_iOS, + Metal_Mac, +}; +uint32 operator | (ECompilationTarget lhs, ECompilationTarget rhs); +uint32 operator | (uint32 lhs, ECompilationTarget rhs); +uint32 operator | (ECompilationTarget lhs, uint32 rhs); + +enum class EStructLayout : uint8 +{ + + // Apply GLSL std140 rules but structure must be compatible with Metal otherwise will throw exception. + Compatible_Std140, + + // Apply GLSL std430 rules but structure must be compatible with Metal otherwise will throw exception. + Compatible_Std430, + + // Apply MSL rules. + Metal, + + // Apply GLSL std140 rules. + Std140, + + // Apply GLSL std430 rules. + Std430, + + // Platform depended layout. + InternalIO, +}; +uint8 operator | (EStructLayout lhs, EStructLayout rhs); +uint8 operator | (uint8 lhs, EStructLayout rhs); +uint8 operator | (EStructLayout lhs, uint8 rhs); + +enum class EValueType : uint8 +{ + Bool8, + Bool32, + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, + Float16, + Float32, + Float64, + Int8_Norm, + Int16_Norm, + UInt8_Norm, + UInt16_Norm, + DeviceAddress, +}; +uint8 operator | (EValueType lhs, EValueType rhs); +uint8 operator | (uint8 lhs, EValueType rhs); +uint8 operator | (EValueType lhs, uint8 rhs); + +enum class EShaderPreprocessor : uint32 +{ + None, + + // Use for auto-complete in IDE. + AEStyle, +}; +uint32 operator | (EShaderPreprocessor lhs, EShaderPreprocessor rhs); +uint32 operator | (uint32 lhs, EShaderPreprocessor rhs); +uint32 operator | (EShaderPreprocessor lhs, uint32 rhs); + +enum class EFormatFeature : uint32 +{ + StorageImageAtomic, + StorageImage, + AttachmentBlend, + Attachment, + LinearSampled, + UniformTexelBuffer, + StorageTexelBuffer, + StorageTexelBufferAtomic, + HWCompressedAttachment, + LossyCompressedAttachment, +}; +uint32 operator | (EFormatFeature lhs, EFormatFeature rhs); +uint32 operator | (uint32 lhs, EFormatFeature rhs); +uint32 operator | (EFormatFeature lhs, uint32 rhs); + +enum class ETessPatch : uint32 +{ + Points, + Isolines, + Triangles, + Quads, +}; +uint32 operator | (ETessPatch lhs, ETessPatch rhs); +uint32 operator | (uint32 lhs, ETessPatch rhs); +uint32 operator | (ETessPatch lhs, uint32 rhs); + +enum class ETessSpacing : uint32 +{ + Equal, + FractionalEven, + FractionalOdd, +}; +uint32 operator | (ETessSpacing lhs, ETessSpacing rhs); +uint32 operator | (uint32 lhs, ETessSpacing rhs); +uint32 operator | (ETessSpacing lhs, uint32 rhs); + +enum class ShaderStructTypeUsage : uint32 +{ + + // Used as interface between graphics pipeline stages. Reflection to C++ is not supported. + ShaderIO, + + // Used as vertex buffer layout. Enables reflection to C++. + VertexLayout, + + // Used as vertex attributes in shader. + // Reflection to C++ is not enabled, use 'VertexLayout' to enable it. + VertexAttribs, + + // Used as uniform/storage buffer. Layout must be same in GLSL/MSL and C++. Enables reflection to C++. + BufferLayout, + + // Used as buffer reference in shader. Layout must be same between shaders in single platform. + // Reflection to C++ is not enabled, use 'BufferLayout' to enable it. + BufferReference, +}; +uint32 operator | (ShaderStructTypeUsage lhs, ShaderStructTypeUsage rhs); +uint32 operator | (uint32 lhs, ShaderStructTypeUsage rhs); +uint32 operator | (ShaderStructTypeUsage lhs, uint32 rhs); + +enum class EMutableRenderState : uint32 +{ +}; +uint32 operator | (EMutableRenderState lhs, EMutableRenderState rhs); +uint32 operator | (uint32 lhs, EMutableRenderState rhs); +uint32 operator | (EMutableRenderState lhs, uint32 rhs); + +enum class EAttachment : uint32 +{ + + // Discard previous content. Used as optimization for TBDR architectures. + Invalidate, + + // Color attachment. + Color, + + // Used as input attachment and color attachment. + ReadWrite, + + // Resolve attachment - will get content from multisampled color attachment. + ColorResolve, + + // Input attachment. + Input, + + // Depth attachment. + Depth, + + // Keep attachment content between passes. + Preserve, + + // Depth and stencil attachment. + DepthStencil, + + // Fragment shading rate attachment. + ShadingRate, +}; +uint32 operator | (EAttachment lhs, EAttachment rhs); +uint32 operator | (uint32 lhs, EAttachment rhs); +uint32 operator | (EAttachment lhs, uint32 rhs); + +enum class EAttachmentLoadOp : uint8 +{ + + // Previous content will not be preserved. + // In TBDR is allow to avoid transfer from global memory to cache. + Invalidate, + + // Preserve attachment content. + // In TBDR contents in global memory will be copied to cache. + Load, + + // Clear attachment before first pass. + // In TBDR is allow to avoid transfer from global memory to cache. + Clear, + + // Attachment is not used at all. + // Can be used to keep one compatible render pass and avoid unnecessary synchronizations for unused attachment. + None, +}; +uint8 operator | (EAttachmentLoadOp lhs, EAttachmentLoadOp rhs); +uint8 operator | (uint8 lhs, EAttachmentLoadOp rhs); +uint8 operator | (EAttachmentLoadOp lhs, uint8 rhs); + +enum class EAttachmentStoreOp : uint8 +{ + + // Attachment content will not needed after rendering. + // In TBDR it allow to avoid transfer from cache to global memory. + Invalidate, + + // Attachment content will be written to global memory. + Store, + + // Attachment is read-only. Content may not be written to memory, but if changed then content in memory will be undefined. + // In TBDR it allow to avoid transfer from cache to global memory. + None, +}; +uint8 operator | (EAttachmentStoreOp lhs, EAttachmentStoreOp rhs); +uint8 operator | (uint8 lhs, EAttachmentStoreOp rhs); +uint8 operator | (EAttachmentStoreOp lhs, uint8 rhs); using sbyte = int8; using ubyte = uint8; @@ -1337,1117 +2908,18 @@ struct FS_RecursiveDirectoryIter FS_RecursiveDirectoryIter (const string & folder); bool IsFile () const; bool IsDirectory () const; - bool IsValid () const; - void Inc (); - string Path () const; - string Extension () const; - string FileName () const; - string Stem () const; - string ParentPath () const; -}; - -string ReadTextFile (const string & path); -void WriteFile (const string & path, const string & text); -void AppendFile (const string & path, const string & text); -struct EImage -{ - EImage () {} - EImage (uint8) {} - operator uint8 () const; - static constexpr uint8 1D = 0; - static constexpr uint8 2D = 1; - static constexpr uint8 3D = 2; - static constexpr uint8 1DArray = 3; - static constexpr uint8 2DArray = 4; - static constexpr uint8 Cube = 5; - static constexpr uint8 CubeArray = 6; -}; - -struct EIndex -{ - EIndex () {} - EIndex (uint8) {} - operator uint8 () const; - static constexpr uint8 UShort = 0; - static constexpr uint8 UInt = 1; -}; - -struct EPixelFormat -{ - EPixelFormat () {} - EPixelFormat (uint8) {} - operator uint8 () const; - static constexpr uint8 RGBA16_SNorm = 0; - static constexpr uint8 RGBA8_SNorm = 1; - static constexpr uint8 RGB16_SNorm = 2; - static constexpr uint8 RGB8_SNorm = 3; - static constexpr uint8 RG16_SNorm = 4; - static constexpr uint8 RG8_SNorm = 5; - static constexpr uint8 R16_SNorm = 6; - static constexpr uint8 R8_SNorm = 7; - static constexpr uint8 RGBA16_UNorm = 8; - static constexpr uint8 RGBA8_UNorm = 9; - static constexpr uint8 RGB16_UNorm = 10; - static constexpr uint8 RGB8_UNorm = 11; - static constexpr uint8 RG16_UNorm = 12; - static constexpr uint8 RG8_UNorm = 13; - static constexpr uint8 R16_UNorm = 14; - static constexpr uint8 R8_UNorm = 15; - static constexpr uint8 RGB10_A2_UNorm = 16; - static constexpr uint8 RGBA4_UNorm = 17; - static constexpr uint8 RGB5_A1_UNorm = 18; - static constexpr uint8 RGB_5_6_5_UNorm = 19; - static constexpr uint8 BGR8_UNorm = 20; - static constexpr uint8 BGRA8_UNorm = 21; - static constexpr uint8 sRGB8 = 22; - static constexpr uint8 sRGB8_A8 = 23; - static constexpr uint8 sBGR8 = 24; - static constexpr uint8 sBGR8_A8 = 25; - static constexpr uint8 R8I = 26; - static constexpr uint8 RG8I = 27; - static constexpr uint8 RGB8I = 28; - static constexpr uint8 RGBA8I = 29; - static constexpr uint8 R16I = 30; - static constexpr uint8 RG16I = 31; - static constexpr uint8 RGB16I = 32; - static constexpr uint8 RGBA16I = 33; - static constexpr uint8 R32I = 34; - static constexpr uint8 RG32I = 35; - static constexpr uint8 RGB32I = 36; - static constexpr uint8 RGBA32I = 37; - static constexpr uint8 R64I = 38; - static constexpr uint8 R8U = 39; - static constexpr uint8 RG8U = 40; - static constexpr uint8 RGB8U = 41; - static constexpr uint8 RGBA8U = 42; - static constexpr uint8 R16U = 43; - static constexpr uint8 RG16U = 44; - static constexpr uint8 RGB16U = 45; - static constexpr uint8 RGBA16U = 46; - static constexpr uint8 R32U = 47; - static constexpr uint8 RG32U = 48; - static constexpr uint8 RGB32U = 49; - static constexpr uint8 RGBA32U = 50; - static constexpr uint8 RGB10_A2U = 51; - static constexpr uint8 R64U = 52; - static constexpr uint8 R16F = 53; - static constexpr uint8 RG16F = 54; - static constexpr uint8 RGB16F = 55; - static constexpr uint8 RGBA16F = 56; - static constexpr uint8 R32F = 57; - static constexpr uint8 RG32F = 58; - static constexpr uint8 RGB32F = 59; - static constexpr uint8 RGBA32F = 60; - static constexpr uint8 RGB_11_11_10F = 61; - static constexpr uint8 RGB9F_E5 = 62; - static constexpr uint8 Depth16 = 63; - static constexpr uint8 Depth24 = 64; - static constexpr uint8 Depth32F = 65; - static constexpr uint8 Depth16_Stencil8 = 66; - static constexpr uint8 Depth24_Stencil8 = 67; - static constexpr uint8 Depth32F_Stencil8 = 68; - static constexpr uint8 BC1_RGB8_UNorm = 69; - static constexpr uint8 BC1_sRGB8 = 70; - static constexpr uint8 BC1_RGB8_A1_UNorm = 71; - static constexpr uint8 BC1_sRGB8_A1 = 72; - static constexpr uint8 BC2_RGBA8_UNorm = 73; - static constexpr uint8 BC2_sRGB8 = 74; - static constexpr uint8 BC3_RGBA8_UNorm = 75; - static constexpr uint8 BC3_sRGB8 = 76; - static constexpr uint8 BC4_R8_SNorm = 77; - static constexpr uint8 BC4_R8_UNorm = 78; - static constexpr uint8 BC5_RG8_SNorm = 79; - static constexpr uint8 BC5_RG8_UNorm = 80; - static constexpr uint8 BC6H_RGB16F = 81; - static constexpr uint8 BC6H_RGB16UF = 82; - static constexpr uint8 BC7_RGBA8_UNorm = 83; - static constexpr uint8 BC7_sRGB8_A8 = 84; - static constexpr uint8 ETC2_RGB8_UNorm = 85; - static constexpr uint8 ETC2_sRGB8 = 86; - static constexpr uint8 ETC2_RGB8_A1_UNorm = 87; - static constexpr uint8 ETC2_sRGB8_A1 = 88; - static constexpr uint8 ETC2_RGBA8_UNorm = 89; - static constexpr uint8 ETC2_sRGB8_A8 = 90; - static constexpr uint8 EAC_R11_SNorm = 91; - static constexpr uint8 EAC_R11_UNorm = 92; - static constexpr uint8 EAC_RG11_SNorm = 93; - static constexpr uint8 EAC_RG11_UNorm = 94; - static constexpr uint8 ASTC_RGBA8_4x4 = 95; - static constexpr uint8 ASTC_RGBA8_5x4 = 96; - static constexpr uint8 ASTC_RGBA8_5x5 = 97; - static constexpr uint8 ASTC_RGBA8_6x5 = 98; - static constexpr uint8 ASTC_RGBA8_6x6 = 99; - static constexpr uint8 ASTC_RGBA8_8x5 = 100; - static constexpr uint8 ASTC_RGBA8_8x6 = 101; - static constexpr uint8 ASTC_RGBA8_8x8 = 102; - static constexpr uint8 ASTC_RGBA8_10x5 = 103; - static constexpr uint8 ASTC_RGBA8_10x6 = 104; - static constexpr uint8 ASTC_RGBA8_10x8 = 105; - static constexpr uint8 ASTC_RGBA8_10x10 = 106; - static constexpr uint8 ASTC_RGBA8_12x10 = 107; - static constexpr uint8 ASTC_RGBA8_12x12 = 108; - static constexpr uint8 ASTC_sRGB8_A8_4x4 = 109; - static constexpr uint8 ASTC_sRGB8_A8_5x4 = 110; - static constexpr uint8 ASTC_sRGB8_A8_5x5 = 111; - static constexpr uint8 ASTC_sRGB8_A8_6x5 = 112; - static constexpr uint8 ASTC_sRGB8_A8_6x6 = 113; - static constexpr uint8 ASTC_sRGB8_A8_8x5 = 114; - static constexpr uint8 ASTC_sRGB8_A8_8x6 = 115; - static constexpr uint8 ASTC_sRGB8_A8_8x8 = 116; - static constexpr uint8 ASTC_sRGB8_A8_10x5 = 117; - static constexpr uint8 ASTC_sRGB8_A8_10x6 = 118; - static constexpr uint8 ASTC_sRGB8_A8_10x8 = 119; - static constexpr uint8 ASTC_sRGB8_A8_10x10 = 120; - static constexpr uint8 ASTC_sRGB8_A8_12x10 = 121; - static constexpr uint8 ASTC_sRGB8_A8_12x12 = 122; - static constexpr uint8 ASTC_RGBA16F_4x4 = 123; - static constexpr uint8 ASTC_RGBA16F_5x4 = 124; - static constexpr uint8 ASTC_RGBA16F_5x5 = 125; - static constexpr uint8 ASTC_RGBA16F_6x5 = 126; - static constexpr uint8 ASTC_RGBA16F_6x6 = 127; - static constexpr uint8 ASTC_RGBA16F_8x5 = 128; - static constexpr uint8 ASTC_RGBA16F_8x6 = 129; - static constexpr uint8 ASTC_RGBA16F_8x8 = 130; - static constexpr uint8 ASTC_RGBA16F_10x5 = 131; - static constexpr uint8 ASTC_RGBA16F_10x6 = 132; - static constexpr uint8 ASTC_RGBA16F_10x8 = 133; - static constexpr uint8 ASTC_RGBA16F_10x10 = 134; - static constexpr uint8 ASTC_RGBA16F_12x10 = 135; - static constexpr uint8 ASTC_RGBA16F_12x12 = 136; - static constexpr uint8 G8B8G8R8_422_UNorm = 137; - static constexpr uint8 B8G8R8G8_422_UNorm = 138; - static constexpr uint8 G8_B8R8_420_UNorm = 139; - static constexpr uint8 G8_B8R8_422_UNorm = 140; - static constexpr uint8 G8_B8R8_444_UNorm = 141; - static constexpr uint8 G8_B8_R8_420_UNorm = 142; - static constexpr uint8 G8_B8_R8_422_UNorm = 143; - static constexpr uint8 G8_B8_R8_444_UNorm = 144; - static constexpr uint8 B10x6G10x6R10x6G10x6_422_UNorm = 145; - static constexpr uint8 G10x6B10x6G10x6R10x6_422_UNorm = 146; - static constexpr uint8 G10x6_B10x6R10x6_420_UNorm = 147; - static constexpr uint8 G10x6_B10x6R10x6_422_UNorm = 148; - static constexpr uint8 G10x6_B10x6R10x6_444_UNorm = 149; - static constexpr uint8 G10x6_B10x6_R10x6_420_UNorm = 150; - static constexpr uint8 G10x6_B10x6_R10x6_422_UNorm = 151; - static constexpr uint8 G10x6_B10x6_R10x6_444_UNorm = 152; - static constexpr uint8 R10x6G10x6B10x6A10x6_UNorm = 153; - static constexpr uint8 R10x6G10x6_UNorm = 154; - static constexpr uint8 R10x6_UNorm = 155; - static constexpr uint8 B12x4G12x4R12x4G12x4_422_UNorm = 156; - static constexpr uint8 G12x4B12x4G12x4R12x4_422_UNorm = 157; - static constexpr uint8 G12x4_B12x4R12x4_420_UNorm = 158; - static constexpr uint8 G12x4_B12x4R12x4_422_UNorm = 159; - static constexpr uint8 G12x4_B12x4R12x4_444_UNorm = 160; - static constexpr uint8 G12x4_B12x4_R12x4_420_UNorm = 161; - static constexpr uint8 G12x4_B12x4_R12x4_422_UNorm = 162; - static constexpr uint8 G12x4_B12x4_R12x4_444_UNorm = 163; - static constexpr uint8 R12x4G12x4B12x4A12x4_UNorm = 164; - static constexpr uint8 R12x4G12x4_UNorm = 165; - static constexpr uint8 R12x4_UNorm = 166; - static constexpr uint8 B16G16R16G16_422_UNorm = 167; - static constexpr uint8 G16B16G16R16_422_UNorm = 168; - static constexpr uint8 G16_B16R16_420_UNorm = 169; - static constexpr uint8 G16_B16R16_422_UNorm = 170; - static constexpr uint8 G16_B16R16_444_UNorm = 171; - static constexpr uint8 G16_B16_R16_420_UNorm = 172; - static constexpr uint8 G16_B16_R16_422_UNorm = 173; - static constexpr uint8 G16_B16_R16_444_UNorm = 174; - static constexpr uint8 SwapchainColor = 254; -}; - -struct EPixelFormatExternal -{ - EPixelFormatExternal () {} - EPixelFormatExternal (uint8) {} - operator uint8 () const; - static constexpr uint8 Android_Depth16 = 0; - static constexpr uint8 Android_DepthJPEG = 1; - static constexpr uint8 Android_DepthPointCloud = 2; - static constexpr uint8 Android_JPEG = 3; - static constexpr uint8 Android_Raw16 = 5; - static constexpr uint8 Android_Raw12 = 6; - static constexpr uint8 Android_Raw10 = 7; - static constexpr uint8 Android_NV16 = 9; - static constexpr uint8 Android_NV21 = 10; - static constexpr uint8 Android_YCBCR_P010 = 11; - static constexpr uint8 Android_YUV_420 = 12; - static constexpr uint8 Android_YUV_422 = 13; - static constexpr uint8 Android_YUV_444 = 14; - static constexpr uint8 Android_YUY2 = 15; - static constexpr uint8 Android_YV12 = 16; - static constexpr uint8 Android_Y8 = 17; - static constexpr uint8 Android_HEIC = 18; -}; - -struct ECompareOp -{ - ECompareOp () {} - ECompareOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Never = 0; - static constexpr uint8 Less = 1; - static constexpr uint8 Equal = 2; - static constexpr uint8 LEqual = 3; - static constexpr uint8 Greater = 4; - static constexpr uint8 NotEqual = 5; - static constexpr uint8 GEqual = 6; - static constexpr uint8 Always = 7; - static constexpr uint8 LessOrEqual = 3; - static constexpr uint8 GreaterOrEqual = 6; -}; - -struct EBlendFactor -{ - EBlendFactor () {} - EBlendFactor (uint8) {} - operator uint8 () const; - static constexpr uint8 Zero = 0; - static constexpr uint8 One = 1; - static constexpr uint8 SrcColor = 2; - static constexpr uint8 OneMinusSrcColor = 3; - static constexpr uint8 DstColor = 4; - static constexpr uint8 OneMinusDstColor = 5; - static constexpr uint8 SrcAlpha = 6; - static constexpr uint8 OneMinusSrcAlpha = 7; - static constexpr uint8 DstAlpha = 8; - static constexpr uint8 OneMinusDstAlpha = 9; - static constexpr uint8 ConstColor = 10; - static constexpr uint8 OneMinusConstColor = 11; - static constexpr uint8 ConstAlpha = 12; - static constexpr uint8 OneMinusConstAlpha = 13; - static constexpr uint8 SrcAlphaSaturate = 14; - static constexpr uint8 Src1Color = 15; - static constexpr uint8 OneMinusSrc1Color = 16; - static constexpr uint8 Src1Alpha = 17; - static constexpr uint8 OneMinusSrc1Alpha = 18; -}; - -struct EBlendOp -{ - EBlendOp () {} - EBlendOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Add = 0; - static constexpr uint8 Sub = 1; - static constexpr uint8 RevSub = 2; - static constexpr uint8 Min = 3; - static constexpr uint8 Max = 4; -}; - -struct ELogicOp -{ - ELogicOp () {} - ELogicOp (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Clear = 1; - static constexpr uint8 Set = 2; - static constexpr uint8 Copy = 3; - static constexpr uint8 CopyInverted = 4; - static constexpr uint8 NoOp = 5; - static constexpr uint8 Invert = 6; - static constexpr uint8 And = 7; - static constexpr uint8 NotAnd = 8; - static constexpr uint8 Or = 9; - static constexpr uint8 NotOr = 10; - static constexpr uint8 Xor = 11; - static constexpr uint8 Equiv = 12; - static constexpr uint8 AndReverse = 13; - static constexpr uint8 AndInverted = 14; - static constexpr uint8 OrReverse = 15; - static constexpr uint8 OrInverted = 16; -}; - -struct EStencilOp -{ - EStencilOp () {} - EStencilOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Keep = 0; - static constexpr uint8 Zero = 1; - static constexpr uint8 Replace = 2; - static constexpr uint8 Incr = 3; - static constexpr uint8 IncrWrap = 4; - static constexpr uint8 Decr = 5; - static constexpr uint8 DecrWrap = 6; - static constexpr uint8 Invert = 7; -}; - -struct EPolygonMode -{ - EPolygonMode () {} - EPolygonMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Point = 0; - static constexpr uint8 Line = 1; - static constexpr uint8 Fill = 2; -}; - -struct EPrimitive -{ - EPrimitive () {} - EPrimitive (uint8) {} - operator uint8 () const; - static constexpr uint8 Point = 0; - static constexpr uint8 LineList = 1; - static constexpr uint8 LineStrip = 2; - static constexpr uint8 LineListAdjacency = 3; - static constexpr uint8 LineStripAdjacency = 4; - static constexpr uint8 TriangleList = 5; - static constexpr uint8 TriangleStrip = 6; - static constexpr uint8 TriangleFan = 7; - static constexpr uint8 TriangleListAdjacency = 8; - static constexpr uint8 TriangleStripAdjacency = 9; - static constexpr uint8 Patch = 10; -}; - -struct ECullMode -{ - ECullMode () {} - ECullMode (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Front = 1; - static constexpr uint8 Back = 2; - static constexpr uint8 FontAndBack = 3; -}; - -struct EPipelineDynamicState -{ - EPipelineDynamicState () {} - EPipelineDynamicState (uint16) {} - operator uint16 () const; - static constexpr uint16 None = 0; - static constexpr uint16 StencilCompareMask = 1; - static constexpr uint16 StencilWriteMask = 2; - static constexpr uint16 StencilReference = 4; - static constexpr uint16 DepthBias = 8; - static constexpr uint16 BlendConstants = 16; - static constexpr uint16 RTStackSize = 64; - static constexpr uint16 FragmentShadingRate = 128; -}; - -struct EResourceState -{ - EResourceState () {} - EResourceState (uint32) {} - operator uint32 () const; - static constexpr uint32 Unknown = 0; - static constexpr uint32 Preserve = 1; - static constexpr uint32 ShaderStorage_Read = 271; - static constexpr uint32 ShaderStorage_Write = 528; - static constexpr uint32 ShaderStorage_RW = 785; - static constexpr uint32 ShaderUniform = 274; - static constexpr uint32 ShaderSample = 275; - static constexpr uint32 CopySrc = 258; - static constexpr uint32 CopyDst = 515; - static constexpr uint32 ClearDst = 516; - static constexpr uint32 BlitSrc = 261; - static constexpr uint32 BlitDst = 518; - static constexpr uint32 InputColorAttachment = 276; - static constexpr uint32 InputColorAttachment_RW = 533; - static constexpr uint32 ColorAttachment = 519; - static constexpr uint32 ColorAttachment_Blend = 775; - static constexpr uint32 DepthStencilAttachment_Read = 1288; - static constexpr uint32 DepthStencilAttachment_Write = 2569; - static constexpr uint32 DepthStencilAttachment_RW = 3849; - static constexpr uint32 DepthTest_StencilRW = 3338; - static constexpr uint32 DepthRW_StencilTest = 1803; - static constexpr uint32 DepthStencilTest_ShaderSample = 1304; - static constexpr uint32 DepthTest_DepthSample_StencilRW = 3353; - static constexpr uint32 InputDepthStencilAttachment = 1302; - static constexpr uint32 InputDepthStencilAttachment_RW = 3863; - static constexpr uint32 Host_Read = 282; - static constexpr uint32 PresentImage = 268; - static constexpr uint32 IndirectBuffer = 284; - static constexpr uint32 IndexBuffer = 285; - static constexpr uint32 VertexBuffer = 286; - static constexpr uint32 ShadingRateImage = 269; - static constexpr uint32 CopyRTAS_Read = 287; - static constexpr uint32 CopyRTAS_Write = 544; - static constexpr uint32 BuildRTAS_Read = 289; - static constexpr uint32 BuildRTAS_Write = 546; - static constexpr uint32 BuildRTAS_RW = 802; - static constexpr uint32 BuildRTAS_IndirectBuffer = 291; - static constexpr uint32 ShaderRTAS = 283; - static constexpr uint32 RTShaderBindingTable = 292; - static constexpr uint32 DSTestBeforeFS = 4096; - static constexpr uint32 DSTestAfterFS = 8192; - static constexpr uint32 Invalidate = 16384; - static constexpr uint32 General = 782; - static constexpr uint32 MeshTaskShader = 32768; - static constexpr uint32 VertexProcessingShaders = 65536; - static constexpr uint32 TileShader = 131072; - static constexpr uint32 FragmentShader = 262144; - static constexpr uint32 PreRasterizationShaders = 98304; - static constexpr uint32 PostRasterizationShaders = 393216; - static constexpr uint32 ComputeShader = 524288; - static constexpr uint32 RayTracingShaders = 1048576; - static constexpr uint32 AllGraphicsShaders = 491520; - static constexpr uint32 AllShaders = 2064384; - static constexpr uint32 BuildRTAS_ScratchBuffer = 802; - static constexpr uint32 InputDepthAttachment = 1302; - static constexpr uint32 DepthStencilAttachment = 16137; -}; - -struct EImageAspect -{ - EImageAspect () {} - EImageAspect (uint8) {} - operator uint8 () const; - static constexpr uint8 Color = 1; - static constexpr uint8 Depth = 2; - static constexpr uint8 Stencil = 4; - static constexpr uint8 DepthStencil = 6; - static constexpr uint8 Plane_0 = 16; - static constexpr uint8 Plane_1 = 32; - static constexpr uint8 Plane_2 = 64; -}; - -struct EShaderIO -{ - EShaderIO () {} - EShaderIO (uint8) {} - operator uint8 () const; - static constexpr uint8 Int = 1; - static constexpr uint8 UInt = 2; - static constexpr uint8 Float = 3; - static constexpr uint8 UFloat = 4; - static constexpr uint8 Half = 5; - static constexpr uint8 UNorm = 6; - static constexpr uint8 SNorm = 7; - static constexpr uint8 sRGB = 8; - static constexpr uint8 AnyColor = 9; - static constexpr uint8 Depth = 10; - static constexpr uint8 Stencil = 11; - static constexpr uint8 DepthStencil = 12; -}; - -struct ESubgroupTypes -{ - ESubgroupTypes () {} - ESubgroupTypes (uint8) {} - operator uint8 () const; - static constexpr uint8 Float16 = 32; - static constexpr uint8 Float32 = 1; - static constexpr uint8 Int8 = 4; - static constexpr uint8 Int16 = 8; - static constexpr uint8 Int32 = 2; - static constexpr uint8 Int64 = 16; -}; - -struct ESubgroupOperation -{ - ESubgroupOperation () {} - ESubgroupOperation (uint32) {} - operator uint32 () const; - static constexpr uint32 IndexAndSize = 0; - static constexpr uint32 Elect = 1; - static constexpr uint32 Barrier = 2; - static constexpr uint32 Any = 3; - static constexpr uint32 All = 4; - static constexpr uint32 AllEqual = 5; - static constexpr uint32 Add = 6; - static constexpr uint32 Mul = 7; - static constexpr uint32 Min = 8; - static constexpr uint32 Max = 9; - static constexpr uint32 And = 10; - static constexpr uint32 Or = 11; - static constexpr uint32 Xor = 12; - static constexpr uint32 InclusiveMul = 13; - static constexpr uint32 InclusiveAdd = 14; - static constexpr uint32 InclusiveMin = 15; - static constexpr uint32 InclusiveMax = 16; - static constexpr uint32 InclusiveAnd = 17; - static constexpr uint32 InclusiveOr = 18; - static constexpr uint32 InclusiveXor = 19; - static constexpr uint32 ExclusiveAdd = 20; - static constexpr uint32 ExclusiveMul = 21; - static constexpr uint32 ExclusiveMin = 22; - static constexpr uint32 ExclusiveMax = 23; - static constexpr uint32 ExclusiveAnd = 24; - static constexpr uint32 ExclusiveOr = 25; - static constexpr uint32 ExclusiveXor = 26; - static constexpr uint32 Ballot = 27; - static constexpr uint32 Broadcast = 28; - static constexpr uint32 BroadcastFirst = 29; - static constexpr uint32 InverseBallot = 30; - static constexpr uint32 BallotBitExtract = 31; - static constexpr uint32 BallotBitCount = 32; - static constexpr uint32 BallotInclusiveBitCount = 33; - static constexpr uint32 BallotExclusiveBitCount = 34; - static constexpr uint32 BallotFindLSB = 35; - static constexpr uint32 BallotFindMSB = 36; - static constexpr uint32 Shuffle = 37; - static constexpr uint32 ShuffleXor = 38; - static constexpr uint32 ShuffleUp = 39; - static constexpr uint32 ShuffleDown = 40; - static constexpr uint32 ClusteredAdd = 41; - static constexpr uint32 ClusteredMul = 42; - static constexpr uint32 ClusteredMin = 43; - static constexpr uint32 ClusteredMax = 44; - static constexpr uint32 ClusteredAnd = 45; - static constexpr uint32 ClusteredOr = 46; - static constexpr uint32 ClusteredXor = 47; - static constexpr uint32 QuadBroadcast = 48; - static constexpr uint32 QuadSwapHorizontal = 49; - static constexpr uint32 QuadSwapVertical = 50; - static constexpr uint32 QuadSwapDiagonal = 51; - static constexpr uint32 _Basic_Begin = 0; - static constexpr uint32 _Basic_End = 2; - static constexpr uint32 _Vote_Begin = 3; - static constexpr uint32 _Vote_End = 5; - static constexpr uint32 _Arithmetic_Begin = 6; - static constexpr uint32 _Arithmetic_End = 26; - static constexpr uint32 _Ballot_Begin = 27; - static constexpr uint32 _Ballot_End = 36; - static constexpr uint32 _Shuffle_Begin = 37; - static constexpr uint32 _Shuffle_End = 38; - static constexpr uint32 _ShuffleRelative_Begin = 39; - static constexpr uint32 _ShuffleRelative_End = 40; - static constexpr uint32 _Clustered_Begin = 41; - static constexpr uint32 _Clustered_End = 47; - static constexpr uint32 _Quad_Begin = 48; - static constexpr uint32 _Quad_End = 51; -}; - -struct EFeature -{ - EFeature () {} - EFeature (uint8) {} - operator uint8 () const; - static constexpr uint8 Ignore = 0; - static constexpr uint8 RequireTrue = 2; - static constexpr uint8 RequireFalse = 1; -}; - -struct EShader -{ - EShader () {} - EShader (uint8) {} - operator uint8 () const; - static constexpr uint8 Vertex = 0; - static constexpr uint8 TessControl = 1; - static constexpr uint8 TessEvaluation = 2; - static constexpr uint8 Geometry = 3; - static constexpr uint8 Fragment = 4; - static constexpr uint8 Compute = 5; - static constexpr uint8 Tile = 6; - static constexpr uint8 MeshTask = 7; - static constexpr uint8 Mesh = 8; - static constexpr uint8 RayGen = 9; - static constexpr uint8 RayAnyHit = 10; - static constexpr uint8 RayClosestHit = 11; - static constexpr uint8 RayMiss = 12; - static constexpr uint8 RayIntersection = 13; - static constexpr uint8 RayCallable = 14; -}; - -struct EShaderStages -{ - EShaderStages () {} - EShaderStages (uint16) {} - operator uint16 () const; - static constexpr uint16 Vertex = 1; - static constexpr uint16 TessControl = 2; - static constexpr uint16 TessEvaluation = 4; - static constexpr uint16 Geometry = 8; - static constexpr uint16 Fragment = 16; - static constexpr uint16 Compute = 32; - static constexpr uint16 Tile = 64; - static constexpr uint16 MeshTask = 128; - static constexpr uint16 Mesh = 256; - static constexpr uint16 RayGen = 512; - static constexpr uint16 RayAnyHit = 1024; - static constexpr uint16 RayClosestHit = 2048; - static constexpr uint16 RayMiss = 4096; - static constexpr uint16 RayIntersection = 8192; - static constexpr uint16 RayCallable = 16384; - static constexpr uint16 All = 32767; - static constexpr uint16 AllGraphics = 415; - static constexpr uint16 GraphicsPipeStages = 31; - static constexpr uint16 MeshPipeStages = 400; - static constexpr uint16 VertexProcessingStages = 271; - static constexpr uint16 PreRasterizationStages = 399; - static constexpr uint16 PostRasterizationStages = 80; - static constexpr uint16 AllRayTracing = 32256; -}; - -struct EGPUVendor -{ - EGPUVendor () {} - EGPUVendor (uint32) {} - operator uint32 () const; - static constexpr uint32 AMD = 0; - static constexpr uint32 NVidia = 1; - static constexpr uint32 Intel = 2; - static constexpr uint32 ARM = 3; - static constexpr uint32 Qualcomm = 4; - static constexpr uint32 ImgTech = 5; - static constexpr uint32 Microsoft = 6; - static constexpr uint32 Apple = 7; - static constexpr uint32 Mesa = 8; - static constexpr uint32 Broadcom = 9; - static constexpr uint32 Samsung = 10; - static constexpr uint32 VeriSilicon = 11; - static constexpr uint32 Huawei = 12; -}; - -struct EVertexType -{ - EVertexType () {} - EVertexType (uint16) {} - operator uint16 () const; - static constexpr uint16 Byte = 4; - static constexpr uint16 Byte2 = 5; - static constexpr uint16 Byte3 = 6; - static constexpr uint16 Byte4 = 7; - static constexpr uint16 Byte_Norm = 68; - static constexpr uint16 Byte2_Norm = 69; - static constexpr uint16 Byte3_Norm = 70; - static constexpr uint16 Byte4_Norm = 71; - static constexpr uint16 Byte_Scaled = 132; - static constexpr uint16 Byte2_Scaled = 133; - static constexpr uint16 Byte3_Scaled = 134; - static constexpr uint16 Byte4_Scaled = 135; - static constexpr uint16 UByte = 8; - static constexpr uint16 UByte2 = 9; - static constexpr uint16 UByte3 = 10; - static constexpr uint16 UByte4 = 11; - static constexpr uint16 UByte_Norm = 72; - static constexpr uint16 UByte2_Norm = 73; - static constexpr uint16 UByte3_Norm = 74; - static constexpr uint16 UByte4_Norm = 75; - static constexpr uint16 UByte_Scaled = 136; - static constexpr uint16 UByte2_Scaled = 137; - static constexpr uint16 UByte3_Scaled = 138; - static constexpr uint16 UByte4_Scaled = 139; - static constexpr uint16 Short = 12; - static constexpr uint16 Short2 = 13; - static constexpr uint16 Short3 = 14; - static constexpr uint16 Short4 = 15; - static constexpr uint16 Short_Norm = 76; - static constexpr uint16 Short2_Norm = 77; - static constexpr uint16 Short3_Norm = 78; - static constexpr uint16 Short4_Norm = 79; - static constexpr uint16 Short_Scaled = 140; - static constexpr uint16 Short2_Scaled = 141; - static constexpr uint16 Short3_Scaled = 142; - static constexpr uint16 Short4_Scaled = 143; - static constexpr uint16 UShort = 16; - static constexpr uint16 UShort2 = 17; - static constexpr uint16 UShort3 = 18; - static constexpr uint16 UShort4 = 19; - static constexpr uint16 UShort_Norm = 80; - static constexpr uint16 UShort2_Norm = 81; - static constexpr uint16 UShort3_Norm = 82; - static constexpr uint16 UShort4_Norm = 83; - static constexpr uint16 UShort_Scaled = 144; - static constexpr uint16 UShort2_Scaled = 145; - static constexpr uint16 UShort3_Scaled = 146; - static constexpr uint16 UShort4_Scaled = 147; - static constexpr uint16 Int = 20; - static constexpr uint16 Int2 = 21; - static constexpr uint16 Int3 = 22; - static constexpr uint16 Int4 = 23; - static constexpr uint16 UInt = 24; - static constexpr uint16 UInt2 = 25; - static constexpr uint16 UInt3 = 26; - static constexpr uint16 UInt4 = 27; - static constexpr uint16 Long = 28; - static constexpr uint16 Long2 = 29; - static constexpr uint16 Long3 = 30; - static constexpr uint16 Long4 = 31; - static constexpr uint16 ULong = 32; - static constexpr uint16 ULong2 = 33; - static constexpr uint16 ULong3 = 34; - static constexpr uint16 ULong4 = 35; - static constexpr uint16 Half = 36; - static constexpr uint16 Half2 = 37; - static constexpr uint16 Half3 = 38; - static constexpr uint16 Half4 = 39; - static constexpr uint16 Float = 40; - static constexpr uint16 Float2 = 41; - static constexpr uint16 Float3 = 42; - static constexpr uint16 Float4 = 43; - static constexpr uint16 Double = 44; - static constexpr uint16 Double2 = 45; - static constexpr uint16 Double3 = 46; - static constexpr uint16 Double4 = 47; - static constexpr uint16 UInt_2_10_10_10 = 51; - static constexpr uint16 UInt_2_10_10_10_Norm = 115; - static constexpr uint16 UInt_2_10_10_10_Scaled = 179; -}; - -struct EGraphicsDeviceID -{ - EGraphicsDeviceID () {} - EGraphicsDeviceID (uint32) {} - operator uint32 () const; - static constexpr uint32 Adreno_500 = 0; - static constexpr uint32 Adreno_600 = 1; - static constexpr uint32 Adreno_700 = 2; - static constexpr uint32 AMD_GCN1 = 3; - static constexpr uint32 AMD_GCN2 = 4; - static constexpr uint32 AMD_GCN3 = 5; - static constexpr uint32 AMD_GCN4 = 6; - static constexpr uint32 AMD_GCN5 = 7; - static constexpr uint32 AMD_GCN5_APU = 8; - static constexpr uint32 AMD_RDNA1 = 9; - static constexpr uint32 AMD_RDNA2 = 10; - static constexpr uint32 AMD_RDNA2_APU = 11; - static constexpr uint32 AMD_RDNA3 = 12; - static constexpr uint32 AMD_RDNA3_APU = 13; - static constexpr uint32 AMD_RDNA4 = 14; - static constexpr uint32 Apple_A8 = 15; - static constexpr uint32 Apple_A9_A10 = 16; - static constexpr uint32 Apple_A11 = 17; - static constexpr uint32 Apple_A12 = 18; - static constexpr uint32 Apple_A13 = 19; - static constexpr uint32 Apple_A14_M1 = 20; - static constexpr uint32 Apple_A15_M2 = 21; - static constexpr uint32 Apple_A16 = 22; - static constexpr uint32 Apple_A17_M3 = 23; - static constexpr uint32 Mali_Midgard_Gen2 = 24; - static constexpr uint32 Mali_Midgard_Gen3 = 25; - static constexpr uint32 Mali_Midgard_Gen4 = 26; - static constexpr uint32 Mali_Bifrost_Gen1 = 27; - static constexpr uint32 Mali_Bifrost_Gen2 = 28; - static constexpr uint32 Mali_Bifrost_Gen3 = 29; - static constexpr uint32 Mali_Valhall_Gen1 = 30; - static constexpr uint32 Mali_Valhall_Gen2 = 31; - static constexpr uint32 Mali_Valhall_Gen3 = 32; - static constexpr uint32 Mali_Valhall_Gen4 = 33; - static constexpr uint32 Mali_5thGen_Gen1 = 34; - static constexpr uint32 Mali_5thGen_Gen2 = 35; - static constexpr uint32 NV_Maxwell = 36; - static constexpr uint32 NV_Maxwell_Tegra = 37; - static constexpr uint32 NV_Pascal = 38; - static constexpr uint32 NV_Pascal_MX = 39; - static constexpr uint32 NV_Pascal_Tegra = 40; - static constexpr uint32 NV_Volta = 41; - static constexpr uint32 NV_Turing_16 = 42; - static constexpr uint32 NV_Turing = 43; - static constexpr uint32 NV_Turing_MX = 44; - static constexpr uint32 NV_Ampere = 45; - static constexpr uint32 NV_Ampere_Orin = 46; - static constexpr uint32 NV_Ada = 47; - static constexpr uint32 NV_Blackwell = 48; - static constexpr uint32 Intel_Gen7 = 49; - static constexpr uint32 Intel_Gen8 = 50; - static constexpr uint32 Intel_Gen9 = 51; - static constexpr uint32 Intel_Gen11 = 52; - static constexpr uint32 Intel_Gen12 = 53; - static constexpr uint32 Intel_Gen12_7 = 54; - static constexpr uint32 PowerVR_Series8 = 55; - static constexpr uint32 PowerVR_Series9 = 56; - static constexpr uint32 PowerVR_SeriesA = 57; - static constexpr uint32 PowerVR_SeriesB = 58; - static constexpr uint32 VeriSilicon = 59; - static constexpr uint32 SwiftShader = 60; -}; - -struct EFilter -{ - EFilter () {} - EFilter (uint8) {} - operator uint8 () const; - static constexpr uint8 Nearest = 0; - static constexpr uint8 Linear = 1; -}; - -struct EMipmapFilter -{ - EMipmapFilter () {} - EMipmapFilter (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Nearest = 1; - static constexpr uint8 Linear = 2; -}; - -struct EAddressMode -{ - EAddressMode () {} - EAddressMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Repeat = 0; - static constexpr uint8 MirrorRepeat = 1; - static constexpr uint8 ClampToEdge = 2; - static constexpr uint8 ClampToBorder = 3; - static constexpr uint8 MirrorClampToEdge = 4; - static constexpr uint8 Clamp = 2; - static constexpr uint8 MirrorClamp = 4; -}; - -struct EBorderColor -{ - EBorderColor () {} - EBorderColor (uint8) {} - operator uint8 () const; - static constexpr uint8 FloatTransparentBlack = 0; - static constexpr uint8 FloatOpaqueBlack = 1; - static constexpr uint8 FloatOpaqueWhite = 2; - static constexpr uint8 IntTransparentBlack = 3; - static constexpr uint8 IntOpaqueBlack = 4; - static constexpr uint8 IntOpaqueWhite = 5; -}; - -struct EReductionMode -{ - EReductionMode () {} - EReductionMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Average = 0; - static constexpr uint8 Min = 1; - static constexpr uint8 Max = 2; -}; - -struct ESamplerOpt -{ - ESamplerOpt () {} - ESamplerOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 ArgumentBuffer = 1; - static constexpr uint8 UnnormalizedCoordinates = 4; - static constexpr uint8 NonSeamlessCubeMap = 2; -}; - -struct EVertexInputRate -{ - EVertexInputRate () {} - EVertexInputRate (uint8) {} - operator uint8 () const; - static constexpr uint8 Vertex = 0; - static constexpr uint8 Instance = 1; -}; - -struct EDescSetUsage -{ - EDescSetUsage () {} - EDescSetUsage (uint8) {} - operator uint8 () const; - static constexpr uint8 AllowPartialyUpdate = 1; - static constexpr uint8 UpdateTemplate = 2; - static constexpr uint8 ArgumentBuffer = 4; - static constexpr uint8 MutableArgBuffer = 8; - static constexpr uint8 MaybeUnsupported = 16; -}; - -struct EPipelineOpt -{ - EPipelineOpt () {} - EPipelineOpt (uint16) {} - operator uint16 () const; - - // Optimize pipeline during creation, may be slow. - static constexpr uint16 Optimize = 1; - static constexpr uint16 CS_DispatchBase = 2; - static constexpr uint16 RT_NoNullAnyHitShaders = 4; - static constexpr uint16 RT_NoNullClosestHitShaders = 8; - static constexpr uint16 RT_NoNullMissShaders = 16; - static constexpr uint16 RT_NoNullIntersectionShaders = 32; - static constexpr uint16 RT_SkipTriangles = 64; - static constexpr uint16 RT_SkipAABBs = 128; - - // Pipeline creation will fail if it is not exists in cache. - static constexpr uint16 DontCompile = 256; - - // When a pipeline is created, its state and shaders are compiled into zero or more device-specific executables, - // which are used when executing commands against that pipeline. - static constexpr uint16 CaptureStatistics = 512; - - // May include the final shader assembly, a binary form of the compiled shader, - // or the shader compiler’s internal representation at any number of intermediate compile steps. - static constexpr uint16 CaptureInternalRepresentation = 1024; - - // Disable pipeline optimization to speedup creation. - static constexpr uint16 DontOptimize = 0; - static constexpr uint16 None = 0; -}; - -struct EQueueMask -{ - EQueueMask () {} - EQueueMask (uint8) {} - operator uint8 () const; - static constexpr uint8 Graphics = 1; - static constexpr uint8 AsyncCompute = 2; - static constexpr uint8 AsyncTransfer = 4; - static constexpr uint8 VideoEncode = 8; - static constexpr uint8 VideoDecode = 16; - static constexpr uint8 All = 31; -}; - -struct ESamplerChromaLocation -{ - ESamplerChromaLocation () {} - ESamplerChromaLocation (uint8) {} - operator uint8 () const; - static constexpr uint8 CositedEven = 0; - static constexpr uint8 Midpoint = 1; -}; - -struct ESamplerYcbcrModelConversion -{ - ESamplerYcbcrModelConversion () {} - ESamplerYcbcrModelConversion (uint8) {} - operator uint8 () const; - static constexpr uint8 RGB_Identity = 0; - static constexpr uint8 Ycbcr_Identity = 1; - static constexpr uint8 Ycbcr_709 = 2; - static constexpr uint8 Ycbcr_601 = 3; - static constexpr uint8 Ycbcr_2020 = 4; -}; - -struct ESamplerYcbcrRange -{ - ESamplerYcbcrRange () {} - ESamplerYcbcrRange (uint8) {} - operator uint8 () const; - static constexpr uint8 ITU_Full = 0; - static constexpr uint8 ITU_Narrow = 1; -}; - -struct ESurfaceFormat -{ - ESurfaceFormat () {} - ESurfaceFormat (uint8) {} - operator uint8 () const; - static constexpr uint8 BGRA8_sRGB_nonlinear = 0; - static constexpr uint8 RGBA8_sRGB_nonlinear = 1; - static constexpr uint8 BGRA8_BT709_nonlinear = 2; - static constexpr uint8 RGBA16F_Extended_sRGB_linear = 3; - static constexpr uint8 RGBA16F_sRGB_nonlinear = 4; - static constexpr uint8 RGBA16F_BT709_nonlinear = 5; - static constexpr uint8 RGBA16F_HDR10_ST2084 = 6; - static constexpr uint8 RGBA16F_BT2020_linear = 7; - static constexpr uint8 RGB10A2_sRGB_nonlinear = 8; - static constexpr uint8 RGB10A2_HDR10_ST2084 = 9; -}; - -struct ERTInstanceOpt -{ - ERTInstanceOpt () {} - ERTInstanceOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 TriangleCullDisable = 1; - static constexpr uint8 TriangleFrontCCW = 2; - static constexpr uint8 ForceOpaque = 4; - static constexpr uint8 ForceNonOpaque = 8; - static constexpr uint8 TriangleCullBack = 0; - static constexpr uint8 TriangleFrontCW = 0; -}; - -struct EImageUsage -{ - EImageUsage () {} - EImageUsage (uint32) {} - operator uint32 () const; - static constexpr uint32 TransferSrc = 1; - static constexpr uint32 TransferDst = 2; - static constexpr uint32 Sampled = 4; - static constexpr uint32 Storage = 8; - static constexpr uint32 ColorAttachment = 16; - static constexpr uint32 DepthStencilAttachment = 32; - static constexpr uint32 InputAttachment = 64; - static constexpr uint32 ShadingRate = 128; - static constexpr uint32 All = 255; - static constexpr uint32 Transfer = 3; - static constexpr uint32 RWAttachment = 80; -}; - -struct EImageOpt -{ - EImageOpt () {} - EImageOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 BlitSrc = 1; - static constexpr uint32 BlitDst = 2; - static constexpr uint32 CubeCompatible = 4; - static constexpr uint32 MutableFormat = 8; - static constexpr uint32 Array2DCompatible = 16; - static constexpr uint32 BlockTexelViewCompatible = 32; - static constexpr uint32 SparseResidency = 64; - static constexpr uint32 SparseAliased = 128; - static constexpr uint32 Alias = 256; - static constexpr uint32 SampleLocationsCompatible = 512; - static constexpr uint32 StorageAtomic = 1024; - static constexpr uint32 ColorAttachmentBlend = 2048; - static constexpr uint32 SampledLinear = 4096; - static constexpr uint32 SampledMinMax = 8192; - static constexpr uint32 VertexPplnStore = 16384; - static constexpr uint32 FragmentPplnStore = 32768; - static constexpr uint32 LossyRTCompression = 65536; - static constexpr uint32 All = 131071; - static constexpr uint32 SparseResidencyAliased = 192; -}; - -struct EBufferUsage -{ - EBufferUsage () {} - EBufferUsage (uint32) {} - operator uint32 () const; - static constexpr uint32 TransferSrc = 1; - static constexpr uint32 TransferDst = 2; - static constexpr uint32 UniformTexel = 4; - static constexpr uint32 StorageTexel = 8; - static constexpr uint32 Uniform = 16; - static constexpr uint32 Storage = 32; - static constexpr uint32 Index = 64; - static constexpr uint32 Vertex = 128; - static constexpr uint32 Indirect = 256; - static constexpr uint32 ShaderAddress = 512; - static constexpr uint32 ShaderBindingTable = 1024; - static constexpr uint32 ASBuild_ReadOnly = 2048; - static constexpr uint32 ASBuild_Scratch = 4096; - static constexpr uint32 All = 8191; - static constexpr uint32 Transfer = 3; -}; - -struct EBufferOpt -{ - EBufferOpt () {} - EBufferOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 SparseResidency = 1; - static constexpr uint32 SparseAliased = 2; - static constexpr uint32 VertexPplnStore = 4; - static constexpr uint32 FragmentPplnStore = 8; - static constexpr uint32 StorageTexelAtomic = 16; - static constexpr uint32 All = 31; - static constexpr uint32 SparseResidencyAliased = 3; -}; - -struct EShadingRate -{ - EShadingRate () {} - EShadingRate (uint8) {} - operator uint8 () const; - static constexpr uint8 Size1x1 = 16; - static constexpr uint8 Size1x2 = 32; - static constexpr uint8 Size1x4 = 48; - static constexpr uint8 Size2x1 = 64; - static constexpr uint8 Size2x2 = 80; - static constexpr uint8 Size2x4 = 96; - static constexpr uint8 Size4x1 = 112; - static constexpr uint8 Size4x2 = 128; - static constexpr uint8 Size4x4 = 144; -}; - -struct EShadingRateCombinerOp -{ - EShadingRateCombinerOp () {} - EShadingRateCombinerOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Keep = 0; - static constexpr uint8 Replace = 1; - static constexpr uint8 Min = 2; - static constexpr uint8 Max = 3; - static constexpr uint8 Sum = 4; - static constexpr uint8 Mul = 5; + bool IsValid () const; + void Inc (); + string Path () const; + string Extension () const; + string FileName () const; + string Stem () const; + string ParentPath () const; }; +string ReadTextFile (const string & path); +void WriteFile (const string & path, const string & text); +void AppendFile (const string & path, const string & text); struct MultiSamples { MultiSamples (); @@ -2532,13 +3004,30 @@ struct RenderState_StencilBufferState RenderState_StencilFaceState front; RenderState_StencilFaceState back; bool enabled; + + // Stencil test compare operator. + // if '(stencilAttachment & CompareMask) [CompareOp] (Reference & CompareMask)' then sample passed stencil test. + void CompareOp (ECompareOp); + void Reference (uint); + void CompareMask (uint); + + // Action performed on samples that fail the stencil test. + // 'stencilValue = FailOp( stencilAttachment )' + // See 'CompareOp', 'Reference' and 'CompareMask' to know how stencil test is performed. void FailOp (EStencilOp); + + // Action performed on samples that pass the stencil test and fail the depth test. + // 'stencilValue = DepthFailOp( stencilAttachment )' + // Depth test happens after stencil test and before stencil update. void DepthFailOp (EStencilOp); + + // Action performed on samples that pass both the depth and stencil tests. + // 'stencilValue = PassOp( stencilAttachment )' void PassOp (EStencilOp); - void CompareOp (ECompareOp); - void Reference (uint8); - void WriteMask (uint8); - void CompareMask (uint8); + + // Bitmask which is ANDed with new stencil value and stencil attachment value before updating stencil attachment. + // 'stencilAttachment = (stencilAttachment & WriteMask) | (stencilValue & WriteMask)' + void WriteMask (uint); }; struct RenderState_DepthBufferState @@ -2605,274 +3094,6 @@ struct RenderState RenderState_MultisampleState multisample; }; -struct EShaderVersion -{ - EShaderVersion () {} - EShaderVersion (uint32) {} - operator uint32 () const; - - // Vulkan 1.0 - static constexpr uint32 SPIRV_1_0 = 268435472; - static constexpr uint32 SPIRV_1_1 = 268435473; - static constexpr uint32 SPIRV_1_2 = 268435474; - - // Vulkan 1.1 - static constexpr uint32 SPIRV_1_3 = 268435475; - - // Vulkan 1.1 extension - static constexpr uint32 SPIRV_1_4 = 268435476; - - // Vulkan 1.2 - static constexpr uint32 SPIRV_1_5 = 268435477; - - // Vulkan 1.3 - static constexpr uint32 SPIRV_1_6 = 268435478; - - // Metal API - static constexpr uint32 Metal_2_0 = 1073741856; - static constexpr uint32 Metal_2_1 = 1073741857; - static constexpr uint32 Metal_2_2 = 1073741858; - - // Added ray tracing. - static constexpr uint32 Metal_2_3 = 1073741859; - static constexpr uint32 Metal_2_4 = 1073741860; - - // Added mesh shading. - static constexpr uint32 Metal_3_0 = 1073741872; - static constexpr uint32 Metal_3_1 = 1073741873; - - // Compile for iOS. - static constexpr uint32 Metal_iOS_2_0 = 536870944; - static constexpr uint32 Metal_iOS_2_1 = 536870945; - static constexpr uint32 Metal_iOS_2_2 = 536870946; - static constexpr uint32 Metal_iOS_2_3 = 536870947; - static constexpr uint32 Metal_iOS_2_4 = 536870948; - static constexpr uint32 Metal_iOS_3_0 = 536870960; - static constexpr uint32 Metal_iOS_3_1 = 536870961; - - // Compile for MacOS. - static constexpr uint32 Metal_Mac_2_0 = 805306400; - static constexpr uint32 Metal_Mac_2_1 = 805306401; - static constexpr uint32 Metal_Mac_2_2 = 805306402; - static constexpr uint32 Metal_Mac_2_3 = 805306403; - static constexpr uint32 Metal_Mac_2_4 = 805306404; - static constexpr uint32 Metal_Mac_3_0 = 805306416; - static constexpr uint32 Metal_Mac_3_1 = 805306417; -}; - -struct EShaderOpt -{ - EShaderOpt () {} - EShaderOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 None = 0; - - // Add debug information. Used in RenderDoc shader debugger. - static constexpr uint32 DebugInfo = 1; - - // Insert shader trace recording. Shader will be very slow. - static constexpr uint32 Trace = 2; - - // Insert shader function profiling. Shader will be very slow. - static constexpr uint32 FnProfiling = 4; - - // Insert whole shader time measurement. Shader will be a bit slow. - static constexpr uint32 TimeHeatMap = 8; - - // Enable optimizations. Take a lot of CPU time at shader compilation. - static constexpr uint32 Optimize = 16; - - // Enable bytecode size optimizations. Take a lot of CPU time at shader compilation. - static constexpr uint32 OptimizeSize = 32; - - // Enable strong optimizations. Take a lot of CPU time at shader compilation. - static constexpr uint32 StrongOptimization = 64; - static constexpr uint32 WarnAsError = 1024; -}; - -struct EAccessType -{ - EAccessType () {} - EAccessType (uint32) {} - operator uint32 () const; - static constexpr uint32 Coherent = 1; - static constexpr uint32 Volatile = 2; - static constexpr uint32 Restrict = 3; - - // Require 'vulkanMemoryModel' feature. - static constexpr uint32 DeviceCoherent = 5; - static constexpr uint32 QueueFamilyCoherent = 6; - static constexpr uint32 WorkgroupCoherent = 7; - static constexpr uint32 SubgroupCoherent = 8; - static constexpr uint32 NonPrivate = 9; -}; - -struct EImageType -{ - EImageType () {} - EImageType (uint16) {} - operator uint16 () const; - static constexpr uint16 1D = 1; - static constexpr uint16 1DArray = 2; - static constexpr uint16 2D = 3; - static constexpr uint16 2DArray = 4; - static constexpr uint16 2DMS = 5; - static constexpr uint16 2DMSArray = 6; - static constexpr uint16 Cube = 7; - static constexpr uint16 CubeArray = 8; - static constexpr uint16 3D = 9; - static constexpr uint16 Buffer = 10; - static constexpr uint16 Float = 16; - static constexpr uint16 Half = 32; - static constexpr uint16 SNorm = 48; - static constexpr uint16 UNorm = 64; - static constexpr uint16 Int = 80; - static constexpr uint16 UInt = 96; - static constexpr uint16 sRGB = 112; - static constexpr uint16 Depth = 128; - static constexpr uint16 Stencil = 144; - static constexpr uint16 DepthStencil = 160; - static constexpr uint16 Shadow = 256; - static constexpr uint16 FImage1D = 17; - static constexpr uint16 FImage2D = 19; - static constexpr uint16 FImage2D_sRGB = 115; - static constexpr uint16 FImage3D = 25; - static constexpr uint16 FImage1DArray = 18; - static constexpr uint16 FImage2DArray = 20; - static constexpr uint16 FImageCube = 23; - static constexpr uint16 FImageCubeArray = 24; - static constexpr uint16 FImage2DMS = 21; - static constexpr uint16 FImage2DMSArray = 22; - static constexpr uint16 FImageBuffer = 26; - static constexpr uint16 HImage1D = 33; - static constexpr uint16 HImage2D = 35; - static constexpr uint16 HImage3D = 41; - static constexpr uint16 HImage1DArray = 34; - static constexpr uint16 HImage2DArray = 36; - static constexpr uint16 HImageCube = 39; - static constexpr uint16 HImageCubeArray = 40; - static constexpr uint16 HImage2DMS = 37; - static constexpr uint16 HImage2DMSArray = 38; - static constexpr uint16 HImageBuffer = 42; - static constexpr uint16 Image1DShadow = 385; - static constexpr uint16 Image2DShadow = 387; - static constexpr uint16 Image1DArrayShadow = 386; - static constexpr uint16 Image2DArrayShadow = 388; - static constexpr uint16 ImageCubeShadow = 391; - static constexpr uint16 ImageCubeArrayShadow = 392; - static constexpr uint16 IImage1D = 81; - static constexpr uint16 IImage2D = 83; - static constexpr uint16 IImage3D = 89; - static constexpr uint16 IImage1DArray = 82; - static constexpr uint16 IImage2DArray = 84; - static constexpr uint16 IImageCube = 87; - static constexpr uint16 IImageCubeArray = 88; - static constexpr uint16 IImage2DMS = 85; - static constexpr uint16 IImage2DMSArray = 86; - static constexpr uint16 IImageBuffer = 90; - static constexpr uint16 UImage1D = 97; - static constexpr uint16 UImage2D = 99; - static constexpr uint16 UImage3D = 105; - static constexpr uint16 UImage1DArray = 98; - static constexpr uint16 UImage2DArray = 100; - static constexpr uint16 UImageCube = 103; - static constexpr uint16 UImageCubeArray = 104; - static constexpr uint16 UImage2DMS = 101; - static constexpr uint16 UImage2DMSArray = 102; - static constexpr uint16 UImageBuffer = 106; - static constexpr uint16 SLongImage1D = 177; - static constexpr uint16 SLongImage2D = 179; - static constexpr uint16 SLongImage3D = 185; - static constexpr uint16 SLongImage1DArray = 178; - static constexpr uint16 SLongImage2DArray = 180; - static constexpr uint16 SLongImageCube = 183; - static constexpr uint16 SLongImageCubeArray = 184; - static constexpr uint16 SLongImage2DMS = 181; - static constexpr uint16 SLongImage2DMSArray = 182; - static constexpr uint16 SLongImageBuffer = 186; - static constexpr uint16 ULongImage1D = 193; - static constexpr uint16 ULongImage2D = 195; - static constexpr uint16 ULongImage3D = 201; - static constexpr uint16 ULongImage1DArray = 194; - static constexpr uint16 ULongImage2DArray = 196; - static constexpr uint16 ULongImageCube = 199; - static constexpr uint16 ULongImageCubeArray = 200; - static constexpr uint16 ULongImage2DMS = 197; - static constexpr uint16 ULongImage2DMSArray = 198; - static constexpr uint16 ULongImageBuffer = 202; -}; - -struct ECompilationTarget -{ - ECompilationTarget () {} - ECompilationTarget (uint32) {} - operator uint32 () const; - static constexpr uint32 Vulkan = 1; - static constexpr uint32 Metal_iOS = 2; - static constexpr uint32 Metal_Mac = 3; -}; - -struct EStructLayout -{ - EStructLayout () {} - EStructLayout (uint8) {} - operator uint8 () const; - - // Apply GLSL std140 rules but structure must be compatible with Metal otherwise will throw exception. - static constexpr uint8 Compatible_Std140 = 0; - - // Apply GLSL std430 rules but structure must be compatible with Metal otherwise will throw exception. - static constexpr uint8 Compatible_Std430 = 1; - - // Apply MSL rules. - static constexpr uint8 Metal = 2; - - // Apply GLSL std140 rules. - static constexpr uint8 Std140 = 3; - - // Apply GLSL std430 rules. - static constexpr uint8 Std430 = 4; - - // Platform depended layout. - static constexpr uint8 InternalIO = 5; -}; - -struct EValueType -{ - EValueType () {} - EValueType (uint8) {} - operator uint8 () const; - static constexpr uint8 Bool8 = 1; - static constexpr uint8 Bool32 = 2; - static constexpr uint8 Int8 = 3; - static constexpr uint8 Int16 = 4; - static constexpr uint8 Int32 = 5; - static constexpr uint8 Int64 = 6; - static constexpr uint8 UInt8 = 7; - static constexpr uint8 UInt16 = 8; - static constexpr uint8 UInt32 = 9; - static constexpr uint8 UInt64 = 10; - static constexpr uint8 Float16 = 11; - static constexpr uint8 Float32 = 12; - static constexpr uint8 Float64 = 13; - static constexpr uint8 Int8_Norm = 14; - static constexpr uint8 Int16_Norm = 15; - static constexpr uint8 UInt8_Norm = 16; - static constexpr uint8 UInt16_Norm = 17; - static constexpr uint8 DeviceAddress = 18; -}; - -struct EShaderPreprocessor -{ - EShaderPreprocessor () {} - EShaderPreprocessor (uint32) {} - operator uint32 () const; - static constexpr uint32 None = 0; - - // Use for auto-complete in IDE. - static constexpr uint32 AEStyle = 1; -}; - struct GlobalConfig { GlobalConfig (); @@ -2903,6 +3124,11 @@ struct GlobalConfig // Set FeatureSet which will be added to all resources. void SetDefaultFeatureSet (const string & fsName); + + // Set defines which will be used in all shaders. + // Format: DEF=1 + // DEF2 + void SetShaderDefines (const string &); }; struct NamedRenderState @@ -2920,23 +3146,6 @@ struct NamedRenderState RenderState_MultisampleState multisample; }; -struct EFormatFeature -{ - EFormatFeature () {} - EFormatFeature (uint32) {} - operator uint32 () const; - static constexpr uint32 StorageImageAtomic = 1; - static constexpr uint32 StorageImage = 2; - static constexpr uint32 AttachmentBlend = 3; - static constexpr uint32 Attachment = 4; - static constexpr uint32 LinearSampled = 5; - static constexpr uint32 UniformTexelBuffer = 6; - static constexpr uint32 StorageTexelBuffer = 7; - static constexpr uint32 StorageTexelBufferAtomic = 8; - static constexpr uint32 HWCompressedAttachment = 9; - static constexpr uint32 LossyCompressedAttachment = 10; -}; - struct FeatureSet { FeatureSet (); @@ -3007,6 +3216,8 @@ struct FeatureSet bool hasShaderMaximalReconvergence (); void shaderQuadControl (EFeature); bool hasShaderQuadControl (); + void clipSpaceWScalingNV (EFeature); + bool hasClipSpaceWScalingNV (); void shaderInt8 (EFeature); bool hasShaderInt8 (); void shaderInt16 (EFeature); @@ -3372,27 +3583,6 @@ struct ArraySize ArraySize (uint); }; -struct ETessPatch -{ - ETessPatch () {} - ETessPatch (uint32) {} - operator uint32 () const; - static constexpr uint32 Points = 1; - static constexpr uint32 Isolines = 2; - static constexpr uint32 Triangles = 3; - static constexpr uint32 Quads = 4; -}; - -struct ETessSpacing -{ - ETessSpacing () {} - ETessSpacing (uint32) {} - operator uint32 () const; - static constexpr uint32 Equal = 1; - static constexpr uint32 FractionalEven = 2; - static constexpr uint32 FractionalOdd = 3; -}; - struct Shader { Shader (); @@ -3471,30 +3661,6 @@ struct Shader void TessPatchMode (ETessPatch mode, ETessSpacing spacing, bool ccw); }; -struct ShaderStructTypeUsage -{ - ShaderStructTypeUsage () {} - ShaderStructTypeUsage (uint32) {} - operator uint32 () const; - - // Used as interface between graphics pipeline stages. Reflection to C++ is not supported. - static constexpr uint32 ShaderIO = 1; - - // Used as vertex buffer layout. Enables reflection to C++. - static constexpr uint32 VertexLayout = 4; - - // Used as vertex attributes in shader. - // Reflection to C++ is not enabled, use 'VertexLayout' to enable it. - static constexpr uint32 VertexAttribs = 2; - - // Used as uniform/storage buffer. Layout must be same in GLSL/MSL and C++. Enables reflection to C++. - static constexpr uint32 BufferLayout = 8; - - // Used as buffer reference in shader. Layout must be same between shaders in single platform. - // Reflection to C++ is not enabled, use 'BufferLayout' to enable it. - static constexpr uint32 BufferReference = 16; -}; - struct ShaderStructType { ShaderStructType (); @@ -3860,13 +4026,6 @@ struct PipelineLayout void Define (const string &); }; -struct EMutableRenderState -{ - EMutableRenderState () {} - EMutableRenderState (uint32) {} - operator uint32 () const; -}; - struct GraphicsPass { GraphicsPass (); @@ -4128,6 +4287,10 @@ struct ComputePipelineSpec void SetSpecValue (const string & name, int value); void SetSpecValue (const string & name, float value); + // Set subgroup size. + // Requires 'subgroupSizeControl' feature, value must be in range [minSubgroupSize, maxSubgroupSize]. + void SubgroupSize (uint); + // Set dynamic states (EPipelineDynamicState). // None of the states are supported for compute pipeline. void SetDynamicState (uint states); @@ -4309,81 +4472,6 @@ const string Subpass_Main; const string Attachment_Color; const string Attachment_Depth; const string Attachment_DepthStencil; -struct EAttachment -{ - EAttachment () {} - EAttachment (uint32) {} - operator uint32 () const; - - // Discard previous content. Used as optimization for TBDR architectures. - static constexpr uint32 Invalidate = 0; - - // Color attachment. - static constexpr uint32 Color = 1; - - // Used as input attachment and color attachment. - static constexpr uint32 ReadWrite = 3; - - // Resolve attachment - will get content from multisampled color attachment. - static constexpr uint32 ColorResolve = 2; - - // Input attachment. - static constexpr uint32 Input = 4; - - // Depth attachment. - static constexpr uint32 Depth = 5; - - // Keep attachment content between passes. - static constexpr uint32 Preserve = 6; - - // Depth and stencil attachment. - static constexpr uint32 DepthStencil = 5; - - // Fragment shading rate attachment. - static constexpr uint32 ShadingRate = 7; -}; - -struct EAttachmentLoadOp -{ - EAttachmentLoadOp () {} - EAttachmentLoadOp (uint8) {} - operator uint8 () const; - - // Previous content will not be preserved. - // In TBDR is allow to avoid transfer from global memory to cache. - static constexpr uint8 Invalidate = 0; - - // Preserve attachment content. - // In TBDR contents in global memory will be copied to cache. - static constexpr uint8 Load = 1; - - // Clear attachment before first pass. - // In TBDR is allow to avoid transfer from global memory to cache. - static constexpr uint8 Clear = 2; - - // Attachment is not used at all. - // Can be used to keep one compatible render pass and avoid unnecessary synchronizations for unused attachment. - static constexpr uint8 None = 3; -}; - -struct EAttachmentStoreOp -{ - EAttachmentStoreOp () {} - EAttachmentStoreOp (uint8) {} - operator uint8 () const; - - // Attachment content will not needed after rendering. - // In TBDR it allow to avoid transfer from cache to global memory. - static constexpr uint8 Invalidate = 0; - - // Attachment content will be written to global memory. - static constexpr uint8 Store = 1; - - // Attachment is read-only. Content may not be written to memory, but if changed then content in memory will be undefined. - // In TBDR it allow to avoid transfer from cache to global memory. - static constexpr uint8 None = 2; -}; - struct ShaderIO { ShaderIO (); @@ -4621,175 +4709,159 @@ string FileName (); RC GetDefaultFeatureSet (); #define SCRIPT -const string Sampler_NearestClamp; -const string Sampler_NearestRepeat; -const string Sampler_NearestMirrorRepeat; -const string Sampler_LinearClamp; -const string Sampler_LinearRepeat; -const string Sampler_LinearMirrorRepeat; -const string Sampler_LinearMipmapClamp; -const string Sampler_LinearMipmapRepeat; -const string Sampler_LinearMipmapMirrorRepeat; -const string Sampler_LinearMipmapMirrorClamp; -const string Sampler_Anisotropy8Repeat; -const string Sampler_Anisotropy8MirrorRepeat; -const string Sampler_Anisotropy8Clamp; -const string Sampler_Anisotropy16Repeat; -const string Sampler_Anisotropy16MirrorRepeat; -const string Sampler_Anisotropy16Clamp; template <> -struct RC : DescriptorSetLayout +struct RC : VertexBufferInput { - RC (const DescriptorSetLayout &); + RC (const VertexBufferInput &); }; template <> -struct RC : RayTracingShaderBinding +struct RC : Attachment { - RC (const RayTracingShaderBinding &); + RC (const Attachment &); }; template <> -struct RC : RenderTechnique +struct RC : FeatureSet { - RC (const RenderTechnique &); + RC (const FeatureSet &); }; template <> -struct RC : RayTracingPipeline +struct RC : ComputePass { - RC (const RayTracingPipeline &); + RC (const ComputePass &); }; template <> -struct RC : GraphicsPass +struct RC : NamedRenderState { - RC (const GraphicsPass &); + RC (const NamedRenderState &); }; template <> -struct RC : ComputePipeline +struct RC : MeshPipeline { - RC (const ComputePipeline &); + RC (const MeshPipeline &); }; template <> -struct RC : ShaderStructType +struct RC : Sampler { - RC (const ShaderStructType &); + RC (const Sampler &); }; template <> -struct RC : Shader +struct RC : MeshPipelineSpec { - RC (const Shader &); + RC (const MeshPipelineSpec &); }; template <> -struct RC : ComputePipelineSpec +struct RC : GraphicsPipeline { - RC (const ComputePipelineSpec &); + RC (const GraphicsPipeline &); }; template <> -struct RC : CompatibleRenderPass +struct RC : RayTracingPipelineSpec { - RC (const CompatibleRenderPass &); + RC (const RayTracingPipelineSpec &); }; template <> -struct RC : MeshPipeline +struct RC : AttachmentSpec { - RC (const MeshPipeline &); + RC (const AttachmentSpec &); }; template <> -struct RC : NamedRenderState +struct RC : TilePipeline { - RC (const NamedRenderState &); + RC (const TilePipeline &); }; template <> -struct RC : FeatureSet +struct RC : GraphicsPipelineSpec { - RC (const FeatureSet &); + RC (const GraphicsPipelineSpec &); }; template <> -struct RC : ComputePass +struct RC : TilePipelineSpec { - RC (const ComputePass &); + RC (const TilePipelineSpec &); }; template <> -struct RC : Sampler +struct RC : PipelineLayout { - RC (const Sampler &); + RC (const PipelineLayout &); }; template <> -struct RC : MeshPipelineSpec +struct RC : RenderPass { - RC (const MeshPipelineSpec &); + RC (const RenderPass &); }; template <> -struct RC : GraphicsPipeline +struct RC : GraphicsPass { - RC (const GraphicsPipeline &); + RC (const GraphicsPass &); }; template <> -struct RC : VertexBufferInput +struct RC : ComputePipeline { - RC (const VertexBufferInput &); + RC (const ComputePipeline &); }; template <> -struct RC : Attachment +struct RC : DescriptorSetLayout { - RC (const Attachment &); + RC (const DescriptorSetLayout &); }; template <> -struct RC : TilePipelineSpec +struct RC : RenderTechnique { - RC (const TilePipelineSpec &); + RC (const RenderTechnique &); }; template <> -struct RC : TilePipeline +struct RC : RayTracingPipeline { - RC (const TilePipeline &); + RC (const RayTracingPipeline &); }; template <> -struct RC : GraphicsPipelineSpec +struct RC : RayTracingShaderBinding { - RC (const GraphicsPipelineSpec &); + RC (const RayTracingShaderBinding &); }; template <> -struct RC : PipelineLayout +struct RC : ComputePipelineSpec { - RC (const PipelineLayout &); + RC (const ComputePipelineSpec &); }; template <> -struct RC : RenderPass +struct RC : CompatibleRenderPass { - RC (const RenderPass &); + RC (const CompatibleRenderPass &); }; template <> -struct RC : AttachmentSpec +struct RC : ShaderStructType { - RC (const AttachmentSpec &); + RC (const ShaderStructType &); }; template <> -struct RC : RayTracingPipelineSpec +struct RC : Shader { - RC (const RayTracingPipelineSpec &); + RC (const Shader &); }; diff --git a/AE/engine/shared_data/scripts/res_editor.as b/AE/engine/shared_data/scripts/res_editor.as index c0da83a7..5ab71142 100644 --- a/AE/engine/shared_data/scripts/res_editor.as +++ b/AE/engine/shared_data/scripts/res_editor.as @@ -1,4 +1,4 @@ -//5757ea0c +//7ca1a16a #include #include @@ -8,7 +8,6 @@ using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; -using int = std::int32_t; using uint = std::uint32_t; using int32 = std::int32_t; using uint32 = std::uint32_t; @@ -22,264 +21,1525 @@ struct RC; template using array = std::vector; -struct EShaderIO; -struct EImageAspect; -struct InstanceIndex; -struct RTInstanceTransform; -struct float3; -struct EPipelineOpt; -struct float2; -struct ERenderLayer; -struct DynamicInt; -struct EVertexInputRate; -struct EDescSetUsage; -struct UnifiedGeometry_DrawIndexedIndirect; -struct ESamplerChromaLocation; -struct float2x2; -struct float2x3; -struct float2x4; -struct EPipelineDynamicState; -struct Random_Binomial4; -struct EFilter; -struct Random_Binomial2; -struct MultiSamples; -struct Random_Binomial3; -struct Random_Binomial1; -struct IPass; -struct ESamplerYcbcrModelConversion; -struct sbyte4; -struct UnifiedGeometry_DrawIndexed; -struct Random; -struct short2; -struct sbyte3; -struct ESamplerOpt; -struct ESubgroupTypes; -struct short3; -struct ushort4; -struct EImageUsage; -struct UnifiedGeometry_DrawIndexedIndirectCount; -struct ScriptFlags; -struct Image; +using namespace std::string_literals; + +template +string operator + (const string &lhs, T rhs); + +struct RTScene; +struct RectI; +struct uint3; +struct uint2; +struct DynamicFloat; +struct RTInstanceMask; +struct DynamicDim; +struct FPSCamera; +struct RectU; +struct RayTracingPass; +struct uint4; +struct OrbitalCamera; +struct UnifiedGeometry_DrawMeshTasksIndirectCount; +struct RGBA8u; +struct RectF; +struct SphericalCube; +struct RGBA32u; +struct UnifiedGeometry_DrawMeshTasks; +struct UnifiedGeometry_DrawIndirectCount; +struct Buffer; +struct CallableIndex; +struct Postprocess; +struct SceneRayTracingPass; +struct UnifiedGeometry_DrawMeshTasksIndirect; +struct ComputePass; +struct UnifiedGeometry; +struct ScaleBiasCamera; +struct TopDownCamera; +struct FlightCamera; +struct BaseController; +struct MipmapLevel; +struct DepthStencil; +struct int3; +struct int2; +struct int4; +struct ImageLayer; +struct ushort3; +struct sbyte2; +struct ushort2; +struct short4; +struct Scene; +struct float3x3; +struct float3x2; +struct float3x4; +struct Model; +struct DynamicUInt; +struct float4x2; +struct float4x4; +struct float4x3; struct DynamicInt2; -struct DynamicInt4; -struct EIndex; -struct UnifiedGeometry_DrawIndirect; struct DynamicInt3; +struct DynamicInt4; struct GeomSource; +struct UnifiedGeometry_DrawIndirect; +struct RTGeometry; +struct DynamicUInt4; +struct DynamicFloat3; struct float4; struct DynamicFloat2; -struct DynamicFloat3; -struct ImageLoadOpFlags; -struct EPostprocess; -struct DynamicUInt2; +struct DynamicUInt3; struct RTShader; -struct DynamicUInt4; -struct RTGeometry; +struct DynamicUInt2; struct DynamicFloat4; -struct EBlendFactor; -struct DynamicUInt3; -struct EStencilOp; -struct ERTInstanceOpt; -struct DbgViewFlags; -struct EAddressMode; -struct EPassFlags; -struct RayIndex; -struct Collection; -struct RTScene; -struct DynamicULong; -struct bool3; -struct EImage; -struct bool2; -struct RGBA32i; -struct SceneGraphicsPass; -struct EVertexType; -struct EBlendOp; -struct bool4; -struct HSVColor; -struct ECullMode; -struct ELogicOp; -struct RGBA32f; -struct EQueueMask; +struct short2; +struct Quat; +struct ushort4; +struct short3; +struct Random; +struct UnifiedGeometry_DrawIndexed; +struct sbyte4; +struct sbyte3; +struct UnifiedGeometry_DrawIndexedIndirectCount; +struct Image; +struct UnifiedGeometry_DrawIndexedIndirect; +struct float2x2; +struct float2x4; +struct float2x3; +struct Random_Binomial4; +struct Random_Binomial1; +struct Random_Binomial3; +struct Random_Binomial2; +struct MultiSamples; +struct IPass; +struct RTInstanceTransform; +struct float2; +struct float3; +struct DynamicInt; +struct InstanceIndex; +struct ubyte4; +struct RTInstanceCustomIndex; +struct RemoteCamera; +struct RTInstanceSBTOffset; +struct Random_Normal3; struct Random_Normal1; struct UnifiedGeometry_Draw; -struct EPrimitive; struct Random_Normal2; -struct Random_Normal3; +struct ubyte3; +struct ubyte2; struct VideoImage; struct FPVCamera; -struct ubyte3; -struct EGraphicsDeviceID; -struct ESubgroupOperation; struct Random_Normal4; -struct ubyte2; -struct ESamplerYcbcrRange; -struct RTInstanceCustomIndex; -struct ubyte4; -struct RemoteCamera; -struct RTInstanceSBTOffset; -struct EShadingRate; -struct SceneRayTracingPass; -struct Postprocess; -struct EResourceState; -struct EBufferUsage; -struct EBufferOpt; -struct UnifiedGeometry_DrawMeshTasksIndirect; -struct ComputePass; -struct EBorderColor; -struct EImageType; -struct UnifiedGeometry_DrawIndirectCount; -struct UnifiedGeometry; -struct CallableIndex; -struct Buffer; -struct UnifiedGeometry_DrawMeshTasksIndirectCount; -struct EImageOpt; -struct EFeature; -struct EMipmapFilter; -struct OrbitalCamera; -struct RGBA8u; -struct RectF; -struct SphericalCube; -struct UnifiedGeometry_DrawMeshTasks; -struct RGBA32u; -struct uint2; -struct uint3; -struct RTInstanceMask; -struct DynamicFloat; -struct RectI; -struct EShader; -struct EShaderStages; -struct DynamicDim; -struct EGPUVendor; -struct RayTracingPass; -struct FPSCamera; -struct ECompareOp; -struct uint4; -struct EColorSpace; -struct float3x2; -struct RectU; -struct float3x3; -struct Model; -struct float3x4; -struct DynamicUInt; -struct float4x2; -struct float4x4; -struct EPolygonMode; -struct int2; -struct float4x3; -struct int3; -struct int4; -struct ImageLayer; -struct ushort2; -struct sbyte2; -struct EPixelFormat; -struct short4; -struct ushort3; -struct Scene; -struct DepthStencil; -struct EPixelFormatExternal; -struct ScaleBiasCamera; -struct EReductionMode; -struct TopDownCamera; -struct ESurfaceFormat; -struct FlightCamera; -struct EShadingRateCombinerOp; -struct MipmapLevel; -struct BaseController; +struct bool2; +struct bool3; +struct DynamicULong; +struct SceneGraphicsPass; +struct bool4; +struct RGBA32i; +struct RGBA32f; +struct HSVColor; +struct Collection; +struct RayIndex; + +enum class EImageType : uint16 +{ + Cube, + CubeArray, + Buffer, + Float, + Half, + SNorm, + UNorm, + Int, + UInt, + sRGB, + Depth, + Stencil, + DepthStencil, + Shadow, + FImage1D, + FImage2D, + FImage2D_sRGB, + FImage3D, + FImage1DArray, + FImage2DArray, + FImageCube, + FImageCubeArray, + FImage2DMS, + FImage2DMSArray, + FImageBuffer, + HImage1D, + HImage2D, + HImage3D, + HImage1DArray, + HImage2DArray, + HImageCube, + HImageCubeArray, + HImage2DMS, + HImage2DMSArray, + HImageBuffer, + Image1DShadow, + Image2DShadow, + Image1DArrayShadow, + Image2DArrayShadow, + ImageCubeShadow, + ImageCubeArrayShadow, + IImage1D, + IImage2D, + IImage3D, + IImage1DArray, + IImage2DArray, + IImageCube, + IImageCubeArray, + IImage2DMS, + IImage2DMSArray, + IImageBuffer, + UImage1D, + UImage2D, + UImage3D, + UImage1DArray, + UImage2DArray, + UImageCube, + UImageCubeArray, + UImage2DMS, + UImage2DMSArray, + UImageBuffer, + SLongImage1D, + SLongImage2D, + SLongImage3D, + SLongImage1DArray, + SLongImage2DArray, + SLongImageCube, + SLongImageCubeArray, + SLongImage2DMS, + SLongImage2DMSArray, + SLongImageBuffer, + ULongImage1D, + ULongImage2D, + ULongImage3D, + ULongImage1DArray, + ULongImage2DArray, + ULongImageCube, + ULongImageCubeArray, + ULongImage2DMS, + ULongImage2DMSArray, + ULongImageBuffer, +}; +uint16 operator | (EImageType lhs, EImageType rhs); +uint16 operator | (uint16 lhs, EImageType rhs); +uint16 operator | (EImageType lhs, uint16 rhs); +static constexpr EImageType EImageType_1D = EImageType(1); +static constexpr EImageType EImageType_1DArray = EImageType(2); +static constexpr EImageType EImageType_2D = EImageType(3); +static constexpr EImageType EImageType_2DArray = EImageType(4); +static constexpr EImageType EImageType_2DMS = EImageType(5); +static constexpr EImageType EImageType_2DMSArray = EImageType(6); +static constexpr EImageType EImageType_3D = EImageType(9); + +enum class EImage : uint8 +{ + Cube, + CubeArray, +}; +uint8 operator | (EImage lhs, EImage rhs); +uint8 operator | (uint8 lhs, EImage rhs); +uint8 operator | (EImage lhs, uint8 rhs); +static constexpr EImage EImage_1D = EImage(0); +static constexpr EImage EImage_2D = EImage(1); +static constexpr EImage EImage_3D = EImage(2); +static constexpr EImage EImage_1DArray = EImage(3); +static constexpr EImage EImage_2DArray = EImage(4); + +enum class EIndex : uint8 +{ + UShort, + UInt, +}; +uint8 operator | (EIndex lhs, EIndex rhs); +uint8 operator | (uint8 lhs, EIndex rhs); +uint8 operator | (EIndex lhs, uint8 rhs); + +enum class EPixelFormat : uint8 +{ + RGBA16_SNorm, + RGBA8_SNorm, + RGB16_SNorm, + RGB8_SNorm, + RG16_SNorm, + RG8_SNorm, + R16_SNorm, + R8_SNorm, + RGBA16_UNorm, + RGBA8_UNorm, + RGB16_UNorm, + RGB8_UNorm, + RG16_UNorm, + RG8_UNorm, + R16_UNorm, + R8_UNorm, + RGB10_A2_UNorm, + RGBA4_UNorm, + RGB5_A1_UNorm, + RGB_5_6_5_UNorm, + BGR8_UNorm, + BGRA8_UNorm, + sRGB8, + sRGB8_A8, + sBGR8, + sBGR8_A8, + R8I, + RG8I, + RGB8I, + RGBA8I, + R16I, + RG16I, + RGB16I, + RGBA16I, + R32I, + RG32I, + RGB32I, + RGBA32I, + R64I, + R8U, + RG8U, + RGB8U, + RGBA8U, + R16U, + RG16U, + RGB16U, + RGBA16U, + R32U, + RG32U, + RGB32U, + RGBA32U, + RGB10_A2U, + R64U, + R16F, + RG16F, + RGB16F, + RGBA16F, + R32F, + RG32F, + RGB32F, + RGBA32F, + RGB_11_11_10F, + RGB9F_E5, + Depth16, + Depth24, + Depth32F, + Depth16_Stencil8, + Depth24_Stencil8, + Depth32F_Stencil8, + BC1_RGB8_UNorm, + BC1_sRGB8, + BC1_RGB8_A1_UNorm, + BC1_sRGB8_A1, + BC2_RGBA8_UNorm, + BC2_sRGB8, + BC3_RGBA8_UNorm, + BC3_sRGB8, + BC4_R8_SNorm, + BC4_R8_UNorm, + BC5_RG8_SNorm, + BC5_RG8_UNorm, + BC6H_RGB16F, + BC6H_RGB16UF, + BC7_RGBA8_UNorm, + BC7_sRGB8_A8, + ETC2_RGB8_UNorm, + ETC2_sRGB8, + ETC2_RGB8_A1_UNorm, + ETC2_sRGB8_A1, + ETC2_RGBA8_UNorm, + ETC2_sRGB8_A8, + EAC_R11_SNorm, + EAC_R11_UNorm, + EAC_RG11_SNorm, + EAC_RG11_UNorm, + ASTC_RGBA8_4x4, + ASTC_RGBA8_5x4, + ASTC_RGBA8_5x5, + ASTC_RGBA8_6x5, + ASTC_RGBA8_6x6, + ASTC_RGBA8_8x5, + ASTC_RGBA8_8x6, + ASTC_RGBA8_8x8, + ASTC_RGBA8_10x5, + ASTC_RGBA8_10x6, + ASTC_RGBA8_10x8, + ASTC_RGBA8_10x10, + ASTC_RGBA8_12x10, + ASTC_RGBA8_12x12, + ASTC_sRGB8_A8_4x4, + ASTC_sRGB8_A8_5x4, + ASTC_sRGB8_A8_5x5, + ASTC_sRGB8_A8_6x5, + ASTC_sRGB8_A8_6x6, + ASTC_sRGB8_A8_8x5, + ASTC_sRGB8_A8_8x6, + ASTC_sRGB8_A8_8x8, + ASTC_sRGB8_A8_10x5, + ASTC_sRGB8_A8_10x6, + ASTC_sRGB8_A8_10x8, + ASTC_sRGB8_A8_10x10, + ASTC_sRGB8_A8_12x10, + ASTC_sRGB8_A8_12x12, + ASTC_RGBA16F_4x4, + ASTC_RGBA16F_5x4, + ASTC_RGBA16F_5x5, + ASTC_RGBA16F_6x5, + ASTC_RGBA16F_6x6, + ASTC_RGBA16F_8x5, + ASTC_RGBA16F_8x6, + ASTC_RGBA16F_8x8, + ASTC_RGBA16F_10x5, + ASTC_RGBA16F_10x6, + ASTC_RGBA16F_10x8, + ASTC_RGBA16F_10x10, + ASTC_RGBA16F_12x10, + ASTC_RGBA16F_12x12, + G8B8G8R8_422_UNorm, + B8G8R8G8_422_UNorm, + G8_B8R8_420_UNorm, + G8_B8R8_422_UNorm, + G8_B8R8_444_UNorm, + G8_B8_R8_420_UNorm, + G8_B8_R8_422_UNorm, + G8_B8_R8_444_UNorm, + B10x6G10x6R10x6G10x6_422_UNorm, + G10x6B10x6G10x6R10x6_422_UNorm, + G10x6_B10x6R10x6_420_UNorm, + G10x6_B10x6R10x6_422_UNorm, + G10x6_B10x6R10x6_444_UNorm, + G10x6_B10x6_R10x6_420_UNorm, + G10x6_B10x6_R10x6_422_UNorm, + G10x6_B10x6_R10x6_444_UNorm, + R10x6G10x6B10x6A10x6_UNorm, + R10x6G10x6_UNorm, + R10x6_UNorm, + B12x4G12x4R12x4G12x4_422_UNorm, + G12x4B12x4G12x4R12x4_422_UNorm, + G12x4_B12x4R12x4_420_UNorm, + G12x4_B12x4R12x4_422_UNorm, + G12x4_B12x4R12x4_444_UNorm, + G12x4_B12x4_R12x4_420_UNorm, + G12x4_B12x4_R12x4_422_UNorm, + G12x4_B12x4_R12x4_444_UNorm, + R12x4G12x4B12x4A12x4_UNorm, + R12x4G12x4_UNorm, + R12x4_UNorm, + B16G16R16G16_422_UNorm, + G16B16G16R16_422_UNorm, + G16_B16R16_420_UNorm, + G16_B16R16_422_UNorm, + G16_B16R16_444_UNorm, + G16_B16_R16_420_UNorm, + G16_B16_R16_422_UNorm, + G16_B16_R16_444_UNorm, + SwapchainColor, +}; +uint8 operator | (EPixelFormat lhs, EPixelFormat rhs); +uint8 operator | (uint8 lhs, EPixelFormat rhs); +uint8 operator | (EPixelFormat lhs, uint8 rhs); + +enum class EPixelFormatExternal : uint8 +{ + Android_Depth16, + Android_DepthJPEG, + Android_DepthPointCloud, + Android_JPEG, + Android_Raw16, + Android_Raw12, + Android_Raw10, + Android_NV16, + Android_NV21, + Android_YCBCR_P010, + Android_YUV_420, + Android_YUV_422, + Android_YUV_444, + Android_YUY2, + Android_YV12, + Android_Y8, + Android_HEIC, +}; +uint8 operator | (EPixelFormatExternal lhs, EPixelFormatExternal rhs); +uint8 operator | (uint8 lhs, EPixelFormatExternal rhs); +uint8 operator | (EPixelFormatExternal lhs, uint8 rhs); + +enum class ECompareOp : uint8 +{ + Never, + Less, + Equal, + LEqual, + Greater, + NotEqual, + GEqual, + Always, + LessOrEqual, + GreaterOrEqual, +}; +uint8 operator | (ECompareOp lhs, ECompareOp rhs); +uint8 operator | (uint8 lhs, ECompareOp rhs); +uint8 operator | (ECompareOp lhs, uint8 rhs); + +enum class EBlendFactor : uint8 +{ + + // S, srcColor - from shader + // D, dstColor - from render target + // S1 - from shader (dual src blend) + // cc - constant color + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + // + + // 0 + Zero, + + // 1 + One, + + // S + SrcColor, + + // 1 - S + OneMinusSrcColor, + + // D + DstColor, + + // 1 - D + OneMinusDstColor, + + // S.a + SrcAlpha, + + // 1 - S.a + OneMinusSrcAlpha, + + // D.a + DstAlpha, + + // 1 - D.a + OneMinusDstAlpha, + + // cc + ConstColor, + + // 1 - cc + OneMinusConstColor, + + // cc.a + ConstAlpha, + + // 1 - cc.a + OneMinusConstAlpha, + + // rgb * min( S.a, D.a ), a * 1 + SrcAlphaSaturate, + + // S1 + Src1Color, + + // 1 - S1 + OneMinusSrc1Color, + + // S1.a + Src1Alpha, + + // 1 - S1.a + OneMinusSrc1Alpha, +}; +uint8 operator | (EBlendFactor lhs, EBlendFactor rhs); +uint8 operator | (uint8 lhs, EBlendFactor rhs); +uint8 operator | (EBlendFactor lhs, uint8 rhs); + +enum class EBlendOp : uint8 +{ + + // S, srcColor - from shader + // D, dstColor - from render target + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + // + + // S + D + Add, + + // S - D + Sub, + + // D - S + RevSub, + + // min( S, D ) + Min, + + // max( S, D ) + Max, +}; +uint8 operator | (EBlendOp lhs, EBlendOp rhs); +uint8 operator | (uint8 lhs, EBlendOp rhs); +uint8 operator | (EBlendOp lhs, uint8 rhs); + +enum class ELogicOp : uint8 +{ + + // S - from shader + // D - from render target + // result = S [logicOp] D + // + + // disabled + None, + + // 0 + Clear, + + // 1 + Set, + + // S + Copy, + + // ~S + CopyInverted, + + // D + NoOp, + + // ~D + Invert, + + // S & D + And, + + // ~ ( S & D ) + NotAnd, + + // S | D + Or, + + // ~ ( S | D ) + NotOr, + + // S ^ D + Xor, + + // ~ ( S ^ D ) + Equiv, + + // S & ~D + AndReverse, + + // ~S & D + AndInverted, + + // S | ~D + OrReverse, + + // ~S | D + OrInverted, +}; +uint8 operator | (ELogicOp lhs, ELogicOp rhs); +uint8 operator | (uint8 lhs, ELogicOp rhs); +uint8 operator | (ELogicOp lhs, uint8 rhs); + +enum class EStencilOp : uint8 +{ + + // src + Keep, + + // 0 + Zero, + + // ref + Replace, + + // min( ++src, 0 ) + Incr, + + // ++src & maxValue + IncrWrap, + + // max( --src, 0 ) + Decr, + + // --src & maxValue + DecrWrap, + + // ~src + Invert, +}; +uint8 operator | (EStencilOp lhs, EStencilOp rhs); +uint8 operator | (uint8 lhs, EStencilOp rhs); +uint8 operator | (EStencilOp lhs, uint8 rhs); + +enum class EPolygonMode : uint8 +{ + Point, + Line, + Fill, +}; +uint8 operator | (EPolygonMode lhs, EPolygonMode rhs); +uint8 operator | (uint8 lhs, EPolygonMode rhs); +uint8 operator | (EPolygonMode lhs, uint8 rhs); + +enum class EPrimitive : uint8 +{ + Point, + LineList, + LineStrip, + LineListAdjacency, + LineStripAdjacency, + TriangleList, + TriangleStrip, + TriangleFan, + TriangleListAdjacency, + TriangleStripAdjacency, + Patch, +}; +uint8 operator | (EPrimitive lhs, EPrimitive rhs); +uint8 operator | (uint8 lhs, EPrimitive rhs); +uint8 operator | (EPrimitive lhs, uint8 rhs); + +enum class ECullMode : uint8 +{ + None, + Front, + Back, + FontAndBack, +}; +uint8 operator | (ECullMode lhs, ECullMode rhs); +uint8 operator | (uint8 lhs, ECullMode rhs); +uint8 operator | (ECullMode lhs, uint8 rhs); + +enum class EPipelineDynamicState : uint16 +{ + None, + StencilCompareMask, + StencilWriteMask, + StencilReference, + DepthBias, + BlendConstants, + RTStackSize, + FragmentShadingRate, + ViewportWScaling, +}; +uint16 operator | (EPipelineDynamicState lhs, EPipelineDynamicState rhs); +uint16 operator | (uint16 lhs, EPipelineDynamicState rhs); +uint16 operator | (EPipelineDynamicState lhs, uint16 rhs); + +enum class EResourceState : uint32 +{ + Unknown, + Preserve, + ShaderStorage_Read, + ShaderStorage_Write, + ShaderStorage_RW, + ShaderUniform, + ShaderSample, + CopySrc, + CopyDst, + ClearDst, + BlitSrc, + BlitDst, + InputColorAttachment, + InputColorAttachment_RW, + ColorAttachment, + ColorAttachment_Blend, + DepthStencilAttachment_Read, + DepthStencilAttachment_Write, + DepthStencilAttachment_RW, + DepthTest_StencilRW, + DepthRW_StencilTest, + DepthStencilTest_ShaderSample, + DepthTest_DepthSample_StencilRW, + InputDepthStencilAttachment, + InputDepthStencilAttachment_RW, + Host_Read, + PresentImage, + IndirectBuffer, + IndexBuffer, + VertexBuffer, + ShadingRateImage, + CopyRTAS_Read, + CopyRTAS_Write, + BuildRTAS_Read, + BuildRTAS_Write, + BuildRTAS_RW, + BuildRTAS_IndirectBuffer, + ShaderRTAS, + RTShaderBindingTable, + DSTestBeforeFS, + DSTestAfterFS, + Invalidate, + General, + MeshTaskShader, + VertexProcessingShaders, + TileShader, + FragmentShader, + PreRasterizationShaders, + PostRasterizationShaders, + ComputeShader, + RayTracingShaders, + AllGraphicsShaders, + AllShaders, + BuildRTAS_ScratchBuffer, + InputDepthAttachment, + DepthStencilAttachment, +}; +uint32 operator | (EResourceState lhs, EResourceState rhs); +uint32 operator | (uint32 lhs, EResourceState rhs); +uint32 operator | (EResourceState lhs, uint32 rhs); + +enum class EImageAspect : uint8 +{ + Color, + Depth, + Stencil, + DepthStencil, + Plane_0, + Plane_1, + Plane_2, +}; +uint8 operator | (EImageAspect lhs, EImageAspect rhs); +uint8 operator | (uint8 lhs, EImageAspect rhs); +uint8 operator | (EImageAspect lhs, uint8 rhs); + +enum class EShaderIO : uint8 +{ + Int, + UInt, + Float, + UFloat, + Half, + UNorm, + SNorm, + sRGB, + AnyColor, + Depth, + Stencil, + DepthStencil, +}; +uint8 operator | (EShaderIO lhs, EShaderIO rhs); +uint8 operator | (uint8 lhs, EShaderIO rhs); +uint8 operator | (EShaderIO lhs, uint8 rhs); + +enum class ESubgroupTypes : uint8 +{ + Float16, + Float32, + Int8, + Int16, + Int32, + Int64, +}; +uint8 operator | (ESubgroupTypes lhs, ESubgroupTypes rhs); +uint8 operator | (uint8 lhs, ESubgroupTypes rhs); +uint8 operator | (ESubgroupTypes lhs, uint8 rhs); + +enum class ESubgroupOperation : uint32 +{ + IndexAndSize, + Elect, + Barrier, + Any, + All, + AllEqual, + Add, + Mul, + Min, + Max, + And, + Or, + Xor, + InclusiveMul, + InclusiveAdd, + InclusiveMin, + InclusiveMax, + InclusiveAnd, + InclusiveOr, + InclusiveXor, + ExclusiveAdd, + ExclusiveMul, + ExclusiveMin, + ExclusiveMax, + ExclusiveAnd, + ExclusiveOr, + ExclusiveXor, + Ballot, + Broadcast, + BroadcastFirst, + InverseBallot, + BallotBitExtract, + BallotBitCount, + BallotInclusiveBitCount, + BallotExclusiveBitCount, + BallotFindLSB, + BallotFindMSB, + Shuffle, + ShuffleXor, + ShuffleUp, + ShuffleDown, + ClusteredAdd, + ClusteredMul, + ClusteredMin, + ClusteredMax, + ClusteredAnd, + ClusteredOr, + ClusteredXor, + QuadBroadcast, + QuadSwapHorizontal, + QuadSwapVertical, + QuadSwapDiagonal, + _Basic_Begin, + _Basic_End, + _Vote_Begin, + _Vote_End, + _Arithmetic_Begin, + _Arithmetic_End, + _Ballot_Begin, + _Ballot_End, + _Shuffle_Begin, + _Shuffle_End, + _ShuffleRelative_Begin, + _ShuffleRelative_End, + _Clustered_Begin, + _Clustered_End, + _Quad_Begin, + _Quad_End, +}; +uint32 operator | (ESubgroupOperation lhs, ESubgroupOperation rhs); +uint32 operator | (uint32 lhs, ESubgroupOperation rhs); +uint32 operator | (ESubgroupOperation lhs, uint32 rhs); + +enum class EFeature : uint8 +{ + Ignore, + RequireTrue, + RequireFalse, +}; +uint8 operator | (EFeature lhs, EFeature rhs); +uint8 operator | (uint8 lhs, EFeature rhs); +uint8 operator | (EFeature lhs, uint8 rhs); + +enum class EShader : uint8 +{ + Vertex, + TessControl, + TessEvaluation, + Geometry, + Fragment, + Compute, + Tile, + MeshTask, + Mesh, + RayGen, + RayAnyHit, + RayClosestHit, + RayMiss, + RayIntersection, + RayCallable, +}; +uint8 operator | (EShader lhs, EShader rhs); +uint8 operator | (uint8 lhs, EShader rhs); +uint8 operator | (EShader lhs, uint8 rhs); + +enum class EShaderStages : uint16 +{ + Vertex, + TessControl, + TessEvaluation, + Geometry, + Fragment, + Compute, + Tile, + MeshTask, + Mesh, + RayGen, + RayAnyHit, + RayClosestHit, + RayMiss, + RayIntersection, + RayCallable, + All, + AllGraphics, + GraphicsPipeStages, + MeshPipeStages, + VertexProcessingStages, + PreRasterizationStages, + PostRasterizationStages, + AllRayTracing, +}; +uint16 operator | (EShaderStages lhs, EShaderStages rhs); +uint16 operator | (uint16 lhs, EShaderStages rhs); +uint16 operator | (EShaderStages lhs, uint16 rhs); + +enum class EGPUVendor : uint32 +{ + AMD, + NVidia, + Intel, + ARM, + Qualcomm, + ImgTech, + Microsoft, + Apple, + Mesa, + Broadcom, + Samsung, + VeriSilicon, + Huawei, +}; +uint32 operator | (EGPUVendor lhs, EGPUVendor rhs); +uint32 operator | (uint32 lhs, EGPUVendor rhs); +uint32 operator | (EGPUVendor lhs, uint32 rhs); + +enum class EVertexType : uint16 +{ + Byte, + Byte2, + Byte3, + Byte4, + Byte_Norm, + Byte2_Norm, + Byte3_Norm, + Byte4_Norm, + Byte_Scaled, + Byte2_Scaled, + Byte3_Scaled, + Byte4_Scaled, + UByte, + UByte2, + UByte3, + UByte4, + UByte_Norm, + UByte2_Norm, + UByte3_Norm, + UByte4_Norm, + UByte_Scaled, + UByte2_Scaled, + UByte3_Scaled, + UByte4_Scaled, + Short, + Short2, + Short3, + Short4, + Short_Norm, + Short2_Norm, + Short3_Norm, + Short4_Norm, + Short_Scaled, + Short2_Scaled, + Short3_Scaled, + Short4_Scaled, + UShort, + UShort2, + UShort3, + UShort4, + UShort_Norm, + UShort2_Norm, + UShort3_Norm, + UShort4_Norm, + UShort_Scaled, + UShort2_Scaled, + UShort3_Scaled, + UShort4_Scaled, + Int, + Int2, + Int3, + Int4, + UInt, + UInt2, + UInt3, + UInt4, + Long, + Long2, + Long3, + Long4, + ULong, + ULong2, + ULong3, + ULong4, + Half, + Half2, + Half3, + Half4, + Float, + Float2, + Float3, + Float4, + Double, + Double2, + Double3, + Double4, + UInt_2_10_10_10, + UInt_2_10_10_10_Norm, + UInt_2_10_10_10_Scaled, +}; +uint16 operator | (EVertexType lhs, EVertexType rhs); +uint16 operator | (uint16 lhs, EVertexType rhs); +uint16 operator | (EVertexType lhs, uint16 rhs); + +enum class EGraphicsDeviceID : uint32 +{ + Adreno_500, + Adreno_600, + Adreno_700, + AMD_GCN1, + AMD_GCN2, + AMD_GCN3, + AMD_GCN4, + AMD_GCN5, + AMD_GCN5_APU, + AMD_RDNA1, + AMD_RDNA2, + AMD_RDNA2_APU, + AMD_RDNA3, + AMD_RDNA3_APU, + AMD_RDNA4, + Apple_A8, + Apple_A9_A10, + Apple_A11, + Apple_A12, + Apple_A13, + Apple_A14_M1, + Apple_A15_M2, + Apple_A16, + Apple_A17_M3, + Mali_Midgard_Gen2, + Mali_Midgard_Gen3, + Mali_Midgard_Gen4, + Mali_Bifrost_Gen1, + Mali_Bifrost_Gen2, + Mali_Bifrost_Gen3, + Mali_Valhall_Gen1, + Mali_Valhall_Gen2, + Mali_Valhall_Gen3, + Mali_Valhall_Gen4, + Mali_5thGen_Gen1, + Mali_5thGen_Gen2, + NV_Maxwell, + NV_Maxwell_Tegra, + NV_Pascal, + NV_Pascal_MX, + NV_Pascal_Tegra, + NV_Volta, + NV_Turing_16, + NV_Turing, + NV_Turing_MX, + NV_Ampere, + NV_Ampere_Orin, + NV_Ada, + NV_Blackwell, + Intel_Gen7, + Intel_Gen8, + Intel_Gen9, + Intel_Gen11, + Intel_Gen12, + Intel_Gen12_7, + PowerVR_Series8, + PowerVR_Series9, + PowerVR_SeriesA, + PowerVR_SeriesB, + VeriSilicon, + SwiftShader, +}; +uint32 operator | (EGraphicsDeviceID lhs, EGraphicsDeviceID rhs); +uint32 operator | (uint32 lhs, EGraphicsDeviceID rhs); +uint32 operator | (EGraphicsDeviceID lhs, uint32 rhs); + +enum class EFilter : uint8 +{ + Nearest, + Linear, +}; +uint8 operator | (EFilter lhs, EFilter rhs); +uint8 operator | (uint8 lhs, EFilter rhs); +uint8 operator | (EFilter lhs, uint8 rhs); + +enum class EMipmapFilter : uint8 +{ + None, + Nearest, + Linear, +}; +uint8 operator | (EMipmapFilter lhs, EMipmapFilter rhs); +uint8 operator | (uint8 lhs, EMipmapFilter rhs); +uint8 operator | (EMipmapFilter lhs, uint8 rhs); + +enum class EAddressMode : uint8 +{ + Repeat, + MirrorRepeat, + ClampToEdge, + ClampToBorder, + MirrorClampToEdge, + Clamp, + MirrorClamp, +}; +uint8 operator | (EAddressMode lhs, EAddressMode rhs); +uint8 operator | (uint8 lhs, EAddressMode rhs); +uint8 operator | (EAddressMode lhs, uint8 rhs); + +enum class EBorderColor : uint8 +{ + FloatTransparentBlack, + FloatOpaqueBlack, + FloatOpaqueWhite, + IntTransparentBlack, + IntOpaqueBlack, + IntOpaqueWhite, +}; +uint8 operator | (EBorderColor lhs, EBorderColor rhs); +uint8 operator | (uint8 lhs, EBorderColor rhs); +uint8 operator | (EBorderColor lhs, uint8 rhs); + +enum class EReductionMode : uint8 +{ + Average, + Min, + Max, +}; +uint8 operator | (EReductionMode lhs, EReductionMode rhs); +uint8 operator | (uint8 lhs, EReductionMode rhs); +uint8 operator | (EReductionMode lhs, uint8 rhs); + +enum class ESamplerOpt : uint8 +{ + ArgumentBuffer, + UnnormalizedCoordinates, + NonSeamlessCubeMap, +}; +uint8 operator | (ESamplerOpt lhs, ESamplerOpt rhs); +uint8 operator | (uint8 lhs, ESamplerOpt rhs); +uint8 operator | (ESamplerOpt lhs, uint8 rhs); + +enum class EVertexInputRate : uint8 +{ + Vertex, + Instance, +}; +uint8 operator | (EVertexInputRate lhs, EVertexInputRate rhs); +uint8 operator | (uint8 lhs, EVertexInputRate rhs); +uint8 operator | (EVertexInputRate lhs, uint8 rhs); + +enum class EDescSetUsage : uint8 +{ + AllowPartialyUpdate, + UpdateTemplate, + ArgumentBuffer, + MutableArgBuffer, + MaybeUnsupported, +}; +uint8 operator | (EDescSetUsage lhs, EDescSetUsage rhs); +uint8 operator | (uint8 lhs, EDescSetUsage rhs); +uint8 operator | (EDescSetUsage lhs, uint8 rhs); + +enum class EPipelineOpt : uint16 +{ + + // Optimize pipeline during creation, may be slow. + Optimize, + CS_DispatchBase, + RT_NoNullAnyHitShaders, + RT_NoNullClosestHitShaders, + RT_NoNullMissShaders, + RT_NoNullIntersectionShaders, + RT_SkipTriangles, + RT_SkipAABBs, + + // Pipeline creation will fail if it is not exists in cache. + DontCompile, + + // When a pipeline is created, its state and shaders are compiled into zero or more device-specific executables, + // which are used when executing commands against that pipeline. + CaptureStatistics, + + // May include the final shader assembly, a binary form of the compiled shader, + // or the shader compiler’s internal representation at any number of intermediate compile steps. + CaptureInternalRepresentation, + + // Disable pipeline optimization to speedup creation. + DontOptimize, + None, +}; +uint16 operator | (EPipelineOpt lhs, EPipelineOpt rhs); +uint16 operator | (uint16 lhs, EPipelineOpt rhs); +uint16 operator | (EPipelineOpt lhs, uint16 rhs); + +enum class EQueueMask : uint8 +{ + Graphics, + AsyncCompute, + AsyncTransfer, + VideoEncode, + VideoDecode, + All, +}; +uint8 operator | (EQueueMask lhs, EQueueMask rhs); +uint8 operator | (uint8 lhs, EQueueMask rhs); +uint8 operator | (EQueueMask lhs, uint8 rhs); + +enum class ESamplerChromaLocation : uint8 +{ + CositedEven, + Midpoint, +}; +uint8 operator | (ESamplerChromaLocation lhs, ESamplerChromaLocation rhs); +uint8 operator | (uint8 lhs, ESamplerChromaLocation rhs); +uint8 operator | (ESamplerChromaLocation lhs, uint8 rhs); + +enum class ESamplerYcbcrModelConversion : uint8 +{ + RGB_Identity, + Ycbcr_Identity, + Ycbcr_709, + Ycbcr_601, + Ycbcr_2020, +}; +uint8 operator | (ESamplerYcbcrModelConversion lhs, ESamplerYcbcrModelConversion rhs); +uint8 operator | (uint8 lhs, ESamplerYcbcrModelConversion rhs); +uint8 operator | (ESamplerYcbcrModelConversion lhs, uint8 rhs); + +enum class ESamplerYcbcrRange : uint8 +{ + ITU_Full, + ITU_Narrow, +}; +uint8 operator | (ESamplerYcbcrRange lhs, ESamplerYcbcrRange rhs); +uint8 operator | (uint8 lhs, ESamplerYcbcrRange rhs); +uint8 operator | (ESamplerYcbcrRange lhs, uint8 rhs); + +enum class ESurfaceFormat : uint8 +{ + BGRA8_sRGB_nonlinear, + RGBA8_sRGB_nonlinear, + BGRA8_BT709_nonlinear, + RGBA16F_Extended_sRGB_linear, + RGBA16F_sRGB_nonlinear, + RGBA16F_BT709_nonlinear, + RGBA16F_HDR10_ST2084, + RGBA16F_BT2020_linear, + RGB10A2_sRGB_nonlinear, + RGB10A2_HDR10_ST2084, +}; +uint8 operator | (ESurfaceFormat lhs, ESurfaceFormat rhs); +uint8 operator | (uint8 lhs, ESurfaceFormat rhs); +uint8 operator | (ESurfaceFormat lhs, uint8 rhs); + +enum class ERTInstanceOpt : uint8 +{ + TriangleCullDisable, + TriangleFrontCCW, + ForceOpaque, + ForceNonOpaque, + TriangleCullBack, + TriangleFrontCW, +}; +uint8 operator | (ERTInstanceOpt lhs, ERTInstanceOpt rhs); +uint8 operator | (uint8 lhs, ERTInstanceOpt rhs); +uint8 operator | (ERTInstanceOpt lhs, uint8 rhs); + +enum class EImageUsage : uint32 +{ + TransferSrc, + TransferDst, + Sampled, + Storage, + ColorAttachment, + DepthStencilAttachment, + InputAttachment, + ShadingRate, + All, + Transfer, + RWAttachment, +}; +uint32 operator | (EImageUsage lhs, EImageUsage rhs); +uint32 operator | (uint32 lhs, EImageUsage rhs); +uint32 operator | (EImageUsage lhs, uint32 rhs); + +enum class EImageOpt : uint32 +{ + BlitSrc, + BlitDst, + CubeCompatible, + MutableFormat, + Array2DCompatible, + BlockTexelViewCompatible, + SparseResidency, + SparseAliased, + Alias, + SampleLocationsCompatible, + StorageAtomic, + ColorAttachmentBlend, + SampledLinear, + SampledMinMax, + VertexPplnStore, + FragmentPplnStore, + LossyRTCompression, + ExtendedUsage, + All, + SparseResidencyAliased, +}; +uint32 operator | (EImageOpt lhs, EImageOpt rhs); +uint32 operator | (uint32 lhs, EImageOpt rhs); +uint32 operator | (EImageOpt lhs, uint32 rhs); + +enum class EBufferUsage : uint32 +{ + TransferSrc, + TransferDst, + UniformTexel, + StorageTexel, + Uniform, + Storage, + Index, + Vertex, + Indirect, + ShaderAddress, + ShaderBindingTable, + ASBuild_ReadOnly, + ASBuild_Scratch, + All, + Transfer, +}; +uint32 operator | (EBufferUsage lhs, EBufferUsage rhs); +uint32 operator | (uint32 lhs, EBufferUsage rhs); +uint32 operator | (EBufferUsage lhs, uint32 rhs); + +enum class EBufferOpt : uint32 +{ + SparseResidency, + SparseAliased, + VertexPplnStore, + FragmentPplnStore, + StorageTexelAtomic, + All, + SparseResidencyAliased, +}; +uint32 operator | (EBufferOpt lhs, EBufferOpt rhs); +uint32 operator | (uint32 lhs, EBufferOpt rhs); +uint32 operator | (EBufferOpt lhs, uint32 rhs); + +enum class EShadingRate : uint8 +{ + Size1x1, + Size1x2, + Size1x4, + Size2x1, + Size2x2, + Size2x4, + Size4x1, + Size4x2, + Size4x4, +}; +uint8 operator | (EShadingRate lhs, EShadingRate rhs); +uint8 operator | (uint8 lhs, EShadingRate rhs); +uint8 operator | (EShadingRate lhs, uint8 rhs); + +enum class EShadingRateCombinerOp : uint8 +{ + Keep, + Replace, + Min, + Max, + Sum, + Mul, +}; +uint8 operator | (EShadingRateCombinerOp lhs, EShadingRateCombinerOp rhs); +uint8 operator | (uint8 lhs, EShadingRateCombinerOp rhs); +uint8 operator | (EShadingRateCombinerOp lhs, uint8 rhs); + +enum class EColorSpace : uint8 +{ + sRGB_nonlinear, + BT709_nonlinear, + Extended_sRGB_linear, + HDR10_ST2084, + BT2020_linear, +}; +uint8 operator | (EColorSpace lhs, EColorSpace rhs); +uint8 operator | (uint8 lhs, EColorSpace rhs); +uint8 operator | (EColorSpace lhs, uint8 rhs); + +enum class DbgViewFlags : uint32 +{ + NoCopy, + Copy, + Histogram, + LinearDepth, + Stencil, +}; +uint32 operator | (DbgViewFlags lhs, DbgViewFlags rhs); +uint32 operator | (uint32 lhs, DbgViewFlags rhs); +uint32 operator | (DbgViewFlags lhs, uint32 rhs); + +enum class ScriptFlags : uint32 +{ + RunOnce, + OnRequest, + RunOnce_AfterLoading, +}; +uint32 operator | (ScriptFlags lhs, ScriptFlags rhs); +uint32 operator | (uint32 lhs, ScriptFlags rhs); +uint32 operator | (ScriptFlags lhs, uint32 rhs); + +enum class ImageLoadOpFlags : uint32 +{ + + // Generate mipmaps after loading + GenMipmaps, +}; +uint32 operator | (ImageLoadOpFlags lhs, ImageLoadOpFlags rhs); +uint32 operator | (uint32 lhs, ImageLoadOpFlags rhs); +uint32 operator | (ImageLoadOpFlags lhs, uint32 rhs); + +enum class EPostprocess : uint32 +{ + + // Entry point: 'Main' + None, + + // Entry point: 'void mainImage (out float4 fragColor, in float2 fragCoord)' + Shadertoy, + + // Entry point: 'void mainVR (out float4 fragColor, in float2 fragCoord, in float3 fragRayOri, in float3 fragRayDir)' + ShadertoyVR, + ShadertoyVR_180, + ShadertoyVR_360, + Shadertoy_360, +}; +uint32 operator | (EPostprocess lhs, EPostprocess rhs); +uint32 operator | (uint32 lhs, EPostprocess rhs); +uint32 operator | (EPostprocess lhs, uint32 rhs); + +enum class EPassFlags : uint8 +{ + None, -struct EImageType -{ - EImageType () {} - EImageType (uint16) {} - operator uint16 () const; - static constexpr uint16 1D = 1; - static constexpr uint16 1DArray = 2; - static constexpr uint16 2D = 3; - static constexpr uint16 2DArray = 4; - static constexpr uint16 2DMS = 5; - static constexpr uint16 2DMSArray = 6; - static constexpr uint16 Cube = 7; - static constexpr uint16 CubeArray = 8; - static constexpr uint16 3D = 9; - static constexpr uint16 Buffer = 10; - static constexpr uint16 Float = 16; - static constexpr uint16 Half = 32; - static constexpr uint16 SNorm = 48; - static constexpr uint16 UNorm = 64; - static constexpr uint16 Int = 80; - static constexpr uint16 UInt = 96; - static constexpr uint16 sRGB = 112; - static constexpr uint16 Depth = 128; - static constexpr uint16 Stencil = 144; - static constexpr uint16 DepthStencil = 160; - static constexpr uint16 Shadow = 256; - static constexpr uint16 FImage1D = 17; - static constexpr uint16 FImage2D = 19; - static constexpr uint16 FImage2D_sRGB = 115; - static constexpr uint16 FImage3D = 25; - static constexpr uint16 FImage1DArray = 18; - static constexpr uint16 FImage2DArray = 20; - static constexpr uint16 FImageCube = 23; - static constexpr uint16 FImageCubeArray = 24; - static constexpr uint16 FImage2DMS = 21; - static constexpr uint16 FImage2DMSArray = 22; - static constexpr uint16 FImageBuffer = 26; - static constexpr uint16 HImage1D = 33; - static constexpr uint16 HImage2D = 35; - static constexpr uint16 HImage3D = 41; - static constexpr uint16 HImage1DArray = 34; - static constexpr uint16 HImage2DArray = 36; - static constexpr uint16 HImageCube = 39; - static constexpr uint16 HImageCubeArray = 40; - static constexpr uint16 HImage2DMS = 37; - static constexpr uint16 HImage2DMSArray = 38; - static constexpr uint16 HImageBuffer = 42; - static constexpr uint16 Image1DShadow = 385; - static constexpr uint16 Image2DShadow = 387; - static constexpr uint16 Image1DArrayShadow = 386; - static constexpr uint16 Image2DArrayShadow = 388; - static constexpr uint16 ImageCubeShadow = 391; - static constexpr uint16 ImageCubeArrayShadow = 392; - static constexpr uint16 IImage1D = 81; - static constexpr uint16 IImage2D = 83; - static constexpr uint16 IImage3D = 89; - static constexpr uint16 IImage1DArray = 82; - static constexpr uint16 IImage2DArray = 84; - static constexpr uint16 IImageCube = 87; - static constexpr uint16 IImageCubeArray = 88; - static constexpr uint16 IImage2DMS = 85; - static constexpr uint16 IImage2DMSArray = 86; - static constexpr uint16 IImageBuffer = 90; - static constexpr uint16 UImage1D = 97; - static constexpr uint16 UImage2D = 99; - static constexpr uint16 UImage3D = 105; - static constexpr uint16 UImage1DArray = 98; - static constexpr uint16 UImage2DArray = 100; - static constexpr uint16 UImageCube = 103; - static constexpr uint16 UImageCubeArray = 104; - static constexpr uint16 UImage2DMS = 101; - static constexpr uint16 UImage2DMSArray = 102; - static constexpr uint16 UImageBuffer = 106; - static constexpr uint16 SLongImage1D = 177; - static constexpr uint16 SLongImage2D = 179; - static constexpr uint16 SLongImage3D = 185; - static constexpr uint16 SLongImage1DArray = 178; - static constexpr uint16 SLongImage2DArray = 180; - static constexpr uint16 SLongImageCube = 183; - static constexpr uint16 SLongImageCubeArray = 184; - static constexpr uint16 SLongImage2DMS = 181; - static constexpr uint16 SLongImage2DMSArray = 182; - static constexpr uint16 SLongImageBuffer = 186; - static constexpr uint16 ULongImage1D = 193; - static constexpr uint16 ULongImage2D = 195; - static constexpr uint16 ULongImage3D = 201; - static constexpr uint16 ULongImage1DArray = 194; - static constexpr uint16 ULongImage2DArray = 196; - static constexpr uint16 ULongImageCube = 199; - static constexpr uint16 ULongImageCubeArray = 200; - static constexpr uint16 ULongImage2DMS = 197; - static constexpr uint16 ULongImage2DMSArray = 198; - static constexpr uint16 ULongImageBuffer = 202; + // ShaderTrace - record all variables, function result, etc and save it to file. + // It is very useful to debug shaders. In UI select 'Debugging' menu, select pass,'Trace' and shader stage then click 'G' key to record trace for pixel under cursor. + // Reference to the last recorded trace will be added to console and IDE log, click on it to open file. + Enable_ShaderTrace, + + // ShaderFunctionProfiling - record time of user function calls, sort it and save to file. + Enable_ShaderFnProf, + + // Enable all debug features. + Enable_AllShaderDbg, +}; +uint8 operator | (EPassFlags lhs, EPassFlags rhs); +uint8 operator | (uint8 lhs, EPassFlags rhs); +uint8 operator | (EPassFlags lhs, uint8 rhs); + +enum class ERenderLayer : uint32 +{ + Opaque, + Translucent, + PostProcess, }; +uint32 operator | (ERenderLayer lhs, ERenderLayer rhs); +uint32 operator | (uint32 lhs, ERenderLayer rhs); +uint32 operator | (ERenderLayer lhs, uint32 rhs); string FindAndReplace (const string &, const string &, const string &); bool StartsWith (const string &, const string &); @@ -1090,6 +2350,7 @@ struct float4 float4 (const uint2 & v2); float4 (const uint3 & v3); float4 (const uint4 & v4); + float4 (const Quat & quat); float4 (const RGBA32f & x); }; @@ -1304,6 +2565,28 @@ float Length (const float4 & x); float LengthSq (const float4 & x); float Distance (const float4 & x, const float4 & y); float DistanceSq (const float4 & x, const float4 & y); +struct Quat +{ + Quat (); + Quat (const Quat&); + Quat& operator = (const Quat&); + Quat (float w, float x, float y, float z); + float x; + float y; + float z; + float w; + Quat RotateX (float angle) const; + Quat RotateY (float angle) const; + Quat RotateZ (float angle) const; + Quat Rotate (float angle, const float3 & axis) const; + Quat Rotate (const float3 & angles) const; + Quat Rotate2 (const float3 & angles) const; + Quat Rotate (float angleX, float angleY, float angleZ) const; + Quat LookAt (const float3 & dir, const float3 & up) const; + Quat From2Normals (const float3 & norm1, const float3 & norm2) const; + Quat FromAngleAxis (float angle, const float3 & axis) const; +}; + struct RectI { RectI (); @@ -1702,1105 +2985,6 @@ string ToString (const RGBA32f & value); string ToString (const RGBA32i & value); string ToString (const RGBA32u & value); string ToString (const RGBA8u & value); -struct EImage -{ - EImage () {} - EImage (uint8) {} - operator uint8 () const; - static constexpr uint8 1D = 0; - static constexpr uint8 2D = 1; - static constexpr uint8 3D = 2; - static constexpr uint8 1DArray = 3; - static constexpr uint8 2DArray = 4; - static constexpr uint8 Cube = 5; - static constexpr uint8 CubeArray = 6; -}; - -struct EIndex -{ - EIndex () {} - EIndex (uint8) {} - operator uint8 () const; - static constexpr uint8 UShort = 0; - static constexpr uint8 UInt = 1; -}; - -struct EPixelFormat -{ - EPixelFormat () {} - EPixelFormat (uint8) {} - operator uint8 () const; - static constexpr uint8 RGBA16_SNorm = 0; - static constexpr uint8 RGBA8_SNorm = 1; - static constexpr uint8 RGB16_SNorm = 2; - static constexpr uint8 RGB8_SNorm = 3; - static constexpr uint8 RG16_SNorm = 4; - static constexpr uint8 RG8_SNorm = 5; - static constexpr uint8 R16_SNorm = 6; - static constexpr uint8 R8_SNorm = 7; - static constexpr uint8 RGBA16_UNorm = 8; - static constexpr uint8 RGBA8_UNorm = 9; - static constexpr uint8 RGB16_UNorm = 10; - static constexpr uint8 RGB8_UNorm = 11; - static constexpr uint8 RG16_UNorm = 12; - static constexpr uint8 RG8_UNorm = 13; - static constexpr uint8 R16_UNorm = 14; - static constexpr uint8 R8_UNorm = 15; - static constexpr uint8 RGB10_A2_UNorm = 16; - static constexpr uint8 RGBA4_UNorm = 17; - static constexpr uint8 RGB5_A1_UNorm = 18; - static constexpr uint8 RGB_5_6_5_UNorm = 19; - static constexpr uint8 BGR8_UNorm = 20; - static constexpr uint8 BGRA8_UNorm = 21; - static constexpr uint8 sRGB8 = 22; - static constexpr uint8 sRGB8_A8 = 23; - static constexpr uint8 sBGR8 = 24; - static constexpr uint8 sBGR8_A8 = 25; - static constexpr uint8 R8I = 26; - static constexpr uint8 RG8I = 27; - static constexpr uint8 RGB8I = 28; - static constexpr uint8 RGBA8I = 29; - static constexpr uint8 R16I = 30; - static constexpr uint8 RG16I = 31; - static constexpr uint8 RGB16I = 32; - static constexpr uint8 RGBA16I = 33; - static constexpr uint8 R32I = 34; - static constexpr uint8 RG32I = 35; - static constexpr uint8 RGB32I = 36; - static constexpr uint8 RGBA32I = 37; - static constexpr uint8 R64I = 38; - static constexpr uint8 R8U = 39; - static constexpr uint8 RG8U = 40; - static constexpr uint8 RGB8U = 41; - static constexpr uint8 RGBA8U = 42; - static constexpr uint8 R16U = 43; - static constexpr uint8 RG16U = 44; - static constexpr uint8 RGB16U = 45; - static constexpr uint8 RGBA16U = 46; - static constexpr uint8 R32U = 47; - static constexpr uint8 RG32U = 48; - static constexpr uint8 RGB32U = 49; - static constexpr uint8 RGBA32U = 50; - static constexpr uint8 RGB10_A2U = 51; - static constexpr uint8 R64U = 52; - static constexpr uint8 R16F = 53; - static constexpr uint8 RG16F = 54; - static constexpr uint8 RGB16F = 55; - static constexpr uint8 RGBA16F = 56; - static constexpr uint8 R32F = 57; - static constexpr uint8 RG32F = 58; - static constexpr uint8 RGB32F = 59; - static constexpr uint8 RGBA32F = 60; - static constexpr uint8 RGB_11_11_10F = 61; - static constexpr uint8 RGB9F_E5 = 62; - static constexpr uint8 Depth16 = 63; - static constexpr uint8 Depth24 = 64; - static constexpr uint8 Depth32F = 65; - static constexpr uint8 Depth16_Stencil8 = 66; - static constexpr uint8 Depth24_Stencil8 = 67; - static constexpr uint8 Depth32F_Stencil8 = 68; - static constexpr uint8 BC1_RGB8_UNorm = 69; - static constexpr uint8 BC1_sRGB8 = 70; - static constexpr uint8 BC1_RGB8_A1_UNorm = 71; - static constexpr uint8 BC1_sRGB8_A1 = 72; - static constexpr uint8 BC2_RGBA8_UNorm = 73; - static constexpr uint8 BC2_sRGB8 = 74; - static constexpr uint8 BC3_RGBA8_UNorm = 75; - static constexpr uint8 BC3_sRGB8 = 76; - static constexpr uint8 BC4_R8_SNorm = 77; - static constexpr uint8 BC4_R8_UNorm = 78; - static constexpr uint8 BC5_RG8_SNorm = 79; - static constexpr uint8 BC5_RG8_UNorm = 80; - static constexpr uint8 BC6H_RGB16F = 81; - static constexpr uint8 BC6H_RGB16UF = 82; - static constexpr uint8 BC7_RGBA8_UNorm = 83; - static constexpr uint8 BC7_sRGB8_A8 = 84; - static constexpr uint8 ETC2_RGB8_UNorm = 85; - static constexpr uint8 ETC2_sRGB8 = 86; - static constexpr uint8 ETC2_RGB8_A1_UNorm = 87; - static constexpr uint8 ETC2_sRGB8_A1 = 88; - static constexpr uint8 ETC2_RGBA8_UNorm = 89; - static constexpr uint8 ETC2_sRGB8_A8 = 90; - static constexpr uint8 EAC_R11_SNorm = 91; - static constexpr uint8 EAC_R11_UNorm = 92; - static constexpr uint8 EAC_RG11_SNorm = 93; - static constexpr uint8 EAC_RG11_UNorm = 94; - static constexpr uint8 ASTC_RGBA8_4x4 = 95; - static constexpr uint8 ASTC_RGBA8_5x4 = 96; - static constexpr uint8 ASTC_RGBA8_5x5 = 97; - static constexpr uint8 ASTC_RGBA8_6x5 = 98; - static constexpr uint8 ASTC_RGBA8_6x6 = 99; - static constexpr uint8 ASTC_RGBA8_8x5 = 100; - static constexpr uint8 ASTC_RGBA8_8x6 = 101; - static constexpr uint8 ASTC_RGBA8_8x8 = 102; - static constexpr uint8 ASTC_RGBA8_10x5 = 103; - static constexpr uint8 ASTC_RGBA8_10x6 = 104; - static constexpr uint8 ASTC_RGBA8_10x8 = 105; - static constexpr uint8 ASTC_RGBA8_10x10 = 106; - static constexpr uint8 ASTC_RGBA8_12x10 = 107; - static constexpr uint8 ASTC_RGBA8_12x12 = 108; - static constexpr uint8 ASTC_sRGB8_A8_4x4 = 109; - static constexpr uint8 ASTC_sRGB8_A8_5x4 = 110; - static constexpr uint8 ASTC_sRGB8_A8_5x5 = 111; - static constexpr uint8 ASTC_sRGB8_A8_6x5 = 112; - static constexpr uint8 ASTC_sRGB8_A8_6x6 = 113; - static constexpr uint8 ASTC_sRGB8_A8_8x5 = 114; - static constexpr uint8 ASTC_sRGB8_A8_8x6 = 115; - static constexpr uint8 ASTC_sRGB8_A8_8x8 = 116; - static constexpr uint8 ASTC_sRGB8_A8_10x5 = 117; - static constexpr uint8 ASTC_sRGB8_A8_10x6 = 118; - static constexpr uint8 ASTC_sRGB8_A8_10x8 = 119; - static constexpr uint8 ASTC_sRGB8_A8_10x10 = 120; - static constexpr uint8 ASTC_sRGB8_A8_12x10 = 121; - static constexpr uint8 ASTC_sRGB8_A8_12x12 = 122; - static constexpr uint8 ASTC_RGBA16F_4x4 = 123; - static constexpr uint8 ASTC_RGBA16F_5x4 = 124; - static constexpr uint8 ASTC_RGBA16F_5x5 = 125; - static constexpr uint8 ASTC_RGBA16F_6x5 = 126; - static constexpr uint8 ASTC_RGBA16F_6x6 = 127; - static constexpr uint8 ASTC_RGBA16F_8x5 = 128; - static constexpr uint8 ASTC_RGBA16F_8x6 = 129; - static constexpr uint8 ASTC_RGBA16F_8x8 = 130; - static constexpr uint8 ASTC_RGBA16F_10x5 = 131; - static constexpr uint8 ASTC_RGBA16F_10x6 = 132; - static constexpr uint8 ASTC_RGBA16F_10x8 = 133; - static constexpr uint8 ASTC_RGBA16F_10x10 = 134; - static constexpr uint8 ASTC_RGBA16F_12x10 = 135; - static constexpr uint8 ASTC_RGBA16F_12x12 = 136; - static constexpr uint8 G8B8G8R8_422_UNorm = 137; - static constexpr uint8 B8G8R8G8_422_UNorm = 138; - static constexpr uint8 G8_B8R8_420_UNorm = 139; - static constexpr uint8 G8_B8R8_422_UNorm = 140; - static constexpr uint8 G8_B8R8_444_UNorm = 141; - static constexpr uint8 G8_B8_R8_420_UNorm = 142; - static constexpr uint8 G8_B8_R8_422_UNorm = 143; - static constexpr uint8 G8_B8_R8_444_UNorm = 144; - static constexpr uint8 B10x6G10x6R10x6G10x6_422_UNorm = 145; - static constexpr uint8 G10x6B10x6G10x6R10x6_422_UNorm = 146; - static constexpr uint8 G10x6_B10x6R10x6_420_UNorm = 147; - static constexpr uint8 G10x6_B10x6R10x6_422_UNorm = 148; - static constexpr uint8 G10x6_B10x6R10x6_444_UNorm = 149; - static constexpr uint8 G10x6_B10x6_R10x6_420_UNorm = 150; - static constexpr uint8 G10x6_B10x6_R10x6_422_UNorm = 151; - static constexpr uint8 G10x6_B10x6_R10x6_444_UNorm = 152; - static constexpr uint8 R10x6G10x6B10x6A10x6_UNorm = 153; - static constexpr uint8 R10x6G10x6_UNorm = 154; - static constexpr uint8 R10x6_UNorm = 155; - static constexpr uint8 B12x4G12x4R12x4G12x4_422_UNorm = 156; - static constexpr uint8 G12x4B12x4G12x4R12x4_422_UNorm = 157; - static constexpr uint8 G12x4_B12x4R12x4_420_UNorm = 158; - static constexpr uint8 G12x4_B12x4R12x4_422_UNorm = 159; - static constexpr uint8 G12x4_B12x4R12x4_444_UNorm = 160; - static constexpr uint8 G12x4_B12x4_R12x4_420_UNorm = 161; - static constexpr uint8 G12x4_B12x4_R12x4_422_UNorm = 162; - static constexpr uint8 G12x4_B12x4_R12x4_444_UNorm = 163; - static constexpr uint8 R12x4G12x4B12x4A12x4_UNorm = 164; - static constexpr uint8 R12x4G12x4_UNorm = 165; - static constexpr uint8 R12x4_UNorm = 166; - static constexpr uint8 B16G16R16G16_422_UNorm = 167; - static constexpr uint8 G16B16G16R16_422_UNorm = 168; - static constexpr uint8 G16_B16R16_420_UNorm = 169; - static constexpr uint8 G16_B16R16_422_UNorm = 170; - static constexpr uint8 G16_B16R16_444_UNorm = 171; - static constexpr uint8 G16_B16_R16_420_UNorm = 172; - static constexpr uint8 G16_B16_R16_422_UNorm = 173; - static constexpr uint8 G16_B16_R16_444_UNorm = 174; - static constexpr uint8 SwapchainColor = 254; -}; - -struct EPixelFormatExternal -{ - EPixelFormatExternal () {} - EPixelFormatExternal (uint8) {} - operator uint8 () const; - static constexpr uint8 Android_Depth16 = 0; - static constexpr uint8 Android_DepthJPEG = 1; - static constexpr uint8 Android_DepthPointCloud = 2; - static constexpr uint8 Android_JPEG = 3; - static constexpr uint8 Android_Raw16 = 5; - static constexpr uint8 Android_Raw12 = 6; - static constexpr uint8 Android_Raw10 = 7; - static constexpr uint8 Android_NV16 = 9; - static constexpr uint8 Android_NV21 = 10; - static constexpr uint8 Android_YCBCR_P010 = 11; - static constexpr uint8 Android_YUV_420 = 12; - static constexpr uint8 Android_YUV_422 = 13; - static constexpr uint8 Android_YUV_444 = 14; - static constexpr uint8 Android_YUY2 = 15; - static constexpr uint8 Android_YV12 = 16; - static constexpr uint8 Android_Y8 = 17; - static constexpr uint8 Android_HEIC = 18; -}; - -struct ECompareOp -{ - ECompareOp () {} - ECompareOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Never = 0; - static constexpr uint8 Less = 1; - static constexpr uint8 Equal = 2; - static constexpr uint8 LEqual = 3; - static constexpr uint8 Greater = 4; - static constexpr uint8 NotEqual = 5; - static constexpr uint8 GEqual = 6; - static constexpr uint8 Always = 7; - static constexpr uint8 LessOrEqual = 3; - static constexpr uint8 GreaterOrEqual = 6; -}; - -struct EBlendFactor -{ - EBlendFactor () {} - EBlendFactor (uint8) {} - operator uint8 () const; - static constexpr uint8 Zero = 0; - static constexpr uint8 One = 1; - static constexpr uint8 SrcColor = 2; - static constexpr uint8 OneMinusSrcColor = 3; - static constexpr uint8 DstColor = 4; - static constexpr uint8 OneMinusDstColor = 5; - static constexpr uint8 SrcAlpha = 6; - static constexpr uint8 OneMinusSrcAlpha = 7; - static constexpr uint8 DstAlpha = 8; - static constexpr uint8 OneMinusDstAlpha = 9; - static constexpr uint8 ConstColor = 10; - static constexpr uint8 OneMinusConstColor = 11; - static constexpr uint8 ConstAlpha = 12; - static constexpr uint8 OneMinusConstAlpha = 13; - static constexpr uint8 SrcAlphaSaturate = 14; - static constexpr uint8 Src1Color = 15; - static constexpr uint8 OneMinusSrc1Color = 16; - static constexpr uint8 Src1Alpha = 17; - static constexpr uint8 OneMinusSrc1Alpha = 18; -}; - -struct EBlendOp -{ - EBlendOp () {} - EBlendOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Add = 0; - static constexpr uint8 Sub = 1; - static constexpr uint8 RevSub = 2; - static constexpr uint8 Min = 3; - static constexpr uint8 Max = 4; -}; - -struct ELogicOp -{ - ELogicOp () {} - ELogicOp (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Clear = 1; - static constexpr uint8 Set = 2; - static constexpr uint8 Copy = 3; - static constexpr uint8 CopyInverted = 4; - static constexpr uint8 NoOp = 5; - static constexpr uint8 Invert = 6; - static constexpr uint8 And = 7; - static constexpr uint8 NotAnd = 8; - static constexpr uint8 Or = 9; - static constexpr uint8 NotOr = 10; - static constexpr uint8 Xor = 11; - static constexpr uint8 Equiv = 12; - static constexpr uint8 AndReverse = 13; - static constexpr uint8 AndInverted = 14; - static constexpr uint8 OrReverse = 15; - static constexpr uint8 OrInverted = 16; -}; - -struct EStencilOp -{ - EStencilOp () {} - EStencilOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Keep = 0; - static constexpr uint8 Zero = 1; - static constexpr uint8 Replace = 2; - static constexpr uint8 Incr = 3; - static constexpr uint8 IncrWrap = 4; - static constexpr uint8 Decr = 5; - static constexpr uint8 DecrWrap = 6; - static constexpr uint8 Invert = 7; -}; - -struct EPolygonMode -{ - EPolygonMode () {} - EPolygonMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Point = 0; - static constexpr uint8 Line = 1; - static constexpr uint8 Fill = 2; -}; - -struct EPrimitive -{ - EPrimitive () {} - EPrimitive (uint8) {} - operator uint8 () const; - static constexpr uint8 Point = 0; - static constexpr uint8 LineList = 1; - static constexpr uint8 LineStrip = 2; - static constexpr uint8 LineListAdjacency = 3; - static constexpr uint8 LineStripAdjacency = 4; - static constexpr uint8 TriangleList = 5; - static constexpr uint8 TriangleStrip = 6; - static constexpr uint8 TriangleFan = 7; - static constexpr uint8 TriangleListAdjacency = 8; - static constexpr uint8 TriangleStripAdjacency = 9; - static constexpr uint8 Patch = 10; -}; - -struct ECullMode -{ - ECullMode () {} - ECullMode (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Front = 1; - static constexpr uint8 Back = 2; - static constexpr uint8 FontAndBack = 3; -}; - -struct EPipelineDynamicState -{ - EPipelineDynamicState () {} - EPipelineDynamicState (uint16) {} - operator uint16 () const; - static constexpr uint16 None = 0; - static constexpr uint16 StencilCompareMask = 1; - static constexpr uint16 StencilWriteMask = 2; - static constexpr uint16 StencilReference = 4; - static constexpr uint16 DepthBias = 8; - static constexpr uint16 BlendConstants = 16; - static constexpr uint16 RTStackSize = 64; - static constexpr uint16 FragmentShadingRate = 128; -}; - -struct EResourceState -{ - EResourceState () {} - EResourceState (uint32) {} - operator uint32 () const; - static constexpr uint32 Unknown = 0; - static constexpr uint32 Preserve = 1; - static constexpr uint32 ShaderStorage_Read = 271; - static constexpr uint32 ShaderStorage_Write = 528; - static constexpr uint32 ShaderStorage_RW = 785; - static constexpr uint32 ShaderUniform = 274; - static constexpr uint32 ShaderSample = 275; - static constexpr uint32 CopySrc = 258; - static constexpr uint32 CopyDst = 515; - static constexpr uint32 ClearDst = 516; - static constexpr uint32 BlitSrc = 261; - static constexpr uint32 BlitDst = 518; - static constexpr uint32 InputColorAttachment = 276; - static constexpr uint32 InputColorAttachment_RW = 533; - static constexpr uint32 ColorAttachment = 519; - static constexpr uint32 ColorAttachment_Blend = 775; - static constexpr uint32 DepthStencilAttachment_Read = 1288; - static constexpr uint32 DepthStencilAttachment_Write = 2569; - static constexpr uint32 DepthStencilAttachment_RW = 3849; - static constexpr uint32 DepthTest_StencilRW = 3338; - static constexpr uint32 DepthRW_StencilTest = 1803; - static constexpr uint32 DepthStencilTest_ShaderSample = 1304; - static constexpr uint32 DepthTest_DepthSample_StencilRW = 3353; - static constexpr uint32 InputDepthStencilAttachment = 1302; - static constexpr uint32 InputDepthStencilAttachment_RW = 3863; - static constexpr uint32 Host_Read = 282; - static constexpr uint32 PresentImage = 268; - static constexpr uint32 IndirectBuffer = 284; - static constexpr uint32 IndexBuffer = 285; - static constexpr uint32 VertexBuffer = 286; - static constexpr uint32 ShadingRateImage = 269; - static constexpr uint32 CopyRTAS_Read = 287; - static constexpr uint32 CopyRTAS_Write = 544; - static constexpr uint32 BuildRTAS_Read = 289; - static constexpr uint32 BuildRTAS_Write = 546; - static constexpr uint32 BuildRTAS_RW = 802; - static constexpr uint32 BuildRTAS_IndirectBuffer = 291; - static constexpr uint32 ShaderRTAS = 283; - static constexpr uint32 RTShaderBindingTable = 292; - static constexpr uint32 DSTestBeforeFS = 4096; - static constexpr uint32 DSTestAfterFS = 8192; - static constexpr uint32 Invalidate = 16384; - static constexpr uint32 General = 782; - static constexpr uint32 MeshTaskShader = 32768; - static constexpr uint32 VertexProcessingShaders = 65536; - static constexpr uint32 TileShader = 131072; - static constexpr uint32 FragmentShader = 262144; - static constexpr uint32 PreRasterizationShaders = 98304; - static constexpr uint32 PostRasterizationShaders = 393216; - static constexpr uint32 ComputeShader = 524288; - static constexpr uint32 RayTracingShaders = 1048576; - static constexpr uint32 AllGraphicsShaders = 491520; - static constexpr uint32 AllShaders = 2064384; - static constexpr uint32 BuildRTAS_ScratchBuffer = 802; - static constexpr uint32 InputDepthAttachment = 1302; - static constexpr uint32 DepthStencilAttachment = 16137; -}; - -struct EImageAspect -{ - EImageAspect () {} - EImageAspect (uint8) {} - operator uint8 () const; - static constexpr uint8 Color = 1; - static constexpr uint8 Depth = 2; - static constexpr uint8 Stencil = 4; - static constexpr uint8 DepthStencil = 6; - static constexpr uint8 Plane_0 = 16; - static constexpr uint8 Plane_1 = 32; - static constexpr uint8 Plane_2 = 64; -}; - -struct EShaderIO -{ - EShaderIO () {} - EShaderIO (uint8) {} - operator uint8 () const; - static constexpr uint8 Int = 1; - static constexpr uint8 UInt = 2; - static constexpr uint8 Float = 3; - static constexpr uint8 UFloat = 4; - static constexpr uint8 Half = 5; - static constexpr uint8 UNorm = 6; - static constexpr uint8 SNorm = 7; - static constexpr uint8 sRGB = 8; - static constexpr uint8 AnyColor = 9; - static constexpr uint8 Depth = 10; - static constexpr uint8 Stencil = 11; - static constexpr uint8 DepthStencil = 12; -}; - -struct ESubgroupTypes -{ - ESubgroupTypes () {} - ESubgroupTypes (uint8) {} - operator uint8 () const; - static constexpr uint8 Float16 = 32; - static constexpr uint8 Float32 = 1; - static constexpr uint8 Int8 = 4; - static constexpr uint8 Int16 = 8; - static constexpr uint8 Int32 = 2; - static constexpr uint8 Int64 = 16; -}; - -struct ESubgroupOperation -{ - ESubgroupOperation () {} - ESubgroupOperation (uint32) {} - operator uint32 () const; - static constexpr uint32 IndexAndSize = 0; - static constexpr uint32 Elect = 1; - static constexpr uint32 Barrier = 2; - static constexpr uint32 Any = 3; - static constexpr uint32 All = 4; - static constexpr uint32 AllEqual = 5; - static constexpr uint32 Add = 6; - static constexpr uint32 Mul = 7; - static constexpr uint32 Min = 8; - static constexpr uint32 Max = 9; - static constexpr uint32 And = 10; - static constexpr uint32 Or = 11; - static constexpr uint32 Xor = 12; - static constexpr uint32 InclusiveMul = 13; - static constexpr uint32 InclusiveAdd = 14; - static constexpr uint32 InclusiveMin = 15; - static constexpr uint32 InclusiveMax = 16; - static constexpr uint32 InclusiveAnd = 17; - static constexpr uint32 InclusiveOr = 18; - static constexpr uint32 InclusiveXor = 19; - static constexpr uint32 ExclusiveAdd = 20; - static constexpr uint32 ExclusiveMul = 21; - static constexpr uint32 ExclusiveMin = 22; - static constexpr uint32 ExclusiveMax = 23; - static constexpr uint32 ExclusiveAnd = 24; - static constexpr uint32 ExclusiveOr = 25; - static constexpr uint32 ExclusiveXor = 26; - static constexpr uint32 Ballot = 27; - static constexpr uint32 Broadcast = 28; - static constexpr uint32 BroadcastFirst = 29; - static constexpr uint32 InverseBallot = 30; - static constexpr uint32 BallotBitExtract = 31; - static constexpr uint32 BallotBitCount = 32; - static constexpr uint32 BallotInclusiveBitCount = 33; - static constexpr uint32 BallotExclusiveBitCount = 34; - static constexpr uint32 BallotFindLSB = 35; - static constexpr uint32 BallotFindMSB = 36; - static constexpr uint32 Shuffle = 37; - static constexpr uint32 ShuffleXor = 38; - static constexpr uint32 ShuffleUp = 39; - static constexpr uint32 ShuffleDown = 40; - static constexpr uint32 ClusteredAdd = 41; - static constexpr uint32 ClusteredMul = 42; - static constexpr uint32 ClusteredMin = 43; - static constexpr uint32 ClusteredMax = 44; - static constexpr uint32 ClusteredAnd = 45; - static constexpr uint32 ClusteredOr = 46; - static constexpr uint32 ClusteredXor = 47; - static constexpr uint32 QuadBroadcast = 48; - static constexpr uint32 QuadSwapHorizontal = 49; - static constexpr uint32 QuadSwapVertical = 50; - static constexpr uint32 QuadSwapDiagonal = 51; - static constexpr uint32 _Basic_Begin = 0; - static constexpr uint32 _Basic_End = 2; - static constexpr uint32 _Vote_Begin = 3; - static constexpr uint32 _Vote_End = 5; - static constexpr uint32 _Arithmetic_Begin = 6; - static constexpr uint32 _Arithmetic_End = 26; - static constexpr uint32 _Ballot_Begin = 27; - static constexpr uint32 _Ballot_End = 36; - static constexpr uint32 _Shuffle_Begin = 37; - static constexpr uint32 _Shuffle_End = 38; - static constexpr uint32 _ShuffleRelative_Begin = 39; - static constexpr uint32 _ShuffleRelative_End = 40; - static constexpr uint32 _Clustered_Begin = 41; - static constexpr uint32 _Clustered_End = 47; - static constexpr uint32 _Quad_Begin = 48; - static constexpr uint32 _Quad_End = 51; -}; - -struct EFeature -{ - EFeature () {} - EFeature (uint8) {} - operator uint8 () const; - static constexpr uint8 Ignore = 0; - static constexpr uint8 RequireTrue = 2; - static constexpr uint8 RequireFalse = 1; -}; - -struct EShader -{ - EShader () {} - EShader (uint8) {} - operator uint8 () const; - static constexpr uint8 Vertex = 0; - static constexpr uint8 TessControl = 1; - static constexpr uint8 TessEvaluation = 2; - static constexpr uint8 Geometry = 3; - static constexpr uint8 Fragment = 4; - static constexpr uint8 Compute = 5; - static constexpr uint8 Tile = 6; - static constexpr uint8 MeshTask = 7; - static constexpr uint8 Mesh = 8; - static constexpr uint8 RayGen = 9; - static constexpr uint8 RayAnyHit = 10; - static constexpr uint8 RayClosestHit = 11; - static constexpr uint8 RayMiss = 12; - static constexpr uint8 RayIntersection = 13; - static constexpr uint8 RayCallable = 14; -}; - -struct EShaderStages -{ - EShaderStages () {} - EShaderStages (uint16) {} - operator uint16 () const; - static constexpr uint16 Vertex = 1; - static constexpr uint16 TessControl = 2; - static constexpr uint16 TessEvaluation = 4; - static constexpr uint16 Geometry = 8; - static constexpr uint16 Fragment = 16; - static constexpr uint16 Compute = 32; - static constexpr uint16 Tile = 64; - static constexpr uint16 MeshTask = 128; - static constexpr uint16 Mesh = 256; - static constexpr uint16 RayGen = 512; - static constexpr uint16 RayAnyHit = 1024; - static constexpr uint16 RayClosestHit = 2048; - static constexpr uint16 RayMiss = 4096; - static constexpr uint16 RayIntersection = 8192; - static constexpr uint16 RayCallable = 16384; - static constexpr uint16 All = 32767; - static constexpr uint16 AllGraphics = 415; - static constexpr uint16 GraphicsPipeStages = 31; - static constexpr uint16 MeshPipeStages = 400; - static constexpr uint16 VertexProcessingStages = 271; - static constexpr uint16 PreRasterizationStages = 399; - static constexpr uint16 PostRasterizationStages = 80; - static constexpr uint16 AllRayTracing = 32256; -}; - -struct EGPUVendor -{ - EGPUVendor () {} - EGPUVendor (uint32) {} - operator uint32 () const; - static constexpr uint32 AMD = 0; - static constexpr uint32 NVidia = 1; - static constexpr uint32 Intel = 2; - static constexpr uint32 ARM = 3; - static constexpr uint32 Qualcomm = 4; - static constexpr uint32 ImgTech = 5; - static constexpr uint32 Microsoft = 6; - static constexpr uint32 Apple = 7; - static constexpr uint32 Mesa = 8; - static constexpr uint32 Broadcom = 9; - static constexpr uint32 Samsung = 10; - static constexpr uint32 VeriSilicon = 11; - static constexpr uint32 Huawei = 12; -}; - -struct EVertexType -{ - EVertexType () {} - EVertexType (uint16) {} - operator uint16 () const; - static constexpr uint16 Byte = 4; - static constexpr uint16 Byte2 = 5; - static constexpr uint16 Byte3 = 6; - static constexpr uint16 Byte4 = 7; - static constexpr uint16 Byte_Norm = 68; - static constexpr uint16 Byte2_Norm = 69; - static constexpr uint16 Byte3_Norm = 70; - static constexpr uint16 Byte4_Norm = 71; - static constexpr uint16 Byte_Scaled = 132; - static constexpr uint16 Byte2_Scaled = 133; - static constexpr uint16 Byte3_Scaled = 134; - static constexpr uint16 Byte4_Scaled = 135; - static constexpr uint16 UByte = 8; - static constexpr uint16 UByte2 = 9; - static constexpr uint16 UByte3 = 10; - static constexpr uint16 UByte4 = 11; - static constexpr uint16 UByte_Norm = 72; - static constexpr uint16 UByte2_Norm = 73; - static constexpr uint16 UByte3_Norm = 74; - static constexpr uint16 UByte4_Norm = 75; - static constexpr uint16 UByte_Scaled = 136; - static constexpr uint16 UByte2_Scaled = 137; - static constexpr uint16 UByte3_Scaled = 138; - static constexpr uint16 UByte4_Scaled = 139; - static constexpr uint16 Short = 12; - static constexpr uint16 Short2 = 13; - static constexpr uint16 Short3 = 14; - static constexpr uint16 Short4 = 15; - static constexpr uint16 Short_Norm = 76; - static constexpr uint16 Short2_Norm = 77; - static constexpr uint16 Short3_Norm = 78; - static constexpr uint16 Short4_Norm = 79; - static constexpr uint16 Short_Scaled = 140; - static constexpr uint16 Short2_Scaled = 141; - static constexpr uint16 Short3_Scaled = 142; - static constexpr uint16 Short4_Scaled = 143; - static constexpr uint16 UShort = 16; - static constexpr uint16 UShort2 = 17; - static constexpr uint16 UShort3 = 18; - static constexpr uint16 UShort4 = 19; - static constexpr uint16 UShort_Norm = 80; - static constexpr uint16 UShort2_Norm = 81; - static constexpr uint16 UShort3_Norm = 82; - static constexpr uint16 UShort4_Norm = 83; - static constexpr uint16 UShort_Scaled = 144; - static constexpr uint16 UShort2_Scaled = 145; - static constexpr uint16 UShort3_Scaled = 146; - static constexpr uint16 UShort4_Scaled = 147; - static constexpr uint16 Int = 20; - static constexpr uint16 Int2 = 21; - static constexpr uint16 Int3 = 22; - static constexpr uint16 Int4 = 23; - static constexpr uint16 UInt = 24; - static constexpr uint16 UInt2 = 25; - static constexpr uint16 UInt3 = 26; - static constexpr uint16 UInt4 = 27; - static constexpr uint16 Long = 28; - static constexpr uint16 Long2 = 29; - static constexpr uint16 Long3 = 30; - static constexpr uint16 Long4 = 31; - static constexpr uint16 ULong = 32; - static constexpr uint16 ULong2 = 33; - static constexpr uint16 ULong3 = 34; - static constexpr uint16 ULong4 = 35; - static constexpr uint16 Half = 36; - static constexpr uint16 Half2 = 37; - static constexpr uint16 Half3 = 38; - static constexpr uint16 Half4 = 39; - static constexpr uint16 Float = 40; - static constexpr uint16 Float2 = 41; - static constexpr uint16 Float3 = 42; - static constexpr uint16 Float4 = 43; - static constexpr uint16 Double = 44; - static constexpr uint16 Double2 = 45; - static constexpr uint16 Double3 = 46; - static constexpr uint16 Double4 = 47; - static constexpr uint16 UInt_2_10_10_10 = 51; - static constexpr uint16 UInt_2_10_10_10_Norm = 115; - static constexpr uint16 UInt_2_10_10_10_Scaled = 179; -}; - -struct EGraphicsDeviceID -{ - EGraphicsDeviceID () {} - EGraphicsDeviceID (uint32) {} - operator uint32 () const; - static constexpr uint32 Adreno_500 = 0; - static constexpr uint32 Adreno_600 = 1; - static constexpr uint32 Adreno_700 = 2; - static constexpr uint32 AMD_GCN1 = 3; - static constexpr uint32 AMD_GCN2 = 4; - static constexpr uint32 AMD_GCN3 = 5; - static constexpr uint32 AMD_GCN4 = 6; - static constexpr uint32 AMD_GCN5 = 7; - static constexpr uint32 AMD_GCN5_APU = 8; - static constexpr uint32 AMD_RDNA1 = 9; - static constexpr uint32 AMD_RDNA2 = 10; - static constexpr uint32 AMD_RDNA2_APU = 11; - static constexpr uint32 AMD_RDNA3 = 12; - static constexpr uint32 AMD_RDNA3_APU = 13; - static constexpr uint32 AMD_RDNA4 = 14; - static constexpr uint32 Apple_A8 = 15; - static constexpr uint32 Apple_A9_A10 = 16; - static constexpr uint32 Apple_A11 = 17; - static constexpr uint32 Apple_A12 = 18; - static constexpr uint32 Apple_A13 = 19; - static constexpr uint32 Apple_A14_M1 = 20; - static constexpr uint32 Apple_A15_M2 = 21; - static constexpr uint32 Apple_A16 = 22; - static constexpr uint32 Apple_A17_M3 = 23; - static constexpr uint32 Mali_Midgard_Gen2 = 24; - static constexpr uint32 Mali_Midgard_Gen3 = 25; - static constexpr uint32 Mali_Midgard_Gen4 = 26; - static constexpr uint32 Mali_Bifrost_Gen1 = 27; - static constexpr uint32 Mali_Bifrost_Gen2 = 28; - static constexpr uint32 Mali_Bifrost_Gen3 = 29; - static constexpr uint32 Mali_Valhall_Gen1 = 30; - static constexpr uint32 Mali_Valhall_Gen2 = 31; - static constexpr uint32 Mali_Valhall_Gen3 = 32; - static constexpr uint32 Mali_Valhall_Gen4 = 33; - static constexpr uint32 Mali_5thGen_Gen1 = 34; - static constexpr uint32 Mali_5thGen_Gen2 = 35; - static constexpr uint32 NV_Maxwell = 36; - static constexpr uint32 NV_Maxwell_Tegra = 37; - static constexpr uint32 NV_Pascal = 38; - static constexpr uint32 NV_Pascal_MX = 39; - static constexpr uint32 NV_Pascal_Tegra = 40; - static constexpr uint32 NV_Volta = 41; - static constexpr uint32 NV_Turing_16 = 42; - static constexpr uint32 NV_Turing = 43; - static constexpr uint32 NV_Turing_MX = 44; - static constexpr uint32 NV_Ampere = 45; - static constexpr uint32 NV_Ampere_Orin = 46; - static constexpr uint32 NV_Ada = 47; - static constexpr uint32 NV_Blackwell = 48; - static constexpr uint32 Intel_Gen7 = 49; - static constexpr uint32 Intel_Gen8 = 50; - static constexpr uint32 Intel_Gen9 = 51; - static constexpr uint32 Intel_Gen11 = 52; - static constexpr uint32 Intel_Gen12 = 53; - static constexpr uint32 Intel_Gen12_7 = 54; - static constexpr uint32 PowerVR_Series8 = 55; - static constexpr uint32 PowerVR_Series9 = 56; - static constexpr uint32 PowerVR_SeriesA = 57; - static constexpr uint32 PowerVR_SeriesB = 58; - static constexpr uint32 VeriSilicon = 59; - static constexpr uint32 SwiftShader = 60; -}; - -struct EFilter -{ - EFilter () {} - EFilter (uint8) {} - operator uint8 () const; - static constexpr uint8 Nearest = 0; - static constexpr uint8 Linear = 1; -}; - -struct EMipmapFilter -{ - EMipmapFilter () {} - EMipmapFilter (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - static constexpr uint8 Nearest = 1; - static constexpr uint8 Linear = 2; -}; - -struct EAddressMode -{ - EAddressMode () {} - EAddressMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Repeat = 0; - static constexpr uint8 MirrorRepeat = 1; - static constexpr uint8 ClampToEdge = 2; - static constexpr uint8 ClampToBorder = 3; - static constexpr uint8 MirrorClampToEdge = 4; - static constexpr uint8 Clamp = 2; - static constexpr uint8 MirrorClamp = 4; -}; - -struct EBorderColor -{ - EBorderColor () {} - EBorderColor (uint8) {} - operator uint8 () const; - static constexpr uint8 FloatTransparentBlack = 0; - static constexpr uint8 FloatOpaqueBlack = 1; - static constexpr uint8 FloatOpaqueWhite = 2; - static constexpr uint8 IntTransparentBlack = 3; - static constexpr uint8 IntOpaqueBlack = 4; - static constexpr uint8 IntOpaqueWhite = 5; -}; - -struct EReductionMode -{ - EReductionMode () {} - EReductionMode (uint8) {} - operator uint8 () const; - static constexpr uint8 Average = 0; - static constexpr uint8 Min = 1; - static constexpr uint8 Max = 2; -}; - -struct ESamplerOpt -{ - ESamplerOpt () {} - ESamplerOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 ArgumentBuffer = 1; - static constexpr uint8 UnnormalizedCoordinates = 4; - static constexpr uint8 NonSeamlessCubeMap = 2; -}; - -struct EVertexInputRate -{ - EVertexInputRate () {} - EVertexInputRate (uint8) {} - operator uint8 () const; - static constexpr uint8 Vertex = 0; - static constexpr uint8 Instance = 1; -}; - -struct EDescSetUsage -{ - EDescSetUsage () {} - EDescSetUsage (uint8) {} - operator uint8 () const; - static constexpr uint8 AllowPartialyUpdate = 1; - static constexpr uint8 UpdateTemplate = 2; - static constexpr uint8 ArgumentBuffer = 4; - static constexpr uint8 MutableArgBuffer = 8; - static constexpr uint8 MaybeUnsupported = 16; -}; - -struct EPipelineOpt -{ - EPipelineOpt () {} - EPipelineOpt (uint16) {} - operator uint16 () const; - - // Optimize pipeline during creation, may be slow. - static constexpr uint16 Optimize = 1; - static constexpr uint16 CS_DispatchBase = 2; - static constexpr uint16 RT_NoNullAnyHitShaders = 4; - static constexpr uint16 RT_NoNullClosestHitShaders = 8; - static constexpr uint16 RT_NoNullMissShaders = 16; - static constexpr uint16 RT_NoNullIntersectionShaders = 32; - static constexpr uint16 RT_SkipTriangles = 64; - static constexpr uint16 RT_SkipAABBs = 128; - - // Pipeline creation will fail if it is not exists in cache. - static constexpr uint16 DontCompile = 256; - - // When a pipeline is created, its state and shaders are compiled into zero or more device-specific executables, - // which are used when executing commands against that pipeline. - static constexpr uint16 CaptureStatistics = 512; - - // May include the final shader assembly, a binary form of the compiled shader, - // or the shader compiler’s internal representation at any number of intermediate compile steps. - static constexpr uint16 CaptureInternalRepresentation = 1024; - - // Disable pipeline optimization to speedup creation. - static constexpr uint16 DontOptimize = 0; - static constexpr uint16 None = 0; -}; - -struct EQueueMask -{ - EQueueMask () {} - EQueueMask (uint8) {} - operator uint8 () const; - static constexpr uint8 Graphics = 1; - static constexpr uint8 AsyncCompute = 2; - static constexpr uint8 AsyncTransfer = 4; - static constexpr uint8 VideoEncode = 8; - static constexpr uint8 VideoDecode = 16; - static constexpr uint8 All = 31; -}; - -struct ESamplerChromaLocation -{ - ESamplerChromaLocation () {} - ESamplerChromaLocation (uint8) {} - operator uint8 () const; - static constexpr uint8 CositedEven = 0; - static constexpr uint8 Midpoint = 1; -}; - -struct ESamplerYcbcrModelConversion -{ - ESamplerYcbcrModelConversion () {} - ESamplerYcbcrModelConversion (uint8) {} - operator uint8 () const; - static constexpr uint8 RGB_Identity = 0; - static constexpr uint8 Ycbcr_Identity = 1; - static constexpr uint8 Ycbcr_709 = 2; - static constexpr uint8 Ycbcr_601 = 3; - static constexpr uint8 Ycbcr_2020 = 4; -}; - -struct ESamplerYcbcrRange -{ - ESamplerYcbcrRange () {} - ESamplerYcbcrRange (uint8) {} - operator uint8 () const; - static constexpr uint8 ITU_Full = 0; - static constexpr uint8 ITU_Narrow = 1; -}; - -struct ESurfaceFormat -{ - ESurfaceFormat () {} - ESurfaceFormat (uint8) {} - operator uint8 () const; - static constexpr uint8 BGRA8_sRGB_nonlinear = 0; - static constexpr uint8 RGBA8_sRGB_nonlinear = 1; - static constexpr uint8 BGRA8_BT709_nonlinear = 2; - static constexpr uint8 RGBA16F_Extended_sRGB_linear = 3; - static constexpr uint8 RGBA16F_sRGB_nonlinear = 4; - static constexpr uint8 RGBA16F_BT709_nonlinear = 5; - static constexpr uint8 RGBA16F_HDR10_ST2084 = 6; - static constexpr uint8 RGBA16F_BT2020_linear = 7; - static constexpr uint8 RGB10A2_sRGB_nonlinear = 8; - static constexpr uint8 RGB10A2_HDR10_ST2084 = 9; -}; - -struct ERTInstanceOpt -{ - ERTInstanceOpt () {} - ERTInstanceOpt (uint8) {} - operator uint8 () const; - static constexpr uint8 TriangleCullDisable = 1; - static constexpr uint8 TriangleFrontCCW = 2; - static constexpr uint8 ForceOpaque = 4; - static constexpr uint8 ForceNonOpaque = 8; - static constexpr uint8 TriangleCullBack = 0; - static constexpr uint8 TriangleFrontCW = 0; -}; - -struct EImageUsage -{ - EImageUsage () {} - EImageUsage (uint32) {} - operator uint32 () const; - static constexpr uint32 TransferSrc = 1; - static constexpr uint32 TransferDst = 2; - static constexpr uint32 Sampled = 4; - static constexpr uint32 Storage = 8; - static constexpr uint32 ColorAttachment = 16; - static constexpr uint32 DepthStencilAttachment = 32; - static constexpr uint32 InputAttachment = 64; - static constexpr uint32 ShadingRate = 128; - static constexpr uint32 All = 255; - static constexpr uint32 Transfer = 3; - static constexpr uint32 RWAttachment = 80; -}; - -struct EImageOpt -{ - EImageOpt () {} - EImageOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 BlitSrc = 1; - static constexpr uint32 BlitDst = 2; - static constexpr uint32 CubeCompatible = 4; - static constexpr uint32 MutableFormat = 8; - static constexpr uint32 Array2DCompatible = 16; - static constexpr uint32 BlockTexelViewCompatible = 32; - static constexpr uint32 SparseResidency = 64; - static constexpr uint32 SparseAliased = 128; - static constexpr uint32 Alias = 256; - static constexpr uint32 SampleLocationsCompatible = 512; - static constexpr uint32 StorageAtomic = 1024; - static constexpr uint32 ColorAttachmentBlend = 2048; - static constexpr uint32 SampledLinear = 4096; - static constexpr uint32 SampledMinMax = 8192; - static constexpr uint32 VertexPplnStore = 16384; - static constexpr uint32 FragmentPplnStore = 32768; - static constexpr uint32 LossyRTCompression = 65536; - static constexpr uint32 All = 131071; - static constexpr uint32 SparseResidencyAliased = 192; -}; - -struct EBufferUsage -{ - EBufferUsage () {} - EBufferUsage (uint32) {} - operator uint32 () const; - static constexpr uint32 TransferSrc = 1; - static constexpr uint32 TransferDst = 2; - static constexpr uint32 UniformTexel = 4; - static constexpr uint32 StorageTexel = 8; - static constexpr uint32 Uniform = 16; - static constexpr uint32 Storage = 32; - static constexpr uint32 Index = 64; - static constexpr uint32 Vertex = 128; - static constexpr uint32 Indirect = 256; - static constexpr uint32 ShaderAddress = 512; - static constexpr uint32 ShaderBindingTable = 1024; - static constexpr uint32 ASBuild_ReadOnly = 2048; - static constexpr uint32 ASBuild_Scratch = 4096; - static constexpr uint32 All = 8191; - static constexpr uint32 Transfer = 3; -}; - -struct EBufferOpt -{ - EBufferOpt () {} - EBufferOpt (uint32) {} - operator uint32 () const; - static constexpr uint32 SparseResidency = 1; - static constexpr uint32 SparseAliased = 2; - static constexpr uint32 VertexPplnStore = 4; - static constexpr uint32 FragmentPplnStore = 8; - static constexpr uint32 StorageTexelAtomic = 16; - static constexpr uint32 All = 31; - static constexpr uint32 SparseResidencyAliased = 3; -}; - -struct EShadingRate -{ - EShadingRate () {} - EShadingRate (uint8) {} - operator uint8 () const; - static constexpr uint8 Size1x1 = 16; - static constexpr uint8 Size1x2 = 32; - static constexpr uint8 Size1x4 = 48; - static constexpr uint8 Size2x1 = 64; - static constexpr uint8 Size2x2 = 80; - static constexpr uint8 Size2x4 = 96; - static constexpr uint8 Size4x1 = 112; - static constexpr uint8 Size4x2 = 128; - static constexpr uint8 Size4x4 = 144; -}; - -struct EShadingRateCombinerOp -{ - EShadingRateCombinerOp () {} - EShadingRateCombinerOp (uint8) {} - operator uint8 () const; - static constexpr uint8 Keep = 0; - static constexpr uint8 Replace = 1; - static constexpr uint8 Min = 2; - static constexpr uint8 Max = 3; - static constexpr uint8 Sum = 4; - static constexpr uint8 Mul = 5; -}; - struct MultiSamples { MultiSamples (); @@ -2825,40 +3009,6 @@ struct MipmapLevel MipmapLevel (uint); }; -struct EColorSpace -{ - EColorSpace () {} - EColorSpace (uint8) {} - operator uint8 () const; - static constexpr uint8 sRGB_nonlinear = 0; - static constexpr uint8 BT709_nonlinear = 6; - static constexpr uint8 Extended_sRGB_linear = 2; - static constexpr uint8 HDR10_ST2084 = 8; - static constexpr uint8 BT2020_linear = 7; -}; - -struct DbgViewFlags -{ - DbgViewFlags () {} - DbgViewFlags (uint32) {} - operator uint32 () const; - static constexpr uint32 NoCopy = 1; - static constexpr uint32 Copy = 0; - static constexpr uint32 Histogram = 2; - static constexpr uint32 LinearDepth = 3; - static constexpr uint32 Stencil = 4; -}; - -struct ScriptFlags -{ - ScriptFlags () {} - ScriptFlags (uint32) {} - operator uint32 () const; - static constexpr uint32 RunOnce = 1; - static constexpr uint32 OnRequest = 2; - static constexpr uint32 RunOnce_AfterLoading = 3; -}; - struct DynamicUInt { DynamicUInt (); @@ -2870,6 +3020,10 @@ struct DynamicUInt RC Add (uint); RC Sub (uint); RC Pow (uint); + RC PowOf2 (); + RC PowOf2 (uint); + RC Dimension2 (); + RC Dimension3 (); }; struct DynamicUInt2 @@ -2878,6 +3032,8 @@ struct DynamicUInt2 DynamicUInt2 (const uint2 &); RC X () const; RC Y () const; + RC PowOf2 (); + RC Dimension (); }; struct DynamicUInt3 @@ -2999,16 +3155,6 @@ struct DynamicDim RC Volume () const; }; -struct ImageLoadOpFlags -{ - ImageLoadOpFlags () {} - ImageLoadOpFlags (uint32) {} - operator uint32 () const; - - // Generate mipmaps after loading - static constexpr uint32 GenMipmaps = 1; -}; - struct Image { @@ -3180,6 +3326,9 @@ struct Buffer // Dynamic array size, can be used for draw call. RC ArraySize () const; + // Constant array size, can be used for draw call. + uint ConstArraySize () const; + // Build buffer data layout with initial content. // Returns offset in bytes where data is begin. uint Float (const string &, float); @@ -3925,44 +4074,6 @@ struct Collection RC RTScene (const string & key) const; }; -struct EPostprocess -{ - EPostprocess () {} - EPostprocess (uint32) {} - operator uint32 () const; - - // Entry point: 'Main' - static constexpr uint32 None = 0; - - // Entry point: 'void mainImage (out float4 fragColor, in float2 fragCoord)' - static constexpr uint32 Shadertoy = 1; - - // Entry point: 'void mainVR (out float4 fragColor, in float2 fragCoord, in float3 fragRayOri, in float3 fragRayDir)' - static constexpr uint32 ShadertoyVR = 2; - static constexpr uint32 ShadertoyVR_180 = 3; - static constexpr uint32 ShadertoyVR_360 = 4; - static constexpr uint32 Shadertoy_360 = 5; -}; - -struct EPassFlags -{ - EPassFlags () {} - EPassFlags (uint8) {} - operator uint8 () const; - static constexpr uint8 None = 0; - - // ShaderTrace - record all variables, function result, etc and save it to file. - // It is very useful to debug shaders. In UI select 'Debugging' menu, select pass,'Trace' and shader stage then click 'G' key to record trace for pixel under cursor. - // Reference to the last recorded trace will be added to console and IDE log, click on it to open file. - static constexpr uint8 Enable_ShaderTrace = 1; - - // ShaderFunctionProfiling - record time of user function calls, sort it and save to file. - static constexpr uint8 Enable_ShaderFnProf = 2; - - // Enable all debug features. - static constexpr uint8 Enable_AllShaderDbg = 7; -}; - struct Postprocess { @@ -4030,6 +4141,10 @@ struct Postprocess void EnableIfGreater (const RC & dynamic, uint refValue); void EnableIfAnyBit (const RC & dynamic, uint refValue); + // Repeat pass multiple times. + // Can be used for performance tests. + void Repeat (const RC &); + // Add resource to all shaders in the current pass. // In - resource is used for read access. // Out - resource is used for write access. @@ -4143,7 +4258,15 @@ struct Postprocess void OutputBlend (const string & name, const RC & image, const ImageLayer & baseLayer, const MipmapLevel & mipmap, EBlendFactor srcRGB, EBlendFactor dstRGB, EBlendOp opRGB, EBlendFactor srcA, EBlendFactor dstA, EBlendOp opA); void OutputBlend (const string & name, const RC & image, const ImageLayer & baseLayer, uint layerCount, EBlendFactor srcRGB, EBlendFactor dstRGB, EBlendOp opRGB, EBlendFactor srcA, EBlendFactor dstA, EBlendOp opA); void OutputBlend (const string & name, const RC & image, const ImageLayer & baseLayer, uint layerCount, const MipmapLevel & mipmap, EBlendFactor srcRGB, EBlendFactor dstRGB, EBlendOp opRGB, EBlendFactor srcA, EBlendFactor dstA, EBlendOp opA); + + // Used instead of 'Output()' to define image as input attachment & color attachment (read/write input attachment). + void InOut (const string & inName, const string & outName, const RC & image); void DepthRange (float min, float max); + void AddViewport (const RectF & rect, float minDepth, float maxDepth, const RectF & scissor, const float2 & wScale); + void AddViewport (const RectF & rect, float minDepth, float maxDepth); + void AddViewport (const RectF & rect); + void AddViewport (float left, float top, float right, float bottom); + void AddViewport (const RectF & rect, float minDepth, float maxDepth, const RectF & scissor); // Set path to fragment shader, empty - load current file. Postprocess (); @@ -4152,6 +4275,9 @@ struct Postprocess Postprocess (EPostprocess postprocessFlags); Postprocess (EPostprocess postprocessFlags, const string & defines); Postprocess (const string & shaderPath, const string & defines); + + // Can be used only if pass hasn't attachments. + void SetDimension (const RC &); }; struct ComputePass @@ -4221,6 +4347,10 @@ struct ComputePass void EnableIfGreater (const RC & dynamic, uint refValue); void EnableIfAnyBit (const RC & dynamic, uint refValue); + // Repeat pass multiple times. + // Can be used for performance tests. + void Repeat (const RC &); + // Add resource to all shaders in the current pass. // In - resource is used for read access. // Out - resource is used for write access. @@ -4251,6 +4381,9 @@ struct ComputePass void LocalSize (const uint2 &); void LocalSize (const uint3 &); + // Set subgroup size. + void SubgroupSize (uint); + // Execute compute shader with number of the workgroups. // Total number of threads is 'groupCount * localSize'. void DispatchGroups (uint groupCountX); @@ -4381,6 +4514,10 @@ struct RayTracingPass void EnableIfGreater (const RC & dynamic, uint refValue); void EnableIfAnyBit (const RC & dynamic, uint refValue); + // Repeat pass multiple times. + // Can be used for performance tests. + void Repeat (const RC &); + // Add resource to all shaders in the current pass. // In - resource is used for read access. // Out - resource is used for write access. @@ -4430,16 +4567,6 @@ struct RayTracingPass void MaxCallableRecursion (const RC &); }; -struct ERenderLayer -{ - ERenderLayer () {} - ERenderLayer (uint32) {} - operator uint32 () const; - static constexpr uint32 Opaque = 0; - static constexpr uint32 Translucent = 1; - static constexpr uint32 PostProcess = 2; -}; - struct SceneGraphicsPass { @@ -4507,6 +4634,10 @@ struct SceneGraphicsPass void EnableIfGreater (const RC & dynamic, uint refValue); void EnableIfAnyBit (const RC & dynamic, uint refValue); + // Repeat pass multiple times. + // Can be used for performance tests. + void Repeat (const RC &); + // Add resource to all shaders in the current pass. // In - resource is used for read access. // Out - resource is used for write access. @@ -4592,6 +4723,11 @@ struct SceneGraphicsPass void Output (const string & name, const RC & image, const ImageLayer & baseLayer, uint layerCount, const DepthStencil & clearDepthStencil); void Output (const string & name, const RC & image, const ImageLayer & baseLayer, uint layerCount, const MipmapLevel & mipmap, const DepthStencil & clearDepthStencil); void DepthRange (float min, float max); + void AddViewport (const RectF & rect, float minDepth, float maxDepth, const RectF & scissor, const float2 & wScale); + void AddViewport (const RectF & rect, float minDepth, float maxDepth); + void AddViewport (const RectF & rect); + void AddViewport (float left, float top, float right, float bottom); + void AddViewport (const RectF & rect, float minDepth, float maxDepth, const RectF & scissor); // Add path to single pipeline or folder with pipelines. // Scene geometry will be linked with compatible pipeline or error will be generated. @@ -4599,6 +4735,9 @@ struct SceneGraphicsPass void AddPipelines (const string & pplnFolder); void Layer (ERenderLayer); void FragmentShadingRate (EShadingRate rate, EShadingRateCombinerOp primitiveOp, EShadingRateCombinerOp textureOp); + + // Can be used only if pass hasn't attachments. + void SetDimension (const RC &); }; struct SceneRayTracingPass @@ -4668,6 +4807,10 @@ struct SceneRayTracingPass void EnableIfGreater (const RC & dynamic, uint refValue); void EnableIfAnyBit (const RC & dynamic, uint refValue); + // Repeat pass multiple times. + // Can be used for performance tests. + void Repeat (const RC &); + // Add resource to all shaders in the current pass. // In - resource is used for read access. // Out - resource is used for write access. @@ -4749,6 +4892,12 @@ void GenMipmaps (const RC &); // Pass which copy image content to another image. void CopyImage (const RC &, const RC &); +// Pass which blits image to another image. +void BlitImage (const RC &, const RC &); + +// Pass which resolve multisample image to another single-sampled image. +void ResolveImage (const RC &, const RC &); + // Pass which compress image on CPU or GPU. void CompressImage (const RC & src, const RC & dst); @@ -4818,7 +4967,8 @@ void GetCylinder (uint segmentCount, bool isInner, array & positions, a // Returns cylinder void GetCylinder (uint segmentCount, bool isInner, array & positions, array & normals, array & tangents, array & bitangents, array & texcoords, array & indices); -// Returns spherical cube without projection and rotation +// Returns spherical cube without projection and face rotation. +// In 'positions': xy - pos on face, z - face index. void GetSphericalCube (uint lod, array & positions, array & indices); // Helper function to convert array of indices to array of uint3 indices per triangle @@ -4876,18 +5026,18 @@ void Slider (const RC & dyn, const string & name, const float2 & void Slider (const RC & dyn, const string & name, const float3 & min, const float3 & max, const float3 & initial); void Slider (const RC & dyn, const string & name, const float4 & min, const float4 & max, const float4 & initial); -// Returns array with 3 elements, where x - wavelength in nm, yzw - RGB color. +// Returns array with 3 elements, where x - wavelength in nm, yzw - RGB color in linear space. void WhiteColorSpectrum3 (array & wavelengthToRGB); -// Returns array with 7 elements, where x - wavelength in nm, yzw - RGB color. +// Returns array with 7 elements, where x - wavelength in nm, yzw - RGB color in linear space. // normalized - sum of colors will be 1. void WhiteColorSpectrum7 (array & wavelengthToRGB, bool normalized); -// Returns array 4 elements with visible light spectrum with step 100nm, where x - wavelength in nm, yzw - RGB color. +// Returns array 4 elements with visible light spectrum with step 100nm, where x - wavelength in nm, yzw - RGB color in linear space. // normalized - sum of colors will be 1. void WhiteColorSpectrumStep100nm (array & wavelengthToRGB, bool normalized); -// Returns array 7 elements with visible light spectrum with step 50nm, where x - wavelength in nm, yzw - RGB color. +// Returns array 7 elements with visible light spectrum with step 50nm, where x - wavelength in nm, yzw - RGB color in linear space. // normalized - sum of colors will be 1. void WhiteColorSpectrumStep50nm (array & wavelengthToRGB, bool normalized); @@ -4911,236 +5061,236 @@ bool Supports_Format (EPixelFormat); #define SCRIPT template <> -struct RC : DynamicInt +struct RC : RTScene { - RC (const DynamicInt &); + RC (const RTScene &); }; template <> -struct RC : IPass +struct RC : DynamicFloat { - RC (const IPass &); + RC (const DynamicFloat &); }; template <> -struct RC : Image +struct RC : DynamicDim { - RC (const Image &); + RC (const DynamicDim &); }; template <> -struct RC : DynamicInt2 +struct RC : FPSCamera { - RC (const DynamicInt2 &); + RC (const FPSCamera &); }; template <> -struct RC : DynamicInt4 +struct RC : RayTracingPass { - RC (const DynamicInt4 &); + RC (const RayTracingPass &); }; template <> -struct RC : DynamicInt3 +struct RC : OrbitalCamera { - RC (const DynamicInt3 &); + RC (const OrbitalCamera &); }; template <> -struct RC : GeomSource +struct RC : SphericalCube { - RC (const GeomSource &); + RC (const SphericalCube &); }; template <> -struct RC : DynamicFloat2 +struct RC : Buffer { - RC (const DynamicFloat2 &); + RC (const Buffer &); }; template <> -struct RC : DynamicFloat3 +struct RC : Postprocess { - RC (const DynamicFloat3 &); + RC (const Postprocess &); }; template <> -struct RC : DynamicUInt2 +struct RC : SceneRayTracingPass { - RC (const DynamicUInt2 &); + RC (const SceneRayTracingPass &); }; template <> -struct RC : DynamicUInt4 +struct RC : ComputePass { - RC (const DynamicUInt4 &); + RC (const ComputePass &); }; template <> -struct RC : RTGeometry +struct RC : UnifiedGeometry { - RC (const RTGeometry &); + RC (const UnifiedGeometry &); }; template <> -struct RC : DynamicFloat4 +struct RC : ScaleBiasCamera { - RC (const DynamicFloat4 &); + RC (const ScaleBiasCamera &); }; template <> -struct RC : DynamicUInt3 +struct RC : TopDownCamera { - RC (const DynamicUInt3 &); + RC (const TopDownCamera &); }; template <> -struct RC : Collection +struct RC : FlightCamera { - RC (const Collection &); + RC (const FlightCamera &); }; template <> -struct RC : RTScene +struct RC : BaseController { - RC (const RTScene &); + RC (const BaseController &); }; template <> -struct RC : DynamicULong +struct RC : Scene { - RC (const DynamicULong &); + RC (const Scene &); }; template <> -struct RC : SceneGraphicsPass +struct RC : Model { - RC (const SceneGraphicsPass &); + RC (const Model &); }; template <> -struct RC : VideoImage +struct RC : DynamicUInt { - RC (const VideoImage &); + RC (const DynamicUInt &); }; template <> -struct RC : FPVCamera +struct RC : DynamicInt2 { - RC (const FPVCamera &); + RC (const DynamicInt2 &); }; template <> -struct RC : RemoteCamera +struct RC : DynamicInt3 { - RC (const RemoteCamera &); + RC (const DynamicInt3 &); }; template <> -struct RC : SceneRayTracingPass +struct RC : DynamicInt4 { - RC (const SceneRayTracingPass &); + RC (const DynamicInt4 &); }; template <> -struct RC : Postprocess +struct RC : GeomSource { - RC (const Postprocess &); + RC (const GeomSource &); }; template <> -struct RC : ComputePass +struct RC : RTGeometry { - RC (const ComputePass &); + RC (const RTGeometry &); }; template <> -struct RC : UnifiedGeometry +struct RC : DynamicUInt4 { - RC (const UnifiedGeometry &); + RC (const DynamicUInt4 &); }; template <> -struct RC : Buffer +struct RC : DynamicFloat3 { - RC (const Buffer &); + RC (const DynamicFloat3 &); }; template <> -struct RC : OrbitalCamera +struct RC : DynamicFloat2 { - RC (const OrbitalCamera &); + RC (const DynamicFloat2 &); }; template <> -struct RC : SphericalCube +struct RC : DynamicUInt3 { - RC (const SphericalCube &); + RC (const DynamicUInt3 &); }; template <> -struct RC : DynamicFloat +struct RC : DynamicUInt2 { - RC (const DynamicFloat &); + RC (const DynamicUInt2 &); }; template <> -struct RC : DynamicDim +struct RC : DynamicFloat4 { - RC (const DynamicDim &); + RC (const DynamicFloat4 &); }; template <> -struct RC : RayTracingPass +struct RC : Image { - RC (const RayTracingPass &); + RC (const Image &); }; template <> -struct RC : FPSCamera +struct RC : IPass { - RC (const FPSCamera &); + RC (const IPass &); }; template <> -struct RC : Model +struct RC : DynamicInt { - RC (const Model &); + RC (const DynamicInt &); }; template <> -struct RC : DynamicUInt +struct RC : RemoteCamera { - RC (const DynamicUInt &); + RC (const RemoteCamera &); }; template <> -struct RC : Scene +struct RC : VideoImage { - RC (const Scene &); + RC (const VideoImage &); }; template <> -struct RC : ScaleBiasCamera +struct RC : FPVCamera { - RC (const ScaleBiasCamera &); + RC (const FPVCamera &); }; template <> -struct RC : TopDownCamera +struct RC : DynamicULong { - RC (const TopDownCamera &); + RC (const DynamicULong &); }; template <> -struct RC : FlightCamera +struct RC : SceneGraphicsPass { - RC (const FlightCamera &); + RC (const SceneGraphicsPass &); }; template <> -struct RC : BaseController +struct RC : Collection { - RC (const BaseController &); + RC (const Collection &); }; diff --git a/AE/engine/shared_data/shaders/CodeTemplates.glsl b/AE/engine/shared_data/shaders/CodeTemplates.glsl index 43786e9c..903bfecd 100644 --- a/AE/engine/shared_data/shaders/CodeTemplates.glsl +++ b/AE/engine/shared_data/shaders/CodeTemplates.glsl @@ -72,7 +72,7 @@ ND_ int2 GenGridTriStrip (const int gridSize) Returns zero on edge. ================================================= */ -ND_ float2 FSBarycentricWireframe2 (float thicknessPx, float falloffPx) +ND_ float2 FSBarycentricWireframe (float thicknessPx, float falloffPx) { const float3 dx_barycoord = gl.dFdxFine( gl.BaryCoord ); const float3 dy_barycoord = gl.dFdyFine( gl.BaryCoord ); @@ -83,9 +83,109 @@ ND_ float2 FSBarycentricWireframe2 (float thicknessPx, float falloffPx) return float2( wireframe, LengthSq(md) ); } -ND_ float FSBarycentricWireframe (float thicknessPx, float falloffPx) +/* +================================================= + FSBarycentricQuadWireframe +---- + Returns zero on edge. +================================================= +*/ +ND_ float2 FSBarycentricQuadWireframe (float thicknessPx, float falloffPx) +{ + float3 barycoord = gl.BaryCoord; barycoord.x += 1.0; + float3 dx_barycoord = gl.dFdxFine( barycoord ); + float3 dy_barycoord = gl.dFdyFine( barycoord ); + float3 d_barycoord = Diagonal( dx_barycoord, dy_barycoord ); + float3 remap = SmoothStep( barycoord, d_barycoord * thicknessPx, d_barycoord * (thicknessPx + falloffPx) ); + float wireframe = MinOf( remap ); + float3 md = Max( dx_barycoord, dy_barycoord ); + return float2( wireframe, LengthSq(md) ); +} +#endif +//----------------------------------------------------------------------------- + + +/* +================================================= + HelperInvocationCount* +---- + warning: some GPU may not execute helper invocations. +================================================= +*/ +#if defined(SH_FRAG) and defined(AE_shader_subgroup_quad) +ND_ uint HelperInvocationCountPerQuad () +{ + uint helper = 0; + #ifdef AE_demote_to_helper_invocation + helper = gl.IsHelperInvocation() ? 1 : 0; + #else + helper = gl.HelperInvocation ? 1 : 0; + #endif + return gl.quadGroup.Broadcast( helper, 0 ) + + gl.quadGroup.Broadcast( helper, 1 ) + + gl.quadGroup.Broadcast( helper, 2 ) + + gl.quadGroup.Broadcast( helper, 3 ); +} +#endif + +#if defined(SH_FRAG) and defined(AE_shader_subgroup_arithmetic) +ND_ uint HelperInvocationCountPerSubgroup () { - return FSBarycentricWireframe2( thicknessPx, falloffPx ).x; + uint helper = 0; + #ifdef AE_demote_to_helper_invocation + helper = gl.IsHelperInvocation() ? 1 : 0; + #else + helper = gl.HelperInvocation ? 1 : 0; + #endif + return gl.subgroup.Add( helper ); } #endif +/* +================================================= + IsFullQuad +---- + some GPU may not execute helper invocations, + use this function to detect is all threads in quad are executed. +================================================= +*/ +#ifdef AE_shader_subgroup_quad +ND_ bool IsFullQuad () +{ + int val = 1; + int sum = gl.quadGroup.Broadcast( val, 0 ) + + gl.quadGroup.Broadcast( val, 1 ) + + gl.quadGroup.Broadcast( val, 2 ) + + gl.quadGroup.Broadcast( val, 3 ); + return sum == 4; +} +#endif + +/* +================================================= + IsFullSubgroup +---- + use this function to detect is all threads in subgroup are executed. +================================================= +*/ +#ifdef AE_shader_subgroup_arithmetic +ND_ bool IsFullSubgroup () +{ + int val = 1; + int sum = gl.subgroup.Add( val ); + return sum == gl.subgroup.Size; +} +#endif + +/* +================================================= + Discard +================================================= +*/ +#ifdef SH_FRAG +# ifdef AE_demote_to_helper_invocation +# define Discard() gl.Demote +# else +# define Discard() gl.Discard +# endif +#endif // SH_FRAG //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/shaders/Color.glsl b/AE/engine/shared_data/shaders/Color.glsl index 61c600d4..c0df6b19 100644 --- a/AE/engine/shared_data/shaders/Color.glsl +++ b/AE/engine/shared_data/shaders/Color.glsl @@ -33,9 +33,9 @@ ND_ float3 XYYtoRGB_v2 (float3 xyY); ND_ float3 RGBtoOklab (float3 rgb); ND_ float3 OklabToRGB (float3 oklab); -ND_ float RGBtoLuminance (const float3 linear); -ND_ float RGBtoLogLuminance (const float3 linear, float gamma); -ND_ float RGBtoLogLuminance (const float3 linear); +ND_ float RGBtoLuminance (const float3 linear); +ND_ float RGBtoLogLuminance (const float3 linear, float gamma); +ND_ float RGBtoLogLuminance (const float3 linear); ND_ float3 FromRGBM (const float4 rgbm); ND_ float4 ToRGBM (const float3 rgb); diff --git a/AE/engine/shared_data/shaders/Easing.glsl b/AE/engine/shared_data/shaders/Easing.glsl index 1c54dad8..76335e0e 100644 --- a/AE/engine/shared_data/shaders/Easing.glsl +++ b/AE/engine/shared_data/shaders/Easing.glsl @@ -9,38 +9,70 @@ #include "Math.glsl" -ND_ float HermiteEaseIn (const float x); -ND_ float HermiteEaseOut (const float x); -ND_ float HermiteEaseInOut (const float x); - -ND_ float QuadraticEaseIn (const float x); // x^2 +ND_ float QuadraticEaseIn (const float x); // x^2 ND_ float QuadraticEaseOut (const float x); ND_ float QuadraticEaseInOut (const float x); -ND_ float CubicEaseIn (const float x); // x^3 +ND_ float CubicEaseIn (const float x); // x^3 ND_ float CubicEaseOut (float x); ND_ float CubicEaseInOut (const float x); -ND_ float QuarticEaseIn (const float x); // x^4 +ND_ float QuarticEaseIn (const float x); // x^4 ND_ float QuarticEaseOut (const float x); ND_ float QuarticEaseInOut (const float x); -ND_ float QuinticEaseIn (const float x); // x^5 +ND_ float QuinticEaseIn (const float x); // x^5 ND_ float QuinticEaseOut (const float x); ND_ float QuinticEaseInOut (const float x); + +ND_ float SineEaseIn (const float x); // Sin(x) +ND_ float SineEaseOut (const float x); +ND_ float SineEaseInOut (const float x); + +ND_ float CircularEaseIn (const float x); // Sqrt(1 - x*x) +ND_ float CircularEaseOut (const float x); +ND_ float CircularEaseInOut (const float x); + +ND_ float ExponentialEaseIn (const float x); // Exp2(x) +ND_ float ExponentialEaseOut (const float x); +ND_ float ExponentialEaseInOut (const float x); + +ND_ float ElasticEaseIn (const float x); +ND_ float ElasticEaseOut (const float x); +ND_ float ElasticEaseInOut (const float x); //----------------------------------------------------------------------------- +ND_ float HermiteEaseIn (const float x); +ND_ float HermiteEaseOut (const float x); +ND_ float HermiteEaseInOut (const float x); // smoothstep + ND_ float LogarithmicEaseIn (const float x); // Ln(x) +ND_ float LogarithmicEaseOut (const float x); + ND_ float Logarithmic2EaseIn (const float x); // Log2(x) +ND_ float Logarithmic2EaseOut (const float x); + ND_ float ReciprocalEaseIn (const float x); // 1/x -ND_ float SineEaseIn (const float x); // Sin(x) -ND_ float ExponentialEaseIn (const float x); // Exp(x) -ND_ float ReciprocalSquaredEaseIn (const float x); // 1/x^2 +ND_ float ReciprocalEaseOut (const float x); + +ND_ float ExponentialE_EaseIn (const float x); // Exp(x) +ND_ float ExponentialE_EaseOut (const float x); + +ND_ float ReciprocalSquaredEaseIn (const float x); // 1/x*x +ND_ float ReciprocalSquaredEaseOut (const float x); + +ND_ float SquareRootEaseIn (const float x); // Sqrt(x) +ND_ float SquareRootEaseOut (const float x); + +ND_ float CubicRootEaseIn (const float x); // Cbrt(x) +ND_ float CubicRootEaseOut (const float x); -#define EaseFlip( fn, x ) (1.0 - fn( 1.0 - (x) )) +#define EaseFlip( fn, x ) (1.0 - fn( 1.0 - (x) )) // EaseIn <-> EaseOut +// TODO: https://iquilezles.org/articles/functions/ + //----------------------------------------------------------------------------- #include "../3party_shaders/Easing-1.glsl" @@ -56,27 +88,74 @@ float HermiteEaseOut (const float x) { float HermiteEaseInOut (const float x) { return SmoothStep( x, 0.0, 1.0 ); } +//------------------------------------------------ + float ReciprocalEaseIn (const float x) { return 1.0 / (-x * 0.5 + 1.0) - 1.0; } +float ReciprocalEaseOut (const float x) { + return EaseFlip( ReciprocalEaseIn, x ); +} +//------------------------------------------------ + + float ReciprocalSquaredEaseIn (const float x) { return 1.0 / (-Saturate(x*x)*0.5 + 1.0) - 1.0; } -float LogarithmicEaseIn (const float x) { +float ReciprocalSquaredEaseOut (const float x) { + return EaseFlip( ReciprocalSquaredEaseIn, x ); +} +//------------------------------------------------ + + +float LogarithmicEaseOut (const float x) { return Ln( x * 1.72 + 1.0 ); } -float Logarithmic2EaseIn (const float x) { +float LogarithmicEaseIn (const float x) { + return EaseFlip( LogarithmicEaseOut, x ); +} +//------------------------------------------------ + + +float Logarithmic2EaseOut (const float x) { return Log2( x * 2.0 + 2.0 ) - 1.0; } -float ExponentialEaseIn (const float x) { +float Logarithmic2EaseIn (const float x) { + return EaseFlip( Logarithmic2EaseOut, x ); +} +//------------------------------------------------ + + +float ExponentialE_EaseIn (const float x) { return Exp( x * 2.1268 - 2.0 ) - 0.134; } -float SineEaseIn (const float x) { - return 1.0 - Sin( (1.0 - x) * float_HalfPi ); +float ExponentialE_EaseOut (const float x) { + return EaseFlip( ExponentialE_EaseIn, x ); +} +//------------------------------------------------ + + +float SquareRootEaseIn (const float x) { + return EaseFlip( Sqrt, x ); +} + +float SquareRootEaseOut (const float x) { + return Sqrt( x ); +} +//------------------------------------------------ + + +float CubicRootEaseIn (const float x) { + return EaseFlip( Cbrt, x ); +} + +float CubicRootEaseOut (const float x) { + return Cbrt( x ); } +//------------------------------------------------ diff --git a/AE/engine/shared_data/shaders/FastMath.glsl b/AE/engine/shared_data/shaders/FastMath.glsl new file mode 100644 index 00000000..749a006d --- /dev/null +++ b/AE/engine/shared_data/shaders/FastMath.glsl @@ -0,0 +1,25 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Approximations for math functions +*/ + +#ifdef __cplusplus +# pragma once +#endif + + +#include "Math.glsl" + +// T: halfX, floatX, doubleX + +// T FastACos (T x) +// T FastASin (T x) +// T FastATan (T y_over_x) +// T FastATan2 (T y, T x) + +//----------------------------------------------------------------------------- +#include "../3party_shaders/FastMath-1.glsl" + + +// TODO: +// Exponentiation https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#exponentiation-with-small-fractional-arguments diff --git a/AE/engine/shared_data/shaders/Geometry.glsl b/AE/engine/shared_data/shaders/Geometry.glsl index 1a932813..3c43f5ce 100644 --- a/AE/engine/shared_data/shaders/Geometry.glsl +++ b/AE/engine/shared_data/shaders/Geometry.glsl @@ -39,6 +39,8 @@ ND_ bool IsOutsideRect (const float2 pos, const float4 rect) { return IsO ND_ bool IsInsideCircle (const float2 pos, const float2 center, const float radius) { return DistanceSq( pos, center ) < Square( radius ); } ND_ bool IsInsideCircle (const float2 pos, const float3 center_radius) { return IsInsideCircle( pos, center_radius.xy, center_radius.z ); } +//----------------------------------------------------------------------------- + ND_ float2 Rect_Center (const float4 rect) { return (rect.xy * 0.5f) + (rect.zw * 0.5f); } ND_ float2 Rect_Size (const float4 rect) { return rect.zw - rect.xy; } @@ -68,11 +70,11 @@ ND_ float3 SphericalToCartesian (const float3 sphericalAndRadius); ND_ float3 CartesianToSpherical (const float3 cartesian); ND_ float DistanceOnSphere (const float3 n0, const float3 n1) { return ACos( Dot( n0, n1 )); } -ND_ float DistanceSqOnSphereApprox (const float3 n0, const float3 n1) { return (2.0f - 2.0f * Dot( n0, n1 )); } //----------------------------------------------------------------------------- -ND_ float3 UVtoSphereNormal (const float2 snormCoord); +ND_ float4 UVtoSphereNormal (const float2 snormCoord); +ND_ float4 UVtoSphereNormal (const float2 snormCoord, const float projFov); //----------------------------------------------------------------------------- @@ -159,15 +161,37 @@ float3 CartesianToSpherical (const float3 cartesian) /* ================================================= UVtoSphereNormal +---- + returns: xyz - normal, w - distance to sphere +================================================= +*/ +float4 UVtoSphereNormal (const float2 snormCoord) +{ + float4 n = float4(snormCoord, 0.0, 1.0 - LengthSq( snormCoord )); + + if ( n.w > 0.0 ) n.z = Sqrt( n.w ); + //n.z = Max( 0.0, Sqrt( n.w )); // doesn't handle Inf on some devices (Adreno) + //n.z = Sqrt( n.w ) * LessFp( 0.0, n.w ); // doesn't handle Inf on some devices (NV) + + return n; +} + +/* +================================================= + UVtoSphereNormal +---- + 'projFov' - FOV to calculate approximate distortion of perspective projection ================================================= */ -float3 UVtoSphereNormal (const float2 c) +float4 UVtoSphereNormal (const float2 snormCoord, const float projFov) { - float d = LengthSq( c ); - float3 n = float3(c.x, c.y, 0.0); + float4 n = UVtoSphereNormal( snormCoord ); + + // can be calculated on CPU side + n.z += ASin( projFov / float_HalfPi ) * 1.2 / float_HalfPi; - if ( d <= 1.0 ) - n.z = 1.0 - d; + n.xyz = Normalize( n.xyz ); - return Normalize( n ); + return n; } + diff --git a/AE/engine/shared_data/shaders/GlobalIndex.glsl b/AE/engine/shared_data/shaders/GlobalIndex.glsl index a93b2360..8bb2fd58 100644 --- a/AE/engine/shared_data/shaders/GlobalIndex.glsl +++ b/AE/engine/shared_data/shaders/GlobalIndex.glsl @@ -63,7 +63,8 @@ ND_ float3 GetGlobalCoordUNorm (); // 0..1 ND_ float3 GetGlobalCoordSNorm (); // -1..1 ND_ float3 GetGlobalCoordUNorm (int3 offset); // 0..1 ND_ float3 GetGlobalCoordSNorm (int3 offset); // -1..1 -ND_ float3 GetGlobalCoordFloat (); // -size/2 .. +size/2 +ND_ float3 GetGlobalCoordSF (); // -size/2 .. +size/2 +ND_ float3 GetGlobalCoordUF (); // 0..size-1 // global normalized coordinate in 2D with same aspect ratio ND_ float2 GetGlobalCoordUNormCorrected (); // 0..1 @@ -145,8 +146,8 @@ float2 MapPixCoordToUNormCorrected (const float2 srcPosPx, const float2 srcSize // global coordinate in 3D int3 GetGlobalCoord () { - #ifdef AE_GEOMETRY_SHADER - return int3( gl.FragCoord.xy, gl.Layer ); + #if 0 //def AE_GEOMETRY_SHADER + return int3( gl.FragCoord.xy, gl.Layer ); // error on Adreno #else return int3( gl.FragCoord.xy, 0 ); #endif @@ -369,11 +370,16 @@ float3 GetGlobalCoordSNorm (int3 offset) return ToSNorm( GetGlobalCoordUNorm( offset )); } -float3 GetGlobalCoordFloat () +float3 GetGlobalCoordSF () { return float3(GetGlobalCoord()) - float3(GetGlobalSize()-1) * 0.5f; } +float3 GetGlobalCoordUF () +{ + return float3(GetGlobalCoord()); +} + // local coordinate in 3D float3 GetLocalCoordUNorm () diff --git a/AE/engine/shared_data/shaders/HWRayTracing.glsl b/AE/engine/shared_data/shaders/HWRayTracing.glsl index fd34fa8a..1cc9e2c2 100644 --- a/AE/engine/shared_data/shaders/HWRayTracing.glsl +++ b/AE/engine/shared_data/shaders/HWRayTracing.glsl @@ -289,6 +289,7 @@ Compute a safe spawn point offset along the normal in world space to prevent self intersection of secondary rays. ================================================= */ +#ifdef AE_LICENSE_BSD3 void ComputeHitPos (const float3 pos0, const float3 pos1, const float3 pos2, const float2 barys, const float4x3 objectToWorld, @@ -348,7 +349,7 @@ outWorldNorm = wld_norm; outWorldOffset = wld_off; } - +#endif /* ================================================= SafeSpawnPoint @@ -359,11 +360,13 @@ Offset the world-space position along the world-space normal by the safe offset to obtain the safe spawn point. ================================================= */ +#ifdef AE_LICENSE_BSD3 ND_ float3 SafeSpawnPoint (const float3 position, const float3 normal, const float offset) { - precise vec3 p = FusedMulAdd( float3(offset), normal, position ); + precise float3 p = FusedMulAdd( float3(offset), normal, position ); return p; } +#endif //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/shaders/Math.glsl b/AE/engine/shared_data/shaders/Math.glsl index 351bb42c..d6806d31 100644 --- a/AE/engine/shared_data/shaders/Math.glsl +++ b/AE/engine/shared_data/shaders/Math.glsl @@ -30,6 +30,9 @@ #ifndef AE_ENABLE_DOUBLE_TYPE # define AE_ENABLE_DOUBLE_TYPE 0 // suffix 'LF' #endif + +// helper +#define UNITE( x, y ) x##y //----------------------------------------------------------------------------- @@ -38,7 +41,7 @@ #define Any any // (bool) #define All all // (bool) -#define Abs abs // (any int, any fp) +#define Abs abs // (any signed) #define ACos acos // (half, float) result in range [0 .. Pi] #define ASin asin // (half, float) result in range [-Pi/2 ... Pi/2] #define ASinH asinh // (half, float) @@ -53,7 +56,7 @@ #define CosH cosh // (half, float) #define Cross cross // (any fp) #define Distance distance // (any fp) -#define Dot dot // (any fp) +#define Dot dot // (any fp) x[0]*y[0] + ... #define Exp exp // (half, float) Pow( float_Euler, x ) #define Exp2 exp2 // (half, float) Pow( 2, x ) #define Exp10( _a_ ) pow(10.0,(_a_)) // (half, float) @@ -234,9 +237,9 @@ ND_ bool4 BoolOr (const bool4 lhs, const bool4 rhs) { return bool4( lhs.x or rh #define Gen_DIAGONAL( _stype_, _vtype_ )\ Gen_DIAGONAL1( _stype_ ) \ - Gen_DIAGONAL1( _vtype_##2 ) \ - Gen_DIAGONAL1( _vtype_##3 ) \ - Gen_DIAGONAL1( _vtype_##4 ) + Gen_DIAGONAL1( UNITE( _vtype_, 2 )) \ + Gen_DIAGONAL1( UNITE( _vtype_, 3 )) \ + Gen_DIAGONAL1( UNITE( _vtype_, 4 )) Gen_DIAGONAL( float, float_vec_t ) @@ -263,10 +266,10 @@ Gen_DIAGONAL( float, float_vec_t ) } #define Gen_SATURATE( _stype_, _vtype_ )\ - Gen_SATURATE1( _stype_, _stype_ ) \ - Gen_SATURATE1( _stype_, _vtype_##2 )\ - Gen_SATURATE1( _stype_, _vtype_##3 )\ - Gen_SATURATE1( _stype_, _vtype_##4 ) + Gen_SATURATE1( _stype_, _stype_ ) \ + Gen_SATURATE1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_SATURATE1( _stype_, UNITE( _vtype_, 3 ))\ + Gen_SATURATE1( _stype_, UNITE( _vtype_, 4 )) Gen_SATURATE( float, float_vec_t ) @@ -294,9 +297,9 @@ Gen_SATURATE( float, float_vec_t ) #define Gen_CBRT( _stype_, _vtype_ )\ Gen_CBRT1( _stype_, _stype_ ) \ - Gen_CBRT1( _stype_, _vtype_##2 )\ - Gen_CBRT1( _stype_, _vtype_##3 )\ - Gen_CBRT1( _stype_, _vtype_##4 ) + Gen_CBRT1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_CBRT1( _stype_, UNITE( _vtype_, 3 ))\ + Gen_CBRT1( _stype_, UNITE( _vtype_, 4 )) Gen_CBRT( float, float_vec_t ) @@ -311,23 +314,31 @@ Gen_CBRT( float, float_vec_t ) ================================================= ToUNorm / ToSNorm ---- - T ToUNorm (T x) [-1, +1] to [ 0, 1] - T ToSNorm (T x) [ 0, 1] to [-1, +1] + T ToUNorm (T x) [-1, +1] to [ 0, 1] + T ToSNorm (T x) [ 0, 1] to [-1, +1] + T ToUNorm (T x, T min, T max) remap from [min, max] to [ 0, 1] + T ToSNorm (T x, T min, T max) remap from [min, max] to [-1, +1] ================================================= */ -#define Gen_TOUSNORM1( _type_ ) \ - ND_ _type_ ToUNorm (const _type_ x) { \ - return FusedMulAdd( x, _type_(0.5), _type_(0.5) ); \ - } \ - ND_ _type_ ToSNorm (const _type_ x) { \ - return FusedMulAdd( x, _type_(2.0), _type_(-1.0) ); \ - } +#define Gen_TOUSNORM1( _type_ ) \ + ND_ _type_ ToUNorm (const _type_ x) { \ + return FusedMulAdd( x, _type_(0.5), _type_(0.5) ); \ + } \ + ND_ _type_ ToSNorm (const _type_ x) { \ + return FusedMulAdd( x, _type_(2.0), _type_(-1.0) ); \ + } \ + ND_ _type_ ToUNorm (const _type_ x, const _type_ minVal, const _type_ maxVal) {\ + return (x - minVal) / (maxVal - minVal); \ + } \ + ND_ _type_ ToSNorm (const _type_ x, const _type_ minVal, const _type_ maxVal) {\ + return ToSNorm( ToUNorm( x, minVal, maxVal )); \ + } \ #define Gen_TOUSNORM( _stype_, _vtype_ )\ - Gen_TOUSNORM1( _stype_ ) \ - Gen_TOUSNORM1( _vtype_##2 ) \ - Gen_TOUSNORM1( _vtype_##3 ) \ - Gen_TOUSNORM1( _vtype_##4 ) + Gen_TOUSNORM1( _stype_ )\ + Gen_TOUSNORM1( UNITE( _vtype_, 2 ))\ + Gen_TOUSNORM1( UNITE( _vtype_, 3 ))\ + Gen_TOUSNORM1( UNITE( _vtype_, 4 )) Gen_TOUSNORM( float, float_vec_t ) @@ -368,53 +379,49 @@ Gen_TOUSNORM( float, float_vec_t ) #if AE_ENABLE_HALF_TYPE # define half_min (0.00006103515625hf) // smallest positive normal number # define half_max (65504.0hf) -# define half_inf uint16BitsToHalf( 0x7c00us ) -# define half_inf_neg uint16BitsToHalf( 0xfc00us ) -# define half_Pi 3.14159265358979323846hf -# define half_Pi2 6.28318530717958647692hf -# define half_HalfPi 1.57079632679489661923hf -# define half_InvPi 0.31830988618379067153hf -# define half_SqrtOf2 1.41421356237309504880hf -# define half_Euler 2.71828182845904523536hf +# define half_inf (uint16BitsToHalf( 0x7c00us )) +# define half_inf_neg (uint16BitsToHalf( 0xfc00us )) +# define half_Pi (3.14159265358979323846hf) +# define half_Pi2 (6.28318530717958647692hf) +# define half_HalfPi (1.57079632679489661923hf) +# define half_InvPi (0.31830988618379067153hf) +# define half_SqrtOf2 (1.41421356237309504880hf) +# define half_Euler (2.71828182845904523536hf) #endif #if 1 # define float_min (1.1754943508e-38f) // smallest positive normal number # define float_max (3.4028234664e+38f) -# define float_inf uintBitsToFloat( 0x7F800000u ) -# define float_inf_neg uintBitsToFloat( 0xFF800000u ) -# define float_qnan uintBitsToFloat( 0xFFC00001u ) -# define float_snan uintBitsToFloat( 0xFF800001u ) -# define float_Pi 3.14159265358979323846f -# define float_Pi2 6.28318530717958647692f -# define float_HalfPi 1.57079632679489661923f -# define float_InvPi 0.31830988618379067153f -# define float_SqrtOf2 1.41421356237309504880f -# define float_Euler 2.71828182845904523536f +# define float_inf (uintBitsToFloat( 0x7F800000u )) +# define float_inf_neg (uintBitsToFloat( 0xFF800000u )) +# define float_qnan (uintBitsToFloat( 0xFFC00001u )) +# define float_snan (uintBitsToFloat( 0xFF800001u )) +# define float_Pi (3.14159265358979323846f) +# define float_Pi2 (6.28318530717958647692f) +# define float_HalfPi (1.57079632679489661923f) +# define float_InvPi (0.31830988618379067153f) +# define float_SqrtOf2 (1.41421356237309504880f) +# define float_Euler (2.71828182845904523536f) +# define float_epsilon (2.0e-5f) #endif #if AE_ENABLE_DOUBLE_TYPE # define double_min (2.2250738585072014e-308lf) // smallest positive normal number # define double_max (1.7976931348623157e+308lf) -# define double_inf uint64BitsToDouble( 0x7FF0000000000000ul ) -# define double_inf_neg uint64BitsToDouble( 0xFFF0000000000000ul ) -# define double_qnan uint64BitsToDouble( 0x7FF8000000000001ul ) -# define double_snan uint64BitsToDouble( 0x7FF0000000000001ul ) -# define double_nan uint64BitsToDouble( 0x7FFFFFFFFFFFFFFFul ) -# define double_Pi 3.14159265358979323846lf -# define double_Pi2 6.28318530717958647692lf -# define double_HalfPi 1.57079632679489661923lf -# define double_InvPi 0.31830988618379067153lf -# define double_SqrtOf2 1.41421356237309504880lf -# define double_Euler 2.71828182845904523536lf +# define double_inf (uint64BitsToDouble( 0x7FF0000000000000ul )) +# define double_inf_neg (uint64BitsToDouble( 0xFFF0000000000000ul )) +# define double_qnan (uint64BitsToDouble( 0x7FF8000000000001ul )) +# define double_snan (uint64BitsToDouble( 0x7FF0000000000001ul )) +# define double_nan (uint64BitsToDouble( 0x7FFFFFFFFFFFFFFFul )) +# define double_Pi (3.14159265358979323846lf) +# define double_Pi2 (6.28318530717958647692lf) +# define double_HalfPi (1.57079632679489661923lf) +# define double_InvPi (0.31830988618379067153lf) +# define double_SqrtOf2 (1.41421356237309504880lf) +# define double_Euler (2.71828182845904523536lf) #endif -ND_ float Epsilon () { return 2.e-5f; } ND_ float Pi () { return float_Pi; } -ND_ float Pi2 () { return float_Pi2; } -ND_ float HalfPi () { return float_HalfPi; } -ND_ float ReciprocalPi () { return float_InvPi; } -ND_ float SqrtOf2 () { return float_SqrtOf2; } //----------------------------------------------------------------------------- @@ -426,10 +433,10 @@ ND_ float SqrtOf2 () { return float_SqrtOf2; } ================================================= */ #define Gen_SQUARE( _stype_, _vtype_ )\ - ND_ _stype_ Square (const _stype_ x) { return x * x; }\ - ND_ _vtype_##2 Square (const _vtype_##2 x) { return x * x; }\ - ND_ _vtype_##3 Square (const _vtype_##3 x) { return x * x; }\ - ND_ _vtype_##4 Square (const _vtype_##4 x) { return x * x; } + ND_ _stype_ Square (const _stype_ x) { return x * x; }\ + ND_ UNITE(_vtype_, 2 ) Square (const UNITE(_vtype_, 2) x) { return x * x; }\ + ND_ UNITE(_vtype_, 3 ) Square (const UNITE(_vtype_, 3) x) { return x * x; }\ + ND_ UNITE(_vtype_, 4 ) Square (const UNITE(_vtype_, 4) x) { return x * x; } Gen_SQUARE( float, float_vec_t ) Gen_SQUARE( int, int_vec_t ) @@ -469,10 +476,10 @@ Gen_SQUARE( uint, uint_vec_t ) ND_ _vtype_ Select (const _btype_ condition, const _vtype_ ifTrue, const _vtype_ ifFalse) { return (ifFalse * _vtype_(Not(condition))) + (ifTrue * _vtype_(condition)); } #define Gen_SELECT( _stype_, _vtype_ )\ - Gen_SELECT1( _stype_, bool )\ - Gen_SELECT1( _vtype_##2, bool2 )\ - Gen_SELECT1( _vtype_##3, bool3 )\ - Gen_SELECT1( _vtype_##4, bool4 ) + Gen_SELECT1( _stype_, bool )\ + Gen_SELECT1( UNITE( _vtype_, 2 ), bool2 )\ + Gen_SELECT1( UNITE( _vtype_, 3 ), bool3 )\ + Gen_SELECT1( UNITE( _vtype_, 4 ), bool4 ) Gen_SELECT( float, float_vec_t ) Gen_SELECT( int, int_vec_t ) @@ -508,27 +515,8 @@ Gen_SELECT( uint, uint_vec_t ) ---- same as per component 'x < y ? ifLess : ifNot' ================================================= -* -#define Gen_SELECT1( _vtype_ )\ - ND_ _vtype_ SelectFp (_vtype_ x, const _vtype_ y, const _vtype_ ifLess, const _vtype_ ifNot) { x = LessFp( y, x ); return (ifLess * x) + ifNot * (x - _vtype_(1.0)); } - -#define Gen_SELECT( _stype_, _vtype_ )\ - Gen_SELECT1( _stype_ )\ - Gen_SELECT1( _vtype_##2 )\ - Gen_SELECT1( _vtype_##3 )\ - Gen_SELECT1( _vtype_##4 ) - -Gen_SELECT( float, float_vec_t ) - -#if AE_ENABLE_HALF_TYPE - Gen_SELECT( half, half_vec_t ) -#endif -#if AE_ENABLE_DOUBLE_TYPE - Gen_SELECT( double, double_vec_t ) -#endif - -#undef Gen_SELECT1 -#undef Gen_SELECT +*/ +#define SelectFp( _x_, _y_, _ifLess_, _ifNot_ ) Lerp( (_ifNot_), (_ifLess_), LessFp( _x_, _y_ )) /* ================================================= @@ -544,9 +532,9 @@ Gen_SELECT( float, float_vec_t ) #define Gen_BRANCHLESS( _stype_, _vtype_ )\ Gen_BRANCHLESS1( _stype_ )\ - Gen_BRANCHLESS1( _vtype_##2 )\ - Gen_BRANCHLESS1( _vtype_##3 )\ - Gen_BRANCHLESS1( _vtype_##4 ) + Gen_BRANCHLESS1( UNITE( _vtype_, 2 ))\ + Gen_BRANCHLESS1( UNITE( _vtype_, 3 ))\ + Gen_BRANCHLESS1( UNITE( _vtype_, 4 )) Gen_BRANCHLESS( float, float_vec_t ) Gen_BRANCHLESS( int, int_vec_t ) @@ -587,13 +575,13 @@ Gen_BRANCHLESS( uint, uint_vec_t ) #define MinAbs( _a_, _b_ ) Select( Less(Abs(_a_), Abs(_b_)), (_a_), (_b_) ) #define MaxAbs( _a_, _b_ ) Select( Greater(Abs(_a_), Abs(_b_)), (_a_), (_b_) ) -#define Gen_MINMAX( _stype_, _vtype_ ) \ - ND_ _stype_ MinOf (_vtype_##2 a) { return Min( a.x, a.y ); } \ - ND_ _stype_ MinOf (_vtype_##3 a) { return Min3( a.x, a.y, a.z ); } \ - ND_ _stype_ MinOf (_vtype_##4 a) { return Min4( a.x, a.y, a.z, a.w ); } \ - ND_ _stype_ MaxOf (_vtype_##2 a) { return Max( a.x, a.y ); } \ - ND_ _stype_ MaxOf (_vtype_##3 a) { return Max3( a.x, a.y, a.z ); } \ - ND_ _stype_ MaxOf (_vtype_##4 a) { return Max4( a.x, a.y, a.z, a.w ); } +#define Gen_MINMAX( _stype_, _vtype_ ) \ + ND_ _stype_ MinOf (UNITE( _vtype_, 2) a) { return Min( a.x, a.y ); } \ + ND_ _stype_ MinOf (UNITE( _vtype_, 3) a) { return Min3( a.x, a.y, a.z ); } \ + ND_ _stype_ MinOf (UNITE( _vtype_, 4) a) { return Min4( a.x, a.y, a.z, a.w ); } \ + ND_ _stype_ MaxOf (UNITE( _vtype_, 2) a) { return Max( a.x, a.y ); } \ + ND_ _stype_ MaxOf (UNITE( _vtype_, 3) a) { return Max3( a.x, a.y, a.z ); } \ + ND_ _stype_ MaxOf (UNITE( _vtype_, 4) a) { return Max4( a.x, a.y, a.z, a.w ); } Gen_MINMAX( float, float_vec_t ) Gen_MINMAX( int, int_vec_t ) @@ -638,8 +626,8 @@ Gen_MINMAX( uint, uint_vec_t ) ND_ _stype_ InvDistance (const _vtype_ x, const _vtype_ y) { _vtype_ r = x - y; return InvSqrt( Dot( r, r )); } #define Gen_LENGTHSQ_DISTANCESQ( _stype_, _vtype_ )\ - Gen_LENGTHSQ_DISTANCESQ1( _stype_, _vtype_##2 )\ - Gen_LENGTHSQ_DISTANCESQ1( _stype_, _vtype_##3 ) + Gen_LENGTHSQ_DISTANCESQ1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_LENGTHSQ_DISTANCESQ1( _stype_, UNITE( _vtype_, 3 )) Gen_LENGTHSQ_DISTANCESQ( float, float_vec_t ) @@ -668,9 +656,9 @@ Gen_LENGTHSQ_DISTANCESQ( float, float_vec_t ) #define Gen_SIGN( _stype_, _vtype_ )\ Gen_SIGN1( _stype_ )\ - Gen_SIGN1( _vtype_##2 )\ - Gen_SIGN1( _vtype_##3 )\ - Gen_SIGN1( _vtype_##4 ) + Gen_SIGN1( UNITE( _vtype_, 2 ))\ + Gen_SIGN1( UNITE( _vtype_, 3 ))\ + Gen_SIGN1( UNITE( _vtype_, 4 )) Gen_SIGN( float, float_vec_t ) @@ -688,9 +676,9 @@ Gen_SIGN( float, float_vec_t ) #define Gen_SIGN( _stype_, _vtype_ )\ ND_ _stype_ Sign (const _stype_ x) { return x < _stype_(0) ? _stype_(-1) : _stype_(1); }\ - Gen_SIGN1( _vtype_##2 )\ - Gen_SIGN1( _vtype_##3 )\ - Gen_SIGN1( _vtype_##4 ) + Gen_SIGN1( UNITE( _vtype_, 2 ))\ + Gen_SIGN1( UNITE( _vtype_, 3 ))\ + Gen_SIGN1( UNITE( _vtype_, 4 )) Gen_SIGN( int, int_vec_t ) @@ -728,9 +716,9 @@ Gen_SIGN( int, int_vec_t ) ND_ _stype_ LinearStep (const _stype_ x, const _stype_ edge0, const _stype_ edge1) { \ return Saturate( (x - edge0) / (edge1 - edge0) ); \ } \ - Gen_LINEARSTEP1( _vtype_##2, _stype_ ) \ - Gen_LINEARSTEP1( _vtype_##3, _stype_ ) \ - Gen_LINEARSTEP1( _vtype_##4, _stype_ ) + Gen_LINEARSTEP1( UNITE( _vtype_, 2 ), _stype_ ) \ + Gen_LINEARSTEP1( UNITE( _vtype_, 3 ), _stype_ ) \ + Gen_LINEARSTEP1( UNITE( _vtype_, 4 ), _stype_ ) Gen_LINEARSTEP( float, float_vec_t ) @@ -766,9 +754,9 @@ Gen_LINEARSTEP( float, float_vec_t ) ND_ _stype_ BumpStep (const _stype_ x, const _stype_ edge0, const _stype_ edge1) { \ return _stype_(1) - Abs( Saturate( (x - edge0) / (edge1 - edge0) ) - _stype_(0.5) ) * _stype_(2.0); \ } \ - Gen_BUMPSTEP1( _vtype_##2, _stype_ ) \ - Gen_BUMPSTEP1( _vtype_##3, _stype_ ) \ - Gen_BUMPSTEP1( _vtype_##4, _stype_ ) + Gen_BUMPSTEP1( UNITE( _vtype_, 2 ), _stype_ ) \ + Gen_BUMPSTEP1( UNITE( _vtype_, 3 ), _stype_ ) \ + Gen_BUMPSTEP1( UNITE( _vtype_, 4 ), _stype_ ) Gen_BUMPSTEP( float, float_vec_t ) @@ -807,9 +795,9 @@ Gen_BUMPSTEP( float, float_vec_t ) x = BumpStep( x, edge0, edge1 ); \ return x * x * (_stype_(3.0) - _stype_(2.0) * x); \ } \ - Gen_SMOOTHBUMPSTEP1( _vtype_##2, _stype_ ) \ - Gen_SMOOTHBUMPSTEP1( _vtype_##3, _stype_ ) \ - Gen_SMOOTHBUMPSTEP1( _vtype_##4, _stype_ ) + Gen_SMOOTHBUMPSTEP1( UNITE( _vtype_, 2 ), _stype_ ) \ + Gen_SMOOTHBUMPSTEP1( UNITE( _vtype_, 3 ), _stype_ ) \ + Gen_SMOOTHBUMPSTEP1( UNITE( _vtype_, 4 ), _stype_ ) Gen_SMOOTHBUMPSTEP( float, float_vec_t ) @@ -838,10 +826,10 @@ Gen_SMOOTHBUMPSTEP( float, float_vec_t ) } #define Gen_TRIANGLEWAVE( _stype_, _vtype_ )\ - Gen_TRIANGLEWAVE1( _stype_, _stype_ )\ - Gen_TRIANGLEWAVE1( _vtype_##2, _stype_ )\ - Gen_TRIANGLEWAVE1( _vtype_##3, _stype_ )\ - Gen_TRIANGLEWAVE1( _vtype_##4, _stype_ ) + Gen_TRIANGLEWAVE1( _stype_, _stype_ )\ + Gen_TRIANGLEWAVE1( UNITE( _vtype_, 2 ), _stype_ )\ + Gen_TRIANGLEWAVE1( UNITE( _vtype_, 3 ), _stype_ )\ + Gen_TRIANGLEWAVE1( UNITE( _vtype_, 4 ), _stype_ ) Gen_TRIANGLEWAVE( float, float_vec_t ) @@ -854,131 +842,284 @@ Gen_TRIANGLEWAVE( float, float_vec_t ) #undef Gen_TRIANGLEWAVE1 #undef Gen_TRIANGLEWAVE - -//----------------------------------------------------------------------------- -// clamp / wrap - -ND_ float ClampOut (const float x, const float minVal, const float maxVal) + +/* +================================================= + Steps +---- + T Steps (T x, T stepCount) +---- + returns X in range [0, 1], with steps pattern _- + returns Y in range [0, 1], with linear interpolation per step / +================================================= +*/ +float2 Steps (float x, const float stepCount) { - float mid = (minVal + maxVal) * 0.5f; - return x < mid ? (x < minVal ? x : minVal) : (x > maxVal ? x : maxVal); // TODO: branchless + x = x * stepCount; + float i = Floor( x ); + return float2( i / stepCount, x - i ); } -ND_ int ClampOut (const int x, const int minVal, const int maxVal) -{ - int mid = (minVal+1)/2 + (maxVal+1)/2; - return x < mid ? (x < minVal ? x : minVal) : (x > maxVal ? x : maxVal); // TODO: branchless -} +/* +================================================= + ClampOut (float) +================================================= +*/ +#define Gen_CLAMPOUT1( _stype_, _vtype_ ) \ + ND_ _vtype_ ClampOut (const _vtype_ x, const _vtype_ minVal, const _vtype_ maxVal) \ + { \ + _vtype_ mid = (minVal * _stype_(0.5)) + (maxVal * _stype_(0.5)); \ + return Lerp( Max( x, maxVal ), Min( x, minVal ), LessFp( x, mid )); \ + } +#define Gen_CLAMPOUT2( _stype_, _vtype_ ) \ + Gen_CLAMPOUT1( _stype_, _vtype_ ) \ + ND_ _vtype_ ClampOut (const _vtype_ x, const _stype_ minVal, const _stype_ maxVal) \ + { \ + _stype_ mid = (minVal * _stype_(0.5)) + (maxVal * _stype_(0.5)); \ + return Lerp( Max( x, maxVal ), Min( x, minVal ), LessFp( x, _vtype_(mid) )); \ + } -ND_ float2 ClampOut (const float2 v, const float minVal, const float maxVal) { - return float2( ClampOut( v.x, minVal, maxVal ), - ClampOut( v.y, minVal, maxVal )); -} +#define Gen_CLAMPOUT( _stype_, _vtype_ )\ + Gen_CLAMPOUT1( _stype_, _stype_ )\ + Gen_CLAMPOUT2( _stype_, UNITE( _vtype_, 2 ))\ + Gen_CLAMPOUT2( _stype_, UNITE( _vtype_, 3 ))\ + Gen_CLAMPOUT2( _stype_, UNITE( _vtype_, 4 )) + +Gen_CLAMPOUT( float, float_vec_t ) -ND_ float3 ClampOut (const float3 v, const float minVal, const float maxVal) { - return float3( ClampOut( v.x, minVal, maxVal ), - ClampOut( v.y, minVal, maxVal ), - ClampOut( v.z, minVal, maxVal )); -} +#if AE_ENABLE_HALF_TYPE + Gen_CLAMPOUT( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_CLAMPOUT( double, double_vec_t ) +#endif -ND_ float4 ClampOut (const float4 v, const float minVal, const float maxVal) { - return float4( ClampOut( v.x, minVal, maxVal ), - ClampOut( v.y, minVal, maxVal ), - ClampOut( v.z, minVal, maxVal ), - ClampOut( v.w, minVal, maxVal )); -} +#undef Gen_CLAMPOUT1 +#undef Gen_CLAMPOUT -ND_ int2 ClampOut (const int2 v, const int minVal, const int maxVal) { - return int2( ClampOut( v.x, minVal, maxVal ), - ClampOut( v.y, minVal, maxVal )); -} +/* +================================================= + ClampOut (int) +================================================= +*/ +#define Gen_CLAMPOUT1( _type_ ) \ + ND_ _type_ ClampOut (const _type_ x, const _type_ minVal, const _type_ maxVal) \ + { \ + _type_ mid = (minVal + _type_(1)) / _type_(2) + (maxVal + _type_(1)) / _type_(2); \ + return x < mid ? Min( x, minVal ) : Max( x, maxVal ); \ + } +#define Gen_CLAMPOUT( _stype_, _vtype_ ) \ + Gen_CLAMPOUT1( _stype_ ) \ + ND_ _vtype_##2 ClampOut (const _vtype_##2 v, const _stype_ minVal, const _stype_ maxVal) { \ + return _vtype_##2( ClampOut( v.x, minVal, maxVal ), \ + ClampOut( v.y, minVal, maxVal )); \ + } \ + ND_ _vtype_##3 ClampOut (const _vtype_##3 v, const _stype_ minVal, const _stype_ maxVal) { \ + return _vtype_##3( ClampOut( v.x, minVal, maxVal ), \ + ClampOut( v.y, minVal, maxVal ), \ + ClampOut( v.z, minVal, maxVal )); \ + } \ + ND_ _vtype_##4 ClampOut (const _vtype_##4 v, const _stype_ minVal, const _stype_ maxVal) { \ + return _vtype_##4( ClampOut( v.x, minVal, maxVal ), \ + ClampOut( v.y, minVal, maxVal ), \ + ClampOut( v.z, minVal, maxVal ), \ + ClampOut( v.w, minVal, maxVal )); \ + } + +Gen_CLAMPOUT( int, int_vec_t ) +Gen_CLAMPOUT( uint, uint_vec_t ) -ND_ int3 ClampOut (const int3 v, const int minVal, const int maxVal) { - return int3( ClampOut( v.x, minVal, maxVal ), - ClampOut( v.y, minVal, maxVal ), - ClampOut( v.z, minVal, maxVal )); -} +#if AE_ENABLE_BYTE_TYPE + Gen_CLAMPOUT( sbyte, sbyte_vec_t ) + Gen_CLAMPOUT( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_CLAMPOUT( sshort, sshort_vec_t ) + Gen_CLAMPOUT( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_CLAMPOUT( slong, slong_vec_t ) + Gen_CLAMPOUT( ulong, ulong_vec_t ) +#endif + +#undef Gen_CLAMPOUT1 +#undef Gen_CLAMPOUT -ND_ int4 ClampOut (const int4 v, const int minVal, const int maxVal) { - return int4( ClampOut( v.x, minVal, maxVal ), - ClampOut( v.y, minVal, maxVal ), - ClampOut( v.z, minVal, maxVal ), - ClampOut( v.w, minVal, maxVal )); -} +/* +================================================= + Wrap (float) +================================================= +*/ +#define Gen_WRAP1( _stype_, _vtype_ ) \ + ND_ _vtype_ Wrap (const _vtype_ x, const _vtype_ minVal, const _vtype_ maxVal) \ + { \ + _vtype_ size = maxVal - minVal; \ + _vtype_ res = minVal + Mod( x - minVal, size ); \ + return res + size * LessFp( res, minVal ); \ + } +#define Gen_WRAP2( _stype_, _vtype_ ) \ + Gen_WRAP1( _stype_, _vtype_ ) \ + ND_ _vtype_ Wrap (const _vtype_ x, const _stype_ minVal, const _stype_ maxVal){\ + return Wrap( x, _vtype_(minVal), _vtype_(maxVal) ); \ + } +#define Gen_WRAP( _stype_, _vtype_ )\ + Gen_WRAP1( _stype_, _stype_ )\ + Gen_WRAP2( _stype_, UNITE( _vtype_, 2 ))\ + Gen_WRAP2( _stype_, UNITE( _vtype_, 3 ))\ + Gen_WRAP2( _stype_, UNITE( _vtype_, 4 )) -ND_ float Wrap (const float x, const float minVal, const float maxVal) -{ - if ( maxVal < minVal ) return minVal; // TODO: branchless - float size = maxVal - minVal; - float res = minVal + Mod( x - minVal, size ); - if ( res < minVal ) return res + size; - return res; -} +Gen_WRAP( float, float_vec_t ) -ND_ int Wrap (const int x, const int minVal, const int maxVal) -{ - if ( maxVal < minVal ) return minVal; // TODO: branchless - int size = maxVal+1 - minVal; - int res = minVal + ((x - minVal) % size); - if ( res < minVal ) return res + size; - return res; -} +#if AE_ENABLE_HALF_TYPE + Gen_WRAP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_WRAP( double, double_vec_t ) +#endif + +#undef Gen_WRAP1 +#undef Gen_WRAP -ND_ float2 Wrap (const float2 v, const float minVal, const float maxVal) { - return float2( Wrap( v.x, minVal, maxVal ), - Wrap( v.y, minVal, maxVal )); -} +/* +================================================= + Wrap (int) +================================================= +*/ +#define Gen_WRAP1( _type_ ) \ + ND_ _type_ Wrap (const _type_ x, const _type_ minVal, const _type_ maxVal) \ + { \ + _type_ size = maxVal + _type_(1) - minVal; \ + _type_ res = minVal + ((x - minVal) % size); \ + if ( res < minVal ) res += size; \ + return res; \ + } +#define Gen_WRAP( _stype_, _vtype_ ) \ + Gen_WRAP1( _stype_ ) \ + ND_ _vtype_##2 Wrap (const _vtype_##2 v, const _stype_ minVal, const _stype_ maxVal) { \ + return _vtype_##2( Wrap( v.x, minVal, maxVal ), \ + Wrap( v.y, minVal, maxVal )); \ + } \ + ND_ _vtype_##3 Wrap (const _vtype_##3 v, const _stype_ minVal, const _stype_ maxVal) { \ + return _vtype_##3( Wrap( v.x, minVal, maxVal ), \ + Wrap( v.y, minVal, maxVal ), \ + Wrap( v.z, minVal, maxVal )); \ + } \ + ND_ _vtype_##4 Wrap (const _vtype_##4 v, const _stype_ minVal, const _stype_ maxVal) { \ + return _vtype_##4( Wrap( v.x, minVal, maxVal ), \ + Wrap( v.y, minVal, maxVal ), \ + Wrap( v.z, minVal, maxVal ), \ + Wrap( v.w, minVal, maxVal )); \ + } \ + ND_ _vtype_##2 Wrap (const _vtype_##2 v, const _vtype_##2 minVal, const _vtype_##2 maxVal) { \ + return _vtype_##2( Wrap( v.x, minVal.x, maxVal.x ), \ + Wrap( v.y, minVal.y, maxVal.y )); \ + } \ + ND_ _vtype_##3 Wrap (const _vtype_##3 v, const _vtype_##3 minVal, const _vtype_##3 maxVal) { \ + return _vtype_##3( Wrap( v.x, minVal.x, maxVal.x ), \ + Wrap( v.y, minVal.y, maxVal.y ), \ + Wrap( v.z, minVal.z, maxVal.z )); \ + } \ + ND_ _vtype_##4 Wrap (const _vtype_##4 v, const _vtype_##4 minVal, const _vtype_##4 maxVal) { \ + return _vtype_##4( Wrap( v.x, minVal.x, maxVal.x ), \ + Wrap( v.y, minVal.y, maxVal.y ), \ + Wrap( v.z, minVal.z, maxVal.z ), \ + Wrap( v.w, minVal.w, maxVal.w )); \ + } + +Gen_WRAP( int, int_vec_t ) +Gen_WRAP( uint, uint_vec_t ) -ND_ float3 Wrap (const float3 v, const float minVal, const float maxVal) { - return float3( Wrap( v.x, minVal, maxVal ), - Wrap( v.y, minVal, maxVal ), - Wrap( v.z, minVal, maxVal )); -} +#if AE_ENABLE_BYTE_TYPE + Gen_WRAP( sbyte, sbyte_vec_t ) + Gen_WRAP( ubyte, ubyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_WRAP( sshort, sshort_vec_t ) + Gen_WRAP( ushort, ushort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_WRAP( slong, slong_vec_t ) + Gen_WRAP( ulong, ulong_vec_t ) +#endif + +#undef Gen_WRAP1 +#undef Gen_WRAP -ND_ float4 Wrap (const float4 v, const float minVal, const float maxVal) { - return float4( Wrap( v.x, minVal, maxVal ), - Wrap( v.y, minVal, maxVal ), - Wrap( v.z, minVal, maxVal ), - Wrap( v.w, minVal, maxVal )); -} +/* +================================================= + MirroredWrap (float) +================================================= +*/ +#define Gen_MIRWRAP1( _stype_, _vtype_ ) \ + ND_ _vtype_ MirroredWrap (const _vtype_ x, const _vtype_ minVal, const _vtype_ maxVal) \ + { \ + _vtype_ size = (maxVal - minVal) * _stype_(2.0); \ + _vtype_ res = Fract( (x - minVal) / size ); \ + return Min( res, _stype_(1.0) - res ) * size + minVal; \ + } +#define Gen_MIRWRAP2( _stype_, _vtype_ ) \ + Gen_MIRWRAP1( _stype_, _vtype_ ) \ + ND_ _vtype_ MirroredWrap (const _vtype_ x, const _stype_ minVal, const _stype_ maxVal){\ + return MirroredWrap( x, _vtype_(minVal), _vtype_(maxVal) ); \ + } -ND_ float2 Wrap (const float2 v, const float2 minVal, const float2 maxVal) { - return float2( Wrap( v.x, minVal.x, maxVal.y ), - Wrap( v.y, minVal.x, maxVal.y )); -} +#define Gen_MIRWRAP( _stype_, _vtype_ )\ + Gen_MIRWRAP1( _stype_, _stype_ )\ + Gen_MIRWRAP2( _stype_, UNITE( _vtype_, 2 ))\ + Gen_MIRWRAP2( _stype_, UNITE( _vtype_, 3 ))\ + Gen_MIRWRAP2( _stype_, UNITE( _vtype_, 4 )) -ND_ float3 Wrap (const float3 v, const float3 minVal, const float3 maxVal) { - return float3( Wrap( v.x, minVal.x, maxVal.x ), - Wrap( v.y, minVal.y, maxVal.y ), - Wrap( v.z, minVal.z, maxVal.z )); -} +Gen_MIRWRAP( float, float_vec_t ) -ND_ float4 Wrap (const float4 v, const float4 minVal, const float4 maxVal) { - return float4( Wrap( v.x, minVal.x, maxVal.x ), - Wrap( v.y, minVal.y, maxVal.y ), - Wrap( v.z, minVal.z, maxVal.z ), - Wrap( v.w, minVal.w, maxVal.w )); -} +#if AE_ENABLE_HALF_TYPE + Gen_MIRWRAP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_MIRWRAP( double, double_vec_t ) +#endif + +#undef Gen_MIRWRAP1 +#undef Gen_MIRWRAP -ND_ int2 Wrap (const int2 v, const float minVal, const float maxVal) { - return int2( Wrap( v.x, minVal, maxVal ), - Wrap( v.y, minVal, maxVal )); -} +/* +================================================= + MirroredWrap (int) +================================================= +*/ +#define Gen_MIRWRAP1( _stype_, _vtype_ ) \ + ND_ _vtype_ MirroredWrap (const _vtype_ x, const _vtype_ minVal, const _vtype_ maxVal) \ + { \ + _vtype_ size = (maxVal - minVal) * _stype_(2); \ + _vtype_ res = Abs( x - minVal ) % size; \ + return Min( res, size - res ) + minVal; \ + } +#define Gen_MIRWRAP2( _stype_, _vtype_ ) \ + Gen_MIRWRAP1( _stype_, _vtype_ ) \ + ND_ _vtype_ MirroredWrap (const _vtype_ x, const _stype_ minVal, const _stype_ maxVal){\ + return MirroredWrap( x, _vtype_(minVal), _vtype_(maxVal) ); \ + } -ND_ int3 Wrap (const int3 v, const int minVal, const int maxVal) { - return int3( Wrap( v.x, minVal, maxVal ), - Wrap( v.y, minVal, maxVal ), - Wrap( v.z, minVal, maxVal )); -} +#define Gen_MIRWRAP( _stype_, _vtype_ )\ + Gen_MIRWRAP1( _stype_, _stype_ )\ + Gen_MIRWRAP2( _stype_, UNITE( _vtype_, 2 ))\ + Gen_MIRWRAP2( _stype_, UNITE( _vtype_, 3 ))\ + Gen_MIRWRAP2( _stype_, UNITE( _vtype_, 4 )) -ND_ int4 Wrap (const int4 v, const int minVal, const int maxVal) { - return int4( Wrap( v.x, minVal, maxVal ), - Wrap( v.y, minVal, maxVal ), - Wrap( v.z, minVal, maxVal ), - Wrap( v.w, minVal, maxVal )); -} +Gen_MIRWRAP( int, int_vec_t ) +#if AE_ENABLE_BYTE_TYPE + Gen_MIRWRAP( sbyte, sbyte_vec_t ) +#endif +#if AE_ENABLE_SHORT_TYPE + Gen_MIRWRAP( sshort, sshort_vec_t ) +#endif +#if AE_ENABLE_LONG_TYPE + Gen_MIRWRAP( slong, slong_vec_t ) +#endif + +#undef Gen_MIRWRAP1 +#undef Gen_MIRWRAP //----------------------------------------------------------------------------- // bit operations @@ -1029,10 +1170,6 @@ ND_ uint ExtractBitIndex (inout uint bits) return uint(IntLog2( ExtractBit( INOUT bits ))); } - -//----------------------------------------------------------------------------- -// interpolation - /* ================================================= BaryLerp @@ -1041,15 +1178,16 @@ ND_ uint ExtractBitIndex (inout uint bits) T BaryLerp (T v0, T v1, T v2, Vec2 barycentrics) -- barycentric interpolation with much better precision ================================================= */ +// TODO: use Dot ? #define Gen_BARYLERP1( _type_, _bary3_, _bary2_ )\ ND_ _type_ BaryLerp (const _type_ v0, const _type_ v1, const _type_ v2, const _bary3_ barycentrics) { return v0 * barycentrics.x + v1 * barycentrics.y + v2 * barycentrics.z; }\ ND_ _type_ BaryLerp (const _type_ v0, const _type_ v1, const _type_ v2, const _bary2_ barycentrics) { return v0 + FusedMulAdd( _type_(barycentrics.x), (v1 - v0), barycentrics.y * (v2 - v0) ); } #define Gen_BARYLERP( _stype_, _vtype_ )\ - Gen_BARYLERP1( _stype_, _vtype_##3, _vtype_##2 )\ - Gen_BARYLERP1( _vtype_##2, _vtype_##3, _vtype_##2 )\ - Gen_BARYLERP1( _vtype_##3, _vtype_##3, _vtype_##2 )\ - Gen_BARYLERP1( _vtype_##4, _vtype_##3, _vtype_##2 ) + Gen_BARYLERP1( _stype_, UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 ))\ + Gen_BARYLERP1( UNITE( _vtype_, 2 ), UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 ))\ + Gen_BARYLERP1( UNITE( _vtype_, 3 ), UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 ))\ + Gen_BARYLERP1( UNITE( _vtype_, 4 ), UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 )) Gen_BARYLERP( float, float_vec_t ) @@ -1076,10 +1214,10 @@ Gen_BARYLERP( float, float_vec_t ) ND_ _type_ BiLerp (const _type_ x1y1, const _type_ x2y1, const _type_ x1y2, const _type_ x2y2, const _factor_ factor) { return Lerp( Lerp( x1y1, x2y1, factor.x ), Lerp( x1y2, x2y2, factor.x ), factor.y ); } #define Gen_BILERP( _stype_, _vtype_ )\ - Gen_BILERP1( _stype_, _vtype_##2 )\ - Gen_BILERP1( _vtype_##2, _vtype_##2 )\ - Gen_BILERP1( _vtype_##3, _vtype_##2 )\ - Gen_BILERP1( _vtype_##4, _vtype_##2 ) + Gen_BILERP1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_BILERP1( UNITE( _vtype_, 2 ), UNITE( _vtype_, 2 ))\ + Gen_BILERP1( UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 ))\ + Gen_BILERP1( UNITE( _vtype_, 4 ), UNITE( _vtype_, 2 )) Gen_BILERP( float, float_vec_t ) @@ -1095,53 +1233,51 @@ Gen_BILERP( float, float_vec_t ) /* ================================================= - Remap / RemapClamp / RemapWrap + Remap ---- T Remap (Vec2 src, Vec2 dst, T v) - T RemapClamp (Vec2 src, Vec2 dst, T v) - T RemapWrap (Vec2 src, Vec2 dst, T v) + T Remap (Vec2 dst, T v) ---- Map 'v' in 'src' interval to 'dst' interval. - Map 'v' in 'src' interval to 'dst' interval and clamp. + Map 'v' in [0,1] interval to 'dst' interval. Interval is a scalar range which specified for all components. ================================================= */ -ND_ float Remap (const float2 src, const float2 dst, const float v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } -ND_ float2 Remap (const float2 src, const float2 dst, const float2 v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } -ND_ float3 Remap (const float2 src, const float2 dst, const float3 v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } -ND_ float4 Remap (const float2 src, const float2 dst, const float4 v) { return (v - src.x) / (src.y - src.x) * (dst.y - dst.x) + dst.x; } - -ND_ float RemapClamp (const float2 src, const float2 dst, const float v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float2 RemapClamp (const float2 src, const float2 dst, const float2 v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float3 RemapClamp (const float2 src, const float2 dst, const float3 v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float4 RemapClamp (const float2 src, const float2 dst, const float4 v) { return Clamp( Remap( src, dst, v ), dst.x, dst.y ); } - -ND_ float RemapSmooth (const float2 src, const float2 dst, const float v) { return SmoothStep( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float2 RemapSmooth (const float2 src, const float2 dst, const float2 v) { return SmoothStep( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float3 RemapSmooth (const float2 src, const float2 dst, const float3 v) { return SmoothStep( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float4 RemapSmooth (const float2 src, const float2 dst, const float4 v) { return SmoothStep( Remap( src, dst, v ), dst.x, dst.y ); } - -ND_ float RemapWrap (const float2 src, const float2 dst, const float v) { return Wrap( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float2 RemapWrap (const float2 src, const float2 dst, const float2 v) { return Wrap( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float3 RemapWrap (const float2 src, const float2 dst, const float3 v) { return Wrap( Remap( src, dst, v ), dst.x, dst.y ); } -ND_ float4 RemapWrap (const float2 src, const float2 dst, const float4 v) { return Wrap( Remap( src, dst, v ), dst.x, dst.y ); } +#define Gen_REMAP1( _type_, _range_ )\ + ND_ _type_ Remap (const _range_ dst, const _type_ v) { return v * (dst.y - dst.x) + dst.x; }\ + ND_ _type_ Remap (const _range_ src, const _range_ dst, const _type_ v) { return Remap( dst, (v - src.x) / (src.y - src.x) ); } -/* -================================================= - Remap / RemapClamp ----- - Map 'v' in 'src' interval to 'dst' interval. - Map 'v' in 'src' interval to 'dst' interval and clamp. - Interval is specified per-component. -================================================= -*/ -ND_ float2 Remap (const float2 src0, const float2 src1, const float2 dst0, const float2 dst1, const float2 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } -ND_ float3 Remap (const float3 src0, const float3 src1, const float3 dst0, const float3 dst1, const float3 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } -ND_ float4 Remap (const float4 src0, const float4 src1, const float4 dst0, const float4 dst1, const float4 v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } +#define Gen_REMAP2( _type_ )\ + ND_ _type_ Remap (const _type_ src0, const _type_ src1, const _type_ dst0, const _type_ dst1, const _type_ v) { return (v - src0) / (src1 - src0) * (dst1 - dst0) + dst0; } + +#define Gen_REMAP( _stype_, _vtype_ )\ + Gen_REMAP1( _stype_, UNITE( _vtype_, 2 ))\ + Gen_REMAP1( UNITE( _vtype_, 2 ), UNITE( _vtype_, 2 ))\ + Gen_REMAP1( UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 ))\ + Gen_REMAP1( UNITE( _vtype_, 4 ), UNITE( _vtype_, 2 ))\ + Gen_REMAP2( UNITE( _vtype_, 2 ))\ + Gen_REMAP2( UNITE( _vtype_, 3 ))\ + Gen_REMAP2( UNITE( _vtype_, 4 )) -ND_ float2 RemapClamp (const float2 src0, const float2 src1, const float2 dst0, const float2 dst1, const float2 v) { return Clamp( Remap( src0, src1, dst0, dst1, v ), dst0, dst1 ); } -ND_ float3 RemapClamp (const float3 src0, const float3 src1, const float3 dst0, const float3 dst1, const float3 v) { return Clamp( Remap( src0, src1, dst0, dst1, v ), dst0, dst1 ); } -ND_ float4 RemapClamp (const float4 src0, const float4 src1, const float4 dst0, const float4 dst1, const float4 v) { return Clamp( Remap( src0, src1, dst0, dst1, v ), dst0, dst1 ); } +Gen_REMAP( float, float_vec_t ) + +#if AE_ENABLE_HALF_TYPE + Gen_REMAP( half, half_vec_t ) +#endif +#if AE_ENABLE_DOUBLE_TYPE + Gen_REMAP( double, double_vec_t ) +#endif + +#undef Gen_REMAP2 +#undef Gen_REMAP1 +#undef Gen_REMAP + +#define RemapA( _fn_, _src_, _dst_, _val_ ) (_fn_( Remap( (_src_), (_dst_), (_val_) ), (_dst_).x, (_dst_).y )) +#define RemapB( _fn_, _src0_, _src1_, _dst0_, _dst1_, _val_ ) (_fn_( Remap( (_src0_), (_src1_), (_dst0_), (_dst1_), (_val_) ), (_dst0_), (_dst1_) )) + +#define RemapWrap( _src_, _dst_, _val_ ) RemapA( Wrap, (_src_), (_dst_), (_val_) ) +#define RemapClamp( _src_, _dst_, _val_ ) RemapA( Clamp, (_src_), (_dst_), (_val_) ) +#define RemapSmooth( _src_, _dst_, _val_ ) RemapA( SmoothStep, (_src_), (_dst_), (_val_) ) /* ================================================= @@ -1156,10 +1292,10 @@ ND_ float2 UIndexToUNormFloor (const int2 index, const int2 count) { return f ND_ float3 UIndexToUNormFloor (const int3 index, const int3 count) { return float3(index) / float3(count - 1); } ND_ float4 UIndexToUNormFloor (const int4 index, const int4 count) { return float4(index) / float4(count - 1); } -ND_ float UIndexToUNormFloor (const uint index, const uint count) { return float( index) / float( count - 1); } -ND_ float2 UIndexToUNormFloor (const uint2 index, const uint2 count) { return float2(index) / float2(count - 1); } -ND_ float3 UIndexToUNormFloor (const uint3 index, const uint3 count) { return float3(index) / float3(count - 1); } -ND_ float4 UIndexToUNormFloor (const uint4 index, const uint4 count) { return float4(index) / float4(count - 1); } +ND_ float UIndexToUNormFloor (const uint index, const uint count) { return float( index) / float( count - 1u); } +ND_ float2 UIndexToUNormFloor (const uint2 index, const uint2 count) { return float2(index) / float2(count - 1u); } +ND_ float3 UIndexToUNormFloor (const uint3 index, const uint3 count) { return float3(index) / float3(count - 1u); } +ND_ float4 UIndexToUNormFloor (const uint4 index, const uint4 count) { return float4(index) / float4(count - 1u); } ND_ float UIndexToUNormFloor (const float index, const float count) { return index / (count - 1.f); } ND_ float2 UIndexToUNormFloor (const float2 index, const float2 count) { return index / (count - 1.f); } @@ -1212,8 +1348,9 @@ ND_ uint3 IndexToVec3 (const uint index, const uint tile) { return IndexToV ================================================= SLerp / BiSLerp ---- - float3 SLerp (float3 x, float3 y, float factor) - float3 BiSLerp (float3 x1y1, float3 x2y1, float3 x1y2, float3 x2y2, float2 factor) + T SLerp (T x, T y, float factor) + T BiSLerp (T x1y1, T x2y1, T x1y2, T x2y2, float2 factor) + where 'T' is float2 or float3 ================================================= */ #define Gen_SLERP1( _stype_, _vtype_, _vtype2_ )\ @@ -1237,8 +1374,8 @@ ND_ uint3 IndexToVec3 (const uint index, const uint tile) { return IndexToV } #define Gen_SLERP( _stype_, _vtype_ )\ - Gen_SLERP1( _stype_, _vtype_##2, _vtype_##2 )\ - Gen_SLERP1( _stype_, _vtype_##3, _vtype_##2 ) + Gen_SLERP1( _stype_, UNITE( _vtype_, 2 ), UNITE( _vtype_, 2 ))\ + Gen_SLERP1( _stype_, UNITE( _vtype_, 3 ), UNITE( _vtype_, 2 )) Gen_SLERP( float, float_vec_t ) @@ -1295,15 +1432,15 @@ Gen_SLERP( float, float_vec_t ) IsZero / IsNotZero / IsNormalized ================================================= */ -ND_ bool IsZero (const float x) { return Abs(x) <= Epsilon(); } -ND_ bool2 IsZero (const float2 v) { return LessEqual( Abs(v), float2(Epsilon()) ); } -ND_ bool3 IsZero (const float3 v) { return LessEqual( Abs(v), float3(Epsilon()) ); } -ND_ bool4 IsZero (const float4 v) { return LessEqual( Abs(v), float4(Epsilon()) ); } +ND_ bool IsZero (const float x) { return Abs(x) <= float_epsilon; } +ND_ bool2 IsZero (const float2 v) { return LessEqual( Abs(v), float2(float_epsilon) ); } +ND_ bool3 IsZero (const float3 v) { return LessEqual( Abs(v), float3(float_epsilon) ); } +ND_ bool4 IsZero (const float4 v) { return LessEqual( Abs(v), float4(float_epsilon) ); } -ND_ bool IsNotZero (const float x) { return Abs(x) > Epsilon(); } -ND_ bool2 IsNotZero (const float2 v) { return Greater( Abs(v), float2(Epsilon()) ); } -ND_ bool3 IsNotZero (const float3 v) { return Greater( Abs(v), float3(Epsilon()) ); } -ND_ bool4 IsNotZero (const float4 v) { return Greater( Abs(v), float4(Epsilon()) ); } +ND_ bool IsNotZero (const float x) { return Abs(x) > float_epsilon; } +ND_ bool2 IsNotZero (const float2 v) { return Greater( Abs(v), float2(float_epsilon) ); } +ND_ bool3 IsNotZero (const float3 v) { return Greater( Abs(v), float3(float_epsilon) ); } +ND_ bool4 IsNotZero (const float4 v) { return Greater( Abs(v), float4(float_epsilon) ); } #define AllZeros( v ) All( IsZero( v )) #define AnyNotZero( v ) Any( IsNotZero( v )) @@ -1313,24 +1450,24 @@ ND_ bool4 IsNotZero (const float4 v) { return Greater( Abs(v), float4(Epsilon() ND_ bool IsNormalized (const float2 v, const float err) { float d = Dot( v, v ) - 1.f; return Abs(d) < err; } ND_ bool IsNormalized (const float3 v, const float err) { float d = Dot( v, v ) - 1.f; return Abs(d) < err; } -ND_ bool IsNormalized (const float2 v) { return IsNormalized( v, Epsilon() ); } -ND_ bool IsNormalized (const float3 v) { return IsNormalized( v, Epsilon() ); } +ND_ bool IsNormalized (const float2 v) { return IsNormalized( v, float_epsilon ); } +ND_ bool IsNormalized (const float3 v) { return IsNormalized( v, float_epsilon ); } /* ================================================= IsUNorm / IsSNorm ================================================= */ -#define Gen_IS_UNORM_SNORM( _stype_, _vtype_ ) \ - ND_ bool IsUNorm (const _stype_ x) { return All2( x >= _stype_(0.0), x <= _stype_(1.0) ); } \ - ND_ bool IsUNorm (const _vtype_##2 x) { return All2( AllGreaterEqual( x, _vtype_##2(0.0) ), AllLessEqual( x, _vtype_##2(1.0) )); } \ - ND_ bool IsUNorm (const _vtype_##3 x) { return All2( AllGreaterEqual( x, _vtype_##3(0.0) ), AllLessEqual( x, _vtype_##3(1.0) )); } \ - ND_ bool IsUNorm (const _vtype_##4 x) { return All2( AllGreaterEqual( x, _vtype_##4(0.0) ), AllLessEqual( x, _vtype_##4(1.0) )); } \ - \ - ND_ bool IsSNorm (const _stype_ x) { return Abs(x) <= 1.0; } \ - ND_ bool IsSNorm (const _vtype_##2 x) { return AllLessEqual( Abs(x), _vtype_##2(1.0) ); } \ - ND_ bool IsSNorm (const _vtype_##3 x) { return AllLessEqual( Abs(x), _vtype_##3(1.0) ); } \ - ND_ bool IsSNorm (const _vtype_##4 x) { return AllLessEqual( Abs(x), _vtype_##4(1.0) ); } +#define Gen_IS_UNORM_SNORM( _stype_, _vtype_ ) \ + ND_ bool IsUNorm (const _stype_ x) { return All2( x >= _stype_(0.0), x <= _stype_(1.0) ); } \ + ND_ bool IsUNorm (const UNITE(_vtype_,2) x) { return All2( AllGreaterEqual( x, UNITE(_vtype_,2)(0.0) ), AllLessEqual( x, UNITE(_vtype_,2)(1.0) )); }\ + ND_ bool IsUNorm (const UNITE(_vtype_,3) x) { return All2( AllGreaterEqual( x, UNITE(_vtype_,3)(0.0) ), AllLessEqual( x, UNITE(_vtype_,3)(1.0) )); }\ + ND_ bool IsUNorm (const UNITE(_vtype_,4) x) { return All2( AllGreaterEqual( x, UNITE(_vtype_,4)(0.0) ), AllLessEqual( x, UNITE(_vtype_,4)(1.0) )); }\ + \ + ND_ bool IsSNorm (const _stype_ x) { return Abs(x) <= 1.0; } \ + ND_ bool IsSNorm (const UNITE(_vtype_,2) x) { return AllLessEqual( Abs(x), UNITE(_vtype_,2)(1.0) ); } \ + ND_ bool IsSNorm (const UNITE(_vtype_,3) x) { return AllLessEqual( Abs(x), UNITE(_vtype_,3)(1.0) ); } \ + ND_ bool IsSNorm (const UNITE(_vtype_,4) x) { return AllLessEqual( Abs(x), UNITE(_vtype_,4)(1.0) ); } Gen_IS_UNORM_SNORM( float, float_vec_t ) @@ -1353,11 +1490,11 @@ Gen_IS_UNORM_SNORM( float, float_vec_t ) per component comparator ================================================= */ -#define Gen_FPEQUAL( _stype_, _vtype_ ) \ - ND_ bool FpEqual (const _stype_ lhs, const _stype_ rhs, const _stype_ err) { return Abs( lhs - rhs ) < err; } \ - ND_ bool2 FpEqual (const _vtype_##2 lhs, const _vtype_##2 rhs, const _stype_ err) { return Less( Abs( lhs - rhs ), _vtype_##2(err) ); } \ - ND_ bool3 FpEqual (const _vtype_##3 lhs, const _vtype_##3 rhs, const _stype_ err) { return Less( Abs( lhs - rhs ), _vtype_##3(err) ); } \ - ND_ bool4 FpEqual (const _vtype_##4 lhs, const _vtype_##4 rhs, const _stype_ err) { return Less( Abs( lhs - rhs ), _vtype_##4(err) ); } +#define Gen_FPEQUAL( _stype_, _vtype_ ) \ + ND_ bool FpEqual (const _stype_ lhs, const _stype_ rhs, const _stype_ err) { return Abs( lhs - rhs ) < err; } \ + ND_ bool2 FpEqual (const UNITE(_vtype_,2) lhs, const UNITE(_vtype_,2) rhs, const _stype_ err) { return Less( Abs( lhs - rhs ), UNITE(_vtype_,2)(err) ); } \ + ND_ bool3 FpEqual (const UNITE(_vtype_,3) lhs, const UNITE(_vtype_,3) rhs, const _stype_ err) { return Less( Abs( lhs - rhs ), UNITE(_vtype_,3)(err) ); } \ + ND_ bool4 FpEqual (const UNITE(_vtype_,4) lhs, const UNITE(_vtype_,4) rhs, const _stype_ err) { return Less( Abs( lhs - rhs ), UNITE(_vtype_,4)(err) ); } Gen_FPEQUAL( float, float_vec_t ) @@ -1403,11 +1540,12 @@ Gen_SWAP( float ) to get the same order as in FS. ================================================= */ -#if defined(AE_shader_subgroup_quad) and defined(AE_shader_subgroup_basic) +#if defined(AE_shader_subgroup_quad) and defined(AE_shader_subgroup_basic) and defined(AE_subgroupBroadcastDynamicId) # define QuadGroup_dFdxFine( _a_ ) (gl.quadGroup.Broadcast( (_a_), (gl.subgroup.Index&2)|1 ) - gl.quadGroup.Broadcast( (_a_), gl.subgroup.Index&2 )) # define QuadGroup_dFdyFine( _a_ ) (gl.quadGroup.Broadcast( (_a_), (gl.subgroup.Index&1)|2 ) - gl.quadGroup.Broadcast( (_a_), gl.subgroup.Index&1 )) # define QuadGroup_fwidthFine( _a_ ) Abs(QuadGroup_dFdxFine(_a_)) + Abs(QuadGroup_dFdyFine(_a_)) - +#endif +#if defined(AE_shader_subgroup_quad) and defined(AE_shader_subgroup_basic) # define QuadGroup_dFdxCoarse( _a_ ) (gl.quadGroup.Broadcast( (_a_), 1 ) - gl.quadGroup.Broadcast( (_a_), 0 )) # define QuadGroup_dFdyCoarse( _a_ ) (gl.quadGroup.Broadcast( (_a_), 2 ) - gl.quadGroup.Broadcast( (_a_), 0 )) # define QuadGroup_fwidthCoarse( _a_ ) Abs(QuadGroup_dFdxCoarse(_a_)) + Abs(QuadGroup_dFdyCoarse(_a_)) @@ -1422,6 +1560,3 @@ void dbg_EnableTraceRecording (bool b) {} void dbg_PauseTraceRecording (bool b) {} void dbg_EnableProfiling (bool b) {} - -// TODO: -// Exponentiation https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#exponentiation-with-small-fractional-arguments diff --git a/AE/engine/shared_data/shaders/Matrix.glsl b/AE/engine/shared_data/shaders/Matrix.glsl index c3db8bc0..3623d43e 100644 --- a/AE/engine/shared_data/shaders/Matrix.glsl +++ b/AE/engine/shared_data/shaders/Matrix.glsl @@ -26,6 +26,7 @@ ND_ float2 GetTranslation2D (const float3x2 m); ND_ float3 GetTranslation3D (const float4x4 m); ND_ float3 GetTranslation3D (const float4x3 m); + // Rotation is clockwise for -Y axis (default in Vulkan) ND_ float2x2 f2x2_Rotate (const float angle); @@ -39,6 +40,25 @@ ND_ float4x4 f4x4_RotateY (const float angle); ND_ float4x4 f4x4_RotateZ (const float angle); ND_ float4x4 f4x4_Rotate (const float angle, const float3 axis); + +// Projection +ND_ float4x4 f4x4_Ortho (const float4 viewport, const float2 range); +ND_ float4x4 f4x4_InfinitePerspective (const float fovY, const float aspect, const float zNear); +ND_ float4x4 f4x4_Perspective (float fovY, const float aspect, const float2 range); +ND_ float4x4 f4x4_Perspective (const float fovY, const float2 viewportSize, const float2 range); + + +// Scale +ND_ float2x2 f2x2_Scale (const float value); +ND_ float2x2 f2x2_Scale (const float2 value); + +ND_ float3x3 f3x3_Scale (const float value); +ND_ float3x3 f3x3_Scale (const float3 value); + +ND_ float4x4 f4x4_Scale (const float value); +ND_ float4x4 f4x4_Scale (const float3 value); + + ND_ float2 GetDirection2D (const float angle); ND_ float2 GetDirection2D (const float3x3 m); @@ -62,41 +82,50 @@ ND_ float3 ViewDir (const float4x4 invMat, const float2 unormPos); //----------------------------------------------------------------------------- +// GLSL specs: +// "If there is a single scalar parameter to a matrix constructor, +// it is used to initialize all the components on the matrix’s diagonal, +// with the remaining components initialized to 0.0." float2x2 f2x2_Identity () { - return float2x2( float2( 1.f, 0.f ), - float2( 0.f, 1.f )); + return float2x2( 1.f ); +// return float2x2( float2( 1.f, 0.f ), +// float2( 0.f, 1.f )); } float3x3 f3x3_Identity () { - return float3x3( float3( 1.f, 0.f, 0.f ), - float3( 0.f, 1.f, 0.f ), - float3( 0.f, 0.f, 1.f )); + return float3x3( 1.f ); +// return float3x3( float3( 1.f, 0.f, 0.f ), +// float3( 0.f, 1.f, 0.f ), +// float3( 0.f, 0.f, 1.f )); } float3x4 f3x4_Identity () { - return float3x4( float4( 1.f, 0.f, 0.f, 0.f ), - float4( 0.f, 1.f, 0.f, 0.f ), - float4( 0.f, 0.f, 1.f, 0.f )); + return float3x4( 1.f ); +// return float3x4( float4( 1.f, 0.f, 0.f, 0.f ), +// float4( 0.f, 1.f, 0.f, 0.f ), +// float4( 0.f, 0.f, 1.f, 0.f )); } float4x3 f4x3_Identity () { - return float4x3( float3( 1.f, 0.f, 0.f ), - float3( 0.f, 1.f, 0.f ), - float3( 0.f, 0.f, 1.f ), - float3( 0.f, 0.f, 0.f )); + return float4x3( 1.f ); +// return float4x3( float3( 1.f, 0.f, 0.f ), +// float3( 0.f, 1.f, 0.f ), +// float3( 0.f, 0.f, 1.f ), +// float3( 0.f, 0.f, 0.f )); } float4x4 f4x4_Identity () { - return float4x4( float4( 1.f, 0.f, 0.f, 0.f ), - float4( 0.f, 1.f, 0.f, 0.f ), - float4( 0.f, 0.f, 1.f, 0.f ), - float4( 0.f, 0.f, 0.f, 1.f )); + return float4x4( 1.f ); +// return float4x4( float4( 1.f, 0.f, 0.f, 0.f ), +// float4( 0.f, 1.f, 0.f, 0.f ), +// float4( 0.f, 0.f, 1.f, 0.f ), +// float4( 0.f, 0.f, 0.f, 1.f )); } //----------------------------------------------------------------------------- @@ -212,6 +241,27 @@ float4x4 f4x4_Rotate (const float angle, const float3 axis) { float4x4 m = flo //----------------------------------------------------------------------------- +float2x2 f2x2_Scale (const float2 value) +{ + return float2x2( value.x, 0.f, + 0.f, value.y ); +} + +float3x3 f3x3_Scale (const float3 value) +{ + return float3x3( value.x, 0.f, 0.f, + 0.f, value.y, 0.f, + 0.f, 0.f, value.z ); +} + +float2x2 f2x2_Scale (const float value) { return f2x2_Scale( float2(value) ); } +float3x3 f3x3_Scale (const float value) { return f3x3_Scale( float3(value) ); } + +float4x4 f4x4_Scale (const float3 value) { float4x4 m = float4x4(f3x3_Scale( value )); m[3][3] = 1.f; return m; } +float4x4 f4x4_Scale (const float value) { return f4x4_Scale( float3(value) ); } +//----------------------------------------------------------------------------- + + float3x3 LookAt (const float3 dir, const float3 up) { float3x3 m; @@ -282,3 +332,67 @@ float3 GetAxisY (const float4x4 m) { return float3( m[0][1], m[1][1], m[2][1] float3 GetAxisZ (const float3x3 m) { return float3( m[0][2], m[1][2], m[2][2] ); } float3 GetAxisZ (const float4x4 m) { return float3( m[0][2], m[1][2], m[2][2] ); } //----------------------------------------------------------------------------- + + +#ifdef AE_LICENSE_MIT + +// based on code from GLM (MIT license) https://github.com/g-truc/glm + +float4x4 f4x4_InfinitePerspective (const float fovY, const float aspect, const float zNear) +{ + const float range = Tan( fovY * 0.5 ) * zNear; + const float left = -range * aspect; + const float right = range * aspect; + const float bottom = -range; + const float top = range; + + float4x4 result = float4x4( 0.f ); + result[0][0] = (2.f * zNear) / (right - left); + result[1][1] = (2.f * zNear) / (top - bottom); + result[2][2] = 1.f; + result[2][3] = 1.f; + result[3][2] = - zNear; + return result; +} + +float4x4 f4x4_Ortho (const float4 viewport, const float2 range) +{ + // viewport - {left, top, right, bottom} + float4x4 result = float4x4( 1.f ); + result[0][0] = 2.f / (viewport.z - viewport.x); + result[1][1] = 2.f / (viewport.y - viewport.w); + result[2][2] = - 1.f; + result[3][0] = - (viewport.z + viewport.x) / (viewport.z - viewport.x); + result[3][1] = - (viewport.y + viewport.w) / (viewport.y - viewport.w); + return result; +} + +float4x4 f4x4_Perspective (float fovY, const float aspect, const float2 range) +{ + fovY = Tan( fovY * 0.5f ); + + float4x4 result = float4x4( 0.f ); + result[0][0] = 1.f / (aspect * fovY); + result[1][1] = 1.f / fovY; + result[2][2] = range.y / (range.y - range.x); + result[2][3] = 1.f; + result[3][2] = -(range.y * range.x) / (range.y - range.x); + return result; +} + +float4x4 f4x4_Perspective (const float fovY, const float2 viewportSize, const float2 range) +{ + const float h = Cos( 0.5f * fovY ) / Sin( 0.5f * fovY ); + const float w = h * viewportSize.y / viewportSize.x; + + float4x4 result = float4x4( 0.f ); + result[0][0] = w; + result[1][1] = h; + result[2][2] = range.y / (range.y - range.x); + result[2][3] = 1.f; + result[3][2] = -(range.y * range.x) / (range.y - range.x); + return result; +} + +#endif // AE_LICENSE_MIT +//----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/shaders/Noise.glsl b/AE/engine/shared_data/shaders/Noise.glsl index d6aab8b5..94a2fc5f 100644 --- a/AE/engine/shared_data/shaders/Noise.glsl +++ b/AE/engine/shared_data/shaders/Noise.glsl @@ -165,6 +165,11 @@ ND_ float3 _FBMTransform (const float3 pos) -0.3461, 0.7324, 0.5862, 0.2588, -0.5260, 0.8100 ); return rot * pos; + #elif 0 + const float3x3 rot = float3x3( 0.00, 1.60, 1.20, + -1.60, 0.72, -0.96, + -1.20, -0.96, 1.28 ); + return rot * pos; #else const float4x3 rot = float4x3( 0.9017, 0.4321, -0.00746, -0.3461, 0.7324, 0.5862, diff --git a/AE/engine/shared_data/shaders/Quaternion.glsl b/AE/engine/shared_data/shaders/Quaternion.glsl index 40487707..2e8669fb 100644 --- a/AE/engine/shared_data/shaders/Quaternion.glsl +++ b/AE/engine/shared_data/shaders/Quaternion.glsl @@ -10,34 +10,34 @@ #include "Math.glsl" -struct quat +struct Quat { - float4 data; + float4 data; // x, y, z, w }; -ND_ quat QIdentity (); -ND_ quat QCreate (const float4 v); -ND_ quat QCreate (const float3 axis, const float angle); -ND_ quat QCreate (float x, float y, float z, float w); +ND_ Quat QIdentity (); +ND_ Quat QCreate (const float4 v); +ND_ Quat QCreate (const float3 axis, const float angle); +ND_ Quat QCreate (float x, float y, float z, float w); -ND_ quat QNormalize (const quat q); -ND_ quat QInverse (const quat q); +ND_ Quat QNormalize (const Quat q); +ND_ Quat QInverse (const Quat q); -ND_ quat QMul (const quat left, const quat right); -ND_ float3 QMul (const quat left, const float3 right); +ND_ Quat QMul (const Quat left, const Quat right); +ND_ float3 QMul (const Quat left, const float3 right); -ND_ float QDot (const quat left, const quat right); -ND_ quat QSlerp (const quat qx, const quat qy, const float factor); +ND_ float QDot (const Quat left, const Quat right); +ND_ Quat QSlerp (const Quat qx, const Quat qy, const float factor); -ND_ float3 QDirection (const quat q); -ND_ quat QLookAt (const float3 from, const float3 to); -ND_ quat QLookAt (const float3 dir); +ND_ float3 QDirection (const Quat q); +ND_ Quat QLookAt (const float3 from, const float3 to); +ND_ Quat QLookAt (const float3 dir); -ND_ quat QRotationX (const float angleRad); -ND_ quat QRotationY (const float angleRad); -ND_ quat QRotationZ (const float angleRad); -ND_ quat QRotation (const float3 anglesRad); +ND_ Quat QRotationX (const float angleRad); +ND_ Quat QRotationY (const float angleRad); +ND_ Quat QRotationZ (const float angleRad); +ND_ Quat QRotation (const float3 anglesRad); //----------------------------------------------------------------------------- @@ -47,9 +47,9 @@ ND_ quat QRotation (const float3 anglesRad); QIdentity ================================================= */ -quat QIdentity () +Quat QIdentity () { - quat ret; + Quat ret; ret.data = float4( 0.0, 0.0, 0.0, 1.0 ); return ret; } @@ -59,23 +59,23 @@ quat QIdentity () QCreate ================================================= */ -quat QCreate (const float4 v) +Quat QCreate (const float4 v) { - quat ret; + Quat ret; ret.data = v; return ret; } -quat QCreate (const float3 axis, const float angle) +Quat QCreate (const float3 axis, const float angle) { - quat ret; + Quat ret; ret.data = float4( axis, angle ); return ret; } -quat QCreate (float x, float y, float z, float w) +Quat QCreate (float x, float y, float z, float w) { - quat ret; + Quat ret; ret.data = float4( x, y, z, w ); return ret; } @@ -85,9 +85,9 @@ quat QCreate (float x, float y, float z, float w) QNormalize ================================================= */ -quat QNormalize (const quat q) +Quat QNormalize (const Quat q) { - quat ret = q; + Quat ret = q; float n = Dot( q.data, q.data ); if ( n == 1.0 ) @@ -102,9 +102,9 @@ quat QNormalize (const quat q) QInverse ================================================= */ -quat QInverse (const quat q) +Quat QInverse (const Quat q) { - quat ret; + Quat ret; ret.data.xyz = -q.data.xyz; ret.data.w = q.data.w; return ret; @@ -115,9 +115,9 @@ quat QInverse (const quat q) QMul ================================================= */ -quat QMul (const quat left, const quat right) +Quat QMul (const Quat left, const Quat right) { - quat ret; + Quat ret; ret.data.xyz = left.data.w * right.data.xyz + left.data.xyz * right.data.w + @@ -134,7 +134,7 @@ quat QMul (const quat left, const quat right) QMul ================================================= */ -float3 QMul (const quat left, const float3 right) +float3 QMul (const Quat left, const float3 right) { float3 q = left.data.xyz; float3 uv = Cross( q, right ); @@ -148,7 +148,7 @@ float3 QMul (const quat left, const float3 right) QDot ================================================= */ -float QDot (const quat left, const quat right) +float QDot (const Quat left, const Quat right) { return Dot( left.data, right.data ); } @@ -158,9 +158,9 @@ float QDot (const quat left, const quat right) QSlerp ================================================= */ -quat QSlerp (const quat qx, const quat qy, const float factor) +Quat QSlerp (const Quat qx, const Quat qy, const float factor) { - quat ret; + Quat ret; float4 qz = qy.data; float cos_theta = Dot( qx.data, qy.data ); @@ -170,7 +170,7 @@ quat QSlerp (const quat qx, const quat qy, const float factor) cos_theta = -cos_theta; } - if ( cos_theta > 1.0 - Epsilon() ) + if ( cos_theta > 1.0 - float_epsilon ) { ret.data = Lerp( qx.data, qy.data, factor ); } @@ -189,7 +189,7 @@ quat QSlerp (const quat qx, const quat qy, const float factor) QDirection ================================================= */ -float3 QDirection (const quat q) +float3 QDirection (const Quat q) { return float3( 2.0 * q.data.x * q.data.z + 2.0 * q.data.y * q.data.w, 2.0 * q.data.z * q.data.y - 2.0 * q.data.x * q.data.w, @@ -201,9 +201,9 @@ float3 QDirection (const quat q) QRotationX ================================================= */ -quat QRotationX (const float angleRad) +Quat QRotationX (const float angleRad) { - quat q; + Quat q; float a = angleRad * 0.5; q.data = float4( Sin(a), 0.0, 0.0, Cos(a) ); @@ -215,9 +215,9 @@ quat QRotationX (const float angleRad) QRotationY ================================================= */ -quat QRotationY (const float angleRad) +Quat QRotationY (const float angleRad) { - quat q; + Quat q; float a = angleRad * 0.5; q.data = float4( 0.0, Sin(a), 0.0, Cos(a) ); @@ -229,9 +229,9 @@ quat QRotationY (const float angleRad) QRotationZ ================================================= */ -quat QRotationZ (const float angleRad) +Quat QRotationZ (const float angleRad) { - quat q; + Quat q; float a = angleRad * 0.5; q.data = float4( 0.0, 0.0, Sin(a), Cos(a) ); @@ -243,7 +243,7 @@ quat QRotationZ (const float angleRad) QRotation ================================================= */ -quat QRotation (const float3 anglesRad) +Quat QRotation (const float3 anglesRad) { return QMul( QMul( QRotationX( anglesRad.x ), QRotationY( anglesRad.y )), QRotationZ( anglesRad.z )); } @@ -253,12 +253,12 @@ quat QRotation (const float3 anglesRad) QLookAt ================================================= */ -quat QLookAt (const float3 from, const float3 to) +Quat QLookAt (const float3 from, const float3 to) { return QLookAt( to - from ); } -quat QLookAt (const float3 dir) +Quat QLookAt (const float3 dir) { float3 fwd = float3(0.0, 0.0, 1.0); float3 axis = Cross( fwd, dir ); diff --git a/AE/engine/shared_data/shaders/Ray.glsl b/AE/engine/shared_data/shaders/Ray.glsl index cd8aeba9..0d24a67b 100644 --- a/AE/engine/shared_data/shaders/Ray.glsl +++ b/AE/engine/shared_data/shaders/Ray.glsl @@ -47,7 +47,7 @@ ND_ float3 Ray_CalcX (const Ray ray, const float2 pointYZ); ND_ float3 Ray_CalcY (const Ray ray, const float2 pointXZ); ND_ float3 Ray_CalcZ (const Ray ray, const float2 pointXY); ND_ bool Ray_Contains (const Ray ray, const float3 point); - void Ray_Rotate (inout Ray ray, const quat rotation); + void Ray_Rotate (inout Ray ray, const Quat rotation); void Ray_Rotate (inout Ray ray, const float3x3 rotation); void Ray_Move (inout Ray ray, const float delta); void Ray_SetLength (inout Ray ray, const float length); @@ -218,7 +218,7 @@ float2 Inverted_PlaneToVR180 (const float3 rayDir, const uint eye) float phi = ATan( rayDir.z, rayDir.x ); theta = (theta + Pi() * 0.5f) / Pi(); - phi = (Pi() - phi) / Pi2(); + phi = (Pi() - phi) / float_Pi2; phi = Fract( phi - 0.125f ) * 2.f + (eye == 0 ? 0.f : 0.5f); return float2( phi, theta ); @@ -258,7 +258,7 @@ float2 Inverted_PlaneToVR360 (const float3 rayDir, const uint eye) theta = (theta + Pi() * 0.5f) * 0.5f / Pi(); theta += (eye == 0 ? 0.f : 0.5f); - phi = (Pi() - phi) / Pi2(); + phi = (Pi() - phi) / float_Pi2; return float2( Fract( phi - 0.75f ), theta ); } @@ -290,7 +290,7 @@ float2 Inverted_PlaneTo360 (const float3 rayDir) float phi = ATan( rayDir.z, rayDir.x ); theta = (theta + Pi() * 0.5) / Pi(); - phi = (Pi() - phi) / Pi2(); + phi = (Pi() - phi) / float_Pi2; return float2( Fract( phi - 0.75 ), theta ); } @@ -438,7 +438,7 @@ bool Ray_Contains (const Ray ray, const float3 point) view matrix must be transposed ================================================= */ -void Ray_Rotate (inout Ray ray, const quat rotation) +void Ray_Rotate (inout Ray ray, const Quat rotation) { // ray.origin - const ray.dir = Normalize( QMul( rotation, ray.dir )); diff --git a/AE/engine/shared_data/shaders/SDF.glsl b/AE/engine/shared_data/shaders/SDF.glsl index a94e3018..c94deff3 100644 --- a/AE/engine/shared_data/shaders/SDF.glsl +++ b/AE/engine/shared_data/shaders/SDF.glsl @@ -69,9 +69,9 @@ ND_ float2 SDF_Move (const float2 position, const float delta); ND_ float2 SDF_Move (const float2 position, const float2 delta); ND_ float3 SDF_Move (const float3 position, const float delta); ND_ float3 SDF_Move (const float3 position, const float3 delta); -ND_ float3 SDF_Rotate (const float3 position, const quat q); +ND_ float3 SDF_Rotate (const float3 position, const Quat q); ND_ float2 SDF_Rotate2D (const float2 position, const float angle); -ND_ float3 SDF_Transform (const float3 position, const quat q, const float3 delta); +ND_ float3 SDF_Transform (const float3 position, const Quat q, const float3 delta); #if 0 // macros ND_ float SDF_Scale (const float2 position, float scale, float (*sdf)(float2)); @@ -234,7 +234,7 @@ float AA_Lines (float x, const float invStep, const float falloffPx) return res; } #endif -#if defined(SH_COMPUTE) and defined(AE_shader_subgroup_basic) +#if defined(SH_COMPUTE) and defined(QuadGroup_dFdxFine) float2 AA_Line_dxdy (const float2 uv, float2 dist, const float2 thicknessAndFalloffPx) { float2 dx = Abs( QuadGroup_dFdxFine( uv )); @@ -263,7 +263,7 @@ float AA_Lines (float x, const float invStep, const float falloffPx) return res; } #endif -#if defined(SH_COMPUTE) and defined(AE_shader_subgroup_basic) +#if defined(SH_COMPUTE) and defined(QuadGroup_dFdxFine) float2 AA_Line_dxdy (const float3 uvw, float3 dist, const float2 thicknessAndFalloffPx) { float3 dx = Abs( QuadGroup_dFdxFine( uvw )); @@ -278,7 +278,7 @@ float AA_Lines (float x, const float invStep, const float falloffPx) } #endif -#if defined(SH_FRAG) or (defined(SH_COMPUTE) and defined(AE_shader_subgroup_basic)) +#if defined(SH_FRAG) or (defined(SH_COMPUTE) and defined(QuadGroup_dFdxFine)) float2 AA_QuadGrid_dxdy (const float2 uv, const float2 thicknessAndFalloffPx) { return AA_Line_dxdy( uv, TriangleWave( uv ), thicknessAndFalloffPx ); diff --git a/AE/engine/shared_data/shaders/TexSampling.glsl b/AE/engine/shared_data/shaders/TexSampling.glsl index c1e8026c..831f281a 100644 --- a/AE/engine/shared_data/shaders/TexSampling.glsl +++ b/AE/engine/shared_data/shaders/TexSampling.glsl @@ -12,6 +12,7 @@ ND_ float4 TriplanarMapping (const float3 uvw, float3 dir, gl::CombinedTex2D samp); +ND_ float4 TexFilter (gl::CombinedTex2D tex, float2 uv); //----------------------------------------------------------------------------- @@ -24,3 +25,28 @@ float4 TriplanarMapping (const float3 uvw, float3 dir, gl::CombinedTex2D dir = Abs(Normalize( dir )); return (a * dir.x) + (b * dir.y) + (c * dir.z); } + +/* +================================================= + TexFilter +---- + from https://www.shadertoy.com/view/XsfGDn + The MIT License + Copyright © 2013 Inigo Quilez +================================================= +*/ +#ifdef AE_LICENSE_MIT + float4 TexFilter (gl::CombinedTex2D tex, float2 uv) + { + float2 dim = float2(gl.texture.GetSize( tex, 0 )); + uv = uv * dim + 0.5; + + float2 iuv = Floor( uv ); + float2 fuv = uv - iuv; + + uv = iuv + fuv*fuv * (3.0 - 2.0 * fuv); + uv = (uv - 0.5) / dim; + + return gl.texture.Sample( tex, uv ); + } +#endif diff --git a/AE/engine/shared_data/shaders/TilableNoise.glsl b/AE/engine/shared_data/shaders/TilableNoise.glsl index f7ce3e53..7c42e791 100644 --- a/AE/engine/shared_data/shaders/TilableNoise.glsl +++ b/AE/engine/shared_data/shaders/TilableNoise.glsl @@ -10,15 +10,21 @@ #include "Hash.glsl" -ND_ float TilableVoronoiNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias); // range [0..inf] -ND_ float TilableVoronoiNoise (const float3 pos, const float3 tileSize); // range [0..inf] -ND_ float TilableWarleyNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias); // range [0..1] -ND_ float TilableWarleyNoise (const float3 pos, const float3 tileSize); // range [0..1] +ND_ float TileableVoronoiNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias); // range [0..inf] +ND_ float TileableVoronoiNoise (const float3 pos, const float3 tileSize); // range [0..inf] +ND_ float TileableWarleyNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias); // range [0..1] +ND_ float TileableWarleyNoise (const float3 pos, const float3 tileSize); // range [0..1] + +ND) float TileableGradientNoise (const float3 x, const float freq); // range ??? +//----------------------------------------------------------------------------- + + +#include "TileableNoise.glsl" //----------------------------------------------------------------------------- // range [0..inf] -float TilableVoronoiNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias) +float TileableVoronoiNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias) { float3 ipoint = Floor( pos * tileSize ); float3 fpoint = Fract( pos * tileSize ); @@ -38,21 +44,21 @@ float TilableVoronoiNoise (const float3 pos, const float3 tileSize, const float } // range [0..inf] -float TilableVoronoiNoise (const float3 pos, const float3 tileSize) +float TileableVoronoiNoise (const float3 pos, const float3 tileSize) { - return TilableVoronoiNoise( pos, tileSize, float2(1.0, 0.0) ); + return TileableVoronoiNoise( pos, tileSize, float2(1.0, 0.0) ); } // range [0..1] -float TilableWarleyNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias) +float TileableWarleyNoise (const float3 pos, const float3 tileSize, const float2 seedScaleBias) { - return Max( 1.0 - TilableVoronoiNoise( pos, tileSize, seedScaleBias ), 0.0 ); + return Max( 1.0 - TileableVoronoiNoise( pos, tileSize, seedScaleBias ), 0.0 ); } // range [0..1] -float TilableWarleyNoise (const float3 pos, const float3 tileSize) +float TileableWarleyNoise (const float3 pos, const float3 tileSize) { - return TilableWarleyNoise( pos, tileSize, float2(1.0, 0.0) ); + return TileableWarleyNoise( pos, tileSize, float2(1.0, 0.0) ); } //----------------------------------------------------------------------------- diff --git a/AE/engine/shared_data/shaders/aestyle.glsl.h b/AE/engine/shared_data/shaders/aestyle.glsl.h index bd4fd312..dca2ec99 100644 --- a/AE/engine/shared_data/shaders/aestyle.glsl.h +++ b/AE/engine/shared_data/shaders/aestyle.glsl.h @@ -26,15 +26,34 @@ # define AE_NV_shader_sm_builtins # define AE_ARM_shader_core_builtins # define AE_fragment_shading_rate + +# define AE_AMD_GPU +# define AE_NVidia_GPU +# define AE_Intel_GPU +# define AE_ARM_Mali_GPU +# define AE_Qualcomm_Adreno_GPU +# define AE_IMG_PowerVR_GPU +# define AE_Microsoft_GPU +# define AE_Apple_GPU +# define AE_Mesa_GPU_driver +# define AE_Broadcom_GPU +# define AE_Samsung_GPU +# define AE_VeriSilicon_GPU +# define AE_Huawei_GPU #endif #define isinf _IsInf #define isnan _IsNaN +#define and && +#define or || + #define highp #define mediump #define lowp #define precise // avoid optimizations +#define WGShared // workgroup shared variable qualifier +#define invariant // all shaders must output same result on same input #define out #define inout @@ -45,6 +64,7 @@ #define OUT // used for vec/mat type building (templates) +// AEStyleGLSLPreprocessor will replace it by GLSL vec/mat type without dimension suffix #define float_vec_t float #define float_mat_t float #define double_vec_t double @@ -149,7 +169,7 @@ template ND_ _Vec mod (const _Vec x, const T y); template ND_ T mod (const T x, const T y); template ND_ T modf (const T x, OUT T &i); template ND_ _Vec normalize (const _Vec); - // ND_ _Vec not (const _Vec); +template ND_ _Vec not (const _Vec); template ND_ _Vec notEqual (const _Vec x, const _Vec y); ND_ double packDouble2x32 (const uint2); ND_ uint2 unpackDouble2x32 (double); @@ -196,10 +216,10 @@ template ND_ _Matrix transpose (const _Matrix< #if 1 ND_ slong pack64 (const int2 v); ND_ ulong pack64 (const uint2 v); - ND_ slong pack64 (const short4 v); + ND_ slong pack64 (const sshort4 v); ND_ ulong pack64 (const ushort4 v); - ND_ int pack32 (const short2 v); + ND_ int pack32 (const sshort2 v); ND_ uint pack32 (const ushort2 v); ND_ int pack32 (const sbyte4 v); ND_ uint pack32 (const ubyte4 v); @@ -209,13 +229,13 @@ template ND_ _Matrix transpose (const _Matrix< ND_ int2 unpack64 (const slong v); ND_ uint2 unpack64 (const ulong v); - ND_ short4 unpack64 (const slong v); - ND_ ushort4 unpack64 (const ulong v); + // ND_ sshort4 unpack64 (const slong v); + // ND_ ushort4 unpack64 (const ulong v); - ND_ short2 unpack32 (const int v); + ND_ sshort2 unpack32 (const int v); ND_ ushort2 unpack32 (const uint v); - ND_ sbyte4 unpack32 (const int v); - ND_ ubyte4 unpack32 (const uint v); + // ND_ sbyte4 unpack32 (const int v); + // ND_ ubyte4 unpack32 (const uint v); ND_ sbyte2 unpack16 (const short v); ND_ ubyte2 unpack16 (const ushort v); @@ -228,13 +248,13 @@ template ND_ _Matrix transpose (const _Matrix< ND_ uint packFloat2x16 (const half2 v); ND_ half2 unpackFloat2x16 (const uint v); - ND_ int packInt2x16 (const short2 v); - ND_ slong packInt4x16 (const short4 v); + ND_ int packInt2x16 (const sshort2 v); + ND_ slong packInt4x16 (const sshort4 v); ND_ uint packUint2x16 (const ushort2 v); ND_ ulong packUint4x16 (const ushort4 v); - ND_ short2 unpackInt2x16 (const int v); - ND_ short4 unpackInt4x16 (const slong v); + ND_ sshort2 unpackInt2x16 (const int v); + ND_ sshort4 unpackInt4x16 (const slong v); ND_ ushort2 unpackUint2x16 (const uint v); ND_ ushort4 unpackUint4x16 (const ulong v); @@ -354,9 +374,9 @@ struct gl #ifdef AE_subgroup_uniform_qualifier // It can be applied to: // * variable declarations qualified as 'in' - // * global variable declarations with no storage qualifier - // * local variable declarations with no storage qualifier - // * function parameter declarations and function return types. + // * global variable declarations with no storage qualifier + // * local variable declarations with no storage qualifier + // * function parameter declarations and function return types. template ND_ T SubgroupUniform (const T &); #endif @@ -861,12 +881,20 @@ struct gl void Image () const; #if defined(SH_COMPUTE) or defined(SH_MESH_TASK) or defined(SH_MESH) - void Shared () const; // for shared variables + void Shared () const; // for 'WGShared' variables #endif } memoryBarrier {}; void ExecutionBarrier () const; ND_ bool Elect () const; + + // in + const uint Size; + const uint Index; // in FS QuadIndex = Index & 3 + # ifdef SH_COMPUTE + const uint GroupCount; // in workgroup + const uint GroupIndex; // in workgroup + # endif #endif #ifdef AE_shader_subgroup_vote @@ -888,6 +916,13 @@ struct gl ND_ uint BallotExclusiveBitCount (const uint4) const; ND_ uint BallotFindLSB (const uint4) const; ND_ uint BallotFindMSB (const uint4) const; + + // in + const uint4 EqMask; + const uint4 GeMask; + const uint4 GtMask; + const uint4 LeMask; + const uint4 LtMask; #endif #ifdef AE_shader_subgroup_shuffle @@ -935,24 +970,6 @@ struct gl template ND_ T ClusteredXor (const T value, uint clasterSize) const; #endif - - // in - #ifdef AE_shader_subgroup_basic - const uint Size; - const uint Index; // in FS QuadIndex = Index & 3 - # ifdef SH_COMPUTE - const uint GroupCount; // in workgroup - const uint GroupIndex; // in workgroup - # endif - #endif - #ifdef AE_shader_subgroup_ballot - const uint4 EqMask; - const uint4 GeMask; - const uint4 GtMask; - const uint4 LeMask; - const uint4 LtMask; - #endif - } subgroup {}; struct { @@ -1202,17 +1219,22 @@ struct gl void WorkgroupBarrier (); #ifdef AE_memory_scope_semantics + // if uses non-zero 'sem', then it must not use 'storage' semantics of zero. void ExecutionBarrier (gl::Scope execution, gl::Scope memory, gl::StorageSemantics storage, gl::Semantics sem); - void MemoryBarrier (gl::Scope execution, gl::Scope memory, gl::StorageSemantics storage, gl::Semantics sem); + + void MemoryBarrier (gl::Scope memory, gl::StorageSemantics storage, gl::Semantics sem); #endif const struct { void All () const; // all memory accesses, scope: shader invocation void Buffer () const; void Image () const; - void Shared () const; // for shared variables + void Shared () const; // for 'WGShared' variables void Workgroup () const; // all memory accesses, scope: workgroup + + #ifdef AE_shader_subgroup_basic void Subgroup () const; // all memory accesses, scope: subgroup + #endif } memoryBarrier; // in @@ -1227,7 +1249,7 @@ struct gl // sync #ifdef AE_memory_scope_semantics - void MemoryBarrier (gl::Scope execution, gl::Scope memory, gl::StorageSemantics storage, gl::Semantics sem); + void MemoryBarrier (gl::Scope memory, gl::StorageSemantics storage, gl::Semantics sem); #endif const struct { diff --git a/AE/engine/shared_data/shaders/aestyle_shared.h b/AE/engine/shared_data/shaders/aestyle_shared.h index f9c029f4..d13040f6 100644 --- a/AE/engine/shared_data/shaders/aestyle_shared.h +++ b/AE/engine/shared_data/shaders/aestyle_shared.h @@ -156,6 +156,7 @@ struct _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c10, T c11); _Matrix (const _Vec c0, @@ -166,6 +167,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c02, T c10, T c11, T c12); _Matrix (const _Vec c0, @@ -176,6 +178,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c02, T c03, T c10, T c11, T c12, T c13); _Matrix (const _Vec c0, @@ -186,6 +189,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c10, T c11, T c20, T c21); @@ -198,6 +202,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c02, T c10, T c11, T c12, T c20, T c21, T c22); @@ -210,6 +215,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c02, T c03, T c10, T c11, T c12, T c13, T c20, T c21, T c22, T c23); @@ -222,6 +228,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c10, T c11, T c20, T c21, @@ -236,6 +243,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c02, T c10, T c11, T c12, T c20, T c21, T c22, @@ -250,6 +258,7 @@ struct _Matrix : _MatrixBase template struct _Matrix : _MatrixBase { + _Matrix (); _Matrix (T c00, T c01, T c02, T c03, T c10, T c11, T c12, T c13, T c20, T c21, T c22, T c23, @@ -355,95 +364,113 @@ template ND_ _Vec operator - (const _Vec x); template ND_ _PVec operator - (const _PVec x); -template _Vec & operator += (_Vec &x, const _Vec y); -template _Vec & operator += (_Vec &x, const T y); +template _Vec & operator += (_Vec &x, const _Vec y); +template _Vec & operator += (_Vec &x, const T y); template _PVec & operator += (_PVec &x, const _PVec y); template _PVec & operator += (_PVec &x, const T y); -template _Vec & operator -= (_Vec &x, const _Vec y); -template _Vec & operator -= (_Vec &x, const T y); +template _Vec & operator -= (_Vec &x, const _Vec y); +template _Vec & operator -= (_Vec &x, const T y); template _PVec & operator -= (_PVec &x, const _PVec y); template _PVec & operator -= (_PVec &x, const T y); -template _Vec & operator *= (_Vec &x, const _Vec y); -template _Vec & operator *= (_Vec &x, const T y); +template _Vec & operator *= (_Vec &x, const _Vec y); +template _Vec & operator *= (_Vec &x, const T y); template _PVec & operator *= (_PVec &x, const _PVec y); template _PVec & operator *= (_PVec &x, const T y); -template _Vec & operator /= (_Vec &x, const _Vec y); -template _Vec & operator /= (_Vec &x, const T y); +template _Vec & operator /= (_Vec &x, const _Vec y); +template _Vec & operator /= (_Vec &x, const T y); template _PVec & operator /= (_PVec &x, const _PVec y); template _PVec & operator /= (_PVec &x, const T y); -template _Vec & operator &= (_Vec &x, const _Vec y); -template _Vec & operator &= (_Vec &x, const T y); +template _Vec & operator &= (_Vec &x, const _Vec y); +template _Vec & operator &= (_Vec &x, const T y); template _PVec & operator &= (_PVec &x, const _PVec y); template _PVec & operator &= (_PVec &x, const T y); -template _Vec & operator |= (_Vec &x, const _Vec y); -template _Vec & operator |= (_Vec &x, const T y); +template _Vec & operator |= (_Vec &x, const _Vec y); +template _Vec & operator |= (_Vec &x, const T y); template _PVec & operator |= (_PVec &x, const _PVec y); template _PVec & operator |= (_PVec &x, const T y); -template _Vec & operator ^= (_Vec &x, const _Vec y); -template _Vec & operator ^= (_Vec &x, const T y); +template _Vec & operator ^= (_Vec &x, const _Vec y); +template _Vec & operator ^= (_Vec &x, const T y); template _PVec & operator ^= (_PVec &x, const _PVec y); template _PVec & operator ^= (_PVec &x, const T y); -template _Vec & operator >>= (_Vec &x, const _Vec y); -template _Vec & operator >>= (_Vec &x, const T y); +template _Vec & operator >>= (_Vec &x, const _Vec y); +template _Vec & operator >>= (_Vec &x, const T y); template _PVec & operator >>= (_PVec &x, const _PVec y); template _PVec & operator >>= (_PVec &x, const T y); -template _Vec & operator <<= (_Vec &x, const _Vec y); -template _Vec & operator <<= (_Vec &x, const T y); +template _Vec & operator <<= (_Vec &x, const _Vec y); +template _Vec & operator <<= (_Vec &x, const T y); template _PVec & operator <<= (_PVec &x, const _PVec y); template _PVec & operator <<= (_PVec &x, const T y); -template ND_ _Vec operator + (const _Vec x, const _Vec y); -template ND_ _Vec operator + (const _Vec x, const T y); +template ND_ _Vec operator + (const _Vec x, const _Vec y); +template ND_ _Vec operator + (const T x, const _Vec y); +template ND_ _Vec operator + (const _Vec x, const T y); template ND_ _PVec operator + (const _PVec x, const _PVec y); +template ND_ _PVec operator + (const T x, const _PVec y); template ND_ _PVec operator + (const _PVec x, const T y); -template ND_ _Vec operator - (const _Vec x, const _Vec y); -template ND_ _Vec operator - (const _Vec x, const T y); +template ND_ _Vec operator - (const _Vec x, const _Vec y); +template ND_ _Vec operator - (const T x, const _Vec y); +template ND_ _Vec operator - (const _Vec x, const T y); template ND_ _PVec operator - (const _PVec x, const _PVec y); +template ND_ _PVec operator - (const T x, const _PVec y); template ND_ _PVec operator - (const _PVec x, const T y); -template ND_ _Vec operator * (const _Vec x, const _Vec y); -template ND_ _Vec operator * (const _Vec x, const T y); +template ND_ _Vec operator * (const _Vec x, const _Vec y); +template ND_ _Vec operator * (const T x, const _Vec y); +template ND_ _Vec operator * (const _Vec x, const T y); template ND_ _PVec operator * (const _PVec x, const _PVec y); +template ND_ _PVec operator * (const T x, const _PVec y); template ND_ _PVec operator * (const _PVec x, const T y); -template ND_ _Vec operator / (const _Vec x, const _Vec y); -template ND_ _Vec operator / (const _Vec x, const T y); +template ND_ _Vec operator / (const _Vec x, const _Vec y); +template ND_ _Vec operator / (const T x, const _Vec y); +template ND_ _Vec operator / (const _Vec x, const T y); template ND_ _PVec operator / (const _PVec x, const _PVec y); +template ND_ _PVec operator / (const T x, const _PVec y); template ND_ _PVec operator / (const _PVec x, const T y); -template ND_ _Vec operator & (const _Vec x, const _Vec y); -template ND_ _Vec operator & (const _Vec x, const T y); +template ND_ _Vec operator & (const _Vec x, const _Vec y); +template ND_ _Vec operator & (const T x, const _Vec y); +template ND_ _Vec operator & (const _Vec x, const T y); template ND_ _PVec operator & (const _PVec x, const _PVec y); +template ND_ _PVec operator & (const T x, const _PVec y); template ND_ _PVec operator & (const _PVec x, const T y); -template ND_ _Vec operator | (const _Vec x, const _Vec y); -template ND_ _Vec operator | (const _Vec x, const T y); +template ND_ _Vec operator | (const _Vec x, const _Vec y); +template ND_ _Vec operator | (const T x, const _Vec y); +template ND_ _Vec operator | (const _Vec x, const T y); template ND_ _PVec operator | (const _PVec x, const _PVec y); +template ND_ _PVec operator | (const T x, const _PVec y); template ND_ _PVec operator | (const _PVec x, const T y); -template ND_ _Vec operator ^ (const _Vec x, const _Vec y); -template ND_ _Vec operator ^ (const _Vec x, const T y); +template ND_ _Vec operator ^ (const _Vec x, const _Vec y); +template ND_ _Vec operator ^ (const T x, const _Vec y); +template ND_ _Vec operator ^ (const _Vec x, const T y); template ND_ _PVec operator ^ (const _PVec x, const _PVec y); +template ND_ _PVec operator ^ (const T x, const _PVec y); template ND_ _PVec operator ^ (const _PVec x, const T y); -template ND_ _Vec operator >> (const _Vec x, const _Vec y); -template ND_ _Vec operator >> (const _Vec x, const T y); +template ND_ _Vec operator >> (const _Vec x, const _Vec y); +template ND_ _Vec operator >> (const T x, const _Vec y); +template ND_ _Vec operator >> (const _Vec x, const T y); template ND_ _PVec operator >> (const _PVec x, const _PVec y); +template ND_ _PVec operator >> (const T x, const _PVec y); template ND_ _PVec operator >> (const _PVec x, const T y); -template ND_ _Vec operator << (const _Vec x, const _Vec y); -template ND_ _Vec operator << (const _Vec x, const T y); +template ND_ _Vec operator << (const _Vec x, const _Vec y); +template ND_ _Vec operator << (const T x, const _Vec y); +template ND_ _Vec operator << (const _Vec x, const T y); template ND_ _PVec operator << (const _PVec x, const _PVec y); +template ND_ _PVec operator << (const T x, const _PVec y); template ND_ _PVec operator << (const _PVec x, const T y); diff --git a/AE/engine/src/audio/BASS/AudioInputBASS.cpp b/AE/engine/src/audio/BASS/AudioInputBASS.cpp index 49e2c0d6..4f499793 100644 --- a/AE/engine/src/audio/BASS/AudioInputBASS.cpp +++ b/AE/engine/src/audio/BASS/AudioInputBASS.cpp @@ -8,7 +8,7 @@ namespace AE::Audio { namespace { - static BOOL CALLBACK RecordingCallback (HRECORD, const void *buffer, DWORD length, void *user) __NE___ + static BOOL CALLBACK RecordingCallback (HRECORD, const void* buffer, DWORD length, void* user) __NE___ { auto& stream = *Cast( user ); return stream.Write( buffer, Bytes{length} ); diff --git a/AE/engine/src/audio/BASS/AudioSystemBASS.cpp b/AE/engine/src/audio/BASS/AudioSystemBASS.cpp index b48f8881..7ffa0d3d 100644 --- a/AE/engine/src/audio/BASS/AudioSystemBASS.cpp +++ b/AE/engine/src/audio/BASS/AudioSystemBASS.cpp @@ -377,7 +377,7 @@ namespace { const int type = info.flags & BASS_DEVICE_TYPE_MASK; - if ( not AllBits( info.flags, BASS_DEVICE_ENABLED ) or type == 0 ) + if ( NoBits( info.flags, BASS_DEVICE_ENABLED ) or type == 0 ) continue; if ( AnyEqual( type, BASS_DEVICE_TYPE_MICROPHONE )) @@ -407,7 +407,7 @@ namespace { const int type = info.flags & BASS_DEVICE_TYPE_MASK; - if ( not AllBits( info.flags, BASS_DEVICE_ENABLED ) or type == 0 ) + if ( NoBits( info.flags, BASS_DEVICE_ENABLED ) or type == 0 ) continue; if ( not AnyEqual( type, BASS_DEVICE_TYPE_HEADSET, BASS_DEVICE_TYPE_MICROPHONE )) diff --git a/AE/engine/src/audio/BASS/UtilsBASS.cpp b/AE/engine/src/audio/BASS/UtilsBASS.cpp index b519ec57..f30efe80 100644 --- a/AE/engine/src/audio/BASS/UtilsBASS.cpp +++ b/AE/engine/src/audio/BASS/UtilsBASS.cpp @@ -152,6 +152,144 @@ namespace bassEncLib.Unload(); bassLib.Unload(); } +//----------------------------------------------------------------------------- + + +/* +================================================= + Decode +================================================= +* + bool AudioDevice::Decode (RStream &stream, OUT RawSoundData &result) __NE___ + { + ubyte buffer [_BufferSize]; + return Decode( TempBuffer_t{ buffer, Bytes{_BufferSize} }, stream, OUT result ); + } + + bool AudioDevice::Decode (const TempBuffer_t &buffer, RStream &stream, OUT RawSoundData &result) __NE___ + { + CHECK_ERR( _initialized ); + CHECK_ERR( stream.IsOpen() ); + CHECK_ERR( buffer.ptr != null and buffer.size <= _LengthMask ); + + BASS_FILEPROCS file_procs = { &StreamWrap::Close, &StreamWrap::Length, &StreamWrap::Read, &StreamWrap::Seek }; + HSTREAM bass_stream = bass.StreamCreateFileUser( STREAMFILE_NOBUFFER, BASS_STREAM_DECODE, &file_procs, &stream ); + CHECK_ERR( bass_stream != 0 ); + + ON_DESTROY( [&bass_stream] { bass.StreamFree( bass_stream ); }); + + BASS_CHANNELINFO info = {}; + CHECK_ERR( bass.ChannelGetInfo( bass_stream, OUT &info ) == TRUE ); + + float bitrate = 0.0f; + CHECK_ERR( bass.ChannelGetAttribute( bass_stream, BASS_ATTRIB_BITRATE, OUT &bitrate ) == TRUE ); + + QWORD length = bass.ChannelGetLength( bass_stream, BASS_POS_BYTE ); + CHECK_ERR( length != UMax ); + + result.channels = info.chans; + result.freq = Frequency_t(info.freq); + result.duration = secondsf{ bass.ChannelBytes2Seconds( bass_stream, length )}; + result.bitrate = Bitrate_t{bitrate}; + result.format = ESampleFormat::UInt16; + result.buffer.resize( length ); + + usize offset = 0; + for (; bass.ChannelIsActive( bass_stream );) + { + QWORD pos = bass.ChannelGetPosition( bass_stream, BASS_POS_BYTE ); + DWORD size = bass.ChannelGetData( bass_stream, OUT buffer.ptr, (DWORD(buffer.size) & _LengthMask) | BASS_DATA_AVAILABLE ); + + if ( size == UMax ) + BASS_CheckError(); + + CHECK_ERR( offset == pos ); + CHECK_ERR( pos + size <= result.buffer.size() ); + + MemCopy( OUT result.buffer.data() + Bytes{pos}, buffer.ptr, Bytes{size} ); + offset = pos + size; + } + + CHECK_ERR( offset == result.buffer.size() ); + return true; + } + +/* +================================================= + Encode +================================================= +* + bool AudioDevice::Encode (RStream &inStream, WStream &outStream, EAudioFormat outputFormat, EAudioQuality quality) __NE___ + { + ubyte buffer [_BufferSize]; + return Encode( TempBuffer_t{ buffer, Bytes{_BufferSize} }, inStream, outStream, outputFormat, quality ); + } + + bool AudioDevice::Encode (const TempBuffer_t &buffer, RStream &inStream, WStream &outStream, EAudioFormat outputFormat, EAudioQuality quality) __NE___ + { + CHECK_ERR( inStream.IsOpen() ); + CHECK_ERR( outStream.IsOpen() ); + CHECK_ERR( buffer.ptr != null and buffer.size <= _LengthMask ); + + BASS_FILEPROCS file_procs = { &StreamWrap::Close, &StreamWrap::Length, &StreamWrap::Read, &StreamWrap::Seek }; + HSTREAM bass_stream = bass.StreamCreateFileUser( STREAMFILE_NOBUFFER, BASS_STREAM_DECODE, &file_procs, &inStream ); + HENCODE bass_enc = 0; + CHECK_ERR( bass_stream != 0 ); + + ON_DESTROY( [&bass_stream] { bass.StreamFree( bass_stream ); }); + + float bitrate = 0.0f; + CHECK_ERR( bass.ChannelGetAttribute( bass_stream, BASS_ATTRIB_BITRATE, OUT &bitrate ) == TRUE ); + + //float freq = 0.0f; + //bass.ChannelGetAttribute( bass_stream, BASS_ATTRIB_FREQ, OUT &freq ); + + switch_enum( outputFormat ) + { + case EAudioFormat::OGG : + { + int qual = 3; + switch ( quality ) + { + case EAudioQuality::Highest : qual = 10; break; + case EAudioQuality::High : qual = 7; break; + case EAudioQuality::Medium : qual = 5; break; + case EAudioQuality::Low : qual = 3; break; + case EAudioQuality::Lowest : qual = -1; break; + } + + String options; + options << " --bitrate "s << ToString(uint(bitrate)); // in Kb/s + options << " --max-bitrate "s << ToString(Max( 320u, uint(bitrate) )); // in Kb/s + options << " --quality " << ToString(qual); // in range -1..10 + //options << " --resample " << ToString(uint(freq)); // in Hz + AE_LOG_DBG( "BASS OGG encoder options: "s << options ); + + bass_enc = bass.Encode_OGG_Start( bass_stream, options.c_str(), 0, StreamWrap::Encode, &outStream ); + CHECK_ERR( bass_enc != 0 ); + break; + } + case EAudioFormat::RAW : + default : + RETURN_ERR( "unknown output format" ); + } + switch_end + + for (; bass.ChannelIsActive( bass_stream );) + { + DWORD err = bass.ChannelGetData( bass_stream, OUT buffer.ptr, (DWORD(buffer.size) & _LengthMask) | BASS_DATA_AVAILABLE ); + + if ( err == UMax ) + BASS_CheckError(); + + CHECK_ERR( bass.Encode_IsActive( bass_stream )); + } + + CHECK_ERR( bass.Encode_Stop( bass_stream )); + + return true; + } +*/ } // AE::Audio diff --git a/AE/engine/src/audio/BASS/UtilsBASS.cpp.h b/AE/engine/src/audio/BASS/UtilsBASS.cpp.h index bb47ce18..f9acf3d8 100644 --- a/AE/engine/src/audio/BASS/UtilsBASS.cpp.h +++ b/AE/engine/src/audio/BASS/UtilsBASS.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "base/Defines/StdInclude.h" #define NOBASSOVERLOADS diff --git a/AE/engine/src/audio/CMakeLists.txt b/AE/engine/src/audio/CMakeLists.txt index b7d281ec..bba24d2f 100644 --- a/AE/engine/src/audio/CMakeLists.txt +++ b/AE/engine/src/audio/CMakeLists.txt @@ -34,6 +34,7 @@ if (${AE_ENABLE_AUDIO}) EnablePCH( "Audio" ) EnablePrebuild( "Audio" ) + EnableUnitBuild( "Audio" ) install( TARGETS "Audio" ARCHIVE DESTINATION "lib" ) endif() diff --git a/AE/engine/src/base/Algorithms/Cast.h b/AE/engine/src/base/Algorithms/Cast.h index d4610840..4a01900e 100644 --- a/AE/engine/src/base/Algorithms/Cast.h +++ b/AE/engine/src/base/Algorithms/Cast.h @@ -37,7 +37,7 @@ namespace AE::Base template cxx20_constexpr void CheckPointerCast (T const* ptr) __NE___ { - #ifdef AE_CFG_DEBUG + #ifdef AE_DEBUG if constexpr( not IsVoid and not IsConstEvaluated() ) { if ( not CheckPointerAlignment( ptr )) @@ -52,6 +52,22 @@ namespace AE::Base #endif } +/* +================================================= + AssumeAligned +================================================= +*/ + template + ND_ constexpr T* AssumeAligned (T* ptr) __NE___ + { + ASSERT( CheckPointerAlignment( ptr, Align )); + #ifdef __cpp_lib_assume_aligned + return std::assume_aligned( ptr ); + #else + return ptr; + #endif + } + /* ================================================= Cast (pointer) diff --git a/AE/engine/src/base/Algorithms/Parser.cpp b/AE/engine/src/base/Algorithms/Parser.cpp index b060b682..59f20bc2 100644 --- a/AE/engine/src/base/Algorithms/Parser.cpp +++ b/AE/engine/src/base/Algorithms/Parser.cpp @@ -341,10 +341,10 @@ namespace { ND_ bool IsUnused (char c) __NE___; ND_ bool OnUnknown (char) __NE___ { return false; } // return true to continue parsing, false - to exit - ND_ static bool _IsWordBegin (char c) __NE___; - ND_ static bool _IsWord (char c) __NE___; - ND_ static bool _IsNumberBegin (char c) __NE___; - ND_ static bool _IsNumber (char c) __NE___; + ND_ static bool _IsWordBegin (char c) __NE___ { return Parser::CPP.IsWordBegin( c ); } + ND_ static bool _IsWord (char c) __NE___ { return Parser::CPP.IsWord( c ); } + ND_ static bool _IsNumberBegin (char c) __NE___ { return Parser::CPP.IsNumberBegin( c ); } + ND_ static bool _IsNumber (char c) __NE___ { return Parser::CPP.IsNumber( c ); } ND_ static bool _IsOperator (char c) __NE___; ND_ static bool _IsBinaryOperator (char p, char c) __NE___; ND_ static bool _IsTernaryOperator (char pp, char p, char c) __NE___; @@ -429,52 +429,6 @@ namespace { /*or (c == '\\')*/; } -/* -================================================= - _IsWordBegin -================================================= -*/ - inline bool CStyleParser::_IsWordBegin (char c) __NE___ - { - return ((c >= 'A') and (c <= 'Z')) or - ((c >= 'a') and (c <= 'z')) or - (c == '_'); - } - -/* -================================================= - _IsWord -================================================= -*/ - inline bool CStyleParser::_IsWord (char c) __NE___ - { - return _IsWordBegin( c ) or _IsNumberBegin( c ); - } - -/* -================================================= - _IsNumberBegin -================================================= -*/ - inline bool CStyleParser::_IsNumberBegin (char c) __NE___ - { - return (c >= '0') and (c <= '9'); - } - -/* -================================================= - _IsNumber -================================================= -*/ - inline bool CStyleParser::_IsNumber (char c) __NE___ - { - return ((c >= '0') and (c <= '9')) or - ((c >= 'A') and (c <= 'F')) or - ((c >= 'a') and (c <= 'f')) or - (c == '.') or (c == 'x') or - (c == 'X'); - } - /* ================================================= _IsOperator diff --git a/AE/engine/src/base/Algorithms/Parser.h b/AE/engine/src/base/Algorithms/Parser.h index 69130f56..e5b2a709 100644 --- a/AE/engine/src/base/Algorithms/Parser.h +++ b/AE/engine/src/base/Algorithms/Parser.h @@ -56,6 +56,15 @@ namespace AE::Base ND_ static bool IsWhiteSpacesOnly (StringView str) __NE___; static void Align (INOUT String &str, usize pos, usize lineSize, char c = ' ') __Th___; + + static constexpr struct _CPPHelper + { + ND_ static bool IsWordBegin (char c) __NE___; + ND_ static bool IsWord (char c) __NE___; + + ND_ static bool IsNumberBegin (char c) __NE___; + ND_ static bool IsNumber (char c) __NE___; + } CPP; }; @@ -124,4 +133,36 @@ namespace AE::Base return true; } +/* +================================================= + _CPPHelper +================================================= +*/ + forceinline bool Parser::_CPPHelper::IsWordBegin (char c) __NE___ + { + return ((c >= 'A') and (c <= 'Z')) or + ((c >= 'a') and (c <= 'z')) or + (c == '_'); + } + + forceinline bool Parser::_CPPHelper::IsNumberBegin (char c) __NE___ + { + return (c >= '0') and (c <= '9'); + } + + forceinline bool Parser::_CPPHelper::IsWord (char c) __NE___ + { + return IsWordBegin( c ) or IsNumberBegin( c ); + } + + forceinline bool Parser::_CPPHelper::IsNumber (char c) __NE___ + { + return ((c >= '0') and (c <= '9')) or + ((c >= 'A') and (c <= 'F')) or + ((c >= 'a') and (c <= 'f')) or + (c == '.') or (c == 'x') or + (c == 'X'); + } + + } // AE::Base diff --git a/AE/engine/src/base/Algorithms/StringUtils.h b/AE/engine/src/base/Algorithms/StringUtils.h index 76a6934f..95859fb8 100644 --- a/AE/engine/src/base/Algorithms/StringUtils.h +++ b/AE/engine/src/base/Algorithms/StringUtils.h @@ -189,7 +189,7 @@ namespace _hidden_ ================================================= */ #if AE_SIMD_AVX >= 2 - ND_ forceinline const char* find_avx2_align (const char *b, const char *e, const char c) __NE___ + ND_ forceinline const char* find_avx2_align (const char* b, const char* e, const char c) __NE___ { auto* i = b; __m256i q = _mm256_set1_epi8( c ); @@ -1376,11 +1376,10 @@ namespace _hidden_ } template - ND_ EnableIf< IsFloatPoint, String > ToString2 (T value) __Th___ + ND_ EnableIf< IsFloatPoint, String > ToString2 (T value, uint fractPart = 0) __Th___ { const T v = Abs(value); char suffix = 0; - uint fract_part; if_likely( v >= T(1) ) { @@ -1388,7 +1387,7 @@ namespace _hidden_ if ( v < T(500'000) ) { value /= T(1000); suffix = 'K'; } else if ( v < T(500'000'000) ) { value /= T(1000'000); suffix = 'M'; } else { value /= T(1000'000'000); suffix = 'G'; } - fract_part = 1; + if ( fractPart == 0 ) fractPart = 1; } else if ( BitEqual( v, T(0) )) @@ -1399,9 +1398,9 @@ namespace _hidden_ if ( v > T(1.0e-6) ) { value *= T(1000); suffix = 'm'; } else // milli if ( v > T(1.0e-9) ) { value *= T(1000'000); suffix = 'u'; } else // micro { value *= T(1000'000'000); suffix = 'n'; } // nano - fract_part = 3; + if ( fractPart == 0 ) fractPart = 3; } - String str = ToString( value, fract_part ); + String str = ToString( value, fractPart ); if ( suffix ) str << suffix; return str; } diff --git a/AE/engine/src/base/CMakeLists.txt b/AE/engine/src/base/CMakeLists.txt index de067b37..8b506417 100644 --- a/AE/engine/src/base/CMakeLists.txt +++ b/AE/engine/src/base/CMakeLists.txt @@ -50,6 +50,11 @@ if (WIN32) target_link_libraries( "Base" PUBLIC "ws2_32" ) endif() +# for IDE +if (FALSE) #WIN32) + target_include_directories( "Base" PRIVATE "${ANDROID_NDK_INCLUDE}/sysroot/usr/include" "${ANDROID_NDK_INCLUDE}/lib/clang/17.0.2/include" ) +endif() + if (MSVC) set_source_files_properties( ${OBJC_SOURCES} PROPERTIES HEADER_FILE_ONLY TRUE ) endif() @@ -72,5 +77,6 @@ target_compile_definitions( "Base" PUBLIC FP_FAST_FMA FP_FAST_FMAF FP_FAST_FMAL EnablePCH( "Base" ) EnablePrebuild( "Base" ) +EnableUnitBuild( "Base" ) install( TARGETS "Base" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/base/Common.h b/AE/engine/src/base/Common.h index 9feb5051..63abb728 100644 --- a/AE/engine/src/base/Common.h +++ b/AE/engine/src/base/Common.h @@ -30,7 +30,7 @@ namespace AE #ifdef AE_PLATFORM_WINDOWS # if UNICODE - using CharType = wchar_t; // L + using CharType = wchar_t; // L'' # define TXT( _text_ ) (L"" _text_) # else using CharType = CharAnsi; @@ -153,17 +153,6 @@ namespace AE::Base template constexpr void Unused (Args&& ...) __NE___ {} -/* -================================================= - ArgRef (same as std::ref) -================================================= -*/ - template - ND_ constexpr std::reference_wrapper ArgRef (T &arg) __NE___ - { - return std::reference_wrapper{ arg }; - } - /* ================================================= RVRef (same as std::move) diff --git a/AE/engine/src/base/Containers/FixedArray.h b/AE/engine/src/base/Containers/FixedArray.h index c4091ab9..667e70e0 100644 --- a/AE/engine/src/base/Containers/FixedArray.h +++ b/AE/engine/src/base/Containers/FixedArray.h @@ -86,12 +86,17 @@ namespace AE::Base ND_ constexpr bool operator <= (ArrayView rhs) C_NE___ { return ArrayView{*this} <= rhs; } - constexpr Self& operator = (const Self &rhs) __NE___; - constexpr Self& operator = (ArrayView rhs) __NE___; + template + constexpr Self& operator = (const FixedArray &) __NE___; + constexpr Self& operator = (const Self &rhs) __NE___ { return operator=( ArrayView{rhs} ); } constexpr Self& operator = (Self &&rhs) __NE___; + template + constexpr Self& operator = (ArrayView rhs) __NE___; - constexpr void assign (const_iterator beginIter, const_iterator endIter) __NE___; - constexpr void append (ArrayView items) __NE___; + template + constexpr void assign (B* beginIter, B* endIter) __NE___; + template + constexpr void append (B* beginIter, B* endIter) __NE___; constexpr void push_back (const T &value) __NE___; constexpr void push_back (T &&value) __NE___; @@ -120,9 +125,9 @@ namespace AE::Base constexpr void fast_erase (usize index) __NE___; private: - ND_ constexpr bool _IsMemoryAliased (const_iterator beginIter, const_iterator endIter) C_NE___ + ND_ constexpr bool _IsMemoryAliased (const void* beginIter, const void* endIter) C_NE___ { - return IsIntersects( begin(), end(), beginIter, endIter ); + return IsIntersects( begin(), end(), beginIter, endIter ); } }; @@ -176,14 +181,16 @@ namespace AE::Base ================================================= */ template - constexpr FixedArray& FixedArray::operator = (const Self &rhs) __NE___ + template + constexpr FixedArray& FixedArray::operator = (const FixedArray &rhs) __NE___ { assign( rhs.begin(), rhs.end() ); return *this; } template - constexpr FixedArray& FixedArray::operator = (ArrayView rhs) __NE___ + template + constexpr FixedArray& FixedArray::operator = (ArrayView rhs) __NE___ { ASSERT( rhs.size() < capacity() ); assign( rhs.begin(), rhs.end() ); @@ -211,8 +218,11 @@ namespace AE::Base ================================================= */ template - constexpr void FixedArray::assign (const_iterator beginIter, const_iterator endIter) __NE___ + template + constexpr void FixedArray::assign (B* beginIter, B* endIter) __NE___ { + StaticAssert( IsConstructible< T, B >); + ASSERT( beginIter <= endIter ); ASSERT( not _IsMemoryAliased( beginIter, endIter )); @@ -230,10 +240,17 @@ namespace AE::Base ================================================= */ template - constexpr void FixedArray::append (ArrayView items) __NE___ + template + constexpr void FixedArray::append (B* beginIter, B* endIter) __NE___ { - for (auto& item : items) { - push_back( item ); + StaticAssert( IsConstructible< T, B >); + + ASSERT( beginIter <= endIter ); + ASSERT( not _IsMemoryAliased( beginIter, endIter )); + + for (auto iter = beginIter; (_count < capacity()) and (iter != endIter); ++iter, ++_count) + { + PlacementNew( OUT data() + _count, *iter ); } } diff --git a/AE/engine/src/base/Containers/StructView.h b/AE/engine/src/base/Containers/StructView.h index a8a29f47..c628fa99 100644 --- a/AE/engine/src/base/Containers/StructView.h +++ b/AE/engine/src/base/Containers/StructView.h @@ -277,7 +277,7 @@ namespace AE::Base public: const_iterator& operator ++ () __NE___ { ++_index; return *this; } - ND_ decltype(auto) operator * () C_NE___ { return _ref[_index]; } + ND_ exact_t operator * () C_NE___ { return _ref[_index]; } ND_ bool operator == (const const_iterator &rhs) C_NE___ { return &_ref == &rhs._ref and _index == rhs._index; } ND_ bool operator != (const const_iterator &rhs) C_NE___ { return not (*this == rhs); } }; @@ -295,7 +295,7 @@ namespace AE::Base public: large_iterator& operator ++ () __NE___ { ++_index; return *this; } - ND_ decltype(auto) operator * () C_NE___ { return _ref[_index]; } + ND_ exact_t operator * () C_NE___ { return _ref[_index]; } ND_ bool operator != (const large_iterator &rhs) C_NE___ { return not (*this == rhs); } ND_ bool operator == (const large_iterator &rhs) C_NE___ { return (_ref._view == rhs._ref._view) and (_index == rhs._index); } }; @@ -321,10 +321,10 @@ namespace AE::Base ND_ usize size () C_NE___ { return _view.size(); } ND_ bool empty () C_NE___ { return _view.empty(); } - ND_ decltype(auto) operator [] (usize i) C_NE___ { return Converter_t{}( _view[i] ); } + ND_ exact_t operator [] (usize i) C_NE___ { return Converter_t{}( _view[i] ); } - ND_ decltype(auto) front () C_NE___ { return operator[] (0); } - ND_ decltype(auto) back () C_NE___ { return operator[] (size()-1); } + ND_ exact_t front () C_NE___ { return operator[] (0); } + ND_ exact_t back () C_NE___ { return operator[] (size()-1); } ND_ const_iterator begin () CrNE___ { return const_iterator{ *this, 0 }; } ND_ const_iterator end () CrNE___ { return const_iterator{ *this, size() }; } diff --git a/AE/engine/src/base/Containers/Tuple.h b/AE/engine/src/base/Containers/Tuple.h index 410b8a89..e041ac3f 100644 --- a/AE/engine/src/base/Containers/Tuple.h +++ b/AE/engine/src/base/Containers/Tuple.h @@ -41,82 +41,82 @@ namespace _hidden_ // methods - constexpr Tuple () __NE___ = default; + __Cx__ Tuple () __NE___ = default; - constexpr Tuple (const Self &) __NE___ = default; - constexpr Tuple (Self &&) __NE___ = default; + __Cx__ Tuple (const Self &) __NE___ = default; + __Cx__ Tuple (Self &&) __NE___ = default; template - constexpr explicit Tuple (UTypes&& ...args) __NE___ : Base_t{ FwdArg(args)... } {} + __CxIA explicit Tuple (UTypes&& ...args) NoExcept( Base::IsNothrowCtor< Base_t, UTypes&&... >) : Base_t{ FwdArg(args)... } {} template - constexpr Tuple (const Tuple &other) __NE___ : Base_t{ other.AsBase() } {} + __CxIA Tuple (const Tuple &other) NoExcept( Base::IsNothrowCtor< Base_t, decltype(other) >) : Base_t{ other.AsBase() } {} template - constexpr Tuple (Tuple&& other) __NE___ : Base_t{ RVRef(other).AsBase() } {} + __CxIA Tuple (Tuple&& other) __NE___ : Base_t{ RVRef(other).AsBase() } {} - constexpr Self& operator = (const Self &) __NE___ = default; - constexpr Self& operator = (Self &&) __NE___ = default; + __Cx__ Self& operator = (const Self &) __NE___ = default; + __Cx__ Self& operator = (Self &&) __NE___ = default; template - constexpr Self& operator = (const Tuple &rhs) __NE___ { AsBase() = rhs.AsBase(); return *this; } + __CxIA Self& operator = (const Tuple &rhs) __NE___ { AsBase() = rhs.AsBase(); return *this; } template - constexpr Self& operator = (Tuple&&rhs) __NE___ { AsBase() = RVRef(rhs).AsBase(); return *this; } + __CxIA Self& operator = (Tuple&&rhs) __NE___ { AsBase() = RVRef(rhs).AsBase(); return *this; } - ND_ constexpr bool operator == (const Self &rhs) C_NE___ { return AsBase() == rhs.AsBase(); } - ND_ constexpr bool operator != (const Self &rhs) C_NE___ { return AsBase() != rhs.AsBase(); } - ND_ constexpr bool operator > (const Self &rhs) C_NE___ { return AsBase() > rhs.AsBase(); } - ND_ constexpr bool operator < (const Self &rhs) C_NE___ { return AsBase() < rhs.AsBase(); } - ND_ constexpr bool operator >= (const Self &rhs) C_NE___ { return AsBase() >= rhs.AsBase(); } - ND_ constexpr bool operator <= (const Self &rhs) C_NE___ { return AsBase() <= rhs.AsBase(); } + NdCxIA bool operator == (const Self &rhs) C_NE___ { return AsBase() == rhs.AsBase(); } + NdCxIA bool operator != (const Self &rhs) C_NE___ { return AsBase() != rhs.AsBase(); } + NdCxIA bool operator > (const Self &rhs) C_NE___ { return AsBase() > rhs.AsBase(); } + NdCxIA bool operator < (const Self &rhs) C_NE___ { return AsBase() < rhs.AsBase(); } + NdCxIA bool operator >= (const Self &rhs) C_NE___ { return AsBase() >= rhs.AsBase(); } + NdCxIA bool operator <= (const Self &rhs) C_NE___ { return AsBase() <= rhs.AsBase(); } template - ND_ constexpr T& Get () r_NE___ { return std::get( *this ); } + NdCxIA T& Get () r_NE___ { return std::get( *this ); } template - ND_ constexpr T const& Get () CrNE___ { return std::get( *this ); } + NdCxIA T const& Get () CrNE___ { return std::get( *this ); } template - ND_ constexpr T && Get () rvNE___ { return std::get( RVRef(*this) ); } + NdCxIA T && Get () rvNE___ { return std::get( RVRef(*this) ); } template - ND_ constexpr decltype(auto) Get () r_NE___ { return std::get( *this ); } + NdCxIA exact_t Get () r_NE___ { return std::get( *this ); } template - ND_ constexpr decltype(auto) Get () CrNE___ { return std::get( *this ); } + NdCxIA exact_t Get () CrNE___ { return std::get( *this ); } template - ND_ constexpr decltype(auto) Get () rvNE___ { return std::get( RVRef(*this) ); } + NdCxIA exact_t Get () rvNE___ { return std::get( RVRef(*this) ); } - ND_ constexpr usize Count () C_NE___ { return sizeof... (Types); } + NdCxIA usize Count () C_NE___ { return sizeof... (Types); } - ND_ constexpr Base_t const& AsBase () CrNE___ { return static_cast(*this); } - ND_ constexpr Base_t & AsBase () r_NE___ { return static_cast(*this); } - ND_ constexpr Base_t && AsBase () rvNE___ { return static_cast( RVRef(*this) ); } + NdCxIA Base_t const& AsBase () CrNE___ { return static_cast(*this); } + NdCxIA Base_t & AsBase () r_NE___ { return static_cast(*this); } + NdCxIA Base_t && AsBase () rvNE___ { return static_cast( RVRef(*this) ); } - ND_ HashVal CalcHash () C_NE___ { return _RecursiveCalcHash<0>(); } + Nd__IA HashVal CalcHash () C_NE___ { return _RecursiveCalcHash<0>(); } template - constexpr void Set (Args&& ...args) __NE___ { _RecursiveSet<0>( FwdArg(args)... ); } + __CxIA void Set (Args&& ...args) __NE___ { _RecursiveSet<0>( FwdArg(args)... ); } template - constexpr decltype(auto) Apply (Fn &&fn) NoExcept(IsNothrowInvocable< Fn, Types&... >) + __CxIA exact_t Apply (Fn &&fn) NoExcept(IsNothrowInvocable< Fn, Types&... >) { return std::apply( FwdArg(fn), static_cast(*this) ); } template - constexpr decltype(auto) Apply (Fn &&fn) CNoExcept(IsNothrowInvocable< Fn, const Types&... >) + __CxIA exact_t Apply (Fn &&fn) CNoExcept(IsNothrowInvocable< Fn, const Types&... >) { return std::apply( FwdArg(fn), static_cast(*this) ); } template - constexpr void ForEach (Fn &fn) C_Th___ { _ForEach<0>( fn ); } + __CxIA void ForEach (Fn &fn) C_Th___ { _ForEach<0>( fn ); } private: @@ -130,7 +130,7 @@ namespace _hidden_ } template - constexpr void _RecursiveSet (Arg0 &&arg0, Args&& ...args) __NE___ + constexpr void _RecursiveSet (Arg0 &&arg0, Args&& ...args) __NE___ { CheckNothrow( IsNoExcept( Get() = FwdArg(arg0) )); Get() = FwdArg(arg0); @@ -182,74 +182,74 @@ namespace _hidden_ // methods public: - constexpr TupleRef () __NE___ = default; + __Cx__ TupleRef () __NE___ = default; // 'args' must be pointers template - constexpr explicit TupleRef (UTypes&& ...args) __NE___ : _base{ FwdArg(args)... } {} + __CxIA explicit TupleRef (UTypes&& ...args) __NE___ : _base{ FwdArg(args)... } {} template - ND_ constexpr T& Get () r_NE___ { return Get< _Index >(); } + NdCxIA T& Get () r_NE___ { return Get< _Index >(); } template - ND_ constexpr T const& Get () CrNE___ { return Get< _Index >(); } + NdCxIA T const& Get () CrNE___ { return Get< _Index >(); } template - ND_ constexpr T & Get () rvNE___ { return Get< _Index >(); } + NdCxIA T & Get () rvNE___ { return Get< _Index >(); } template - ND_ constexpr decltype(auto) Get () r_NE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } + NdCxIA exact_t Get () r_NE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } template - ND_ constexpr decltype(auto) Get () CrNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } + NdCxIA exact_t Get () CrNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } template - ND_ constexpr decltype(auto) Get () rvNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } + NdCxIA exact_t Get () rvNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } // for structured bindings #if 1 template - ND_ constexpr decltype(auto) get () r_NE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } + NdCxIA exact_t get () r_NE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } template - ND_ constexpr decltype(auto) get () CrNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } + NdCxIA exact_t get () CrNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } template - ND_ constexpr decltype(auto) get () rvNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } + NdCxIA exact_t get () rvNE___ { ASSERT( IsNotNull() ); return *_base.template Get(); } #endif template - ND_ constexpr bool IsNotNull () C_NE___ { return _base.template Get() != null; } + NdCxIA bool IsNotNull () C_NE___ { return _base.template Get() != null; } template - ND_ constexpr bool IsNotNull () C_NE___ { return _base.template Get< _Index >() != null; } + NdCxIA bool IsNotNull () C_NE___ { return _base.template Get< _Index >() != null; } template - ND_ constexpr bool IsNull () C_NE___ { return _base.template Get() == null; } + NdCxIA bool IsNull () C_NE___ { return _base.template Get() == null; } template - ND_ constexpr bool IsNull () C_NE___ { return _base.template Get< _Index >() == null; } + NdCxIA bool IsNull () C_NE___ { return _base.template Get< _Index >() == null; } - ND_ constexpr usize Count () C_NE___ { return sizeof... (Types); } + NdCx__ usize Count () C_NE___ { return sizeof... (Types); } - ND_ constexpr bool AllNonNull () C_NE___ { return _RecursiveNonNull<0>(); } - ND_ constexpr bool AnyNull () C_NE___ { return not AllNonNull(); } - ND_ constexpr bool AllNull () C_NE___ { return _RecursiveNull<0>(); } + NdCxIA bool AllNonNull () C_NE___ { return _RecursiveNonNull<0>(); } + NdCxIA bool AnyNull () C_NE___ { return not AllNonNull(); } + NdCxIA bool AllNull () C_NE___ { return _RecursiveNull<0>(); } - ND_ constexpr Tuple_t const& AsTuple () CrNE___ { return _base; } - ND_ constexpr Tuple_t & AsTuple () r_NE___ { return _base; } - ND_ constexpr Tuple_t & AsTuple () rvNE___ { return _base; } - ND_ constexpr CRef_t const& AsConst () CrNE___ { return reinterpret_cast(*this); } + NdCx__ Tuple_t const& AsTuple () CrNE___ { return _base; } + NdCx__ Tuple_t & AsTuple () r_NE___ { return _base; } + NdCx__ Tuple_t & AsTuple () rvNE___ { return _base; } + NdCx__ CRef_t const& AsConst () CrNE___ { return reinterpret_cast(*this); } private: template - ND_ constexpr bool _RecursiveNonNull () C_NE___ + ND_ constexpr bool _RecursiveNonNull () C_NE___ { if constexpr( I+1 < sizeof...(Types) ) return IsNotNull() and _RecursiveNonNull(); @@ -258,7 +258,7 @@ namespace _hidden_ } template - ND_ constexpr bool _RecursiveNull () C_NE___ + ND_ constexpr bool _RecursiveNull () C_NE___ { if constexpr( I+1 < sizeof...(Types) ) return IsNull() and _RecursiveNull(); @@ -293,7 +293,7 @@ namespace _hidden_ namespace _hidden_ { template - constexpr auto _TupleConcat (Tuple1&& tuple1, Tuple2&& tuple2, IndexSequence, IndexSequence) __Th___ + NdCxIA auto _TupleConcat (Tuple1&& tuple1, Tuple2&& tuple2, IndexSequence, IndexSequence) __Th___ { return Tuple{ std::get( FwdArg( tuple1 )) ..., std::get( FwdArg( tuple2 )) ... }; @@ -301,7 +301,7 @@ namespace _hidden_ } template - ND_ constexpr auto TupleConcat (Tuple1&& tuple1, Tuple2&& tuple2) __Th___ + NdCxIA auto TupleConcat (Tuple1&& tuple1, Tuple2&& tuple2) __Th___ { StaticAssert( IsTuple ); StaticAssert( IsTuple ); @@ -311,7 +311,7 @@ namespace _hidden_ } template - ND_ constexpr auto TupleConcat (Tuple1&& tuple1, Tuple2&& tuple2, Tuples&& ...tuples) __Th___ + NdCxIA auto TupleConcat (Tuple1&& tuple1, Tuple2&& tuple2, Tuples&& ...tuples) __Th___ { return TupleConcat( FwdArg(tuple1), TupleConcat( FwdArg(tuple2), FwdArg(tuples)... )); @@ -323,7 +323,7 @@ namespace _hidden_ ================================================= */ template - ND_ constexpr Tuple StructSet (Args&... args) __NE___ + NdCxIA Tuple StructSet (Args&... args) __NE___ { return Tuple< Args& ...>{ args... }; } diff --git a/AE/engine/src/base/Containers/Union.h b/AE/engine/src/base/Containers/Union.h index af929442..87ddaaca 100644 --- a/AE/engine/src/base/Containers/Union.h +++ b/AE/engine/src/base/Containers/Union.h @@ -28,14 +28,14 @@ namespace AE::Base ================================================= */ template - ND_ constexpr decltype(auto) Visit (Union &un, Funcs&&... fn) //noexcept(AllNothrowInvocable< Funcs... >) // TODO + ND_ constexpr exact_t Visit (Union &un, Funcs&&... fn) //noexcept(AllNothrowInvocable< Funcs... >) // TODO { using namespace Base::_hidden_; return std::visit( overloaded{ FwdArg(fn)... }, un ); } template - ND_ constexpr decltype(auto) Visit (const Union &un, Funcs&&... fn) //noexcept(AllNothrowInvocable< Funcs... >) // TODO + ND_ constexpr exact_t Visit (const Union &un, Funcs&&... fn) //noexcept(AllNothrowInvocable< Funcs... >) // TODO { using namespace Base::_hidden_; return std::visit( overloaded{ FwdArg(fn)... }, un ); diff --git a/AE/engine/src/base/Containers/UntypedStorage.h b/AE/engine/src/base/Containers/UntypedStorage.h index 72792513..27f3b248 100644 --- a/AE/engine/src/base/Containers/UntypedStorage.h +++ b/AE/engine/src/base/Containers/UntypedStorage.h @@ -41,8 +41,8 @@ namespace AE::Base template explicit UntypedStorage (TypeList) __NE___; - UntypedStorage () __NE___ { DEBUG_ONLY( DbgInitMem( _buffer, Sizeof(_buffer) )); } - ~UntypedStorage () __NE___ { DEBUG_ONLY( DbgFreeMem( _buffer, Sizeof(_buffer) )); } + UntypedStorage () __NE___ { DEBUG_ONLY( DbgInitMem( _buffer, Size() )); } + ~UntypedStorage () __NE___ { DEBUG_ONLY( DbgFreeMem( _buffer, Size() )); } template ND_ T* Ptr (Bytes offset = 0_b) __NE___; @@ -56,8 +56,8 @@ namespace AE::Base template ND_ T const& Ref (Bytes offset = 0_b) C_NE___ { return *Ptr( offset ); } - ND_ static constexpr Bytes Size () __NE___ { return Bytes{Size_v}; } - ND_ static constexpr Bytes Align () __NE___ { return Bytes{Align_v}; } + NdCv__ static Bytes Size () __NE___ { return Bytes{Size_v}; } + NdCv__ static Bytes Align () __NE___ { return Bytes{Align_v}; } }; @@ -89,8 +89,8 @@ namespace AE::Base TrivialStorage (const Self &other) __NE___; Self& operator = (const Self &rhs) __NE___; - TrivialStorage () __NE___ { DEBUG_ONLY( DbgInitMem( _buffer, Sizeof(_buffer) )); } - ~TrivialStorage () __NE___ { DEBUG_ONLY( DbgFreeMem( _buffer, Sizeof(_buffer) )); } + TrivialStorage () __NE___ { DEBUG_ONLY( DbgInitMem( _buffer, Size() )); } + ~TrivialStorage () __NE___ { DEBUG_ONLY( DbgFreeMem( _buffer, Size() )); } template ND_ T* Ptr (Bytes offset = 0_b) __NE___; @@ -107,8 +107,8 @@ namespace AE::Base ND_ void* Data () __NE___ { return _buffer; } ND_ void const* Data () C_NE___ { return _buffer; } - ND_ static constexpr Bytes Size () __NE___ { return Bytes{Size_v}; } - ND_ static constexpr Bytes Align () __NE___ { return Bytes{Align_v}; } + NdCv__ static Bytes Size () __NE___ { return Bytes{Size_v}; } + NdCv__ static Bytes Align () __NE___ { return Bytes{Align_v}; } }; diff --git a/AE/engine/src/base/DataSource/DataStream.h b/AE/engine/src/base/DataSource/DataStream.h index 2f43df4b..3afacf81 100644 --- a/AE/engine/src/base/DataSource/DataStream.h +++ b/AE/engine/src/base/DataSource/DataStream.h @@ -187,18 +187,18 @@ namespace AE::Base Read ================================================= */ - inline bool RStream::Read (OUT void* buffer, Bytes size) __NE___ + inline bool RStream::Read (OUT void* buffer, const Bytes size) __NE___ { return ReadSeq( buffer, size ) == size; } template )> - bool RStream::Read (usize length, OUT BasicString &str) __NE___ + bool RStream::Read (const usize length, OUT BasicString &str) __NE___ { NOTHROW_ERR( str.resize( length )); - Bytes expected_size { sizeof(str[0]) * str.length() }; - Bytes current_size = ReadSeq( str.data(), expected_size ); + const Bytes expected_size { sizeof(str[0]) * str.length() }; + const Bytes current_size = ReadSeq( str.data(), expected_size ); str.resize( usize(current_size / sizeof(str[0])) ); // nothrow @@ -206,19 +206,19 @@ namespace AE::Base } template )> - bool RStream::Read (Bytes size, OUT BasicString &str) __NE___ + bool RStream::Read (const Bytes size, OUT BasicString &str) __NE___ { ASSERT( IsMultipleOf( size, sizeof(T) )); return Read( usize(size) / sizeof(T), OUT str ); } template )> - bool RStream::Read (usize count, OUT Array &arr) __NE___ + bool RStream::Read (const usize count, OUT Array &arr) __NE___ { NOTHROW_ERR( arr.resize( count )); - Bytes expected_size { sizeof(arr[0]) * arr.size() }; - Bytes current_size = ReadSeq( arr.data(), expected_size ); + const Bytes expected_size { sizeof(arr[0]) * arr.size() }; + const Bytes current_size = ReadSeq( arr.data(), expected_size ); arr.resize( usize(current_size / sizeof(arr[0])) ); // nothrow @@ -226,7 +226,7 @@ namespace AE::Base } template )> - bool RStream::Read (Bytes size, OUT Array &arr) __NE___ + bool RStream::Read (const Bytes size, OUT Array &arr) __NE___ { ASSERT( IsMultipleOf( size, sizeof(T) )); return Read( usize(size) / sizeof(T), OUT arr ); @@ -235,7 +235,8 @@ namespace AE::Base template )> bool RStream::Read (OUT T &data) __NE___ { - return ReadSeq( AddressOf(data), Sizeof(data) ) == Sizeof(data); + constexpr Bytes size {sizeof(data)}; + return ReadSeq( AddressOf(data), size ) == size; } inline bool RStream::Read (Bytes dataSize, OUT MemChunkList &mem) __NE___ @@ -291,7 +292,7 @@ namespace AE::Base Write ================================================= */ - inline bool WStream::Write (const void* buffer, Bytes size) __NE___ + inline bool WStream::Write (const void* buffer, const Bytes size) __NE___ { return WriteSeq( buffer, size ) == size; } @@ -299,8 +300,7 @@ namespace AE::Base template )> bool WStream::Write (ArrayView buf) __NE___ { - Bytes size { sizeof(buf[0]) * buf.size() }; - + const Bytes size { sizeof(buf[0]) * buf.size() }; return WriteSeq( buf.data(), size ) == size; } @@ -316,15 +316,15 @@ namespace AE::Base if ( str.empty() ) return true; - Bytes size { sizeof(str[0]) * str.length() }; - + const Bytes size { sizeof(str[0]) * str.length() }; return WriteSeq( str.data(), size ) == size; } template )> bool WStream::Write (const T &data) __NE___ { - return WriteSeq( AddressOf(data), Sizeof(data) ) == Sizeof(data); + constexpr Bytes size {sizeof(data)}; + return WriteSeq( AddressOf(data), size ) == size; } inline bool WStream::Write (const MemChunkList &mem) __NE___ diff --git a/AE/engine/src/base/DataSource/StdFileStream.cpp b/AE/engine/src/base/DataSource/StdFileStream.cpp index 227581c2..24e0b3d6 100644 --- a/AE/engine/src/base/DataSource/StdFileStream.cpp +++ b/AE/engine/src/base/DataSource/StdFileStream.cpp @@ -257,7 +257,7 @@ DEBUG_ONLY( { ASSERT( IsOpen() ); - Bytes readn{ fread( buffer, 1, usize(size), _file )}; + Bytes readn{ fread( OUT buffer, 1, usize(size), _file )}; _position += readn; diff --git a/AE/engine/src/base/DataSource/UnixFileHelper.cpp.h b/AE/engine/src/base/DataSource/UnixFileHelper.cpp.h index 609e3d8b..d99f31bd 100644 --- a/AE/engine/src/base/DataSource/UnixFileHelper.cpp.h +++ b/AE/engine/src/base/DataSource/UnixFileHelper.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #ifdef AE_PLATFORM_UNIX_BASED namespace diff --git a/AE/engine/src/base/DataSource/WindowsFileHelper.cpp.h b/AE/engine/src/base/DataSource/WindowsFileHelper.cpp.h index 4b038913..a801f352 100644 --- a/AE/engine/src/base/DataSource/WindowsFileHelper.cpp.h +++ b/AE/engine/src/base/DataSource/WindowsFileHelper.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #ifdef AE_PLATFORM_WINDOWS namespace @@ -34,7 +36,7 @@ namespace // validation { DBG_CHECK_MSG( not AllBits( flags, FILE_FLAG_RANDOM_ACCESS | FILE_FLAG_SEQUENTIAL_SCAN ), - "'RandomAccess | SequentialScan' is not supported" ); + "combination of 'RandomAccess' and 'SequentialScan' is not supported" ); } return flags; } diff --git a/AE/engine/src/base/Defines/Attribs.h b/AE/engine/src/base/Defines/Attribs.h index a4ec8288..db92378e 100644 --- a/AE/engine/src/base/Defines/Attribs.h +++ b/AE/engine/src/base/Defines/Attribs.h @@ -50,17 +50,31 @@ // function prefix attribs -/* -#define __Cx__ constexpr -#define __Cx__ constexpr -#define St____ static -#define StCx__ static constexpr -#define Fr____ friend -#define FrCx__ friend constexpr -#define St__In static inline -#define St__FI static forceinline -#define __Cv__ consteval -*/ +#define __Cx__ constexpr +#define __CxIn constexpr inline +#define __CxIF constexpr forceinline +#define __CxIA constexpr AE_FLATTEN_FN forceinline +#define ____In inline +#define ____IF forceinline +#define ____IA AE_FLATTEN_FN forceinline +#define NdCx__ ND_ constexpr +#define NdCxIn ND_ constexpr inline +#define NdCxIF ND_ constexpr forceinline +#define NdCxIA ND_ constexpr AE_FLATTEN_FN forceinline +#define Nd__In ND_ inline +#define Nd__IF ND_ forceinline +#define Nd__IA ND_ AE_FLATTEN_FN forceinline +#define __Cv__ cxx20_consteval +#define NdCv__ ND_ cxx20_consteval + + +// has attribute (C++20) +#ifdef __has_cpp_attribute +# define AE_HAS_ATTRIB __has_cpp_attribute +#else +# define AE_HAS_ATTRIB(...) (0) +#endif + // no discard #ifndef ND_ @@ -106,7 +120,11 @@ # define forceinline __forceinline # elif defined(AE_COMPILER_CLANG) or defined(AE_COMPILER_GCC) +# if AE_HAS_ATTRIB( gnu::always_inline ) +# define forceinline [[gnu::always_inline]] +# else # define forceinline __inline__ __attribute__((__always_inline__)) +# endif # else # pragma warning ("'forceinline' is not supported") @@ -125,6 +143,24 @@ #endif +// C++20 consteval specifier +// forces all calls to happen at compile time +#ifdef __cpp_consteval +# define cxx20_consteval consteval +#else +# define cxx20_consteval constexpr +#endif + + +// C++20 constinit specifier +// not constant, can be used with 'static' and 'thread_local' +#ifdef __cpp_consteval +# define cxx20_constinit constinit +#else +# define cxx20_constinit +#endif + + // C++20 concepts #ifdef __cpp_concepts # define if_constexpr_requires( ... ) if constexpr( requires{ __VA_ARGS___ }) @@ -242,30 +278,33 @@ // code vectorization #ifdef AE_COMPILER_MSVC -# define DONT_VECTORIZE __pragma( loop( no_vector )) // disable vectorization -# define FORCE_VECTORIZE __pragma( loop( ivdep )) // ignore dependencies to enable vectorization -# define UNROLL +# define AE_DONT_VECTORIZE __pragma( loop( no_vector )) // disable vectorization +# define AE_FORCE_VECTORIZE __pragma( loop( ivdep )) // ignore dependencies to enable vectorization +# define AE_UNROLL #elif defined(AE_COMPILER_CLANG) -# define DONT_VECTORIZE _Pragma( "clang loop vectorize(disable) interleave(disable)" ) -# define FORCE_VECTORIZE _Pragma( "clang loop vectorize(enable) interleave(enable)" ) -# define UNROLL _Pragma( "clang loop unroll(full)" ) +# define AE_DONT_VECTORIZE _Pragma( "clang loop vectorize(disable) interleave(disable)" ) +# define AE_FORCE_VECTORIZE _Pragma( "clang loop vectorize(enable) interleave(enable)" ) +# define AE_UNROLL _Pragma( "clang loop unroll(full)" ) #else -# define DONT_VECTORIZE -# define FORCE_VECTORIZE -# define UNROLL +# define AE_DONT_VECTORIZE +# define AE_FORCE_VECTORIZE +# define AE_UNROLL #endif // vtable #ifdef AE_COMPILER_MSVC -# define NO_VTABLE __declspec( novtable ) +# define NO_VTABLE __declspec( novtable ) #else # define NO_VTABLE #endif +// TODO: [[clang::internal_linkage]] + + // intrinsic attribute #if defined(AE_COMPILER_MSVC) and not defined(AE_COMPILER_CLANG_CL) # if _MSC_VER >= 1935 // since VS 2022 17.5 @@ -277,21 +316,68 @@ #endif -// force recursively inline all function call inside the block -#ifdef AE_COMPILER_MSVC +// force inline all function calls +// AE_INLINE_ALL and AE_INLINE_CALLS applied for call or scope. +// AE_FLATTEN_FN applied for function. +// +#if defined(AE_COMPILER_MSVC) and not defined(AE_COMPILER_CLANG_CL) # if _MSC_VER > 1930 // since VS 2022 -# define AE_INLINE_ALL [[msvc::flatten]] +# define AE_INLINE_ALL [[msvc::flatten]] // recursively use 'forceinline_calls' +# define AE_FLATTEN_FN [[msvc::flatten]] +# define AE_INLINE_CALLS [[msvc::forceinline_calls]] +# endif +#endif +#if defined(AE_COMPILER_CLANG) or defined(AE_COMPILER_CLANG_CL) +# if AE_HAS_ATTRIB( clang::flatten ) and AE_HAS_ATTRIB( clang::always_inline ) +# define AE_INLINE_ALL [[clang::flatten]] // recursively use 'always_inline' +# define AE_INLINE_CALLS [[clang::always_inline]] +# define AE_FLATTEN_FN [[clang::flatten]] +# endif +#endif +#if defined(AE_COMPILER_GCC) +# if AE_HAS_ATTRIB( gnu::flatten ) and AE_HAS_ATTRIB( gnu::always_inline ) +# define AE_INLINE_ALL [[gnu::flatten]] +# define AE_INLINE_CALLS [[gnu::always_inline]] +# define AE_FLATTEN_FN __attribute__((flatten)) # endif #endif #ifndef AE_INLINE_ALL # define AE_INLINE_ALL #endif +#ifndef AE_INLINE_CALLS +# define AE_INLINE_CALLS +#endif +#ifndef AE_FLATTEN_FN +# define AE_FLATTEN_FN +#endif + + +// marks a function as hot (hot code path), as a manual alternative to PGO hotness data +#if defined(AE_COMPILER_CLANG) or defined(AE_COMPILER_CLANG_CL) or defined(AE_COMPILER_GCC) +# if AE_HAS_ATTRIB( gnu::hot ) and AE_HAS_ATTRIB( gnu::cold ) +# define AE_HOT_FN [[gnu::hot]] +# define AE_COLD_FN [[gnu::cold]] +# else +# define AE_HOT_FN __attribute__((hot)) +# define AE_COLD_FN __attribute__((cold)) +# endif +#endif +#ifndef AE_HOT_FN +# define AE_HOT_FN +#endif +#ifndef AE_COLD_FN +# define AE_COLD_FN +#endif // mark function that has no side effects. // function can operate only on arguments and can not read global memory. #if defined(AE_COMPILER_CLANG) or defined(AE_COMPILER_CLANG_CL) +# if AE_HAS_ATTRIB( gnu::const ) +# define AE_NOSIDEEFFECTS [[gnu::const]] +# else # define AE_NOSIDEEFFECTS __attribute__ ((const)) +# endif #else # define AE_NOSIDEEFFECTS #endif @@ -344,7 +430,12 @@ // TODO: arm64_neon.h #endif -#if defined(AE_CPU_ARCH_ARM32) or defined(AE_CPU_ARCH_ARM64) +#if defined(AE_COMPILER_MSVC) and defined(AE_CPU_ARCH_ARM64) +# undef AE_SIMD_NEON +# define AE_SIMD_NEON 0 +#endif + +#if defined(AE_CPU_ARCH_ARM32) # ifndef __ARM_FP # error soft-FP is not supported # endif @@ -410,11 +501,12 @@ // Keep exactly the same type. // 'auto' will deduce the value type instead of reference type. // 'exact_t' will use reference type where it is possible. -//#define exact_t decltype(auto) +#define exact_t decltype(auto) /* example of variable initialization: int i; int&& f(); + auto x3a = i; // decltype(x3a) is int decltype(auto) x3d = i; // decltype(x3d) is int auto x4a = (i); // decltype(x4a) is int @@ -424,5 +516,5 @@ example of variable initialization: auto x6a = { 1, 2 }; // decltype(x6a) is std::initializer_list decltype(auto) x6d = { 1, 2 }; // error, { 1, 2 } is not an expression auto *x7a = &i; // decltype(x7a) is int* - decltype(auto)*x7d = &i; // error, declared type is not plain decltype(auto) + decltype(auto)* x7d = &i; // error, declared type is not plain decltype(auto) */ diff --git a/AE/engine/src/base/Defines/DetectLicense.inl.h b/AE/engine/src/base/Defines/DetectLicense.inl.h index 59fecb88..12e491b8 100644 --- a/AE/engine/src/base/Defines/DetectLicense.inl.h +++ b/AE/engine/src/base/Defines/DetectLicense.inl.h @@ -8,7 +8,7 @@ * REQUIRE_LGPLv3 - GNU LGPL 3 license * REQUIRE_GPLv2 - GNU GPL 2 license // https://en.wikipedia.org/wiki/GNU_General_Public_License * REQUIRE_GPLv3 - GNU GPL 3 license - * REQUIRE_COMMERCIAL - commercial license + * REQUIRE_COMMERCIAL - commercial license, closed source, compatible with copyleft license types Output: AE_LICENSE "" @@ -29,18 +29,40 @@ * AE_LICENSE_MPL_2 - Mozilla Public License // https://en.wikipedia.org/wiki/Mozilla_Public_License * AE_LICENSE_ZLIB // https://en.wikipedia.org/wiki/Zlib_License * AE_LICENSE_UNLICENSE + * AE_LICENSE_CC_BY_NC_SA_3 - CC BY-NC-SA 3.0 // https://creativecommons.org/licenses/by-nc-sa/3.0/ + * AE_LICENSE_FREE_NON_COMMERCIAL reference: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses */ -#if defined(AE_LICENSE_FREE_NON_COMMERCIAL) and defined(REQUIRE_COMMERCIAL) -# error only for non-commercial use! -#endif + +// commercial license +#ifdef REQUIRE_COMMERCIAL +// AE_LICENSE_APACHE_2 - ok +// AE_LICENSE_BSD/BSD2/BSD3 - ok +// AE_LICENSE_LGPLv2_SHAREDLIB - ok +// AE_LICENSE_LGPLv3_SHAREDLIB - ok +// AE_LICENSE_MIT - ok +// AE_LICENSE_MPL_2 - ok +// AE_LICENSE_ZLIB - ok +// AE_LICENSE_UNLICENSE - ok + +# if defined(AE_LICENSE_FREE_NON_COMMERCIAL) or defined(AE_LICENSE_CC_BY_NC_SA_3) +# error only for non-commercial use! +# endif +# if defined(AE_LICENSE_LGPLv2) or defined(AE_LICENSE_LGPLv3) +# error LGPL with static linking requires to open sources +# endif +# if defined(AE_LICENSE_GPLv2) or defined(AE_LICENSE_GPLv3) or defined(AE_LICENSE_AGPLv3) +# error GPL requires to open sources +# endif + +# define AE_LICENSE "Commercial" // MIT & BSD -#if defined(REQUIRE_MIT) or defined(REQUIRE_BSD_3) +#elif defined(REQUIRE_MIT) or defined(REQUIRE_BSD_3) // AE_LICENSE_BSD/BSD2/BSD3 - ok // AE_LICENSE_LGPLv2_SHAREDLIB - ok // AE_LICENSE_LGPLv3_SHAREDLIB - ok diff --git a/AE/engine/src/base/Defines/StdInclude.h b/AE/engine/src/base/Defines/StdInclude.h index e31af910..6bf88b03 100644 --- a/AE/engine/src/base/Defines/StdInclude.h +++ b/AE/engine/src/base/Defines/StdInclude.h @@ -74,24 +74,24 @@ #endif #ifdef AE_COMPILER_MSVC -# include // GetExceptionCode() -#define AE_SEH_STACK_OVERFLOW 0xC00000FDL // EXCEPTION_STACK_OVERFLOW -#define AE_SEH_ACCESS_VIOLATION 0xC0000005L // EXCEPTION_ACCESS_VIOLATION +# include // GetExceptionCode() +# define AE_SEH_STACK_OVERFLOW 0xC00000FDL // EXCEPTION_STACK_OVERFLOW +# define AE_SEH_ACCESS_VIOLATION 0xC0000005L // EXCEPTION_ACCESS_VIOLATION # include -# include // MMX -# include // SSE -# include // SSE2 -# include // SSE3 -# include // SSSE3 -# include // SSE4.1 -# include // SSE4.2 -# include // SSE4A -# include // AES -# include // AVX, AVX2, FMA - -# ifndef AE_COMPILER_CLANG_CL -# include // AVX512 + +# if 0 +# include // MMX +# include // SSE +# include // SSE2 +# include // SSE3 +# include // SSSE3 +# include // SSE4.1 +# include // SSE4.2 +# include // SSE4A +# include // AES +# include // AVX, AVX2, FMA +# include // AVX512 # endif # pragma intrinsic( _BitScanForward, _BitScanReverse ) @@ -100,5 +100,6 @@ # pragma intrinsic( _BitScanForward64, _BitScanReverse64 ) # pragma intrinsic( __popcnt64 ) # endif -#endif + +#endif // AE_COMPILER_MSVC diff --git a/AE/engine/src/base/Log/Logger.cpp b/AE/engine/src/base/Log/Logger.cpp index e2dc92dc..c1cd38bd 100644 --- a/AE/engine/src/base/Log/Logger.cpp +++ b/AE/engine/src/base/Log/Logger.cpp @@ -256,7 +256,7 @@ namespace */ ILogger::LoggerPtr ILogger::CreateDialogOutput (LevelBits levelBits, ScopeBits scopeBits) __NE___ { - #if defined(AE_CI_BUILD_TEST) or defined(AE_CI_BUILD_PERF) + #if defined(AE_CI_BUILD_TEST) or defined(AE_CI_BUILD_PERF) or defined(AE_CFG_RELEASE) Unused( levelBits, scopeBits ); return {}; @@ -569,7 +569,9 @@ namespace case EScope::Client : col = EColor::Blue; break; case EScope::_Count : default : DBG_WARNING( "unknown log level" ); - }*/ + } + switch_end*/ + switch_enum( info.level ) { case ELevel::Debug : add_time = true; add_file = true; col = EColor::Navy; break; @@ -769,9 +771,11 @@ namespace */ ILogger::LoggerPtr ILogger::CreateBreakOnError () __NE___ { + #ifndef AE_CFG_RELEASE if ( PlatformUtils::IsUnderDebugger() ) return MakeUnique(); else + #endif return {}; } //----------------------------------------------------------------------------- diff --git a/AE/engine/src/base/Math/BitMath.h b/AE/engine/src/base/Math/BitMath.h index 45372488..e6a1691c 100644 --- a/AE/engine/src/base/Math/BitMath.h +++ b/AE/engine/src/base/Math/BitMath.h @@ -106,6 +106,24 @@ namespace AE::Math if constexpr( S <= 64 ) return AnyBits( lhs.to_ullong(), rhs.to_ullong ); } +/* +================================================= + NoBits +---- + returns 'true' if 'lhs' and 'rhs' hasn't same bits. + same as 'not AnyBits()' +================================================= +*/ + template and IsScalarOrEnum< T2 >) + > + ND_ constexpr bool NoBits (const T1 lhs, const T2 rhs) __NE___ + { + StaticAssert( not (IsEnum and IsEnum) or IsSameTypes ); + //ASSERT( rhs != T2(0) ); + return !( ToNearUInt(lhs) & ToNearUInt(rhs) ); + } + /* ================================================= ExtractBit @@ -432,7 +450,9 @@ namespace AE::Math StaticAssert( IsEnum or IsInteger ); ASSERT( x >= T(0) ); - return T( ToNearUInt(x) << (shift & (CT_SizeofInBits(x) - 1)) ); + return shift >= CT_SizeofInBits(x) ? + T(0) : + T( ToNearUInt(x) << shift ); } template @@ -441,7 +461,9 @@ namespace AE::Math StaticAssert( IsEnum or IsInteger ); ASSERT( x >= T(0) ); - return T( ToNearUInt(x) >> (shift & (CT_SizeofInBits(x) - 1)) ); + return shift >= CT_SizeofInBits(x) ? + T(0) : + T( ToNearUInt(x) >> shift ); } /* @@ -579,6 +601,21 @@ namespace AE::Math return (static_cast(x) & (U{1} << index)) != 0; } +/* +================================================= + SetBit +================================================= +*/ + template + ND_ constexpr EnableIf or IsEnum, T> SetBit (const T x, const bool bit, const usize index) __NE___ + { + using U = ToUnsignedInteger; + if ( bit ) + return static_cast( static_cast(x) | (U{1} << index) ); + else + return static_cast( static_cast(x) & ~(U{1} << index) ); + } + /* ================================================= ToBit diff --git a/AE/engine/src/base/Math/Byte.h b/AE/engine/src/base/Math/Byte.h index a779a322..a3a57d7f 100644 --- a/AE/engine/src/base/Math/Byte.h +++ b/AE/engine/src/base/Math/Byte.h @@ -27,19 +27,18 @@ namespace AE::Math Self size; Self align; - constexpr SizeAndAlign () __NE___ {} - constexpr SizeAndAlign (const SizeAndAlign &) __NE___ = default; - constexpr SizeAndAlign (SizeAndAlign &&) __NE___ = default; + __Cx__ SizeAndAlign () __NE___ {} + __Cx__ SizeAndAlign (const SizeAndAlign &) __NE___ = default; + __Cx__ SizeAndAlign (SizeAndAlign &&) __NE___ = default; template - constexpr explicit SizeAndAlign (B1 inSize, B2 inAlign) __NE___ : size{inSize}, align{inAlign} {} + __Cx__ explicit SizeAndAlign (B1 inSize, B2 inAlign) __NE___ : size{inSize}, align{inAlign} {} template - constexpr explicit SizeAndAlign (const B &other) __NE___ : size{other.size}, align{other.align} {} + __Cx__ explicit SizeAndAlign (const B &other) __NE___ : size{other.size}, align{other.align} {} + __Cx__ explicit SizeAndAlign (std::size_t size, std::align_val_t align) __NE___ : size{size}, align{align} {} - constexpr explicit SizeAndAlign (std::size_t size, std::align_val_t align) __NE___ : size{size}, align{align} {} - - constexpr SizeAndAlign operator * (usize count) C_NE___ { SizeAndAlign tmp{*this}; tmp.size *= count; return tmp; } + __Cx__ SizeAndAlign operator * (usize count) C_NE___ { SizeAndAlign tmp{*this}; tmp.size *= count; return tmp; } }; @@ -50,146 +49,146 @@ namespace AE::Math // methods public: - constexpr TByte () __NE___ : _value{0} {} - constexpr TByte (UMax_t) __NE___ : _value{UMax} {} - constexpr TByte (Default_t) __NE___ : _value{0} {} - constexpr TByte (const Self &) __NE___ = default; + __Cx__ TByte () __NE___ : _value{0} {} + __Cx__ TByte (UMax_t) __NE___ : _value{UMax} {} + __Cx__ TByte (Default_t) __NE___ : _value{0} {} + __Cx__ TByte (const Self &) __NE___ = default; - explicit constexpr TByte (T value) __NE___ : _value{value} {} - explicit constexpr TByte (std::align_val_t val) __NE___ : _value{usize(val)} {} - explicit constexpr TByte (const void* ptr) __NE___ : _value{BitCast(ptr)} {} + __Cx__ explicit TByte (T value) __NE___ : _value{value} {} + __Cx__ explicit TByte (std::align_val_t val) __NE___ : _value{usize(val)} {} + __Cx__ explicit TByte (const void* ptr) __NE___ : _value{BitCast(ptr)} {} template - constexpr TByte (const TByte &other) __NE___ : _value{CheckCast(other)} {} + __Cx__ TByte (const TByte &other) __NE___ : _value{CheckCast(other)} {} - ND_ explicit constexpr operator sbyte () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator sshort () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator int () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator slong () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator sbyte () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator sshort () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator int () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator slong () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator ubyte () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator ushort () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator uint () C_NE___ { return static_cast(_value); } - ND_ explicit constexpr operator ulong () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator ubyte () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator ushort () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator uint () C_NE___ { return static_cast(_value); } + NdCx__ explicit operator ulong () C_NE___ { return static_cast(_value); } #if defined(AE_PLATFORM_WINDOWS) or \ defined(AE_PLATFORM_APPLE) or \ defined(AE_PLATFORM_EMSCRIPTEN) or \ (defined(AE_PLATFORM_ANDROID) and AE_PLATFORM_BITS == 32) - ND_ explicit constexpr operator signed long () C_NE___ { return static_cast< signed long >(_value); } - ND_ explicit constexpr operator unsigned long () C_NE___ { return static_cast< unsigned long >(_value); } + NdCx__ explicit operator signed long () C_NE___ { return static_cast< signed long >(_value); } + NdCx__ explicit operator unsigned long () C_NE___ { return static_cast< unsigned long >(_value); } #endif template - ND_ constexpr R * AsPtr () C_NE___ { return BitCast( CheckCast( _value )); } - ND_ constexpr void* AsPtr () C_NE___ { return BitCast( CheckCast( _value )); } + NdCx__ R * AsPtr () C_NE___ { return BitCast( CheckCast( _value )); } + NdCx__ void* AsPtr () C_NE___ { return BitCast( CheckCast( _value )); } template - ND_ explicit constexpr operator R * () C_NE___ { return BitCast( CheckCast( _value )); } + NdCx__ explicit operator R * () C_NE___ { return BitCast( CheckCast( _value )); } - ND_ constexpr T get () C_NE___ { return _value; } - ND_ constexpr T Kb () C_NE___ { return _value >> 10; } - ND_ constexpr T Mb () C_NE___ { return _value >> 20; } - ND_ constexpr T Gb () C_NE___ { return _value >> 30; } + NdCx__ T get () C_NE___ { return _value; } + NdCx__ T Kb () C_NE___ { return _value >> 10; } + NdCx__ T Mb () C_NE___ { return _value >> 20; } + NdCx__ T Gb () C_NE___ { return _value >> 30; } - ND_ static constexpr Self FromBits (T value) __NE___ { return Self( value >> 3 ); } - ND_ static constexpr Self FromKb (T value) __NE___ { return Self( value << 10 ); } - ND_ static constexpr Self FromMb (T value) __NE___ { return Self( value << 20 ); } - ND_ static constexpr Self FromGb (T value) __NE___ { return Self( value << 30 ); } + NdCx__ static Self FromBits (T value) __NE___ { return Self( value >> 3 ); } + NdCx__ static Self FromKb (T value) __NE___ { return Self( value << 10 ); } + NdCx__ static Self FromMb (T value) __NE___ { return Self( value << 20 ); } + NdCx__ static Self FromGb (T value) __NE___ { return Self( value << 30 ); } - template ND_ static constexpr Self SizeOf () __NE___ { StaticAssert( not IsVoid ); return Self( sizeof(B) ); } - template ND_ static constexpr Self SizeOf (const B &) __NE___ { StaticAssert( not IsVoid ); return Self( sizeof(B) ); } + template NdCv__ static Self SizeOf () __NE___ { StaticAssert( not IsVoid ); return Self( sizeof(B) ); } + template NdCv__ static Self SizeOf (const B &) __NE___ { StaticAssert( not IsVoid ); return Self( sizeof(B) ); } - template ND_ static constexpr Self AlignOf () __NE___ { StaticAssert( not IsVoid ); return Self( alignof(B) ); } - template ND_ static constexpr Self AlignOf (const B &) __NE___ { StaticAssert( not IsVoid ); return Self( alignof(B) ); } + template NdCv__ static Self AlignOf () __NE___ { StaticAssert( not IsVoid ); return Self( alignof(B) ); } + template NdCv__ static Self AlignOf (const B &) __NE___ { StaticAssert( not IsVoid ); return Self( alignof(B) ); } - template ND_ static constexpr SizeAndAlign SizeAndAlignOf () __NE___ { return SizeAndAlign{ SizeOf(), AlignOf() }; } - template ND_ static constexpr SizeAndAlign SizeAndAlignOf (const B &) __NE___ { return SizeAndAlign{ SizeOf(), AlignOf() }; } + template NdCv__ static SizeAndAlign SizeAndAlignOf () __NE___ { return SizeAndAlign{ SizeOf(), AlignOf() }; } + template NdCv__ static SizeAndAlign SizeAndAlignOf (const B &) __NE___ { return SizeAndAlign{ SizeOf(), AlignOf() }; } // move any pointer - template ND_ friend B* operator + (B* lhs, const Self &rhs) __NE___ { return BitCast( usize(lhs) + usize(rhs._value) ); } - template ND_ friend B* operator - (B* lhs, const Self &rhs) __NE___ { return BitCast( usize(lhs) - usize(rhs._value) ); } - template friend B*& operator += (B* &lhs, const Self &rhs) __NE___ { return (lhs = lhs + rhs); } - template friend B*& operator -= (B* &lhs, const Self &rhs) __NE___ { return (lhs = lhs + rhs); } + template ND_ friend B* operator + (B* lhs, const Self &rhs) __NE___ { return BitCast( usize(lhs) + usize(rhs._value) ); } + template ND_ friend B* operator - (B* lhs, const Self &rhs) __NE___ { return BitCast( usize(lhs) - usize(rhs._value) ); } + template friend B*& operator += (B* &lhs, const Self &rhs) __NE___ { return (lhs = lhs + rhs); } + template friend B*& operator -= (B* &lhs, const Self &rhs) __NE___ { return (lhs = lhs + rhs); } - constexpr Self& operator = (UMax_t) __NE___ { _value = UMax; return *this; } - constexpr Self& operator = (Default_t) __NE___ { _value = 0; return *this; } - constexpr Self& operator = (const Self &rhs) __NE___ = default; + __Cx__ Self& operator = (UMax_t) __NE___ { _value = UMax; return *this; } + __Cx__ Self& operator = (Default_t) __NE___ { _value = 0; return *this; } + __Cx__ Self& operator = (const Self &rhs) __NE___ = default; - ND_ constexpr Self operator ~ () C_NE___ { return Self( ~_value ); } + NdCx__ Self operator ~ () C_NE___ { return Self( ~_value ); } - Self& operator ++ () __NE___ { ++_value; return *this; } - Self operator ++ (int) __NE___ { auto res = _value++; return Self{res}; } + Self& operator ++ () __NE___ { ++_value; return *this; } + Self operator ++ (int) __NE___ { auto res = _value++; return Self{res}; } - Self& operator += (const Self &rhs) __NE___ { _value += rhs._value; return *this; } - ND_ constexpr Self operator + (const Self &rhs) C_NE___ { return Self( _value + rhs._value ); } + Self& operator += (const Self &rhs) __NE___ { _value += rhs._value; return *this; } + NdCx__ Self operator + (const Self &rhs) C_NE___ { return Self( _value + rhs._value ); } - Self& operator -= (const Self &rhs) __NE___ { _value -= rhs._value; return *this; } - ND_ constexpr Self operator - (const Self &rhs) C_NE___ { return Self( _value - rhs._value ); } + Self& operator -= (const Self &rhs) __NE___ { _value -= rhs._value; return *this; } + NdCx__ Self operator - (const Self &rhs) C_NE___ { return Self( _value - rhs._value ); } - Self& operator *= (const Self &rhs) __NE___ { _value *= rhs._value; return *this; } - ND_ constexpr Self operator * (const Self &rhs) C_NE___ { return Self( _value * rhs._value ); } + Self& operator *= (const Self &rhs) __NE___ { _value *= rhs._value; return *this; } + NdCx__ Self operator * (const Self &rhs) C_NE___ { return Self( _value * rhs._value ); } - Self& operator /= (const Self &rhs) __NE___ { _value /= rhs._value; return *this; } - ND_ constexpr Self operator / (const Self &rhs) C_NE___ { return Self( _value / rhs._value ); } + Self& operator /= (const Self &rhs) __NE___ { _value /= rhs._value; return *this; } + NdCx__ Self operator / (const Self &rhs) C_NE___ { return Self( _value / rhs._value ); } - Self& operator %= (const Self &rhs) __NE___ { _value %= rhs._value; return *this; } - ND_ constexpr Self operator % (const Self &rhs) C_NE___ { return Self( _value % rhs._value ); } + Self& operator %= (const Self &rhs) __NE___ { _value %= rhs._value; return *this; } + NdCx__ Self operator % (const Self &rhs) C_NE___ { return Self( _value % rhs._value ); } - Self& operator += (const T rhs) __NE___ { _value += rhs; return *this; } - ND_ constexpr Self operator + (const T rhs) C_NE___ { return Self( _value + rhs ); } + Self& operator += (const T rhs) __NE___ { _value += rhs; return *this; } + NdCx__ Self operator + (const T rhs) C_NE___ { return Self( _value + rhs ); } - Self& operator -= (const T rhs) __NE___ { _value -= rhs; return *this; } - ND_ constexpr Self operator - (const T rhs) C_NE___ { return Self( _value - rhs ); } + Self& operator -= (const T rhs) __NE___ { _value -= rhs; return *this; } + NdCx__ Self operator - (const T rhs) C_NE___ { return Self( _value - rhs ); } - Self& operator *= (const T rhs) __NE___ { _value *= rhs; return *this; } - ND_ constexpr Self operator * (const T rhs) C_NE___ { return Self( _value * rhs ); } + Self& operator *= (const T rhs) __NE___ { _value *= rhs; return *this; } + NdCx__ Self operator * (const T rhs) C_NE___ { return Self( _value * rhs ); } - Self& operator /= (const T rhs) __NE___ { _value /= rhs; return *this; } - ND_ constexpr Self operator / (const T rhs) C_NE___ { return Self( _value / rhs ); } + Self& operator /= (const T rhs) __NE___ { _value /= rhs; return *this; } + NdCx__ Self operator / (const T rhs) C_NE___ { return Self( _value / rhs ); } - Self& operator %= (const T rhs) __NE___ { _value %= rhs; return *this; } - ND_ constexpr Self operator % (const T rhs) C_NE___ { return Self( _value % rhs ); } + Self& operator %= (const T rhs) __NE___ { _value %= rhs; return *this; } + NdCx__ Self operator % (const T rhs) C_NE___ { return Self( _value % rhs ); } - ND_ Self& operator >>= (const T rhs) __NE___ { _value >>= rhs; return *this; } - ND_ constexpr Self operator >> (const T rhs) C_NE___ { return Self{ _value >> rhs }; } + Self& operator >>= (const T rhs) __NE___ { _value >>= rhs; return *this; } + NdCx__ Self operator >> (const T rhs) C_NE___ { return Self{ _value >> rhs }; } - ND_ Self& operator <<= (const T rhs) __NE___ { _value <<= rhs; return *this; } - ND_ constexpr Self operator << (const T rhs) C_NE___ { return Self{ _value << rhs }; } + Self& operator <<= (const T rhs) __NE___ { _value <<= rhs; return *this; } + NdCx__ Self operator << (const T rhs) C_NE___ { return Self{ _value << rhs }; } - ND_ constexpr bool operator == (const Self &rhs) C_NE___ { return _value == rhs._value; } - ND_ constexpr bool operator != (const Self &rhs) C_NE___ { return _value != rhs._value; } - ND_ constexpr bool operator > (const Self &rhs) C_NE___ { return _value > rhs._value; } - ND_ constexpr bool operator < (const Self &rhs) C_NE___ { return _value < rhs._value; } - ND_ constexpr bool operator >= (const Self &rhs) C_NE___ { return _value >= rhs._value; } - ND_ constexpr bool operator <= (const Self &rhs) C_NE___ { return _value <= rhs._value; } + NdCx__ bool operator == (const Self &rhs) C_NE___ { return _value == rhs._value; } + NdCx__ bool operator != (const Self &rhs) C_NE___ { return _value != rhs._value; } + NdCx__ bool operator > (const Self &rhs) C_NE___ { return _value > rhs._value; } + NdCx__ bool operator < (const Self &rhs) C_NE___ { return _value < rhs._value; } + NdCx__ bool operator >= (const Self &rhs) C_NE___ { return _value >= rhs._value; } + NdCx__ bool operator <= (const Self &rhs) C_NE___ { return _value <= rhs._value; } - ND_ constexpr bool operator == (const T rhs) C_NE___ { return _value == rhs; } - ND_ constexpr bool operator != (const T rhs) C_NE___ { return _value != rhs; } - ND_ constexpr bool operator > (const T rhs) C_NE___ { return _value > rhs; } - ND_ constexpr bool operator < (const T rhs) C_NE___ { return _value < rhs; } - ND_ constexpr bool operator >= (const T rhs) C_NE___ { return _value >= rhs; } - ND_ constexpr bool operator <= (const T rhs) C_NE___ { return _value <= rhs; } + NdCx__ bool operator == (const T rhs) C_NE___ { return _value == rhs; } + NdCx__ bool operator != (const T rhs) C_NE___ { return _value != rhs; } + NdCx__ bool operator > (const T rhs) C_NE___ { return _value > rhs; } + NdCx__ bool operator < (const T rhs) C_NE___ { return _value < rhs; } + NdCx__ bool operator >= (const T rhs) C_NE___ { return _value >= rhs; } + NdCx__ bool operator <= (const T rhs) C_NE___ { return _value <= rhs; } - ND_ friend constexpr Self operator + (T lhs, const Self &rhs) __NE___ { return Self( lhs + rhs._value ); } - ND_ friend constexpr Self operator - (T lhs, const Self &rhs) __NE___ { return Self( lhs - rhs._value ); } - ND_ friend constexpr Self operator * (T lhs, const Self &rhs) __NE___ { return Self( lhs * rhs._value ); } - ND_ friend constexpr Self operator / (T lhs, const Self &rhs) __NE___ { return Self( lhs / rhs._value ); } - ND_ friend constexpr Self operator % (T lhs, const Self &rhs) __NE___ { return Self( lhs % rhs._value ); } + NdCx__ friend Self operator + (T lhs, const Self &rhs) __NE___ { return Self( lhs + rhs._value ); } + NdCx__ friend Self operator - (T lhs, const Self &rhs) __NE___ { return Self( lhs - rhs._value ); } + NdCx__ friend Self operator * (T lhs, const Self &rhs) __NE___ { return Self( lhs * rhs._value ); } + NdCx__ friend Self operator / (T lhs, const Self &rhs) __NE___ { return Self( lhs / rhs._value ); } + NdCx__ friend Self operator % (T lhs, const Self &rhs) __NE___ { return Self( lhs % rhs._value ); } - ND_ friend constexpr bool operator == (T lhs, Self rhs) __NE___ { return lhs == rhs._value; } - ND_ friend constexpr bool operator != (T lhs, Self rhs) __NE___ { return lhs != rhs._value; } - ND_ friend constexpr bool operator > (T lhs, Self rhs) __NE___ { return lhs > rhs._value; } - ND_ friend constexpr bool operator < (T lhs, Self rhs) __NE___ { return lhs < rhs._value; } - ND_ friend constexpr bool operator >= (T lhs, Self rhs) __NE___ { return lhs >= rhs._value; } - ND_ friend constexpr bool operator <= (T lhs, Self rhs) __NE___ { return lhs <= rhs._value; } + NdCx__ friend bool operator == (T lhs, Self rhs) __NE___ { return lhs == rhs._value; } + NdCx__ friend bool operator != (T lhs, Self rhs) __NE___ { return lhs != rhs._value; } + NdCx__ friend bool operator > (T lhs, Self rhs) __NE___ { return lhs > rhs._value; } + NdCx__ friend bool operator < (T lhs, Self rhs) __NE___ { return lhs < rhs._value; } + NdCx__ friend bool operator >= (T lhs, Self rhs) __NE___ { return lhs >= rhs._value; } + NdCx__ friend bool operator <= (T lhs, Self rhs) __NE___ { return lhs <= rhs._value; } - ND_ static constexpr Self Max () __NE___ { return Self{ MaxValue() }; } - ND_ static constexpr Self Min () __NE___ { return Self{ MinValue() }; } + NdCv__ static Self Max () __NE___ { return Self{ MaxValue() }; } + NdCv__ static Self Min () __NE___ { return Self{ MinValue() }; } }; @@ -211,25 +210,25 @@ namespace AE::Math inline static constexpr Bytes SizeOf = Bytes::SizeOf(); template - ND_ constexpr Bytes Sizeof (const T &) __NE___ { return Bytes::SizeOf(); } + NdCx__ Bytes Sizeof (const T &) __NE___ { return Bytes::SizeOf(); } // TODO: consteval template inline static constexpr Bytes AlignOf = Bytes::AlignOf(); template - ND_ constexpr Bytes Alignof (const T &) __NE___ { return Bytes::AlignOf(); } + NdCx__ Bytes Alignof (const T &) __NE___ { return Bytes::AlignOf(); } template inline static constexpr SizeAndAlign SizeAndAlignOf = Bytes::SizeAndAlignOf(); template - ND_ constexpr SizeAndAlign SizeAndAlignof (const T &) __NE___ { return Bytes::SizeAndAlignOf(); } + NdCx__ SizeAndAlign SizeAndAlignof (const T &) __NE___ { return Bytes::SizeAndAlignOf(); } - ND_ constexpr Bytes operator "" _b (unsigned long long value) __NE___ { return Bytes{ CheckCast(value) }; } - ND_ constexpr Bytes operator "" _Kb (unsigned long long value) __NE___ { return Bytes::FromKb( CheckCast(value) ); } - ND_ constexpr Bytes operator "" _Mb (unsigned long long value) __NE___ { return Bytes::FromMb( CheckCast(value) ); } - ND_ constexpr Bytes operator "" _Gb (unsigned long long value) __NE___ { return Bytes::FromGb( CheckCast(value) ); } + NdCv__ Bytes operator "" _b (unsigned long long value) __NE___ { return Bytes{ CheckCast(value) }; } + NdCv__ Bytes operator "" _Kb (unsigned long long value) __NE___ { return Bytes::FromKb( CheckCast(value) ); } + NdCv__ Bytes operator "" _Mb (unsigned long long value) __NE___ { return Bytes::FromMb( CheckCast(value) ); } + NdCv__ Bytes operator "" _Gb (unsigned long long value) __NE___ { return Bytes::FromGb( CheckCast(value) ); } namespace _hidden_ @@ -267,13 +266,13 @@ namespace AE::Math ================================================= */ template - ND_ constexpr TByte FloorPOT (const TByte x) __NE___ + NdCx__ TByte FloorPOT (const TByte x) __NE___ { return TByte{ FloorPOT( T{x} )}; } template - ND_ constexpr TByte CeilPOT (const TByte x) __NE___ + NdCx__ TByte CeilPOT (const TByte x) __NE___ { return TByte{ CeilPOT( T{x} )}; } @@ -284,7 +283,7 @@ namespace AE::Math ================================================= */ template - ND_ constexpr bool IsPowerOfTwo (const TByte x) __NE___ + NdCx__ bool IsPowerOfTwo (const TByte x) __NE___ { return IsPowerOfTwo( T{x} ); } @@ -320,11 +319,11 @@ class std::numeric_limits< AE::Math::TByte > public: static constexpr bool is_specialized = Base::is_specialized; - ND_ static constexpr Bytes min () __NE___ { + NdCx__ static Bytes min () __NE___ { return Bytes{Base::min()}; } - ND_ static constexpr Bytes max () __NE___ { + NdCx__ static Bytes max () __NE___ { return Bytes{Base::max()}; } }; diff --git a/AE/engine/src/base/Math/GLM.h b/AE/engine/src/base/Math/GLM.h index 0fe27f37..1a5e0b61 100644 --- a/AE/engine/src/base/Math/GLM.h +++ b/AE/engine/src/base/Math/GLM.h @@ -9,20 +9,22 @@ #define GLM_FORCE_DEPTH_ZERO_TO_ONE #define GLM_FORCE_RADIANS -#define GLM_ENABLE_EXPERIMENTAL +#define GLM_ENABLE_EXPERIMENTAL // for gtx #ifdef AE_CXX_20 -# define GLM_FORCE_CXX2A +# define GLM_FORCE_CXX20 #else # define GLM_FORCE_CXX17 #endif -#define GLM_FORCE_VEC_EQUAL_OP // special for AE #define GLM_FORCE_EXPLICIT_CTOR -//#define GLM_FORCE_XYZW_ONLY // will disable SIMD +//#define GLM_FORCE_XYZW_ONLY // will disable SIMD #define GLM_FORCE_CTOR_INIT #define GLM_FORCE_INLINE #define GLM_FORCE_ALIGNED_GENTYPES #define GLM_FORCE_INTRINSICS #define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES +//#define GLM_FORCE_FMA +//#define GLM_FORCE_SIZE_T_LENGTH +#define GLM_AE_VERSION // enable simd @@ -42,8 +44,6 @@ # define GLM_FORCE_SSE3 // float # elif AE_SIMD_SSE >= 20 # define GLM_FORCE_SSE2 // float -# elif AE_SIMD_SSE >= 10 -# define GLM_FORCE_SSE // float # else // disable intrinsics # define GLM_FORCE_ARCH_UNKNOWN @@ -59,18 +59,7 @@ #endif -#ifdef AE_COMPILER_MSVC -# pragma warning (push) -# pragma warning (disable: 4201) -# pragma warning (disable: 4127) -#endif -#ifdef AE_COMPILER_CLANG -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - #include "glm.hpp" -#include "detail/type_half.hpp" #include "gtc/bitfield.hpp" #include "gtc/color_space.hpp" @@ -96,6 +85,7 @@ #include "gtx/norm.hpp" #include "gtx/easing.hpp" #include "gtx/rotate_vector.hpp" +#include "gtx/quaternion.hpp" #include "gtx/dual_quaternion.hpp" #include "gtx/intersect.hpp" #include "gtx/fast_exponential.hpp" @@ -111,13 +101,6 @@ #include "ext/matrix_relational.hpp" -#ifdef AE_COMPILER_MSVC -# pragma warning (pop) -#endif -#ifdef AE_COMPILER_CLANG -# pragma clang diagnostic pop -#endif - #if GLM_CONFIG_ALIGNED_GENTYPES != GLM_ENABLE # error required GLM_CONFIG_ALIGNED_GENTYPES = GLM_ENABLE #endif diff --git a/AE/engine/src/base/Math/MatrixImpl.h b/AE/engine/src/base/Math/MatrixImpl.h index 6d1be10f..40cc7800 100644 --- a/AE/engine/src/base/Math/MatrixImpl.h +++ b/AE/engine/src/base/Math/MatrixImpl.h @@ -143,6 +143,8 @@ namespace AE::Math #if Columns == 2 and Rows == 2 ND_ static Self Rotate (Rad_t angle) __NE___; + ND_ static Self Scaled (const Vec2_t &scale) __NE___; + ND_ static Self Scaled (const T scale) __NE___ { return Scaled( Vec2_t{ scale }); } #endif #if Columns == 3 and Rows == 3 @@ -231,32 +233,22 @@ namespace AE::Math const T c = Cos( angle ); return Self{ Col_t{ c, s }, Col_t{ -s, c }}; } -#endif - -#if Columns == 3 and Rows == 3 /* ================================================= - ToCubeFace + Scaled ================================================= -* +*/ template - TMatrix TMatrix::ToCubeFace (ubyte face) __NE___ + TMatrix TMatrix::Scaled (const Vec2_t &scale) __NE___ { - ASSERT( face < 6 ); - - const int idx = face < 6 ? face : 5; // pos: 0, 2, 4; neg: 1, 3, 5 - const int norm = idx >> 1; // norm: 0, 1, 2 - const bool negative = idx & 1; - Self m = Zero(); - - m( norm==0 ) = T{1}; - m( 6-norm ) = T{1}; - m( norm+6 ) = negative ? T{-1} : T{1}; - - return m.Transpose(); + return Self{ Col_t{ scale.x, T(0), }, + Col_t{ T(0), scale.y }}; } +#endif + +#if Columns == 3 and Rows == 3 /* ================================================= FromDirection @@ -269,6 +261,7 @@ namespace AE::Math Vec3_t ver = Normalize( Cross( dir, hor )); return Self{ hor, ver, dir }; } + /* ================================================= Scaled @@ -519,19 +512,19 @@ namespace AE::Math bool TMatrix::operator == (const Self &rhs) C_NE___ { #if Columns == 2 - return All( get<0>() == rhs.get<0>() ) & - All( get<1>() == rhs.get<1>() ); + return AllEqual( get<0>(), rhs.get<0>() ) and + AllEqual( get<1>(), rhs.get<1>() ); #elif Columns == 3 - return All( get<0>() == rhs.get<0>() ) & - All( get<1>() == rhs.get<1>() ) & - All( get<2>() == rhs.get<2>() ); + return AllEqual( get<0>(), rhs.get<0>() ) and + AllEqual( get<1>(), rhs.get<1>() ) and + AllEqual( get<2>(), rhs.get<2>() ); #elif Columns == 4 - return All( get<0>() == rhs.get<0>() ) & - All( get<1>() == rhs.get<1>() ) & - All( get<2>() == rhs.get<2>() ) & - All( get<3>() == rhs.get<3>() ); + return AllEqual( get<0>(), rhs.get<0>() ) and + AllEqual( get<1>(), rhs.get<1>() ) and + AllEqual( get<2>(), rhs.get<2>() ) and + AllEqual( get<3>(), rhs.get<3>() ); #endif } diff --git a/AE/engine/src/base/Math/Percent.h b/AE/engine/src/base/Math/Percent.h index 29c2f826..4e65fa2a 100644 --- a/AE/engine/src/base/Math/Percent.h +++ b/AE/engine/src/base/Math/Percent.h @@ -27,26 +27,33 @@ namespace AE::Math // methods public: - constexpr TPercent () __NE___ = default; - explicit constexpr TPercent (T val) __NE___ : _value{val} {} + constexpr TPercent () __NE___ = default; + explicit constexpr TPercent (T val) __NE___ : _value{val} {} + NdCx__ bool operator == (const Self rhs) C_NE___ { return _value == rhs._value; } + NdCx__ bool operator != (const Self rhs) C_NE___ { return _value != rhs._value; } + NdCx__ bool operator < (const Self rhs) C_NE___ { return _value < rhs._value; } + NdCx__ bool operator > (const Self rhs) C_NE___ { return _value > rhs._value; } + NdCx__ bool operator <= (const Self rhs) C_NE___ { return _value <= rhs._value; } + NdCx__ bool operator >= (const Self rhs) C_NE___ { return _value >= rhs._value; } - ND_ constexpr T GetPercent () C_NE___ { return _value * T{100}; } // 0..100% - ND_ constexpr T GetFraction () C_NE___ { return _value; } // 0..1 + NdCx__ T GetPercent () C_NE___ { return _value * T{100}; } // 0..100% + NdCx__ T GetFraction () C_NE___ { return _value; } // 0..1 template - ND_ constexpr EnableIf, B> Of (const B &value) C_NE___ { return value * B{GetFraction()}; } + NdCx__ EnableIf, B> Of (const B &value) C_NE___ { return value * B{GetFraction()}; } - ND_ static constexpr Self FromPercent (T value) __NE___ { return Self{ value * T{0.01} }; } - ND_ static constexpr Self FromFraction (T value) __NE___ { return Self{ value }; } + template + NdCx__ static Self FromPercent (B value) __NE___ { return Self{ T(value) * T{0.01} }; } + NdCx__ static Self FromFraction (T value) __NE___ { return Self{ value }; } }; using Percent = TPercent< float >; using PercentD = TPercent< double >; - ND_ constexpr Percent operator "" _pct (long double value) { return Percent{ Percent::FromPercent( Percent::Value_t( value ))}; } - ND_ constexpr Percent operator "" _pct (unsigned long long value) { return Percent{ Percent::FromPercent( Percent::Value_t( value ))}; } + NdCx__ Percent operator "" _pct (long double value) __NE___ { return Percent{ Percent::FromPercent( Percent::Value_t( value ))}; } + NdCx__ Percent operator "" _pct (unsigned long long value) __NE___ { return Percent{ Percent::FromPercent( Percent::Value_t( value ))}; } } // AE::Math diff --git a/AE/engine/src/base/Math/PhysicalDimension.h b/AE/engine/src/base/Math/PhysicalDimension.h index ccf0ee74..3dbcab65 100644 --- a/AE/engine/src/base/Math/PhysicalDimension.h +++ b/AE/engine/src/base/Math/PhysicalDimension.h @@ -184,7 +184,7 @@ namespace AE::Math using KilogramMeterPerSecond = MeterPerSecond::Mul< Kilogram >; // kg * m / s using KilogramPerCubicMeter = Kilogram::Div< CubicMeter >; // kg / m^3 using Newton = Kilogram::Mul< MeterPerSquareSecond >; // N = kg * m / s^2 - SI - using Joule = Newton::Mul< Meter >; // J = kg * (m / s)^2 - SI + using Joule = Newton::Mul< Meter >; // J = kg * (m / s)^2 or C*V - SI using Pascal = Kilogram::Div< Meter::Mul< Second::Pow<2> >>; // Pa = kg / (m * s^2) - SI using Hertz = NonDimensional::Div< Second >; // Hz = 1 / s - SI using Watt = Joule::Div< Second >; // W = J / s - SI diff --git a/AE/engine/src/base/Math/PhysicalQuantity.h b/AE/engine/src/base/Math/PhysicalQuantity.h index af0d41c7..89b15235 100644 --- a/AE/engine/src/base/Math/PhysicalQuantity.h +++ b/AE/engine/src/base/Math/PhysicalQuantity.h @@ -53,15 +53,16 @@ namespace AE::Math // methods public: constexpr PhysicalQuantity () __NE___ : _value{0} {} + constexpr explicit PhysicalQuantity (Zero_t) __NE___ : _value{0} {} constexpr explicit PhysicalQuantity (Value_t value) __NE___ : _value{value} {} template - constexpr PhysicalQuantity (const PhysicalQuantity &other) __NE___ : + constexpr PhysicalQuantity (const PhysicalQuantity other) __NE___ : _value{ other.template ToScale().GetNonScaled() } {} template - explicit constexpr PhysicalQuantity (const PhysicalQuantity &other) __NE___ : + explicit constexpr PhysicalQuantity (const PhysicalQuantity other) __NE___ : _value{Value_t( other.template ToScale().GetNonScaled() )} {} @@ -70,27 +71,28 @@ namespace AE::Math constexpr Self& operator = (const Self &) __NE___ = default; constexpr Self& operator = (Self &&) __NE___ = default; + constexpr Self& operator = (Zero_t) __NE___ { _value = Value_t(0); return *this; } ND_ constexpr Self operator - () C_NE___ { return Self{ -_value }; } - ND_ constexpr bool operator == (const Self &rhs) C_NE___ { return _value == rhs._value; } - ND_ constexpr bool operator != (const Self &rhs) C_NE___ { return _value != rhs._value; } - ND_ constexpr bool operator > (const Self &rhs) C_NE___ { return _value > rhs._value; } - ND_ constexpr bool operator >= (const Self &rhs) C_NE___ { return _value >= rhs._value; } - ND_ constexpr bool operator < (const Self &rhs) C_NE___ { return _value < rhs._value; } - ND_ constexpr bool operator <= (const Self &rhs) C_NE___ { return _value <= rhs._value; } + ND_ constexpr bool operator == (const Self rhs) C_NE___ { return _value == rhs._value; } + ND_ constexpr bool operator != (const Self rhs) C_NE___ { return _value != rhs._value; } + ND_ constexpr bool operator > (const Self rhs) C_NE___ { return _value > rhs._value; } + ND_ constexpr bool operator >= (const Self rhs) C_NE___ { return _value >= rhs._value; } + ND_ constexpr bool operator < (const Self rhs) C_NE___ { return _value < rhs._value; } + ND_ constexpr bool operator <= (const Self rhs) C_NE___ { return _value <= rhs._value; } - constexpr Self& operator += (const Self &rhs) __NE___ { _value += rhs._value; return *this; } - constexpr Self& operator -= (const Self &rhs) __NE___ { _value -= rhs._value; return *this; } + constexpr Self& operator += (const Self rhs) __NE___ { _value += rhs._value; return *this; } + constexpr Self& operator -= (const Self rhs) __NE___ { _value -= rhs._value; return *this; } - ND_ constexpr Self operator + (const Self &rhs) C_NE___ { return Self( _value + rhs._value ); } - ND_ constexpr Self operator - (const Self &rhs) C_NE___ { return Self( _value - rhs._value ); } + ND_ constexpr Self operator + (const Self rhs) C_NE___ { return Self( _value + rhs._value ); } + ND_ constexpr Self operator - (const Self rhs) C_NE___ { return Self( _value - rhs._value ); } - constexpr Self& operator *= (Value_t rhs) __NE___ { _value *= rhs; return *this; } - constexpr Self& operator /= (Value_t rhs) __NE___ { _value /= rhs; return *this; } + constexpr Self& operator *= (const Value_t rhs) __NE___ { _value *= rhs; return *this; } + constexpr Self& operator /= (const Value_t rhs) __NE___ { _value /= rhs; return *this; } - ND_ constexpr Self operator * (Value_t rhs) C_NE___ { return Self( _value * rhs ); } - ND_ constexpr Self operator / (Value_t rhs) C_NE___ { return Self( _value / rhs ); } + ND_ constexpr Self operator * (const Value_t rhs) C_NE___ { return Self( _value * rhs ); } + ND_ constexpr Self operator / (const Value_t rhs) C_NE___ { return Self( _value / rhs ); } ND_ constexpr Value_t GetNonScaled () C_NE___ { return _value; } ND_ constexpr Value_t& GetNonScaledRef () __NE___ { return _value; } @@ -98,35 +100,35 @@ namespace AE::Math template - ND_ constexpr auto operator + (const PhysicalQuantity &rhs) C_NE___ + ND_ constexpr auto operator + (const PhysicalQuantity rhs) C_NE___ { using Scale = PhysicalQuantity_Scale::template Add< Scale_t, S >; return PhysicalQuantity< Value_t, Dimension_t, Scale >{ Scale::Get( _value, rhs.GetNonScaled() )}; } template - ND_ constexpr auto operator - (const PhysicalQuantity &rhs) C_NE___ + ND_ constexpr auto operator - (const PhysicalQuantity rhs) C_NE___ { using Scale = PhysicalQuantity_Scale::template Sub< Scale_t, S >; return PhysicalQuantity< Value_t, Dimension_t, Scale >{ Scale::Get( _value, rhs.GetNonScaled() )}; } template - ND_ constexpr auto operator * (const PhysicalQuantity &rhs) C_NE___ + ND_ constexpr auto operator * (const PhysicalQuantity rhs) C_NE___ { using Scale = PhysicalQuantity_Scale::template Mul< Scale_t, S >; return PhysicalQuantity< Value_t, typename Dimension_t::template Mul, Scale >{ Scale::Get( _value, rhs.GetNonScaled() )}; } template - ND_ constexpr auto operator / (const PhysicalQuantity &rhs) C_NE___ + ND_ constexpr auto operator / (const PhysicalQuantity rhs) C_NE___ { using Scale = PhysicalQuantity_Scale::template Div< Scale_t, S >; return PhysicalQuantity< Value_t, typename Dimension_t::template Div, Scale >{ Scale::Get( _value, rhs.GetNonScaled() )}; } - ND_ friend constexpr Self operator * (Value_t lhs, const Self &rhs) __NE___ + ND_ friend constexpr Self operator * (Value_t lhs, const Self rhs) __NE___ { return Self( lhs * rhs.GetNonScaled() ); } @@ -138,7 +140,7 @@ namespace AE::Math return PhysicalQuantity< Value_t, Dimension_t, DstScale >{ _value * scale }; } - ND_ friend constexpr Inversed_t operator / (Value_t lhs, const Self &rhs) __NE___ + ND_ friend constexpr Inversed_t operator / (const Value_t lhs, const Self rhs) __NE___ { return Inversed_t{ lhs / rhs.GetNonScaled() }; } @@ -216,15 +218,16 @@ namespace AE::Math // methods public: constexpr PhysicalQuantity () __NE___ : _value{0} {} + constexpr explicit PhysicalQuantity (Zero_t) __NE___ : _value{0} {} constexpr explicit PhysicalQuantity (Value_t value) __NE___ : _value{value} {} template - constexpr PhysicalQuantity (const PhysicalQuantity &other) __NE___ : + constexpr PhysicalQuantity (const PhysicalQuantity other) __NE___ : _value{ other.template ToScale().GetNonScaled() } {} template - explicit constexpr PhysicalQuantity (const PhysicalQuantity &other) __NE___ : + explicit constexpr PhysicalQuantity (const PhysicalQuantity other) __NE___ : _value{T( other.template ToScale().GetNonScaled() )} {} @@ -233,21 +236,22 @@ namespace AE::Math constexpr Self& operator = (const Self &) __NE___ = default; constexpr Self& operator = (Self &&) __NE___ = default; + constexpr Self& operator = (Zero_t) __NE___ { _value = Value_t(0); return *this; } ND_ constexpr operator Value_t () C_NE___ { return GetScaled(); } - ND_ constexpr bool operator == (const Self &rhs) C_NE___ { return _value == rhs.GetNonScaled(); } - ND_ constexpr bool operator != (const Self &rhs) C_NE___ { return _value != rhs.GetNonScaled(); } - ND_ constexpr bool operator > (const Self &rhs) C_NE___ { return _value > rhs.GetNonScaled(); } - ND_ constexpr bool operator >= (const Self &rhs) C_NE___ { return _value >= rhs.GetNonScaled(); } - ND_ constexpr bool operator < (const Self &rhs) C_NE___ { return _value < rhs.GetNonScaled(); } - ND_ constexpr bool operator <= (const Self &rhs) C_NE___ { return _value <= rhs.GetNonScaled(); } + ND_ constexpr bool operator == (const Self rhs) C_NE___ { return _value == rhs.GetNonScaled(); } + ND_ constexpr bool operator != (const Self rhs) C_NE___ { return _value != rhs.GetNonScaled(); } + ND_ constexpr bool operator > (const Self rhs) C_NE___ { return _value > rhs.GetNonScaled(); } + ND_ constexpr bool operator >= (const Self rhs) C_NE___ { return _value >= rhs.GetNonScaled(); } + ND_ constexpr bool operator < (const Self rhs) C_NE___ { return _value < rhs.GetNonScaled(); } + ND_ constexpr bool operator <= (const Self rhs) C_NE___ { return _value <= rhs.GetNonScaled(); } - constexpr Self& operator += (const Self &rhs) __NE___ { _value += rhs.GetNonScaled(); return *this; } - constexpr Self& operator -= (const Self &rhs) __NE___ { _value -= rhs.GetNonScaled(); return *this; } + constexpr Self& operator += (const Self rhs) __NE___ { _value += rhs.GetNonScaled(); return *this; } + constexpr Self& operator -= (const Self rhs) __NE___ { _value -= rhs.GetNonScaled(); return *this; } - ND_ constexpr Self operator + (const Self &rhs) C_NE___ { return Self{ _value + rhs.GetNonScaled() }; } - ND_ constexpr Self operator - (const Self &rhs) C_NE___ { return Self{ _value - rhs.GetNonScaled() }; } + ND_ constexpr Self operator + (const Self rhs) C_NE___ { return Self{ _value + rhs.GetNonScaled() }; } + ND_ constexpr Self operator - (const Self rhs) C_NE___ { return Self{ _value - rhs.GetNonScaled() }; } constexpr Self& operator *= (Value_t rhs) __NE___ { _value *= rhs; return *this; } constexpr Self& operator /= (Value_t rhs) __NE___ { _value /= rhs; return *this; } diff --git a/AE/engine/src/base/Math/PhysicalQuantityVec.h b/AE/engine/src/base/Math/PhysicalQuantityVec.h index 6f05588f..0add23af 100644 --- a/AE/engine/src/base/Math/PhysicalQuantityVec.h +++ b/AE/engine/src/base/Math/PhysicalQuantityVec.h @@ -74,13 +74,13 @@ namespace AE::Math template PhysicalQuantityVec (const TPhysicalQuantityVec<2, Value_t, Dimension_t, S, Q> &other) __NE___ : QVec_t{other.x, other.y} {} - PhysicalQuantityVec (Value_t X, Value_t Y) __NE___ : QVec_t{X,Y} {} + PhysicalQuantityVec (Value_t X, Value_t Y) __NE___ : QVec_t{Quantity{X}, Quantity{Y}} {} PhysicalQuantityVec (Quantity X, Quantity Y) __NE___ : QVec_t{X,Y} {} - explicit PhysicalQuantityVec (Value_t V) __NE___ : QVec_t{V,V} {} - explicit PhysicalQuantityVec (Quantity V) __NE___ : QVec_t{V,V} {} + explicit PhysicalQuantityVec (Value_t s) __NE___ : QVec_t{Quantity{s}} {} + explicit PhysicalQuantityVec (Quantity s) __NE___ : QVec_t{s} {} - explicit PhysicalQuantityVec (const ValVec_t &V) __NE___ : QVec_t{V.x,V.y} {} + explicit PhysicalQuantityVec (const ValVec_t &v) __NE___ : QVec_t{v} {} ND_ ValVec_t const& GetNonScaled () C_NE___ { return *reinterpret_cast(this); } ND_ ValVec_t & GetNonScaledRef () __NE___ { return *reinterpret_cast(this); } @@ -111,13 +111,13 @@ namespace AE::Math template PhysicalQuantityVec (const TPhysicalQuantityVec<3, Value_t, Dimension_t, S, Q> &other) __NE___ : QVec_t{other.x, other.y, other.z} {} - PhysicalQuantityVec (Value_t X, Value_t Y, Value_t Z) __NE___ : QVec_t{X,Y,Z} {} + PhysicalQuantityVec (Value_t X, Value_t Y, Value_t Z) __NE___ : QVec_t{Quantity{X}, Quantity{Y}, Quantity{Z}} {} PhysicalQuantityVec (Quantity X, Quantity Y, Quantity Z) __NE___ : QVec_t{X,Y,Z} {} - explicit PhysicalQuantityVec (Value_t V) __NE___ : QVec_t{V,V,V} {} - explicit PhysicalQuantityVec (Quantity V) __NE___ : QVec_t{V,V,V} {} + explicit PhysicalQuantityVec (Value_t s) __NE___ : QVec_t{Quantity{s}} {} + explicit PhysicalQuantityVec (Quantity s) __NE___ : QVec_t{s} {} - explicit PhysicalQuantityVec (const ValVec_t &V) __NE___ : QVec_t{V.x,V.y,V.z} {} + explicit PhysicalQuantityVec (const ValVec_t &v) __NE___ : QVec_t{v} {} ND_ ValVec_t const& GetNonScaled () C_NE___ { return *reinterpret_cast(this); } ND_ ValVec_t & GetNonScaledRef () __NE___ { return *reinterpret_cast(this); } @@ -148,13 +148,13 @@ namespace AE::Math template PhysicalQuantityVec (const TPhysicalQuantityVec<4, Value_t, Dimension_t, S, Q> &other) __NE___ : QVec_t{other.x, other.y, other.z, other.w} {} - PhysicalQuantityVec (Value_t X, Value_t Y, Value_t Z, Value_t W) __NE___ : QVec_t{X,Y,Z,W} {} + PhysicalQuantityVec (Value_t X, Value_t Y, Value_t Z, Value_t W) __NE___ : QVec_t{Quantity{X}, Quantity{Y}, Quantity{Z}, Quantity{W}} {} PhysicalQuantityVec (Quantity X, Quantity Y, Quantity Z, Quantity W) __NE___ : QVec_t{X,Y,Z,W} {} - explicit PhysicalQuantityVec (Value_t V) __NE___ : QVec_t{V,V,V,V} {} - explicit PhysicalQuantityVec (Quantity V) __NE___ : QVec_t{V,V,V,V} {} + explicit PhysicalQuantityVec (Value_t s) __NE___ : QVec_t{Quantity{s}} {} + explicit PhysicalQuantityVec (Quantity s) __NE___ : QVec_t{s} {} - explicit PhysicalQuantityVec (const ValVec_t &V) __NE___ : QVec_t{V.x,V.y,V.z,V.w} {} + explicit PhysicalQuantityVec (const ValVec_t &v) __NE___ : QVec_t{v.x, v.y, v.z, v.w} {} ND_ ValVec_t const& GetNonScaled () C_NE___ { return *reinterpret_cast(this); } ND_ ValVec_t & GetNonScaledRef () __NE___ { return *reinterpret_cast(this); } diff --git a/AE/engine/src/base/Math/Quat.h b/AE/engine/src/base/Math/Quat.h index b144e36e..1b977698 100644 --- a/AE/engine/src/base/Math/Quat.h +++ b/AE/engine/src/base/Math/Quat.h @@ -129,7 +129,7 @@ namespace AE::Math ND_ static Self LookAt (const Vec3_t &dir, const Vec3_t &up)__NE___; ND_ static Self From2Normals (const Vec3_t &n1, const Vec3_t &n2) __NE___ { return Self{_GLM_Quat_t{ n1, n2 }}; } - ND_ static Self FromAngleAxis (Rad_t angle, const Vec3_t &axis) __NE___ { return glm::angleAxis( angle, axis ); } + ND_ static Self FromAngleAxis (Rad_t angle, const Vec3_t &axis) __NE___ { return Self{_GLM_Quat_t{ glm::angleAxis( T{angle}, axis )}}; } ND_ static T CalcW (T x, T y, T z) __NE___ { return T{1} - Sqrt( x*x + y*y + z*z ); } diff --git a/AE/engine/src/base/Math/Transformation.h b/AE/engine/src/base/Math/Transformation.h index 3dc65667..b2ce88ab 100644 --- a/AE/engine/src/base/Math/Transformation.h +++ b/AE/engine/src/base/Math/Transformation.h @@ -135,8 +135,8 @@ namespace AE::Math template bool TTransformation::operator == (const Self &rhs) C_NE___ { - return All( orientation == rhs.orientation ) & - All( position == rhs.position ) & + return All( orientation == rhs.orientation ) and + AllEqual( position, rhs.position ) and (scale == rhs.scale); } @@ -148,16 +148,16 @@ namespace AE::Math template ND_ bool Equal (const TTransformation &lhs, const TTransformation &rhs, const T err = Epsilon()) __NE___ { - return All( Math::Equal( lhs.orientation, rhs.orientation, err )) & - All( Math::Equal( lhs.position, rhs.position, err )) & + return All( Math::Equal( lhs.orientation, rhs.orientation, err )) and + All( Math::Equal( lhs.position, rhs.position, err )) and Math::Equal( lhs.scale, rhs.scale, err ); } template ND_ bool Equal (const TTransformation &lhs, const TTransformation &rhs, const Percent err) __NE___ { - return All( Math::Equal( lhs.orientation, rhs.orientation, err )) & - All( Math::Equal( lhs.position, rhs.position, err )) & + return All( Math::Equal( lhs.orientation, rhs.orientation, err )) and + All( Math::Equal( lhs.position, rhs.position, err )) and Math::Equal( lhs.scale, rhs.scale, err ); } @@ -169,16 +169,16 @@ namespace AE::Math template ND_ bool BitEqual (const TTransformation &lhs, const TTransformation &rhs, const EnabledBitCount bitCount) __NE___ { - return All( Math::BitEqual( lhs.orientation, rhs.orientation, bitCount )) & - All( Math::BitEqual( lhs.position, rhs.position, bitCount )) & + return All( Math::BitEqual( lhs.orientation, rhs.orientation, bitCount )) and + All( Math::BitEqual( lhs.position, rhs.position, bitCount )) and Math::BitEqual( lhs.scale, rhs.scale, bitCount ); } template ND_ bool BitEqual (const TTransformation &lhs, const TTransformation &rhs) __NE___ { - return All( Math::BitEqual( lhs.orientation, rhs.orientation )) & - All( Math::BitEqual( lhs.position, rhs.position )) & + return All( Math::BitEqual( lhs.orientation, rhs.orientation )) and + All( Math::BitEqual( lhs.position, rhs.position )) and Math::BitEqual( lhs.scale, rhs.scale ); } diff --git a/AE/engine/src/base/Math/Vec.h b/AE/engine/src/base/Math/Vec.h index 7610e70c..57c9e245 100644 --- a/AE/engine/src/base/Math/Vec.h +++ b/AE/engine/src/base/Math/Vec.h @@ -88,13 +88,13 @@ namespace glm ================================================= */ template - ND_ TVec operator == (const TVec &lhs, AE::Base::UMax_t) __NE___ + ND_ GLM_CONSTEXPR TVec operator == (const TVec &lhs, AE::Base::UMax_t) __NE___ { return glm::equal( lhs, TVec{MaxValue()} ); } template - ND_ TVec operator != (const TVec &lhs, AE::Base::UMax_t) __NE___ + ND_ GLM_CONSTEXPR TVec operator != (const TVec &lhs, AE::Base::UMax_t) __NE___ { return glm::notEqual( lhs, TVec{MaxValue()} ); } @@ -105,37 +105,37 @@ namespace glm ================================================= */ template - ND_ TVec operator == (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ + ND_ GLM_CONSTEXPR TVec operator == (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ { return glm::equal( lhs, TVec{} ); } template - ND_ TVec operator != (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ + ND_ GLM_CONSTEXPR TVec operator != (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ { return glm::notEqual( lhs, TVec{} ); } template - ND_ TVec operator >= (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ + ND_ GLM_CONSTEXPR TVec operator >= (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ { return glm::greaterThanEqual( lhs, TVec{} ); } template - ND_ TVec operator <= (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ + ND_ GLM_CONSTEXPR TVec operator <= (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ { return glm::lessThanEqual( lhs, TVec{} ); } template - ND_ TVec operator > (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ + ND_ GLM_CONSTEXPR TVec operator > (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ { return glm::greaterThan( lhs, TVec{} ); } template - ND_ TVec operator < (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ + ND_ GLM_CONSTEXPR TVec operator < (const TVec &lhs, AE::Base::_hidden_::_Zero) __NE___ { return glm::lessThan( lhs, TVec{} ); } @@ -146,61 +146,73 @@ namespace glm ================================================= */ template - ND_ EnableIf, TVec> operator == (const TVec &lhs, const S rhs) __NE___ + ND_ GLM_CONSTEXPR EnableIf, TVec> operator == (const TVec &lhs, const S rhs) __NE___ { return glm::equal( lhs, TVec{rhs} ); } template - ND_ EnableIf, TVec> operator != (const TVec &lhs, const S rhs) __NE___ + ND_ GLM_CONSTEXPR EnableIf, TVec> operator != (const TVec &lhs, const S rhs) __NE___ { return glm::notEqual( lhs, TVec{rhs} ); } + template + ND_ GLM_CONSTEXPR TVec operator == (const TVec &lhs, const TVec rhs) __NE___ + { + return glm::equal( lhs, rhs ); + } + + template + ND_ GLM_CONSTEXPR TVec operator != (const TVec &lhs, const TVec rhs) __NE___ + { + return glm::notEqual( lhs, rhs ); + } + template - ND_ EnableIf, TVec> operator >= (const TVec &lhs, const S rhs) __NE___ + ND_ GLM_CONSTEXPR EnableIf, TVec> operator >= (const TVec &lhs, const S rhs) __NE___ { return glm::greaterThanEqual( lhs, TVec{rhs} ); } template - ND_ EnableIf, TVec> operator <= (const TVec &lhs, const S rhs) __NE___ + ND_ GLM_CONSTEXPR EnableIf, TVec> operator <= (const TVec &lhs, const S rhs) __NE___ { return glm::lessThanEqual( lhs, TVec{rhs} ); } template - ND_ EnableIf, TVec> operator > (const TVec &lhs, const S rhs) __NE___ + ND_ GLM_CONSTEXPR EnableIf, TVec> operator > (const TVec &lhs, const S rhs) __NE___ { return glm::greaterThan( lhs, TVec{rhs} ); } template - ND_ EnableIf, TVec> operator < (const TVec &lhs, const S rhs) __NE___ + ND_ GLM_CONSTEXPR EnableIf, TVec> operator < (const TVec &lhs, const S rhs) __NE___ { return glm::lessThan( lhs, TVec{rhs} ); } template - ND_ TVec operator >= (const TVec &lhs, const TVec &rhs) __NE___ + ND_ GLM_CONSTEXPR TVec operator >= (const TVec &lhs, const TVec &rhs) __NE___ { return glm::greaterThanEqual( lhs, rhs ); } template - ND_ TVec operator <= (const TVec &lhs, const TVec &rhs) __NE___ + ND_ GLM_CONSTEXPR TVec operator <= (const TVec &lhs, const TVec &rhs) __NE___ { return glm::lessThanEqual( lhs, rhs ); } template - ND_ TVec operator > (const TVec &lhs, const TVec &rhs) __NE___ + ND_ GLM_CONSTEXPR TVec operator > (const TVec &lhs, const TVec &rhs) __NE___ { return glm::greaterThan( lhs, rhs ); } template - ND_ TVec operator < (const TVec &lhs, const TVec &rhs) __NE___ + ND_ GLM_CONSTEXPR TVec operator < (const TVec &lhs, const TVec &rhs) __NE___ { return glm::lessThan( lhs, rhs ); } @@ -414,6 +426,51 @@ namespace _hidden_ template static constexpr T EulerNumber = T( 2.71828182845904523536 ); +/* +================================================= + AllEqual +================================================= +*/ + template + ND_ GLM_CONSTEXPR bool AllEqual (const TVec &v1, const TVec &v2) __NE___ + { + using namespace glm; + + if constexpr( I == 2 ) + { + return + detail::compute_equal::is_iec559>::call(v1.x, v2.x) && + detail::compute_equal::is_iec559>::call(v1.y, v2.y); + } + if constexpr( I == 3 ) + { + return + detail::compute_equal::is_iec559>::call(v1.x, v2.x) && + detail::compute_equal::is_iec559>::call(v1.y, v2.y) && + detail::compute_equal::is_iec559>::call(v1.z, v2.z); + } + if constexpr( I == 4 ) + { + return detail::compute_vec_equal<4, T, Q, detail::is_int::value, sizeof(T) * 8, detail::is_aligned::value>::call(v1, v2); + } + } + +/* +================================================= + AnyNotEqual +================================================= +*/ + template + ND_ GLM_CONSTEXPR bool AnyNotEqual (const TVec &v1, const TVec &v2) __NE___ + { + using namespace glm; + + if constexpr( I == 4 ) + return detail::compute_vec_nequal<4, T, Q, detail::is_int::value, sizeof(T) * 8, detail::is_aligned::value>::call(v1, v2); + else + return not AllEqual( v1, v2 ); + } + /* ================================================= AdditionIsSafe (signed integer) @@ -1847,27 +1904,27 @@ namespace _hidden_ ================================================= */ template - ND_ EnableIf, TVec> Ln (const TVec& v) __NE___ + ND_ EnableIf, TVec> Ln (const TVec &v) __NE___ { ASSERT( All( v >= T{0} )); return glm::log( v ); } template - ND_ EnableIf, TVec> Log2 (const TVec& v) __NE___ + ND_ EnableIf, TVec> Log2 (const TVec &v) __NE___ { ASSERT( All( v >= T{0} )); return glm::log2( v ); } template - ND_ EnableIf, TVec> Log (const TVec& v, const T base) __NE___ + ND_ EnableIf, TVec> Log (const TVec &v, const T base) __NE___ { return Ln( v ) / Ln( base ); } template - ND_ EnableIf, TVec> Log (const TVec& v, const TVec& base) __NE___ + ND_ EnableIf, TVec> Log (const TVec &v, const TVec& base) __NE___ { return Ln( v ) / Ln( base ); } @@ -1888,6 +1945,15 @@ namespace _hidden_ return res; } + template + ND_ EnableIf, TVec> IPow (const TVec &base, const T power) __NE___ + { + TVec res = TVec{T(1)}; + for (T i = 0; i < power; ++i) + res *= base; + return res; + } + /* ================================================= Pow / Exp / Exp2 / Exp10 / ExpMinus1 (scalar) @@ -2517,8 +2583,12 @@ namespace _hidden_ template ND_ EnableIf, TVec> IsFinite (const TVec &v) __NE___ { - // TODO: may not work with compiler optimizations, use std::isfinite - return (v == v); + if constexpr( I == 2 ) + return TVec{ IsFinite( v.x ), IsFinite( v.y )}; + if constexpr( I == 3 ) + return TVec{ IsFinite( v.x ), IsFinite( v.y ), IsFinite( v.z )}; + if constexpr( I == 4 ) + return TVec{ IsFinite( v.x ), IsFinite( v.y ), IsFinite( v.z ), IsFinite( v.w )}; } /* diff --git a/AE/engine/src/base/Math/sRGB.h b/AE/engine/src/base/Math/sRGB.h index 2b120afb..bb687fe1 100644 --- a/AE/engine/src/base/Math/sRGB.h +++ b/AE/engine/src/base/Math/sRGB.h @@ -16,7 +16,7 @@ namespace AE::Math This code is licensed under the MIT License (MIT). ================================================= */ - ND_ inline float ApplySRGBCurve (const float x) __NE___ { return x < 0.0031308f ? 12.92f * x : 1.055f * Pow(x, 1.0f / 2.4f) - 0.055f; } + ND_ inline float ApplySRGBCurve (const float x) __NE___ { return x < 0.0031308f ? 12.92f * x : 1.055f * Pow(x, 1.0f / 2.4f) - 0.055f; } ND_ inline float RemoveSRGBCurve (const float x) __NE___ { return x < 0.04045f ? x / 12.92f : Pow( (x + 0.055f) / 1.055f, 2.4f ); } ND_ inline float3 ApplySRGBCurve (const float3 &v) __NE___ { return float3( ApplySRGBCurve(v.r), ApplySRGBCurve(v.g), ApplySRGBCurve(v.b) ); } @@ -36,7 +36,7 @@ namespace AE::Math This code is licensed under the MIT License (MIT). ================================================= */ - ND_ inline float ApplySRGBCurve_Fast (const float x) __NE___ { return x < 0.0031308f ? 12.92f * x : 1.13005f * Sqrt(x - 0.00228f) - 0.13448f * x + 0.005719f; } + ND_ inline float ApplySRGBCurve_Fast (const float x) __NE___ { return x < 0.0031308f ? 12.92f * x : 1.13005f * Sqrt(x - 0.00228f) - 0.13448f * x + 0.005719f; } ND_ inline float RemoveSRGBCurve_Fast (const float x) __NE___ { return x < 0.04045f ? x / 12.92f : -7.43605f * x - 31.24297f * Sqrt(-0.53792f * x + 1.279924f) + 35.34864f; } ND_ inline float3 ApplySRGBCurve_Fast (const float3 &v) __NE___ { return float3( ApplySRGBCurve_Fast(v.r), ApplySRGBCurve_Fast(v.g), ApplySRGBCurve_Fast(v.b) ); } diff --git a/AE/engine/src/base/Memory/MemUtils.h b/AE/engine/src/base/Memory/MemUtils.h index d2d7c1c6..f88c9280 100644 --- a/AE/engine/src/base/Memory/MemUtils.h +++ b/AE/engine/src/base/Memory/MemUtils.h @@ -17,13 +17,13 @@ namespace AE::Base ================================================= */ template - ND_ constexpr decltype(auto) AddressOf (T &value) __NE___ + ND_ constexpr exact_t AddressOf (T &value) __NE___ { return std::addressof( value ); } template - ND_ constexpr decltype(auto) VAddressOf (T &value) __NE___ + ND_ constexpr exact_t VAddressOf (T &value) __NE___ { return Cast( std::addressof( value )); } @@ -261,19 +261,19 @@ namespace _hidden_ inline void MemCopy16 (OUT void* dst, const void* src, const Bytes size) __NE___ { Base::_hidden_::MemCopyChecks( dst, src, size, 16 ); - std::memcpy( OUT dst, src, usize(size) ); // TODO: SSE/Neon + std::memcpy( OUT AssumeAligned<16>(dst), AssumeAligned<16>(src), usize(size) ); // TODO: SSE/Neon } inline void MemCopy32 (OUT void* dst, const void* src, const Bytes size) __NE___ { Base::_hidden_::MemCopyChecks( dst, src, size, 32 ); - std::memcpy( OUT dst, src, usize(size) ); // TODO: SSE/Neon + std::memcpy( OUT AssumeAligned<32>(dst), AssumeAligned<32>(src), usize(size) ); // TODO: SSE/Neon } inline void MemCopy64 (OUT void* dst, const void* src, const Bytes size) __NE___ { Base::_hidden_::MemCopyChecks( dst, src, size, 64 ); - std::memcpy( OUT dst, src, usize(size) ); // TODO: SSE/Neon + std::memcpy( OUT AssumeAligned<64>(dst), AssumeAligned<64>(src), usize(size) ); // TODO: SSE/Neon } /* diff --git a/AE/engine/src/base/Platforms/AndroidUtils.cpp b/AE/engine/src/base/Platforms/AndroidUtils.cpp index b42c4293..cb125526 100644 --- a/AE/engine/src/base/Platforms/AndroidUtils.cpp +++ b/AE/engine/src/base/Platforms/AndroidUtils.cpp @@ -180,6 +180,23 @@ namespace AE::Base #endif } +/* +================================================= + IsUnderDebugger +---- + https://developer.android.com/reference/android/os/Debug.html#isDebuggerConnected() +================================================= +*/ + bool Android_IsUnderDebugger = false; // extern + + bool AndroidUtils::IsUnderDebugger () __NE___ + { + #ifdef AE_CFG_RELEASE + return false; + #else + return Android_IsUnderDebugger; + #endif + } } // AE::Base diff --git a/AE/engine/src/base/Platforms/AndroidUtils.h b/AE/engine/src/base/Platforms/AndroidUtils.h index f44fe0c7..4d05da6c 100644 --- a/AE/engine/src/base/Platforms/AndroidUtils.h +++ b/AE/engine/src/base/Platforms/AndroidUtils.h @@ -39,17 +39,6 @@ namespace AE::Base ND_ static auto GetOSType () __NE___ { return EOperationSystem::Android; } }; - - - inline bool AndroidUtils::IsUnderDebugger () __NE___ - { - #ifdef AE_DEBUG - return true; - #else - return false; - #endif - } - } // AE::Base #endif // AE_PLATFORM_ANDROID diff --git a/AE/engine/src/base/Platforms/CPUInfo.h b/AE/engine/src/base/Platforms/CPUInfo.h index 89c8c558..b20b5618 100644 --- a/AE/engine/src/base/Platforms/CPUInfo.h +++ b/AE/engine/src/base/Platforms/CPUInfo.h @@ -125,9 +125,11 @@ namespace AE::Base CacheGeom L3; }; + static constexpr uint MaxLogicalCores = 64; + static constexpr uint MaxCoreTypes = 4; + using MHz_t = uint; - using CoreBits_t = BitSet< 256 >; - static constexpr uint MaxCores = 4; + using CoreBits_t = BitSet< MaxLogicalCores >; struct Core @@ -151,7 +153,7 @@ namespace AE::Base ND_ uint FirstLogicalCore () C_NE___ { return BitScanForward( logicalBits.to_ullong() ); } ND_ uint LastLogicalCore () C_NE___ { return BitScanReverse( logicalBits.to_ullong() ); } }; - using Cores_t = FixedArray< Core, MaxCores >; + using Cores_t = FixedArray< Core, MaxCoreTypes >; struct Processor @@ -193,38 +195,6 @@ namespace AE::Base }; - - // - // CPU Performance Info - // - - struct CpuPerformance - { - // types - using MHz_t = uint; - - struct PerProcessCounters - { - milliseconds userTime; - milliseconds kernelTime; - uint pageFaults = 0; // number of page faults serviced that required I/O activity - uint fsInput = 0; // number of times the filesystem had to perform input - uint fsOutput = 0; // number of times the filesystem had to perform output - uint voluntaryContextSwitches = 0; // context switch when awaiting availability of a resource (IO) - uint involuntaryContextSwitches = 0; // higher priority process replace current process - }; - using PerThreadCounters = PerProcessCounters; - - - // methods - ND_ static MHz_t GetFrequency (uint core) __NE___; - static uint GetFrequency (OUT MHz_t* result, uint maxCount) __NE___; - - static uint GetUsage (OUT float* user, OUT float* kernel, uint maxCount) __NE___; - - ND_ static bool GetPerfCounters (OUT PerProcessCounters &, - OUT PerThreadCounters &) __NE___; - }; //----------------------------------------------------------------------------- diff --git a/AE/engine/src/base/Platforms/CPUInfo_Apple.cpp b/AE/engine/src/base/Platforms/CPUInfo_Apple.cpp index b36cbfad..88d8ffc7 100644 --- a/AE/engine/src/base/Platforms/CPUInfo_Apple.cpp +++ b/AE/engine/src/base/Platforms/CPUInfo_Apple.cpp @@ -154,48 +154,6 @@ namespace AE::Base _Validate(); } -//----------------------------------------------------------------------------- - - - -/* -================================================= - GetFrequency -================================================= -*/ - CpuPerformance::MHz_t CpuPerformance::GetFrequency (uint core) __NE___ - { - Unused( core ); - return 0; - } - - uint CpuPerformance::GetFrequency (OUT MHz_t* result, const uint maxCount) __NE___ - { - Unused( result, maxCount ); - return 0; - } - -/* -================================================= - GetUsage -================================================= -*/ - uint CpuPerformance::GetUsage (OUT float* user, OUT float* kernel, const uint maxCount) __NE___ - { - Unused( user, kernel, maxCount ); - return 0; - } - -/* -================================================= - GetPerfCounters -================================================= -*/ - bool CpuPerformance::GetPerfCounters (OUT PerProcessCounters &, OUT PerThreadCounters &) __NE___ - { - return false; - } - } // AE::Base diff --git a/AE/engine/src/base/Platforms/CPUInfo_Linux.cpp b/AE/engine/src/base/Platforms/CPUInfo_Linux.cpp deleted file mode 100644 index 94395105..00000000 --- a/AE/engine/src/base/Platforms/CPUInfo_Linux.cpp +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' - -#include "base/Defines/StdInclude.h" - -#if defined(AE_PLATFORM_LINUX) or defined(AE_PLATFORM_ANDROID) -# include -# include -# include -#endif - -#include "base/Platforms/CPUInfo.h" -#include "base/Math/BitMath.h" -#include "base/Memory/MemUtils.h" -#include "base/Algorithms/StringUtils.h" - -#if defined(AE_PLATFORM_LINUX) or defined(AE_PLATFORM_ANDROID) - -namespace AE::Base -{ -/* -================================================= - GetFrequency -================================================= -*/ - CpuPerformance::MHz_t CpuPerformance::GetFrequency (uint core) __NE___ - { - std::ifstream stream {"/sys/devices/system/cpu/cpu"s << ToString(core) << "/cpufreq/scaling_cur_freq"}; - if ( stream ) - { - String line; - std::getline( stream, OUT line ); - stream.close(); - return StringToUInt( line ) / 1000; - } - return 0; - } - -/* -================================================= - GetFrequency -================================================= -*/ - uint CpuPerformance::GetFrequency (OUT MHz_t* result, const uint maxCount) __NE___ - { - String line; - const auto GetCurrentClockSpeed = [&line](uint id) -> MHz_t - {{ - std::ifstream stream {"/sys/devices/system/cpu/cpu"s << ToString(id) << "/cpufreq/scaling_cur_freq"}; - if ( stream ) { - std::getline( stream, OUT line ); - stream.close(); - return StringToUInt( line ) / 1000; - } - return 0; - }}; - - const uint core_count = Min( CpuArchInfo::Get().cpu.logicalCoreCount, maxCount ); - - for (uint i = 0; i < core_count; ++i) { - result[i] = GetCurrentClockSpeed( i ); - } - return core_count; - } - -/* -================================================= - GetUsage -================================================= -*/ - uint CpuPerformance::GetUsage (OUT float* user, OUT float* kernel, const uint maxCount) __NE___ - { - NonNull( user ); - NonNull( kernel ); - - const auto& info = CpuArchInfo::Get(); - const uint core_count = Min( info.cpu.logicalCoreCount, maxCount ); - - for (auto& core : info.cpu.coreTypes) - { - MHz_t freq = GetFrequency( core.FirstLogicalCore() ); - float usage = Max( float(freq - core.baseClock) / float(core.maxClock - core.baseClock), 0.f ); - - for (uint core_id : BitIndexIterate( core.logicalBits.to_ullong() )) - { - if ( core_id < core_count ) - { - user[core_id] = usage; - kernel[core_id] = 0.f; - } - } - } - return core_count; - } - -/* -================================================= - GetPerfCounters -================================================= -*/ - bool CpuPerformance::GetPerfCounters (OUT PerProcessCounters &perProcess, OUT PerThreadCounters &perThread) __NE___ - { - ::rusage proc_usage = {}; - ::rusage thread_usage = {}; - - if ( ::getrusage( RUSAGE_SELF, OUT &proc_usage ) != 0 and - ::getrusage( RUSAGE_THREAD, OUT &thread_usage ) != 0 ) - return false; - - perProcess.userTime = milliseconds{ ulong(proc_usage.ru_utime.tv_sec) * 1000'000 + ulong(proc_usage.ru_utime.tv_usec) }; - perProcess.kernelTime = milliseconds{ ulong(proc_usage.ru_stime.tv_sec) * 1000'000 + ulong(proc_usage.ru_stime.tv_usec) }; - perProcess.pageFaults = proc_usage.ru_majflt; - perProcess.fsInput = proc_usage.ru_inblock; - perProcess.fsOutput = proc_usage.ru_oublock; - perProcess.voluntaryContextSwitches = proc_usage.ru_nvcsw; - perProcess.involuntaryContextSwitches = proc_usage.ru_nivcsw; - - perThread.userTime = milliseconds{ ulong(thread_usage.ru_utime.tv_sec) * 1000'000 + ulong(thread_usage.ru_utime.tv_usec) }; - perThread.kernelTime = milliseconds{ ulong(thread_usage.ru_stime.tv_sec) * 1000'000 + ulong(thread_usage.ru_stime.tv_usec) }; - perThread.pageFaults = thread_usage.ru_majflt; - perThread.fsInput = thread_usage.ru_inblock; - perThread.fsOutput = thread_usage.ru_oublock; - perThread.voluntaryContextSwitches = thread_usage.ru_nvcsw; - perThread.involuntaryContextSwitches = thread_usage.ru_nivcsw; - - return true; - } - - -} // AE::Base - -#endif // (LINUX or ANDROID) diff --git a/AE/engine/src/base/Platforms/CPUInfo_LinuxARM.cpp b/AE/engine/src/base/Platforms/CPUInfo_LinuxARM.cpp index 66aec6d8..77920ffa 100644 --- a/AE/engine/src/base/Platforms/CPUInfo_LinuxARM.cpp +++ b/AE/engine/src/base/Platforms/CPUInfo_LinuxARM.cpp @@ -67,7 +67,12 @@ namespace ND_ static uint GetMinClockSpeed (uint id) { String line; - std::ifstream stream {"/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/cpuinfo_min_freq"}; // or scaling_min_freq + std::ifstream stream; + + stream.open( "/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/cpuinfo_min_freq" ); + if ( not stream ) { + stream.open( "/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/scaling_min_freq" ); + } if ( stream ) { std::getline( stream, OUT line ); stream.close(); @@ -79,7 +84,12 @@ namespace ND_ static uint GetMaxClockSpeed (uint id) { String line; - std::ifstream stream {"/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/cpuinfo_max_freq"}; // or scaling_max_freq + std::ifstream stream; + + stream.open( "/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/cpuinfo_max_freq" ); + if ( not stream ) { + stream.open( "/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/scaling_max_freq" ); + } if ( stream ) { std::getline( stream, OUT line ); stream.close(); @@ -88,6 +98,40 @@ namespace return 0; } + ND_ static bool GetMinMaxClockSpeed (uint id, OUT uint &min, OUT uint &max) + { + String line; + std::ifstream stream {"/sys/devices/system/cpu/cpu"s << Base::ToString(id) << "/cpufreq/scaling_available_frequencies"}; + + if ( not stream ) + return false; + + std::getline( stream, OUT line ); + stream.close(); + + // remove tailing spaces + for (; not line.empty() and line.back() == ' ';) + line.pop_back(); + + if ( line.empty() ) + return false; + + // read first and last values + usize p0 = line.find( ' ' ); + usize p1 = line.rfind( ' ' ); + + if ( p0 == UMax or p1 == UMax ) + return false; + + max = StringToUInt( SubString( line, 0, p0 )) / 1000; // in MHz + min = StringToUInt( SubString( line, p1+1, line.size() )) / 1000; // in MHz + + if ( min > max ) + std::swap( min, max ); + + return true; + } + // from // https://elixir.bootlin.com/linux/latest/source/arch/arm/include/asm/cputype.h // https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/asm/cputype.h @@ -425,8 +469,12 @@ namespace } const int id = IntLog2( dst.logicalBits.to_ulong() ); dst.name = GetCoreName( CPUImplToVendor( vendor ), part ); - dst.baseClock = GetMinClockSpeed( id ); - dst.maxClock = GetMaxClockSpeed( id ); + + if ( not GetMinMaxClockSpeed( id, OUT dst.baseClock, OUT dst.maxClock )) + { + dst.baseClock = GetMinClockSpeed( id ); + dst.maxClock = GetMaxClockSpeed( id ); + } } // sort by max clock diff --git a/AE/engine/src/base/Platforms/CPUInfo_Windows.cpp b/AE/engine/src/base/Platforms/CPUInfo_Windows.cpp index fcc053d3..9c90bd18 100644 --- a/AE/engine/src/base/Platforms/CPUInfo_Windows.cpp +++ b/AE/engine/src/base/Platforms/CPUInfo_Windows.cpp @@ -2,8 +2,6 @@ #if defined(AE_COMPILER_MSVC) and defined(AE_PLATFORM_WINDOWS) # include "base/Platforms/WindowsHeader.cpp.h" -# include -# include # include "base/Platforms/WindowsUtils.h" # include "base/Platforms/WindowsLibrary.h" @@ -56,6 +54,7 @@ namespace AE::Base char cpu_name [64] = {}; // read CPU features (only x86/x64) + #if defined(AE_CPU_ARCH_X64) or defined(AE_CPU_ARCH_X86) if ( cpu.arch == ECPUArch::X64 ) { StaticArray cpui = {}; @@ -116,6 +115,7 @@ namespace AE::Base } // TODO: _may_i_use_cpu_feature } + #endif cpu.vendor = _NameToVendor( StringView{cpu_name} ); @@ -150,7 +150,7 @@ namespace AE::Base // info for each logical core const uint count = buf_size / sizeof(SYSTEM_CPU_SET_INFORMATION); - FixedMap< BYTE, Core*, MaxCores > eff_class_map; + FixedMap< BYTE, Core*, MaxCoreTypes > eff_class_map; for (uint i = 0; i < count; ++i) { @@ -219,6 +219,7 @@ namespace AE::Base } else { + #if defined(AE_CPU_ARCH_X64) or defined(AE_CPU_ARCH_X86) StaticArray cpui = {}; __cpuid( OUT cpui.data(), 0 ); @@ -234,6 +235,7 @@ namespace AE::Base core.maxClock = cpui[1]; } } + #endif } } diff --git a/AE/engine/src/base/Platforms/Perf_Apple.cpp b/AE/engine/src/base/Platforms/Perf_Apple.cpp new file mode 100644 index 00000000..966cf30b --- /dev/null +++ b/AE/engine/src/base/Platforms/Perf_Apple.cpp @@ -0,0 +1,72 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#include "base/Defines/StdInclude.h" + +#ifdef AE_PLATFORM_APPLE +# include "base/Platforms/PerformanceStat.h" + +namespace AE::Base +{ +/* +================================================= + CPU_GetFrequency +================================================= +*/ + PerformanceStat::MHz_t PerformanceStat::CPU_GetFrequency (uint core) __NE___ + { + Unused( core ); + return 0; + } + + uint PerformanceStat::CPU_GetFrequency (OUT MHz_t* result, const uint maxCount) __NE___ + { + Unused( result, maxCount ); + return 0; + } + +/* +================================================= + CPU_GetUsage +================================================= +*/ + uint PerformanceStat::CPU_GetUsage (OUT float* user, OUT float* kernel, const uint maxCount) __NE___ + { + Unused( user, kernel, maxCount ); + return 0; + } + +/* +================================================= + GetPerfCounters +================================================= +*/ + bool PerformanceStat::GetPerfCounters (OUT PerProcessCounters &, OUT PerThreadCounters &) __NE___ + { + return false; + } + +/* +================================================= + Battery_Get +================================================= +*/ + bool PerformanceStat::Battery_Get (OUT BatteryStat &result) __NE___ + { + result = Default; + return false; + } + +/* +================================================= + Temperature_Get +================================================= +*/ + bool PerformanceStat::Temperature_Get (OUT TemperatureStat &result) __NE___ + { + result.sensors.clear(); + return false; + } + +} // AE::Base + +#endif // AE_PLATFORM_APPLE diff --git a/AE/engine/src/base/Platforms/Perf_Linux.cpp b/AE/engine/src/base/Platforms/Perf_Linux.cpp new file mode 100644 index 00000000..bec19e45 --- /dev/null +++ b/AE/engine/src/base/Platforms/Perf_Linux.cpp @@ -0,0 +1,462 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#if defined(AE_PLATFORM_LINUX) or defined(AE_PLATFORM_ANDROID) + +# include "base/Defines/StdInclude.h" + +# include +# include +# include + +# include "base/Platforms/CPUInfo.h" +# include "base/Platforms/PerformanceStat.h" +# include "base/Math/BitMath.h" +# include "base/Memory/MemUtils.h" +# include "base/Algorithms/StringUtils.h" + +# include "threading/Primitives/SpinLock.h" +# include "threading/Primitives/Synchronized.h" + +namespace AE::Base +{ +namespace +{ +/* +================================================= + ReadString +================================================= +*/ + ND_ static bool ReadString (const String &path, OUT String &result) __NE___ + { + result.clear(); + + std::ifstream stream {path}; + if ( stream ) + { + std::getline( stream, OUT result ); + stream.close(); + return true; + } + return false; + } + +/* +================================================= + ReadAll +================================================= +*/ + ND_ static bool ReadAll (const String &path, OUT Array &result) __NE___ + { + result.clear(); + + FILE* file = std::fopen( path.c_str(), "rb" ); + if ( file == null ) + return false; + + const usize step = 4<<10; + usize size = 0; + + for (;;) + { + size = result.size(); + NOTHROW_ERR( result.resize( AlignUp( size + step, step ))); + + auto readn = fread( OUT result.data() + size, 1, step, file ); + size += readn; + + if ( readn < step ) + break; + } + + result.resize( size ); + return true; + } + +/* +================================================= + ReadInt +================================================= +*/ + template + ND_ static EnableIf, bool> ReadInt (const String &path, OUT T &result) __NE___ + { + result = 0; + + std::ifstream stream {path}; + if ( stream ) + { + #if 1 + stream >> result; + stream.close(); + #else + String line; + std::getline( stream, OUT line ); + stream.close(); + result = StringToUInt( line ); + #endif + return true; + } + return false; + } + +/* +================================================= + ParseMemInfo +================================================= +*/ + static bool ParseMemInfo (OUT Bytes &total, OUT Bytes &avail, OUT Bytes &totalV, OUT Bytes &usedV) __NE___ + { + Array arr; + if ( not ReadAll( "/proc/meminfo", OUT arr )) + return false; + + StringView str {arr.data(), arr.size()}; + + const auto Parse = [&str] (StringView propName, OUT Bytes &res) + {{ + usize begin = str.find( propName ); + if ( begin == StringView::npos ) + return; + + begin += propName.size(); + usize end = str.find( '\n', begin ); + end = Min( end, str.size() ); + + StringView range = SubString( str, begin, end-begin ); + usize p1 = range.rfind( ' ' ); + usize p2 = range.rfind( ' ', p1-1 ); ++p2; + + range = SubString( range, p2, p1-p2 ); + res = Bytes{StringToUInt( range )} << 10; + }}; + + Parse( "MemTotal:", OUT total ); + Parse( "MemAvailable:", OUT avail ); + Parse( "VmallocTotal:", OUT totalV ); + Parse( "VmallocUsed:", OUT usedV ); + return true; + } + +} // namespace + +/* +================================================= + CPU_GetFrequency +================================================= +*/ + PerformanceStat::MHz_t PerformanceStat::CPU_GetFrequency (uint core) __NE___ + { + uint val; + Unused( ReadInt( "/sys/devices/system/cpu/cpu"s << ToString(core) << "/cpufreq/scaling_cur_freq", OUT val )); // or cpuinfo_cur_freq + return val / 1000; + } + +/* +================================================= + CPU_GetFrequency +================================================= +*/ + uint PerformanceStat::CPU_GetFrequency (OUT MHz_t* result, const uint maxCount) __NE___ + { + const uint core_count = Min( CpuArchInfo::Get().cpu.logicalCoreCount, maxCount ); + + for (uint i = 0; i < core_count; ++i) { + result[i] = CPU_GetFrequency( i ); + } + return core_count; + } + +/* +================================================= + CPU_GetUsage +================================================= +*/ + uint PerformanceStat::CPU_GetUsage (OUT float* user, OUT float* kernel, const uint maxCount) __NE___ + { + NonNull( user ); + NonNull( kernel ); + + const auto& info = CpuArchInfo::Get(); + const uint core_count = Min( info.cpu.logicalCoreCount, maxCount ); + + for (auto& core : info.cpu.coreTypes) + { + MHz_t freq = CPU_GetFrequency( core.FirstLogicalCore() ); + float usage = Max( float(freq - core.baseClock) / float(core.maxClock - core.baseClock), 0.f ); + + for (uint core_id : BitIndexIterate( core.logicalBits.to_ullong() )) + { + if ( core_id < core_count ) + { + user[core_id] = usage; + kernel[core_id] = 0.f; // not supported + } + } + } + return core_count; + } + +/* +================================================= + GetPerfCounters +================================================= +*/ + bool PerformanceStat::GetPerfCounters (OUT PerProcessCounters* perProcess, OUT PerThreadCounters* perThread, OUT MemoryCounters* memory) __NE___ + { + bool res = true; + ::rusage usage = {}; + + if ( perThread != null and + ::getrusage( RUSAGE_THREAD, OUT &usage ) == 0 ) + { + perThread->userTime = milliseconds{ ulong(usage.ru_utime.tv_sec) * 1000'000 + ulong(usage.ru_utime.tv_usec) }; + perThread->kernelTime = milliseconds{ ulong(usage.ru_stime.tv_sec) * 1000'000 + ulong(usage.ru_stime.tv_usec) }; + perThread->fsInput = usage.ru_inblock; + perThread->fsOutput = usage.ru_oublock; + perThread->voluntaryContextSwitches = usage.ru_nvcsw; + perThread->involuntaryContextSwitches = usage.ru_nivcsw; + res = true; + } + + if ( (perProcess != null or memory != null) and + ::getrusage( RUSAGE_SELF, OUT &usage ) == 0 ) + { + res = true; + if ( perProcess != null ) + { + perProcess->userTime = milliseconds{ ulong(usage.ru_utime.tv_sec) * 1000'000 + ulong(usage.ru_utime.tv_usec) }; + perProcess->kernelTime = milliseconds{ ulong(usage.ru_stime.tv_sec) * 1000'000 + ulong(usage.ru_stime.tv_usec) }; + perProcess->fsInput = usage.ru_inblock; + perProcess->fsOutput = usage.ru_oublock; + perProcess->voluntaryContextSwitches = usage.ru_nvcsw; + perProcess->involuntaryContextSwitches = usage.ru_nivcsw; + } + if ( memory != null ) + { + memory->peakUsage = Bytes{ulong(usage.ru_maxrss) << 10}; // Kb to bytes + memory->pageFaults = usage.ru_majflt; + } + } + + if ( memory != null ) + { + // from https://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-runtime-using-c/12675172#12675172 + + uint tSize = 0, resident = 0, share = 0; + std::ifstream buffer {"/proc/self/statm"}; // throw + buffer >> tSize >> resident >> share; + buffer.close(); + + memory->currentUsage = Bytes{resident} * Bytes{ulong(::sysconf( _SC_PAGE_SIZE ))}; + res = true; + + ParseMemInfo( OUT memory->totalPhysical, OUT memory->availablePhysical, + OUT memory->totalVirtual, OUT memory->usedVirtual ); + } + + return res; + } +//----------------------------------------------------------------------------- + + +#ifdef AE_PLATFORM_ANDROID +namespace +{ + using AE::Threading::RWSpinLock; + using AE::Threading::Synchronized; + + struct BatteryStat2 : PerformanceStat::BatteryStat + { + uint count1 = 0; + uint count2 = 0; + }; + + static Synchronized< RWSpinLock, BatteryStat2 > s_BatteryStat; +} + +/* +================================================= + Battery_Get +---- + docs: https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-power +================================================= +*/ + bool PerformanceStat::Battery_Get (OUT BatteryStat &result) __NE___ + { + #if 0 + // not supported without root, use BatteryManager instead https://developer.android.com/reference/android/os/BatteryManager.html + + float scale = 1.0e-3f; // milli + + int temp; + Unused( ReadInt( "/sys/class/power_supply/battery/capacity", OUT temp )); + result.level = Percent::FromPercent( temp ); // same as 'charge_counter / charge_full_design' + + Unused( ReadInt( "/sys/class/power_supply/battery/voltage_now", OUT temp )); + + if ( temp > 10'000 ) + scale = 1.0e-6f; // micro + + result.voltage = Voltage_t{ float(temp) * scale }; + + Unused( ReadInt( "/sys/class/power_supply/battery/current_now", OUT temp )); + result.current = Current_t{ Abs(float(temp)) * scale }; + result.isCharging = (temp > 0); + + result.power = result.current * result.voltage; + + Unused( ReadInt( "/sys/class/power_supply/battery/temp", OUT temp )); + result.temperature = Temperature_t{ float(temp) * 0.125f }; + + Unused( ReadInt( "/sys/class/power_supply/battery/charge_counter", OUT temp )); + result.capacity = float(temp) * 1.0e-3f; // A*h + + String status; + Unused( ReadString( "/sys/class/power_supply/battery/status", OUT status )); + result.isCharging = (status == "Charging"); + + //ReadInt( "/sys/class/power_supply/battery/current_max", OUT temp ); + //result.maxCurrent = Current_t{ float(temp) * scale }; + + //ReadInt( "/sys/class/power_supply/battery/voltage_min", OUT temp ); + //result.minVoltage = Voltage_t{ float(temp) * scale }; + + return true; + #else + + auto res = s_BatteryStat.Read(); + + result = res; + result.power = result.current * result.voltage; + + return true; + #endif + } + +/* +================================================= + Temperature_Get +---- + https://stackoverflow.com/questions/20771070/how-do-i-get-the-cpu-temperature + available on some devices +================================================= +*/ + bool PerformanceStat::Temperature_Get (OUT TemperatureStat &result) __NE___ + { + result.sensors.clear(); + + for (uint id = 0; id < 100; ++id) + { + const String name = "/sys/class/thermal/thermal_zone"s + ToString(id); + String type; + uint temp; + + if ( ReadInt( name + "/temp", OUT temp ) and + ReadString( name + "/type", OUT type )) + { + result.sensors.emplace_back( RVRef(type), Temperature_t{ float(temp) * 1.0e-3f }); + } + else + break; + } + + if ( not result.sensors.empty() ) + return true; + + for (uint id = 0; id < 100; ++id) + { + const String name = "/sys/devices/virtual/thermal/thermal_zone"s + ToString(id); + String type; + uint temp; + + if ( ReadInt( name + "/temp", OUT temp ) and + ReadString( name + "/type", OUT type )) + { + result.sensors.emplace_back( RVRef(type), Temperature_t{ float(temp) * 1.0e-3f }); + } + else + break; + } + + return true; + } + +/* +================================================= + _SetBatteryStat +================================================= +*/ + void PerformanceStat::_SetBatteryStat1 (float current, float capacity, float energy) __NE___ + { + if ( capacity > 50.0e+3f ) capacity *= 1.0e-6f; // some devices returns uAh, uAh -> Ah + else capacity *= 1.0e-3f; // mAh -> Ah + + // positive - charging, negative - discharging, some devices returns always positive value + if ( current < -2.0e+9f ) current = 0.f; + current = Abs( current ); + if ( current > 10.0e+3f ) current *= 1.0e-6f; // some devices returns uA, uA -> A + else current *= 1.0e-3f; // mA -> A + + // negative value, if not supported + capacity = Max( capacity, 0.f ); + energy = Max( energy, 0.f ); + + + auto stat = s_BatteryStat.WriteLock(); + + stat->current = PerformanceStat::Current_t{ current }; + stat->capacity = PerformanceStat::Capacity_t{ capacity }; + stat->count1 ++; + Unused( energy ); // not supported + } + + void PerformanceStat::_SetBatteryStat2 (float level, float temperature, float voltage, bool isCharging) __NE___ + { + if ( voltage > 10.0e+3f ) voltage *= 1.0e-6f; // some devices returns uV, uV -> V + else if ( voltage > 100.f ) voltage *= 1.0e-3f; // some devices returns mV, mV -> V + + + auto stat = s_BatteryStat.WriteLock(); + + stat->level = Percent::FromPercent( level ); + stat->temperature = temperature; + stat->voltage = PerformanceStat::Voltage_t{ voltage }; + stat->isCharging = isCharging; + stat->count2 ++; + } + +#endif // AE_PLATFORM_ANDROID +//----------------------------------------------------------------------------- + + +#ifdef AE_PLATFORM_LINUX +/* +================================================= + Battery_Get +================================================= +*/ + bool PerformanceStat::Battery_Get (OUT BatteryStat &result) __NE___ + { + // TODO: same as Android but with BAT0 battery name + + result = Default; + return false; + } + +/* +================================================= + Temperature_Get +================================================= +*/ + bool PerformanceStat::Temperature_Get (OUT TemperatureStat &result) __NE___ + { + result.sensors.clear(); + return false; + } + +#endif // AE_PLATFORM_LINUX +//----------------------------------------------------------------------------- + +} // AE::Base + +#endif // (LINUX or ANDROID) diff --git a/AE/engine/src/base/Platforms/CPUInfo_WIndows2.cpp b/AE/engine/src/base/Platforms/Perf_WIndows.cpp similarity index 77% rename from AE/engine/src/base/Platforms/CPUInfo_WIndows2.cpp rename to AE/engine/src/base/Platforms/Perf_WIndows.cpp index 98fc2fbe..b696fbc8 100644 --- a/AE/engine/src/base/Platforms/CPUInfo_WIndows2.cpp +++ b/AE/engine/src/base/Platforms/Perf_WIndows.cpp @@ -1,18 +1,10 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' #if defined(AE_COMPILER_MSVC) and defined(AE_PLATFORM_WINDOWS) -# include "base/Defines/StdInclude.h" - -# pragma warning (push) -# pragma warning (disable: 4668) -# pragma warning (disable: 4005) -# include -# include -# include -# pragma warning (pop) - +# include "base/Platforms/WindowsHeader.cpp.h" # include "base/Platforms/WindowsUtils.h" # include "base/Platforms/WindowsLibrary.h" +# include "base/Platforms/PerformanceStat.h" namespace AE::Base { @@ -294,6 +286,73 @@ namespace static NtdllLib lib; return lib; } + +/* +================================================= + GetPerfCountersWin +================================================= +*/ + static bool GetPerfCountersWin (OUT PerformanceStat::PerProcessCounters* perProcess, + OUT PerformanceStat::PerThreadCounters* perThread) __NE___ + { + auto& ntdll = Ntdll(); + if ( ntdll.fnQuerySystemInformation == null ) + return false; + + ULONG size = 0; + ntdll.fnQuerySystemInformation( c_SystemProcessInformation, null, 0, OUT &size ); + + Array buf; buf.resize( size ); + + if ( ntdll.fnQuerySystemInformation( c_SystemProcessInformation, OUT buf.data(), ULONG(buf.size()), null ) != STATUS_SUCCESS ) + return false; + + const DWORD proc_id = ::GetCurrentProcessId(); + const DWORD thread_id = ::GetCurrentThreadId(); + + for_likely (const char* ptr = buf.data(); ptr < buf.data()+buf.size();) + { + auto& proc_info = *Cast( ptr ); + + if_unlikely( proc_info.NextEntryOffset == 0 ) + break; + + if_unlikely( usize(proc_info.UniqueProcessId) == proc_id ) + { + uint ctx_switch = 0; + + // find thread + auto threads = ArrayView{ Cast( ptr + sizeof(proc_info) ), proc_info.NumberOfThreads }; + for (auto& t : threads) + { + ctx_switch += t.ContextSwitches; + + if_unlikely( thread_id == usize(t.ClientId.UniqueThread) and perThread != null ) + { + *perThread = Default; + perThread->userTime = milliseconds{ t.UserTime.QuadPart }; + perThread->kernelTime = milliseconds{ t.KernelTime.QuadPart }; + perThread->involuntaryContextSwitches = t.ContextSwitches; + } + } + + if ( perProcess != null ) + { + *perProcess = Default; + perProcess->userTime = milliseconds{ proc_info.UserTime.QuadPart }; + perProcess->kernelTime = milliseconds{ proc_info.KernelTime.QuadPart }; + //perProcess->pageFaults= proc_info.PageFaultCount; + perProcess->fsInput = uint(proc_info.ReadOperationCount.QuadPart); + perProcess->fsOutput = uint(proc_info.WriteOperationCount.QuadPart); + perProcess->involuntaryContextSwitches = ctx_switch; + } + return true; + } + + ptr += proc_info.NextEntryOffset; + } + return false; + } } //----------------------------------------------------------------------------- @@ -301,10 +360,10 @@ namespace /* ================================================= - GetUsage + CPU_GetUsage ================================================= */ - uint CpuPerformance::GetUsage (OUT float* user, OUT float* kernel, const uint maxCount) __NE___ + uint PerformanceStat::CPU_GetUsage (OUT float* user, OUT float* kernel, const uint maxCount) __NE___ { NonNull( user ); NonNull( kernel ); @@ -336,17 +395,17 @@ namespace /* ================================================= - GetFrequency + CPU_GetFrequency ================================================= */ - CpuPerformance::MHz_t CpuPerformance::GetFrequency (uint core) __NE___ + PerformanceStat::MHz_t PerformanceStat::CPU_GetFrequency (uint core) __NE___ { // TODO Unused( core ); return 0; } - uint CpuPerformance::GetFrequency (OUT MHz_t* result, const uint maxCount) __NE___ + uint PerformanceStat::CPU_GetFrequency (OUT MHz_t* result, const uint maxCount) __NE___ { // TODO Unused( result, maxCount ); @@ -358,63 +417,62 @@ namespace GetPerfCounters ================================================= */ - bool CpuPerformance::GetPerfCounters (OUT PerProcessCounters &perProcess, OUT PerThreadCounters &perThread) __NE___ + bool PerformanceStat::GetPerfCounters (OUT PerProcessCounters* perProcess, OUT PerThreadCounters* perThread, OUT MemoryCounters* memory) __NE___ { - perProcess = Default; - perThread = Default; + bool res = true; - auto& ntdll = Ntdll(); - if ( ntdll.fnQuerySystemInformation == null ) - return false; - - ULONG size = 0; - ntdll.fnQuerySystemInformation( c_SystemProcessInformation, null, 0, OUT &size ); - - Array buf; buf.resize( size ); - - if ( ntdll.fnQuerySystemInformation( c_SystemProcessInformation, OUT buf.data(), ULONG(buf.size()), null ) != STATUS_SUCCESS ) - return false; + if ( perProcess != null or perThread != null ) + res = GetPerfCountersWin( OUT perProcess, OUT perThread ); - const DWORD proc_id = ::GetCurrentProcessId(); - const DWORD thread_id = ::GetCurrentThreadId(); - - for_likely (const char* ptr = buf.data(); ptr < buf.data()+buf.size();) + if ( memory != null ) { - auto& proc_info = *Cast( ptr ); - - if_unlikely( proc_info.NextEntryOffset == 0 ) - break; - - if_unlikely( usize(proc_info.UniqueProcessId) == proc_id ) + PROCESS_MEMORY_COUNTERS mem = {}; + if ( ::GetProcessMemoryInfo( ::GetCurrentProcess(), OUT &mem, sizeof(mem) ) == TRUE ) // winxp { - // find thread - auto threads = ArrayView{ Cast( ptr + sizeof(proc_info) ), proc_info.NumberOfThreads }; - for (auto& t : threads) - { - perProcess.involuntaryContextSwitches += t.ContextSwitches; + res = true; + memory->pageFaults = mem.PageFaultCount; + memory->peakUsage = Bytes{mem.PeakWorkingSetSize}; + memory->currentUsage = Bytes{mem.WorkingSetSize}; + } - if_unlikely( thread_id == usize(t.ClientId.UniqueThread) ) - { - perThread.userTime = milliseconds{ t.UserTime.QuadPart }; - perThread.kernelTime = milliseconds{ t.KernelTime.QuadPart }; - perThread.involuntaryContextSwitches = t.ContextSwitches; - } - } + MEMORYSTATUSEX statex = {}; + statex.dwLength = sizeof(statex); + if ( ::GlobalMemoryStatusEx( OUT &statex ) == TRUE ) // winxp + { + res = true; + memory->totalPhysical = Bytes{ statex.ullTotalPhys }; + memory->availablePhysical = Bytes{ statex.ullAvailPhys }; - perProcess.userTime = milliseconds{ proc_info.UserTime.QuadPart }; - perProcess.kernelTime = milliseconds{ proc_info.KernelTime.QuadPart }; - perProcess.pageFaults = proc_info.PageFaultCount; - perProcess.fsInput = uint(proc_info.ReadOperationCount.QuadPart); - perProcess.fsOutput = uint(proc_info.WriteOperationCount.QuadPart); + memory->threshold = 0_b; // not supported - return true; + memory->totalVirtual = Bytes{ statex.ullTotalVirtual }; + memory->usedVirtual = memory->totalVirtual - Bytes{ statex.ullAvailVirtual }; } - - ptr += proc_info.NextEntryOffset; } + return res; + } + +/* +================================================= + Battery_Get +================================================= +*/ + bool PerformanceStat::Battery_Get (OUT BatteryStat &result) __NE___ + { + result = Default; return false; } +/* +================================================= + Temperature_Get +================================================= +*/ + bool PerformanceStat::Temperature_Get (OUT TemperatureStat &result) __NE___ + { + result.sensors.clear(); + return false; + } } // AE::Base diff --git a/AE/engine/src/base/Platforms/PerformanceStat.h b/AE/engine/src/base/Platforms/PerformanceStat.h new file mode 100644 index 00000000..9a2cdeda --- /dev/null +++ b/AE/engine/src/base/Platforms/PerformanceStat.h @@ -0,0 +1,96 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#include "base/Common.h" +#include "base/Containers/FixedString.h" +#include "base/Containers/FixedArray.h" +#include "base/Math/PhysicalQuantity.h" + +namespace AE::Base +{ + + // + // CPU and Memory Performance Statistics + // + + struct PerformanceStat + { + // types + using _PQ = DefaultPhysicalQuantity; + using MHz_t = uint; + using Current_t = _PQ::Ampere; + using Voltage_t = _PQ::Volt; + using Power_t = _PQ::Watt; + using Energy_t = _PQ::Joule; + using Temperature_t = float; // C + using Capacity_t = decltype( _PQ::Ampere{} * _PQ::Hour{} ); // Ah = C * 3600 + using Seconds_t = _PQ::Second; + + struct PerProcessCounters + { + milliseconds userTime; + milliseconds kernelTime; + uint fsInput = 0; // number of times the filesystem had to perform input + uint fsOutput = 0; // number of times the filesystem had to perform output + uint voluntaryContextSwitches = 0; // context switch when awaiting availability of a resource (IO) + uint involuntaryContextSwitches = 0; // higher priority process replace current process + }; + using PerThreadCounters = PerProcessCounters; + + struct MemoryCounters + { + uint pageFaults = 0; // number of page faults serviced that required I/O activity + Bytes peakUsage; // + Bytes currentUsage; // + + Bytes totalPhysical; + // Bytes usedPhysical; // The amount of physical RAM left unused by the system. + Bytes availablePhysical; + Bytes threshold; // The threshold of 'availablePhysical' at which we consider memory to be low + // and start killing background services and other non-extraneous processes. + Bytes totalVirtual; + Bytes usedVirtual; + }; + + struct BatteryStat + { + Current_t current; + Voltage_t voltage; + Power_t power; + Temperature_t temperature = 0.f; + Percent level; + Capacity_t capacity; + bool isCharging = false; + }; + + struct TemperatureStat + { + Array> sensors; + }; + + + // methods + ND_ static MHz_t CPU_GetFrequency (uint core) __NE___; + static uint CPU_GetFrequency (OUT MHz_t* result, uint maxCount) __NE___; + + static uint CPU_GetUsage (OUT float* user, OUT float* kernel, uint maxCount) __NE___; + + ND_ static bool GetPerfCounters (OUT PerProcessCounters *, + OUT PerThreadCounters *, + OUT MemoryCounters *) __NE___; + + ND_ static bool Battery_Get (OUT BatteryStat &) __NE___; + ND_ static bool Temperature_Get (OUT TemperatureStat &) __NE___; + + + #ifdef AE_PLATFORM_ANDROID + static void _SetBatteryStat1 (float current, float capacity, float energy) __NE___; + static void _SetBatteryStat2 (float level, float temperature, float voltage, + bool isCharging) __NE___; + + // static void _SetMemoryStat (Bytes total, Bytes available) __NE___; + #endif + }; + +} // AE::Base diff --git a/AE/engine/src/base/Platforms/WindowsHeader.cpp.h b/AE/engine/src/base/Platforms/WindowsHeader.cpp.h index a2362b2d..f9c6b79e 100644 --- a/AE/engine/src/base/Platforms/WindowsHeader.cpp.h +++ b/AE/engine/src/base/Platforms/WindowsHeader.cpp.h @@ -20,6 +20,12 @@ # endif # include + +# include +# include +# define _NTDEF_ +# include + # include # include # include @@ -36,6 +42,8 @@ # include # include //# include +# include +# include # ifdef AE_COMPILER_MSVC # pragma warning (pop) diff --git a/AE/engine/src/base/Platforms/WindowsUtils.cpp b/AE/engine/src/base/Platforms/WindowsUtils.cpp index 8893a94a..0c4ae3c2 100644 --- a/AE/engine/src/base/Platforms/WindowsUtils.cpp +++ b/AE/engine/src/base/Platforms/WindowsUtils.cpp @@ -69,42 +69,6 @@ namespace AE::Base return info; } -/* -================================================= - GetPhysicalMemorySize -================================================= -*/ - WindowsUtils::MemorySize WindowsUtils::GetPhysicalMemorySize () __NE___ - { - MEMORYSTATUSEX statex = {}; - statex.dwLength = sizeof(statex); - - ::GlobalMemoryStatusEx( OUT &statex ); // winxp - - MemorySize result; - result.total = Bytes{ statex.ullTotalPhys }; - result.available = Bytes{ statex.ullAvailPhys }; - return result; - } - -/* -================================================= - GetVirtualMemorySize -================================================= -*/ - WindowsUtils::MemorySize WindowsUtils::GetVirtualMemorySize () __NE___ - { - MEMORYSTATUSEX statex = {}; - statex.dwLength = sizeof(statex); - - ::GlobalMemoryStatusEx( OUT &statex ); // winxp - - MemorySize result; - result.total = Bytes{ statex.ullTotalVirtual }; - result.available = Bytes{ statex.ullAvailVirtual }; - return result; - } - /* ================================================= CheckNetworkError diff --git a/AE/engine/src/base/Platforms/WindowsUtils.h b/AE/engine/src/base/Platforms/WindowsUtils.h index ebf35235..2204ce2b 100644 --- a/AE/engine/src/base/Platforms/WindowsUtils.h +++ b/AE/engine/src/base/Platforms/WindowsUtils.h @@ -26,12 +26,6 @@ namespace AE::Base Bytes allocationGranularity; }; - struct MemorySize - { - Bytes total; - Bytes available; - }; - // functions @@ -48,8 +42,6 @@ namespace AE::Base // Memory // ND_ static MemoryPageInfo GetMemoryPageInfo () __NE___; - ND_ static MemorySize GetPhysicalMemorySize () __NE___; - ND_ static MemorySize GetVirtualMemorySize () __NE___; // Thread // diff --git a/AE/engine/src/base/Pointers/Ptr.h b/AE/engine/src/base/Pointers/Ptr.h index 36eaf2ef..93bc9662 100644 --- a/AE/engine/src/base/Pointers/Ptr.h +++ b/AE/engine/src/base/Pointers/Ptr.h @@ -20,32 +20,32 @@ namespace AE::Base // methods public: - Ptr () __NE___ {} - Ptr (std::nullptr_t) __NE___ {} - Ptr (T* ptr) __NE___ : _value{ptr} {} + constexpr Ptr () __NE___ {} + constexpr Ptr (std::nullptr_t) __NE___ {} + constexpr Ptr (T* ptr) __NE___ : _value{ptr} {} template - Ptr (Ptr other) __NE___ : _value{static_cast( other.get() )} {} + constexpr Ptr (Ptr other) __NE___ : _value{static_cast( other.get() )} {} - ND_ T * operator -> () C_NE___ { NonNull( _value ); return _value; } - ND_ T & operator * () C_NE___ { NonNull( _value ); return *_value; } - ND_ T * get () C_NE___ { return _value; } + ND_ constexpr T * operator -> () C_NE___ { NonNull( _value ); return _value; } + ND_ constexpr T & operator * () C_NE___ { NonNull( _value ); return *_value; } + ND_ constexpr T * get () C_NE___ { return _value; } - ND_ explicit operator T * () C_NE___ { return _value; } + ND_ constexpr explicit operator T * () C_NE___ { return _value; } - ND_ operator Ptr () C_NE___ { return _value; } + ND_ constexpr operator Ptr () C_NE___ { return _value; } template - ND_ explicit operator B () C_NE___ { return static_cast( _value ); } + ND_ constexpr explicit operator B () C_NE___ { return static_cast( _value ); } - ND_ explicit operator bool () C_NE___ { return _value != null; } + ND_ constexpr explicit operator bool () C_NE___ { return _value != null; } - ND_ bool operator == (const Ptr &rhs) C_NE___ { return _value == rhs._value; } - ND_ bool operator != (const Ptr &rhs) C_NE___ { return not (*this == rhs); } - ND_ bool operator < (const Ptr &rhs) C_NE___ { return _value < rhs._value; } - ND_ bool operator > (const Ptr &rhs) C_NE___ { return _value > rhs._value; } - ND_ bool operator <= (const Ptr &rhs) C_NE___ { return _value <= rhs._value; } - ND_ bool operator >= (const Ptr &rhs) C_NE___ { return _value >= rhs._value; } + ND_ constexpr bool operator == (Ptr rhs) C_NE___ { return _value == rhs._value; } + ND_ constexpr bool operator != (Ptr rhs) C_NE___ { return not (*this == rhs); } + ND_ constexpr bool operator < (Ptr rhs) C_NE___ { return _value < rhs._value; } + ND_ constexpr bool operator > (Ptr rhs) C_NE___ { return _value > rhs._value; } + ND_ constexpr bool operator <= (Ptr rhs) C_NE___ { return _value <= rhs._value; } + ND_ constexpr bool operator >= (Ptr rhs) C_NE___ { return _value >= rhs._value; } }; diff --git a/AE/engine/src/base/Pointers/Ref.h b/AE/engine/src/base/Pointers/Ref.h new file mode 100644 index 00000000..7c2d11bd --- /dev/null +++ b/AE/engine/src/base/Pointers/Ref.h @@ -0,0 +1,59 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#include "base/Common.h" + +namespace AE::Base +{ + + // + // Reference Wrapper + // + + template + struct Ref + { + // variables + private: + T * _value; + + // methods + public: + Ref () = delete; + Ref (const Ref &) = delete; + + constexpr Ref (Ref &&) __NE___ = default; + constexpr explicit Ref (T& ref) __NE___ : _value{&ref} {} + + constexpr Ref& operator = (const Ref &) = delete; + constexpr Ref& operator = (Ref &&) __NE___ = default; + + constexpr T& operator = (const T& rhs) C_NE___ { return *_value = rhs; } + + ND_ constexpr T* operator -> () C_NE___ { return _value; } + ND_ constexpr T* operator & () C_NE___ { return _value; } + + ND_ constexpr operator T& () C_NE___ { return *_value; } + + ND_ constexpr T& get () C_NE___ { return *_value; } + }; + + + template + Ref (T&) -> Ref; + + +/* +================================================= + ArgRef (same as std::ref) +================================================= +*/ + template + ND_ constexpr Ref ArgRef (T &arg) __NE___ + { + return Ref{ arg }; + } + + +} // AE::Base diff --git a/AE/engine/src/base/Time/Timer.h b/AE/engine/src/base/Time/Timer.h index 93137888..74f00da0 100644 --- a/AE/engine/src/base/Time/Timer.h +++ b/AE/engine/src/base/Time/Timer.h @@ -71,6 +71,7 @@ namespace AE::Base ND_ auto Now () C_NE___ { return _lastTick; } ND_ auto Interval () C_NE___ { return _interval; } + ND_ bool IsActive () C_NE___ { return _interval.count() > Zero; } }; @@ -85,7 +86,7 @@ namespace AE::Base _lastTick = lastTick; _interval = TimeCast( interval ); - ASSERT( _interval.count() > 0 ); + ASSERT( IsActive() ); } /* @@ -99,7 +100,7 @@ namespace AE::Base _interval = TimeCast( interval ); _lastTick = Clock_t::now() - _interval; - ASSERT( _interval.count() > 0 ); + ASSERT( IsActive() ); } /* @@ -109,7 +110,7 @@ namespace AE::Base */ inline void Timer::Restart (TimePoint_t now) __NE___ { - ASSERT( _interval.count() > 0 ); + ASSERT( IsActive() ); _lastTick = now; } @@ -120,7 +121,7 @@ namespace AE::Base */ inline auto Timer::Tick (TimePoint_t now) __NE___ { - ASSERT( _interval.count() > 0 ); + ASSERT( IsActive() ); const Duration_t dt = now - _lastTick; const bool ok = dt >= _interval; diff --git a/AE/engine/src/ecs-st/CMakeLists.txt b/AE/engine/src/ecs-st/CMakeLists.txt index b5d13c96..5a5f6aea 100644 --- a/AE/engine/src/ecs-st/CMakeLists.txt +++ b/AE/engine/src/ecs-st/CMakeLists.txt @@ -13,5 +13,6 @@ target_link_libraries( "ECS-st" PUBLIC "Serializing" "Threading" ) EnablePCH( "ECS-st" ) EnablePrebuild( "ECS-st" ) +EnableUnitBuild( "ECS-st" ) install( TARGETS "ECS-st" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/ecs-st/Core/Registry.h b/AE/engine/src/ecs-st/Core/Registry.h index 1702082d..9fb5e1eb 100644 --- a/AE/engine/src/ecs-st/Core/Registry.h +++ b/AE/engine/src/ecs-st/Core/Registry.h @@ -244,7 +244,7 @@ namespace AE::ECS void _WithSingleComponents (Fn &&fn, ArrayView chunks, const Tuple *) __NE___; template - ND_ decltype(auto) _GetSingleComponent () __NE___; + ND_ exact_t _GetSingleComponent () __NE___; template diff --git a/AE/engine/src/ecs-st/Core/Registry.inl.h b/AE/engine/src/ecs-st/Core/Registry.inl.h index 497f9fad..0182e13d 100644 --- a/AE/engine/src/ecs-st/Core/Registry.inl.h +++ b/AE/engine/src/ecs-st/Core/Registry.inl.h @@ -868,7 +868,7 @@ DEBUG_ONLY( struct GetStorageElement { template - static decltype(auto) Get (ChunkType &chunk, usize i) __NE___ + static exact_t Get (ChunkType &chunk, usize i) __NE___ { return chunk.template Get< MapCompType >()[i]; } @@ -1166,7 +1166,7 @@ DEBUG_ONLY( ================================================= */ template - decltype(auto) Registry::_GetSingleComponent () __NE___ + exact_t Registry::_GetSingleComponent () __NE___ { if constexpr( IsPointer ) { diff --git a/AE/engine/src/graphics/CMakeLists.txt b/AE/engine/src/graphics/CMakeLists.txt index 850d0cbd..a17adde2 100644 --- a/AE/engine/src/graphics/CMakeLists.txt +++ b/AE/engine/src/graphics/CMakeLists.txt @@ -26,6 +26,9 @@ if (${AE_ENABLE_VULKAN} OR ${AE_ENABLE_METAL} OR ${AE_ENABLE_REMOTE_GRAPHICS}) if (NOT (DEFINED ENGINE_LIBS_PATH)) file( GLOB_RECURSE METAL_CPP_SOURCES "Metal/*.cpp" ) file( GLOB_RECURSE METAL_OBJC_SOURCES "Metal/*.mm" ) + if ( ${CMAKE_VERSION} VERSION_LESS "3.29.0" ) + set_property( SOURCE ${METAL_OBJC_SOURCES} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) + endif() endif() endif() @@ -47,6 +50,7 @@ if (${AE_ENABLE_VULKAN} OR ${AE_ENABLE_METAL} OR ${AE_ENABLE_REMOTE_GRAPHICS}) "${PIPELINE_COMPILER_DIR}/Packer/PipelinePackDeserializer.cpp" "${PIPELINE_COMPILER_DIR}/Packer/RenderPassPack.cpp" "${PIPELINE_COMPILER_DIR}/Packer/SamplerPack.cpp" ) + set_property( SOURCE ${PIPELINE_COMPILER_CPP_SRC} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) endif() if (${AE_ENABLE_VULKAN}) @@ -160,6 +164,9 @@ if (${AE_ENABLE_VULKAN} OR ${AE_ENABLE_METAL} OR ${AE_ENABLE_REMOTE_GRAPHICS}) EnablePCH( "Graphics" ) EnablePrebuild( "Graphics" ) + EnableUnitBuild( "Graphics" ) + + set_property( SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/Private/Shared.cpp" PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) install( TARGETS "Graphics" ARCHIVE DESTINATION "lib" ) endif() diff --git a/AE/engine/src/graphics/Private/BufferDesc.cpp b/AE/engine/src/graphics/Private/BufferDesc.cpp.h similarity index 91% rename from AE/engine/src/graphics/Private/BufferDesc.cpp rename to AE/engine/src/graphics/Private/BufferDesc.cpp.h index 35434999..d70903d2 100644 --- a/AE/engine/src/graphics/Private/BufferDesc.cpp +++ b/AE/engine/src/graphics/Private/BufferDesc.cpp.h @@ -18,7 +18,7 @@ namespace AE::Graphics if ( memType == Default ) memType = EMemoryType::DeviceLocal; - if ( not AllBits( memType, EMemoryType::DeviceLocal )) + if ( NoBits( memType, EMemoryType::DeviceLocal )) { options &= ~EBufferOpt::SparseResidencyAliased; usage &= ~(EBufferUsage::StorageTexel | EBufferUsage::Storage | EBufferUsage::ShaderAddress | @@ -28,10 +28,10 @@ namespace AE::Graphics if ( usage == Default ) usage = EBufferUsage::Transfer; - if ( not AnyBits( usage, EBufferUsage::Storage | EBufferUsage::StorageTexel )) + if ( NoBits( usage, EBufferUsage::Storage | EBufferUsage::StorageTexel )) options &= ~(EBufferOpt::VertexPplnStore | EBufferOpt::FragmentPplnStore); - if ( not AllBits( usage, EBufferUsage::StorageTexel )) + if ( NoBits( usage, EBufferUsage::StorageTexel )) options &= ~EBufferOpt::StorageTexelAtomic; if ( AnyBits( usage, EBufferUsage::ShaderBindingTable | EBufferUsage::ASBuild_ReadOnly | EBufferUsage::ASBuild_Scratch )) diff --git a/AE/engine/src/graphics/Private/CommandBatch.cpp.h b/AE/engine/src/graphics/Private/CommandBatch.cpp.h index a1a0e55a..64948ef7 100644 --- a/AE/engine/src/graphics/Private/CommandBatch.cpp.h +++ b/AE/engine/src/graphics/Private/CommandBatch.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #if defined(AE_ENABLE_VULKAN) # define SUFFIX V # define CMDBATCH VCommandBatch diff --git a/AE/engine/src/graphics/Private/ContextValidation.cpp b/AE/engine/src/graphics/Private/ContextValidation.cpp index c2db88fa..b6a68e78 100644 --- a/AE/engine/src/graphics/Private/ContextValidation.cpp +++ b/AE/engine/src/graphics/Private/ContextValidation.cpp @@ -106,9 +106,14 @@ namespace return info.HasDepthOrStencil(); } + ND_ static uint3 MipmapDimension (const ImageDesc &desc, MipmapLevel mip) + { + return ImageUtils::MipmapDimension( desc.Dimension(), mip.Get(), EPixelFormat_GetInfo( desc.format ).TexBlockDim() ); + } + static void ValidateImageSubresourceLayers (const ImageDesc &desc, const ImageSubresourceLayers &subres, const uint3 &offset, const uint3 &extent) __Th___ { - const uint3 dim = Max( 1u, desc.dimension >> subres.mipLevel.Get() ); + const uint3 dim = MipmapDimension( desc, subres.mipLevel ); GCTX_CHECK( All( offset < dim )); GCTX_CHECK( All( (offset + extent) <= dim )); @@ -137,6 +142,8 @@ namespace fs.attachmentFragmentShadingRate == True; } + ND_ static bool ViewportWScalingSupported () __NE___ { return _GetFeatureSet().clipSpaceWScalingNV == True; } + #ifdef AE_ENABLE_VULKAN ND_ static auto const& _GetDeviceExtensions () __NE___ { return GraphicsScheduler().GetDevice().GetVExtensions(); @@ -218,7 +225,7 @@ namespace GCTX_CHECK( image != Default ); for (auto& range : ranges) { - GCTX_CHECK( not AnyBits( range.aspectMask, ~(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) )); + GCTX_CHECK( NoBits( range.aspectMask, ~(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) )); } } # endif @@ -411,12 +418,21 @@ namespace GCTX_CHECK( AllBits( srcImageDesc.usage, EImageUsage::TransferSrc )); GCTX_CHECK( AllBits( dstImageDesc.usage, EImageUsage::TransferDst )); + // TODO: queue transfer granularity + for (auto& range : ranges) { ValidateImageSubresourceLayers( srcImageDesc, range.srcSubres, range.srcOffset, range.extent ); ValidateImageSubresourceLayers( dstImageDesc, range.dstSubres, range.dstOffset, range.extent ); GCTX_CHECK( range.srcSubres.aspectMask == range.dstSubres.aspectMask ); // TODO: multi-planar format + + uint2 src_gran {1}; + uint2 dst_gran {1}; + + GCTX_CHECK( EPixelFormat_GetCopyGranularity( srcImageDesc.format, OUT src_gran, dstImageDesc.format, OUT dst_gran )); + + // TODO } } @@ -531,37 +547,21 @@ namespace GCTX_CHECK( IsDeviceMemory( srcImageDesc )); GCTX_CHECK( IsDeviceMemory( dstImageDesc )); - const auto & src_fmt = EPixelFormat_GetInfo( srcImageDesc.format ); - const auto & dst_fmt = EPixelFormat_GetInfo( dstImageDesc.format ); - GCTX_CHECK( AllBits( srcImageDesc.usage, EImageUsage::TransferSrc )); GCTX_CHECK( AllBits( dstImageDesc.usage, EImageUsage::TransferDst )); GCTX_CHECK( AllBits( srcImageDesc.options, EImageOpt::BlitSrc )); GCTX_CHECK( AllBits( dstImageDesc.options, EImageOpt::BlitDst )); GCTX_CHECK( not srcImageDesc.samples.IsEnabled() ); GCTX_CHECK( not dstImageDesc.samples.IsEnabled() ); - - using EType = PixelFormatInfo::EType; - const auto float_flags = EType::SFloat | EType::UFloat | EType::UNorm | EType::SNorm; - - GCTX_CHECK( AnyBits( src_fmt.valueType, float_flags ) == AnyBits( dst_fmt.valueType, float_flags )); - GCTX_CHECK( AllBits( src_fmt.valueType, EType::Int ) == AllBits( dst_fmt.valueType, EType::Int )); - GCTX_CHECK( AllBits( src_fmt.valueType, EType::UInt ) == AllBits( dst_fmt.valueType, EType::UInt )); - GCTX_CHECK( AnyBits( src_fmt.valueType, EType::DepthStencil ) == AnyBits( dst_fmt.valueType, EType::DepthStencil )); - - if ( AnyBits( src_fmt.valueType, EType::DepthStencil )) - { - GCTX_CHECK( srcImageDesc.format == dstImageDesc.format ); - GCTX_CHECK( blitFilter == EBlitFilter::Nearest ); - } + GCTX_CHECK( EPixelFormat_IsBlitSupported( srcImageDesc.format, dstImageDesc.format, blitFilter )); //if ( blitFilter == EBlitFilter::Linear ) // GCTX_CHECK( AllBits( srcImageDesc.options, EImageOpt::SampledLinear )); for (auto& range : ranges) { - const uint3 src_dim = Max( 1u, srcImageDesc.dimension >> range.srcSubres.mipLevel.Get() ); - const uint3 dst_dim = Max( 1u, dstImageDesc.dimension >> range.dstSubres.mipLevel.Get() ); + const uint3 src_dim = MipmapDimension( srcImageDesc, range.srcSubres.mipLevel ); + const uint3 dst_dim = MipmapDimension( dstImageDesc, range.dstSubres.mipLevel ); GCTX_CHECK( All( range.srcOffset0 <= src_dim )); GCTX_CHECK( All( range.srcOffset1 <= src_dim )); @@ -601,8 +601,8 @@ namespace for (auto& range : ranges) { - const uint3 src_dim = Max( 1u, srcImageDesc.dimension >> range.srcSubres.mipLevel.Get() ); - const uint3 dst_dim = Max( 1u, dstImageDesc.dimension >> range.dstSubres.mipLevel.Get() ); + const uint3 src_dim = MipmapDimension( srcImageDesc, range.srcSubres.mipLevel ); + const uint3 dst_dim = MipmapDimension( dstImageDesc, range.dstSubres.mipLevel ); GCTX_CHECK( All( range.srcOffset < src_dim )); GCTX_CHECK( All( range.srcOffset + range.extent <= src_dim )); @@ -686,7 +686,7 @@ namespace GCTX_CHECK( stages != Default ); GCTX_CHECK_MSG( layout != Default, "pipeline is not bound" ); GCTX_CHECK( AnyBits( stages, EShaderStages::Compute )); - GCTX_CHECK( not AnyBits( stages, ~EShaderStages::Compute )); + GCTX_CHECK( NoBits( stages, ~EShaderStages::Compute )); } # endif @@ -784,7 +784,7 @@ namespace GCTX_CHECK( stages != Default ); GCTX_CHECK_MSG( layout != Default, "pipeline is not bound" ); GCTX_CHECK( AnyBits( stages, EShaderStages::AllGraphics )); - GCTX_CHECK( not AnyBits( stages, ~EShaderStages::AllGraphics )); + GCTX_CHECK( NoBits( stages, ~EShaderStages::AllGraphics )); } # endif @@ -1288,7 +1288,20 @@ namespace GCTX_CHECK( FragmentShadingRateSupported() ); // GCTX_CHECK( AllBits( dynState, EPipelineDynamicState::FragmentShadingRate )); - GCTX_CHECK( not AnyBits( rate, ~uint(EShadingRate::_SizeMask) )); // only size + GCTX_CHECK( NoBits( rate, ~uint(EShadingRate::_SizeMask) )); // only size + } + +/* +================================================= + SetViewportWScaling +================================================= +*/ + void DrawContextValidation::SetViewportWScaling (EPipelineDynamicState dynState, ArrayView scaling) __Th___ + { + GCTX_CHECK( ViewportWScalingSupported() ); + //GCTX_CHECK( AllBits( dynState, EPipelineDynamicState::ViewportWScaling )); + + Unused( scaling ); // TODO } #endif @@ -1624,7 +1637,7 @@ namespace GCTX_CHECK( stages != Default ); GCTX_CHECK_MSG( layout != Default, "pipeline is not bound" ); GCTX_CHECK( AnyBits( stages, EShaderStages::AllRayTracing )); - GCTX_CHECK( not AnyBits( stages, ~EShaderStages::AllRayTracing )); + GCTX_CHECK( NoBits( stages, ~EShaderStages::AllRayTracing )); } # endif diff --git a/AE/engine/src/graphics/Private/ContextValidation.h b/AE/engine/src/graphics/Private/ContextValidation.h index d8e0bf82..e893d9e4 100644 --- a/AE/engine/src/graphics/Private/ContextValidation.h +++ b/AE/engine/src/graphics/Private/ContextValidation.h @@ -225,6 +225,7 @@ namespace AE::Graphics::_hidden_ static void SetStencilReference (EPipelineDynamicState dynState) __Th___; static void SetBlendConstants (EPipelineDynamicState dynState) __Th___; static void SetFragmentShadingRate (EPipelineDynamicState, EShadingRate, EShadingRateCombinerOp, EShadingRateCombinerOp) __Th___; + static void SetViewportWScaling (EPipelineDynamicState, ArrayView scaling) __Th___; #ifdef AE_ENABLE_VULKAN diff --git a/AE/engine/src/graphics/Private/Defines.h b/AE/engine/src/graphics/Private/Defines.h index 919c299b..fb47c6dd 100644 --- a/AE/engine/src/graphics/Private/Defines.h +++ b/AE/engine/src/graphics/Private/Defines.h @@ -1,7 +1,5 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -#pragma once - #if defined(AE_CFG_RELEASE) and not defined(AE_CI_BUILD_TEST) # define AE_DBG_GRAPHICS 0 # define GFX_DBG_ONLY( ... ) diff --git a/AE/engine/src/graphics/Private/DeviceProperties.cpp b/AE/engine/src/graphics/Private/DeviceProperties.cpp index 5bb39198..77fde807 100644 --- a/AE/engine/src/graphics/Private/DeviceProperties.cpp +++ b/AE/engine/src/graphics/Private/DeviceProperties.cpp @@ -516,15 +516,11 @@ namespace { #ifdef AE_ENABLE_LOGS TRY{ - String str; + //String str; - str << "memory types:"; + // skip 'memTypes' - for (EMemoryType mem : memTypes) { - str << "\n " << ToString( mem ); - } - - AE_LOGI( str ); + //AE_LOGI( str ); } CATCH_ALL() #endif diff --git a/AE/engine/src/graphics/Private/DrawCommandBatch.cpp.h b/AE/engine/src/graphics/Private/DrawCommandBatch.cpp.h index 03526bac..ecd51870 100644 --- a/AE/engine/src/graphics/Private/DrawCommandBatch.cpp.h +++ b/AE/engine/src/graphics/Private/DrawCommandBatch.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #if defined(AE_ENABLE_VULKAN) # define DRAWCMDBATCH VDrawCommandBatch diff --git a/AE/engine/src/graphics/Private/EnumToString.h b/AE/engine/src/graphics/Private/EnumToString.h index cff3a7ca..b0711101 100644 --- a/AE/engine/src/graphics/Private/EnumToString.h +++ b/AE/engine/src/graphics/Private/EnumToString.h @@ -646,6 +646,8 @@ namespace AE::Base //case EPipelineDynamicState::DepthBounds : str << "DepthBounds"; break; case EPipelineDynamicState::RTStackSize : str << "RTStackSize"; break; case EPipelineDynamicState::FragmentShadingRate : str << "FragmentShadingRate"; break; + case EPipelineDynamicState::ViewportWScaling : str << "ViewportWScaling"; break; + case EPipelineDynamicState::GraphicsPipelineMask : case EPipelineDynamicState::All : case EPipelineDynamicState::_Last : diff --git a/AE/engine/src/graphics/Private/EnumUtils.cpp b/AE/engine/src/graphics/Private/EnumUtils.cpp.h similarity index 92% rename from AE/engine/src/graphics/Private/EnumUtils.cpp rename to AE/engine/src/graphics/Private/EnumUtils.cpp.h index cc71fb34..2aa9a7dc 100644 --- a/AE/engine/src/graphics/Private/EnumUtils.cpp +++ b/AE/engine/src/graphics/Private/EnumUtils.cpp.h @@ -108,13 +108,13 @@ namespace AE::Graphics case _EResState::InputDepthStencilAttachment : case _EResState::InputDepthStencilAttachment_RW : CHECK_ERR( AnyBits( shaders, EResourceState::PostRasterizationShaders )); - CHECK_ERR( not AnyBits( shaders, EResourceState::AllShaders & ~EResourceState::PostRasterizationShaders )); + CHECK_ERR( NoBits( shaders, EResourceState::AllShaders & ~EResourceState::PostRasterizationShaders )); break; case _EResState::DepthStencilTest_ShaderSample : case _EResState::DepthTest_DepthSample_StencilRW : CHECK_ERR( AnyBits( shaders, EResourceState::AllGraphicsShaders )); - CHECK_ERR( not AnyBits( shaders, EResourceState::AllShaders & ~EResourceState::AllGraphicsShaders )); + CHECK_ERR( NoBits( shaders, EResourceState::AllShaders & ~EResourceState::AllGraphicsShaders )); break; case _EResState::Unknown : @@ -165,7 +165,7 @@ namespace AE::Graphics for (EShaderStages t = EShaderStages(1 << 0); t < EShaderStages::All; t = EShaderStages(uint(t) << 1)) { - if ( not AllBits( values, t )) + if ( NoBits( values, t )) continue; switch_enum( t ) @@ -1236,6 +1236,199 @@ namespace AE::Graphics return ImageUtils::ImageSize( imageDim, fmt_info.bitsPerBlock, fmt_info.TexBlockDim() ); } } + +/* +================================================= + EPixelFormat_IsCompatible +================================================= +*/ + bool EPixelFormat_IsCompatible (const EPixelFormat aFmt, const EPixelFormat bFmt) __NE___ + { + if ( aFmt == bFmt ) + return true; + + auto& a = EPixelFormat_GetInfo( aFmt ); + auto& b = EPixelFormat_GetInfo( bFmt ); + + // Vulkan docs '49.1.7. Format Compatibility Classes': + // "Uncompressed color formats are compatible with each other if they occupy the same number of bits per texel block." + if ( a.IsColor() and b.IsColor() and not a.IsCompressed() and not b.IsCompressed() ) + { + return a.bitsPerBlock == b.bitsPerBlock and + All( a.blockDim == b.blockDim ); + } + + // Vulkan docs: + // "Compressed color formats are compatible with each other if the only difference between them is + // the numeric format of the uncompressed pixels." + if ( a.IsCompressed() and b.IsCompressed() ) + { + const auto CompressionType = [] (EPixelFormat fmt) + {{ + switch ( fmt ) + { + case EPixelFormat::BC1_RGB8_UNorm : + case EPixelFormat::BC1_sRGB8 : return EPixelFormat::BC1_RGB8_UNorm; + + case EPixelFormat::BC1_RGB8_A1_UNorm : + case EPixelFormat::BC1_sRGB8_A1 : return EPixelFormat::BC1_RGB8_A1_UNorm; + + case EPixelFormat::BC2_RGBA8_UNorm : + case EPixelFormat::BC2_sRGB8 : return EPixelFormat::BC2_RGBA8_UNorm; + + case EPixelFormat::BC3_RGBA8_UNorm : + case EPixelFormat::BC3_sRGB8 : return EPixelFormat::BC3_RGBA8_UNorm; + + case EPixelFormat::BC4_R8_SNorm : + case EPixelFormat::BC4_R8_UNorm : return EPixelFormat::BC4_R8_UNorm; + + case EPixelFormat::BC5_RG8_SNorm : + case EPixelFormat::BC5_RG8_UNorm : return EPixelFormat::BC5_RG8_UNorm; + + case EPixelFormat::BC6H_RGB16F : + case EPixelFormat::BC6H_RGB16UF : return EPixelFormat::BC6H_RGB16F; + + case EPixelFormat::BC7_RGBA8_UNorm : + case EPixelFormat::BC7_sRGB8_A8 : return EPixelFormat::BC7_RGBA8_UNorm; + + case EPixelFormat::ETC2_RGB8_UNorm : + case EPixelFormat::ETC2_sRGB8 : return EPixelFormat::ETC2_RGB8_UNorm; + + case EPixelFormat::ETC2_RGB8_A1_UNorm : + case EPixelFormat::ETC2_sRGB8_A1 : return EPixelFormat::ETC2_RGB8_A1_UNorm; + + case EPixelFormat::ETC2_RGBA8_UNorm : + case EPixelFormat::ETC2_sRGB8_A8 : return EPixelFormat::ETC2_RGBA8_UNorm; + + case EPixelFormat::EAC_R11_SNorm : + case EPixelFormat::EAC_R11_UNorm : return EPixelFormat::EAC_R11_UNorm; + + case EPixelFormat::EAC_RG11_SNorm : + case EPixelFormat::EAC_RG11_UNorm : return EPixelFormat::EAC_RG11_UNorm; + } + + if ( EPixelFormat_IsASTC( fmt )) + return EPixelFormat::ASTC_RGBA8_4x4; + + return fmt; + }}; + + return CompressionType( aFmt ) == CompressionType( bFmt ) and + All( a.blockDim == b.blockDim ); + } + + // Vulkan docs: + // "Each depth/stencil format is only compatible with itself." + if ( a.HasDepthOrStencil() or b.HasDepthOrStencil() ) + return false; + + // TODO: multiplanar? + return false; + } + +/* +================================================= + EPixelFormat_IsBlitSupported +================================================= +*/ + bool EPixelFormat_IsBlitSupported (EPixelFormat src, EPixelFormat dst, EBlitFilter blitFilter) __NE___ + { + const auto & src_fmt = EPixelFormat_GetInfo( src ); + const auto & dst_fmt = EPixelFormat_GetInfo( dst ); + + using EType = PixelFormatInfo::EType; + const auto float_flags = EType::SFloat | EType::UFloat | EType::UNorm | EType::SNorm; + + if ( AnyBits( src_fmt.valueType, float_flags ) != AnyBits( dst_fmt.valueType, float_flags ) or + AllBits( src_fmt.valueType, EType::Int ) != AllBits( dst_fmt.valueType, EType::Int ) or + AllBits( src_fmt.valueType, EType::UInt ) != AllBits( dst_fmt.valueType, EType::UInt ) or + AnyBits( src_fmt.valueType, EType::DepthStencil ) != AnyBits( dst_fmt.valueType, EType::DepthStencil )) + { + return false; + } + + if ( AnyBits( src_fmt.valueType, EType::DepthStencil )) + { + if ( src != dst or + blitFilter != EBlitFilter::Nearest ) + return false; + } + + return true; + } + +/* +================================================= + EPixelFormat_GetCopyGranularity +---- + returns 'false' if not compatible +================================================= +*/ + bool EPixelFormat_GetCopyGranularity (const EPixelFormat src, OUT uint2 &srcBlockDim, + const EPixelFormat dst, OUT uint2 &dstBlockDim) __NE___ + { + auto& a = EPixelFormat_GetInfo( src ); + auto& b = EPixelFormat_GetInfo( dst ); + + // Vulkan docs: + // "Color formats with the same texel block size are considered size-compatible" + if ( a.IsColor() and b.IsColor() and a.bitsPerBlock == b.bitsPerBlock ) + { + srcBlockDim = a.TexBlockDim(); + dstBlockDim = b.TexBlockDim(); + return true; + } + + return false; + } + + bool EPixelFormat_IsCopySupportedRelaxed (EPixelFormat src, EPixelFormat dst) __NE___ + { + auto& a = EPixelFormat_GetInfo( src ); + auto& b = EPixelFormat_GetInfo( dst ); + + // Vulkan docs: + // "Color formats with the same texel block size are considered size-compatible" + if ( a.IsColor() and b.IsColor() and a.bitsPerBlock == b.bitsPerBlock ) + return true; + + return false; + } + +/* +================================================= + EPixelFormat_IsCopySupported +================================================= +*/ + bool EPixelFormat_IsCopySupported (EPixelFormat src, EPixelFormat dst) __NE___ + { + if ( src == dst ) + return true; + + uint2 a, b; + return EPixelFormat_GetCopyGranularity( src, OUT a, dst, OUT b ) and All( a == b ); + } + + bool EPixelFormat_IsCopySupported (const EPixelFormat src, const uint2 &srcDim, + const EPixelFormat dst, const uint2 &dstDim) __NE___ + { + if ( src == dst and All( srcDim == dstDim )) + return true; + + uint2 src_gran; + uint2 dst_gran; + + if ( EPixelFormat_GetCopyGranularity( src, OUT src_gran, dst, OUT dst_gran )) + { + uint2 s0 = srcDim / src_gran; + uint2 s1 = dstDim / dst_gran; + + return All( s0 == s1 ) and + All( srcDim - s0 == uint2{0} ) and + All( dstDim - s1 == uint2{0} ); + } + return false; + } //----------------------------------------------------------------------------- @@ -1960,8 +2153,10 @@ namespace AE::Graphics case EPixelFormat::G10x6B10x6G10x6R10x6_422_UNorm: return EVideoFormat::Y210; case EPixelFormat::G12x4B12x4G12x4R12x4_422_UNorm: return EVideoFormat::Y212; + case EPixelFormat::sBGR8_A8 : case EPixelFormat::BGRA8_UNorm : return EVideoFormat::BGR0; // or BGRA + case EPixelFormat::sRGB8_A8 : case EPixelFormat::RGBA8_UNorm : return EVideoFormat::RGB0; // or RGBA case EPixelFormat::RGBA16_UNorm : return EVideoFormat::XV36; diff --git a/AE/engine/src/graphics/Private/EnumUtils.h b/AE/engine/src/graphics/Private/EnumUtils.h index b1381545..dbf5cded 100644 --- a/AE/engine/src/graphics/Private/EnumUtils.h +++ b/AE/engine/src/graphics/Private/EnumUtils.h @@ -42,7 +42,7 @@ namespace AE::Graphics ND_ inline EShaderStages EShaderStages_FromShader (EShader value) __NE___ { auto result = EShaderStages( 1 << uint(value) ); - ASSERT( not AnyBits( result, ~EShaderStages::All )); + ASSERT( NoBits( result, ~EShaderStages::All )); return result; } //----------------------------------------------------------------------------- @@ -98,8 +98,8 @@ namespace AE::Graphics ND_ constexpr bool EResourceState_IsReadOnly (EResourceState value) __NE___ { - return not EResourceState_HasWriteAccess( value ) and - not AllBits( value, EResourceState::Invalidate ) and + return not EResourceState_HasWriteAccess( value ) and + NoBits( value, EResourceState::Invalidate ) and value != EResourceState::_InvalidState; } @@ -127,7 +127,7 @@ namespace AE::Graphics ND_ constexpr bool EResourceState_IsReadOnly (EResourceState value, EImageAspect mask) __NE___ { return not EResourceState_HasWriteAccess( value, mask ) and - not AllBits( value, EResourceState::Invalidate ) and + NoBits( value, EResourceState::Invalidate ) and value != EResourceState::_InvalidState; } @@ -207,7 +207,7 @@ namespace AE::Graphics }; EType valueType = Default; - ushort bitsPerBlock = 0; // for color and depth (max: 64bit * 4) + ushort bitsPerBlock = 0; // for color and depth (max: 64bit * 4) // TODO: use bytes ubyte bitsPerBlock2 = 0; // for stencil EPixelFormat format = Default; EImageAspect aspectMask = Default; @@ -237,7 +237,7 @@ namespace AE::Graphics ND_ uint2 TexBlockDim () C_NE___ { return uint2{blockDim}; } ND_ bool IsCompressed () C_NE___ { return not All( blockDim == ubyte2{1,1} ); } - ND_ bool IsColor () C_NE___ { return not AnyBits( valueType, EType::DepthStencil ); } + ND_ bool IsColor () C_NE___ { return NoBits( valueType, EType::DepthStencil ); } ND_ bool IsDepth () C_NE___ { return valueType == EType::Depth; } ND_ bool IsStencil () C_NE___ { return valueType == EType::Stencil; } ND_ bool IsDepthStencil () C_NE___ { return valueType == EType::DepthStencil; } @@ -413,6 +413,21 @@ namespace AE::Graphics */ ND_ Bytes EPixelFormat_ImageSize (EPixelFormat fmt, const uint2 &dim, Bytes planeAlign = 1_b) __NE___; ND_ Bytes EPixelFormat_ImageSize (EPixelFormat fmt, const uint3 &dim, Bytes planeAlign = 1_b) __NE___; + +/* +================================================= + format compatibility +================================================= +*/ + ND_ bool EPixelFormat_IsCompatible (EPixelFormat, EPixelFormat) __NE___; + ND_ bool EPixelFormat_IsCopySupported (EPixelFormat src, EPixelFormat dst) __NE___; + ND_ bool EPixelFormat_IsCopySupportedRelaxed (EPixelFormat src, EPixelFormat dst) __NE___; + ND_ bool EPixelFormat_IsBlitSupported (EPixelFormat, EPixelFormat, EBlitFilter) __NE___; + + ND_ bool EPixelFormat_IsCopySupported (EPixelFormat src, const uint2 &srcDim, + EPixelFormat dst, const uint2 &dstDim) __NE___; + ND_ bool EPixelFormat_GetCopyGranularity (EPixelFormat src, OUT uint2 &srcBlockDim, + EPixelFormat dst, OUT uint2 &dstBlockDim) __NE___; //----------------------------------------------------------------------------- @@ -480,7 +495,7 @@ namespace AE::Graphics ND_ inline constexpr bool EMemoryType_IsNonCoherent (EMemoryType memType) __NE___ { return AllBits( memType, EMemoryType::HostCached ) and - not AnyBits( memType, EMemoryType::HostCoherent ); + NoBits( memType, EMemoryType::HostCoherent ); } ND_ inline constexpr bool EMemoryType_IsHostVisible (EMemoryType memType) __NE___ diff --git a/AE/engine/src/graphics/Private/FeatureSet.cpp b/AE/engine/src/graphics/Private/FeatureSet.cpp.h similarity index 98% rename from AE/engine/src/graphics/Private/FeatureSet.cpp rename to AE/engine/src/graphics/Private/FeatureSet.cpp.h index 081201ad..8055cd79 100644 --- a/AE/engine/src/graphics/Private/FeatureSet.cpp +++ b/AE/engine/src/graphics/Private/FeatureSet.cpp.h @@ -1260,9 +1260,6 @@ namespace */ bool FeatureSet::IsSupported (const ImageDesc &desc) C_NE___ { - StaticAssert( uint(EImageUsage::All) == 0xFF ); - StaticAssert( uint(EImageOpt::All) == 0x1FFFF ); - bool result = true; if ( (desc.format >= EPixelFormat::ASTC_RGBA8_4x4 and desc.format <= EPixelFormat::ASTC_RGBA8_12x12) or @@ -1289,18 +1286,38 @@ namespace result &= (desc.queues == Default) or AllBits( queues.supported, desc.queues ); + const auto CheckFormatUsage = [&desc] (const PixelFormatSet_t &fmtSet) + {{ + bool compat = fmtSet.contains( desc.format ); + if ( not compat and AllBits( desc.options, EImageOpt::ExtendedUsage )) + { + uint count = 0; + for (auto fmt : desc.viewFormats) + { + if ( fmt != Default ) + { + compat |= fmtSet.contains( fmt ); + ++count; + } + } + if ( count == 0 ) + compat = true; + } + return compat; + }}; + for (auto usage : BitfieldIterate( desc.usage )) { switch_enum( usage ) { - case EImageUsage::TransferSrc : break; - case EImageUsage::TransferDst : break; - case EImageUsage::Sampled : break; - case EImageUsage::Storage : result &= storageImageFormats.contains( desc.format ); break; + case EImageUsage::Storage : result &= CheckFormatUsage( storageImageFormats ); break; case EImageUsage::ColorAttachment : case EImageUsage::DepthStencilAttachment : - case EImageUsage::InputAttachment : result &= attachmentFormats.contains( desc.format ); break; + case EImageUsage::InputAttachment : result &= CheckFormatUsage( attachmentFormats ); break; case EImageUsage::ShadingRate : result &= (attachmentFragmentShadingRate == EFeature::RequireTrue); break; + case EImageUsage::TransferSrc : break; + case EImageUsage::TransferDst : break; + case EImageUsage::Sampled : break; case EImageUsage::_Last : case EImageUsage::All : @@ -1324,15 +1341,16 @@ namespace case EImageOpt::SparseAliased : break; case EImageOpt::Alias : break; case EImageOpt::SampleLocationsCompatible : break; // TODO - case EImageOpt::StorageAtomic : result &= storageImageAtomicFormats.contains( desc.format ); break; - case EImageOpt::ColorAttachmentBlend : result &= attachmentBlendFormats.contains( desc.format ); break; - case EImageOpt::SampledLinear : result &= linearSampledFormats.contains( desc.format ); break; + case EImageOpt::StorageAtomic : result &= CheckFormatUsage( storageImageAtomicFormats ); break; + case EImageOpt::ColorAttachmentBlend : result &= CheckFormatUsage( attachmentBlendFormats ); break; + case EImageOpt::SampledLinear : result &= CheckFormatUsage( linearSampledFormats ); break; case EImageOpt::SampledMinMax : break; // TODO case EImageOpt::VertexPplnStore : result &= (fragmentStoresAndAtomics == EFeature::RequireTrue); break; case EImageOpt::FragmentPplnStore : result &= (vertexPipelineStoresAndAtomics == EFeature::RequireTrue); break; case EImageOpt::LossyRTCompression : break; // TODO case EImageOpt::BlitSrc : break; case EImageOpt::BlitDst : break; + case EImageOpt::ExtendedUsage : break; // TODO case EImageOpt::_Last : case EImageOpt::SparseResidencyAliased : @@ -1605,16 +1623,6 @@ namespace { return result; } -/* -================================================= - GetHashOfFS_Precalculated -================================================= -*/ - HashVal64 FeatureSet::GetHashOfFS_Precalculated () __NE___ - { - return HashVal64{0x0040013f2f731cb2ull}; - } - /* ================================================= accelerationStructure @@ -1634,5 +1642,15 @@ namespace { return EFeature::RequireFalse; } +/* +================================================= + GetHashOfFS_Precalculated +================================================= +*/ + HashVal64 FeatureSet::GetHashOfFS_Precalculated () __NE___ + { + return HashVal64{0x77f873fcd94a950bull}; + } + } // AE::Graphics diff --git a/AE/engine/src/graphics/Private/ImageDesc.cpp b/AE/engine/src/graphics/Private/ImageDesc.cpp.h similarity index 79% rename from AE/engine/src/graphics/Private/ImageDesc.cpp rename to AE/engine/src/graphics/Private/ImageDesc.cpp.h index 1af2cde5..b9aa95ce 100644 --- a/AE/engine/src/graphics/Private/ImageDesc.cpp +++ b/AE/engine/src/graphics/Private/ImageDesc.cpp.h @@ -14,21 +14,21 @@ namespace AE::Graphics */ ImageDesc& ImageDesc::SetDimension (const uint value) __NE___ { - dimension = uint3{ value, 1, 1 }; + dimension = CheckCast(uint3{ value, 1, 1 }); imageDim = (imageDim == Default ? EImageDim_1D : imageDim); return *this; } ImageDesc& ImageDesc::SetDimension (const uint2 &value) __NE___ { - dimension = uint3{ value, 1 }; + dimension = CheckCast(uint3{ value, 1 }); imageDim = (imageDim == Default ? EImageDim_2D : imageDim); return *this; } ImageDesc& ImageDesc::SetDimension (const uint3 &value) __NE___ { - dimension = value; + dimension = CheckCast( value ); imageDim = (imageDim == Default ? EImageDim_3D : imageDim); return *this; } @@ -82,7 +82,7 @@ namespace AE::Graphics for (auto& dst : viewFormats) { - if ( dst == value or dst == Default ) + if_unlikely( dst == value or dst == Default ) { dst = value; return *this; @@ -103,7 +103,7 @@ namespace AE::Graphics ASSERT( format != Default ); ASSERT( imageDim != Default ); - dimension = Max( dimension, uint3{1} ); + dimension = Max( dimension, ushort{1} ); arrayLayers = Max( arrayLayers, 1_layer ); switch_enum( imageDim ) @@ -111,11 +111,11 @@ namespace AE::Graphics case EImageDim_1D : ASSERT( not samples.IsEnabled() ); ASSERT( dimension.y == 1 and dimension.z == 1 ); - ASSERT( not AnyBits( options, EImageOpt::Array2DCompatible | EImageOpt::CubeCompatible )); // this options are not supported for 1D + ASSERT( NoBits( options, EImageOpt::Array2DCompatible | EImageOpt::CubeCompatible )); // this options are not supported for 1D options &= ~(EImageOpt::Array2DCompatible | EImageOpt::CubeCompatible); samples = 1_samples; - dimension = uint3{ dimension.x, 1, 1 }; + dimension = ImageDim_t{ dimension.x, 1, 1 }; break; case EImageDim_2D : @@ -138,7 +138,7 @@ namespace AE::Graphics case EImageDim_3D : ASSERT( not samples.IsEnabled() ); ASSERT( arrayLayers == 1_layer ); - ASSERT( not AnyBits( options, EImageOpt::CubeCompatible )); // options are not supported for 1D + ASSERT( NoBits( options, EImageOpt::CubeCompatible )); // options are not supported for 1D options &= ~EImageOpt::CubeCompatible; samples = 1_samples; @@ -152,13 +152,16 @@ namespace AE::Graphics } switch_end + const bool is_comp_fmt = EPixelFormat_IsCompressed( format ); + const bool uncompress = AllBits( options, EImageOpt::BlockTexelViewCompatible ) and is_comp_fmt; + if ( memType == Default ) memType = EMemoryType::DeviceLocal; if ( usage == Default ) usage = EImageUsage::Transfer | EImageUsage::Sampled; - if ( not AllBits( memType, EMemoryType::DeviceLocal )) + if ( NoBits( memType, EMemoryType::DeviceLocal )) { options &= ~(EImageOpt::SparseResidencyAliased); usage &= ~(EImageUsage::ColorAttachment | EImageUsage::DepthStencilAttachment | EImageUsage::Sampled | @@ -171,18 +174,29 @@ namespace AE::Graphics usage &= (EImageUsage::ColorAttachment | EImageUsage::DepthStencilAttachment | EImageUsage::InputAttachment); } - if ( not AllBits( usage, EImageUsage::ColorAttachment )) + if ( NoBits( usage, EImageUsage::ColorAttachment )) options &= ~EImageOpt::ColorAttachmentBlend; - if ( not AllBits( usage, EImageUsage::Storage )) + if ( NoBits( usage, EImageUsage::Storage )) options &= ~(EImageOpt::StorageAtomic | EImageOpt::VertexPplnStore | EImageOpt::FragmentPplnStore); - if ( not AllBits( usage, EImageUsage::Sampled )) + if ( NoBits( usage, EImageUsage::Sampled )) options &= ~(EImageOpt::SampledLinear | EImageOpt::SampledMinMax); - if ( not AllBits( usage, EImageUsage::DepthStencilAttachment )) + if ( NoBits( usage, EImageUsage::DepthStencilAttachment )) options &= ~EImageOpt::SampleLocationsCompatible; + // TODO: BlockTexelViewCompatible requires VK_KHR_maintenance2 + + if ( AllBits( options, EImageOpt::BlockTexelViewCompatible ) and (not is_comp_fmt) ) + options &= EImageOpt::BlockTexelViewCompatible; + + if ( AllBits( options, EImageOpt::BlockTexelViewCompatible )) + options |= EImageOpt::MutableFormat; + + if ( ViewFormatListSize() > 1 ) + options |= EImageOpt::MutableFormat; + // validate samples and mipmaps if ( samples.IsEnabled() ) { @@ -192,17 +206,25 @@ namespace AE::Graphics else { samples = 1_samples; - mipLevels = MipmapLevel( Clamp( mipLevels.Get(), 1u, ImageUtils::NumberOfMipmaps( dimension ))); + mipLevels = MipmapLevel( Clamp( mipLevels.Get(), 1u, ImageUtils::NumberOfMipmaps( Dimension() ))); } - /*if ( ViewFormatListSize() > 1 ) + // validate view format list + for (auto& fmt : viewFormats) { - if ( not AllBits( options, EImageOpt::MutableFormat )) + if ( fmt == Default ) + continue; + + bool supported = uncompress ? + EPixelFormat_IsCopySupportedRelaxed( format, fmt ) : + EPixelFormat_IsCompatible( format, fmt ); + + if_unlikely( not supported ) { - DBG_WARNING( "only 1 format can be set in 'viewFormats'" ); - viewFormats.resize( 1 ); + DBG_WARNING( "removed incompatible format from 'viewFormats'" ); + fmt = Default; } - }*/ + } ASSERT( usage != Default ); } @@ -312,23 +334,56 @@ namespace AE::Graphics { baseMipmap = MipmapLevel{Clamp( baseMipmap.Get(), 0u, desc.mipLevels.Get()-1 )}; mipmapCount = CheckCast( Clamp( mipmapCount, 1u, desc.mipLevels.Get() - baseMipmap.Get() )); + dimension = ImageDim_t{ImageUtils::MipmapDimension( desc.Dimension(), baseMipmap.Get(), EPixelFormat_GetInfo( desc.format ).TexBlockDim() )}; // validate format - if ( format == Default ) - { - format = desc.format; - } - else - if ( format != desc.format ) { - if ( not ArrayContains( ArrayView{desc.viewFormats}, format ) and - not AllBits( desc.options, EImageOpt::MutableFormat )) + if ( format == Default ) + format = desc.format; + + const bool is_compat = EPixelFormat_IsCompatible( desc.format, format ); + const bool has_fmt_list = desc.HasViewFormatList(); + bool valid_fmt = false; + + if ( AllBits( desc.options, EImageOpt::MutableFormat )) + valid_fmt = is_compat; + + if ( has_fmt_list ) + { + if ( ArrayContains( ArrayView{desc.viewFormats}, format )) + valid_fmt = is_compat; + } + else + if ( format == desc.format ) + valid_fmt = true; + + if ( AllBits( desc.options, EImageOpt::BlockTexelViewCompatible ) and not EPixelFormat_IsCompressed( format )) + { + uint2 gran1, gran2; + if ( valid_fmt = EPixelFormat_GetCopyGranularity( desc.format, OUT gran1, format, OUT gran2 ); valid_fmt ) + dimension = ImageDim_t{ (uint2{dimension} * gran2 + (gran1 - 1u)) / gran1, dimension.z }; + } + + if ( not valid_fmt ) { DBG_WARNING( "can't change format if 'MutableFormat' is not set and 'viewFormats' does not contains this format" ); - format = desc.format; + if ( has_fmt_list ) + format = desc.viewFormats[0]; + else + format = desc.format; } } + if ( AllBits( desc.options, EImageOpt::BlockTexelViewCompatible ) and not EPixelFormat_IsCompressed( format )) + { + ASSERT( mipmapCount == 1 ); + mipmapCount = 1; + + ASSERT( layerCount == 1 or layerCount == UMax ); + layerCount = 1; + } + + // validate aspect mask EImageAspect mask = EPixelFormat_ToImageAspect( format ); aspectMask = (aspectMask == Default ? mask : (aspectMask & mask)); diff --git a/AE/engine/src/graphics/Private/ImageMemView.cpp b/AE/engine/src/graphics/Private/ImageMemView.cpp index c7b2c997..63e408ac 100644 --- a/AE/engine/src/graphics/Private/ImageMemView.cpp +++ b/AE/engine/src/graphics/Private/ImageMemView.cpp @@ -515,7 +515,7 @@ namespace ImageMemView::ImageMemView (const BufferMemView& content, const uint3 &off, const uint3 &dim, Bytes rowPitch, Bytes slicePitch, EPixelFormat format, EImageAspect aspect) __NE___ : _content{ content }, _rowPitch{ rowPitch }, _slicePitch{ slicePitch }, - _offset{ CheckCast(off) }, _dimension{ CheckCast(Max( dim, 1u )) }, + _offset{ CheckCast(off) }, _dimension{ CheckCast(Max( dim, 1u )) }, _format{ format }, _aspect{ aspect } { ASSERT( RowPitch() == rowPitch ); diff --git a/AE/engine/src/graphics/Private/PipelinePack.cpp.h b/AE/engine/src/graphics/Private/PipelinePack.cpp.h index 864d525c..bb844422 100644 --- a/AE/engine/src/graphics/Private/PipelinePack.cpp.h +++ b/AE/engine/src/graphics/Private/PipelinePack.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #if defined(AE_ENABLE_VULKAN) # define PPLNPACK VPipelinePack diff --git a/AE/engine/src/graphics/Private/RenderState.cpp b/AE/engine/src/graphics/Private/RenderState.cpp.h similarity index 99% rename from AE/engine/src/graphics/Private/RenderState.cpp rename to AE/engine/src/graphics/Private/RenderState.cpp.h index af0a7049..8946a869 100644 --- a/AE/engine/src/graphics/Private/RenderState.cpp +++ b/AE/engine/src/graphics/Private/RenderState.cpp.h @@ -371,6 +371,7 @@ namespace AE::Graphics case EPipelineDynamicState::RTStackSize : case EPipelineDynamicState::FragmentShadingRate : + case EPipelineDynamicState::ViewportWScaling : break; // ignore case EPipelineDynamicState::Unknown : diff --git a/AE/engine/src/graphics/Private/RenderTaskScheduler.cpp.h b/AE/engine/src/graphics/Private/RenderTaskScheduler.cpp.h index 88d7fabc..222936a8 100644 --- a/AE/engine/src/graphics/Private/RenderTaskScheduler.cpp.h +++ b/AE/engine/src/graphics/Private/RenderTaskScheduler.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #if defined(AE_ENABLE_VULKAN) # define CMDPOOLMNGR VCommandPoolManager @@ -49,7 +51,7 @@ q.pending[ submit_idx ] = batch.GetRC(); const auto old = BitCast( q.bits.fetch_or( bf.value, EMemoryOrder::Release )); - CHECK( not AnyBits( old.packed.pending, bf.packed.pending )); // already exists + CHECK( NoBits( old.packed.pending, bf.packed.pending )); // already exists } switch_enum( mode ) @@ -669,7 +671,7 @@ mask.packed.submitted &= ~ToBitMask( range.first ); const auto old_bits = BitCast( q.bits.fetch_or( mask.value )); // add bits to submitted - CHECK( not AnyBits( old_bits.packed.submitted, mask.packed.submitted )); // already submitted + CHECK( NoBits( old_bits.packed.submitted, mask.packed.submitted )); // already submitted if ( forceFlush ) { @@ -775,9 +777,9 @@ const auto old_bits = BitCast( _queueMap[ uint(desc.queue) ].bits.fetch_or( bf.value )); - CHECK_ERR_MSG( not AnyBits( old_bits.packed.required, bf.packed.required ), "batch with 'submitIdx' is already created" ); - CHECK_ERR_MSG( not AnyBits( old_bits.packed.pending, bf.packed.required ) or - not AnyBits( old_bits.packed.submitted, bf.packed.required ), "batch with 'submitIdx' is marked as unused" ); + CHECK_ERR_MSG( NoBits( old_bits.packed.required, bf.packed.required ), "batch with 'submitIdx' is already created" ); + CHECK_ERR_MSG( NoBits( old_bits.packed.pending, bf.packed.required ) or + NoBits( old_bits.packed.submitted, bf.packed.required ), "batch with 'submitIdx' is marked as unused" ); } uint index; diff --git a/AE/engine/src/graphics/Private/ResourceManager.cpp.h b/AE/engine/src/graphics/Private/ResourceManager.cpp.h index fe58a9b9..a8212ac5 100644 --- a/AE/engine/src/graphics/Private/ResourceManager.cpp.h +++ b/AE/engine/src/graphics/Private/ResourceManager.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #if defined(AE_ENABLE_VULKAN) # define SUFFIX V diff --git a/AE/engine/src/graphics/Private/ResourceValidation.h b/AE/engine/src/graphics/Private/ResourceValidation.h index 34090a14..c980bcaa 100644 --- a/AE/engine/src/graphics/Private/ResourceValidation.h +++ b/AE/engine/src/graphics/Private/ResourceValidation.h @@ -58,7 +58,7 @@ namespace AE::Graphics constexpr auto view_usage = EBufferUsage::UniformTexel | EBufferUsage::StorageTexel; - if_unlikely( not AnyBits( desc.usage, view_usage )) + if_unlikely( NoBits( desc.usage, view_usage )) return false; if_unlikely( view.format == Default or view.format >= EPixelFormat::_Count ) @@ -79,7 +79,8 @@ namespace AE::Graphics template ND_ bool Image_IsSupported (const ResMngr &resMngr, const ImageDesc &desc, Bool imageFormatListSupported) __NE___ { - const auto& res_flags = resMngr.GetDevice().GetResourceFlags(); + const auto& res_flags = resMngr.GetDevice().GetResourceFlags(); + const usize fmt_list_size = desc.ViewFormatListSize(); if_unlikely( desc.imageDim == Default or desc.usage == Default or desc.format == Default or desc.memType == Default ) return false; @@ -93,19 +94,20 @@ namespace AE::Graphics { switch_enum( option ) { - case EImageOpt::BlitSrc : if_unlikely( not AllBits( desc.usage, EImageUsage::TransferSrc )) return false; break; - case EImageOpt::BlitDst : if_unlikely( not AllBits( desc.usage, EImageUsage::TransferDst )) return false; break; - case EImageOpt::BlockTexelViewCompatible : if_unlikely( not EPixelFormat_IsCompressed( desc.format )) return false; break; + case EImageOpt::BlitSrc : if_unlikely( NoBits( desc.usage, EImageUsage::TransferSrc )) return false; break; + case EImageOpt::BlitDst : if_unlikely( NoBits( desc.usage, EImageUsage::TransferDst )) return false; break; + case EImageOpt::BlockTexelViewCompatible : if_unlikely( not EPixelFormat_IsCompressed( desc.format )) return false; break; case EImageOpt::StorageAtomic : case EImageOpt::VertexPplnStore : - case EImageOpt::FragmentPplnStore : if_unlikely( not AllBits( desc.usage, EImageUsage::Storage )) return false; break; + case EImageOpt::FragmentPplnStore : if_unlikely( NoBits( desc.usage, EImageUsage::Storage )) return false; break; case EImageOpt::SampledLinear : - case EImageOpt::SampledMinMax : if_unlikely( not AllBits( desc.usage, EImageUsage::Sampled )) return false; break; + case EImageOpt::SampledMinMax : if_unlikely( NoBits( desc.usage, EImageUsage::Sampled )) return false; break; - case EImageOpt::ColorAttachmentBlend : if_unlikely( not AllBits( desc.usage, EImageUsage::ColorAttachment )) return false; break; + case EImageOpt::ColorAttachmentBlend : if_unlikely( NoBits( desc.usage, EImageUsage::ColorAttachment )) return false; break; + case EImageOpt::ExtendedUsage : case EImageOpt::LossyRTCompression : case EImageOpt::SparseAliased : case EImageOpt::SparseResidencyAliased : @@ -130,11 +132,11 @@ namespace AE::Graphics return false; // validate format list - if ( imageFormatListSupported and desc.HasViewFormatList() ) + if ( imageFormatListSupported and fmt_list_size > 0 ) { using EFmtType = PixelFormatInfo::EType; - if_unlikely( not AllBits( desc.options, EImageOpt::MutableFormat ) and desc.ViewFormatListSize() > 1 ) + if_unlikely( NoBits( desc.options, EImageOpt::MutableFormat ) and fmt_list_size > 1 ) return false; const auto& origin_fmt_info = EPixelFormat_GetInfo( desc.format ); @@ -146,27 +148,24 @@ namespace AE::Graphics continue; const auto& fmt_info = EPixelFormat_GetInfo( fmt ); - bool compatible = fmt_info.bitsPerBlock == origin_fmt_info.bitsPerBlock and - fmt_info.bitsPerBlock2 == origin_fmt_info.bitsPerBlock2; + bool compatible = true; if ( uncompress and not fmt_info.IsCompressed() ) { - compatible &= (fmt_info.channels == origin_fmt_info.channels); - compatible &= (fmt_info.valueType & (EFmtType::UNorm | EFmtType::SNorm)) == (origin_fmt_info.valueType & (EFmtType::UNorm | EFmtType::SNorm)); - compatible &= (fmt_info.valueType & (EFmtType::SFloat | EFmtType::UFloat)) == (origin_fmt_info.valueType & (EFmtType::SFloat | EFmtType::UFloat)); - compatible &= (fmt_info.valueType & (EFmtType::Int | EFmtType::UInt)) == (origin_fmt_info.valueType & (EFmtType::Int | EFmtType::UInt)); + compatible &= origin_fmt_info.IsColor() and fmt_info.IsColor(); + compatible &= fmt_info.bitsPerBlock == origin_fmt_info.bitsPerBlock; } else { - compatible &= All( fmt_info.blockDim == origin_fmt_info.blockDim ); - compatible &= (fmt_info.IsCompressed() == origin_fmt_info.IsCompressed()); + compatible &= EPixelFormat_IsCompatible( desc.format, fmt ); } + if_unlikely( not compatible ) return false; } } else - if_unlikely( desc.HasViewFormatList() ) + if_unlikely( fmt_list_size > 0 ) { if ( AllBits( desc.options, EImageOpt::MutableFormat )) return true; @@ -189,13 +188,14 @@ namespace AE::Graphics ND_ bool ImageView_IsSupported (const ResMngr &resMngr, const ImageDesc &desc, const ImageViewDesc &view) __NE___ { StaticAssert( uint(EImageUsage::All) == 0xFF ); - StaticAssert( uint(EImageOpt::All) == 0x1FFFF ); + StaticAssert( uint(EImageOpt::All) == 0x3FFFF ); + ASSERT( view.format != Default ); constexpr EImageUsage view_usage = EImageUsage::Sampled | EImageUsage::Storage | EImageUsage::ColorAttachment | EImageUsage::DepthStencilAttachment | EImageUsage::InputAttachment | EImageUsage::ShadingRate; - if_unlikely( not AnyBits( desc.usage, view_usage )) + if_unlikely( NoBits( desc.usage, view_usage )) return false; if ( view.viewType == EImage_CubeArray ) @@ -203,7 +203,7 @@ namespace AE::Graphics if_unlikely( desc.imageDim != EImageDim_2D or (desc.imageDim == EImageDim_3D and AllBits( desc.options, EImageOpt::Array2DCompatible)) ) return false; - if_unlikely( not AllBits( desc.options, EImageOpt::CubeCompatible )) + if_unlikely( NoBits( desc.options, EImageOpt::CubeCompatible )) return false; if_unlikely( not IsMultipleOf( view.layerCount, 6 )) @@ -212,7 +212,7 @@ namespace AE::Graphics if ( view.viewType == EImage_Cube ) { - if_unlikely( not AllBits( desc.options, EImageOpt::CubeCompatible )) + if_unlikely( NoBits( desc.options, EImageOpt::CubeCompatible )) return false; if_unlikely( view.layerCount != 6 ) @@ -221,51 +221,37 @@ namespace AE::Graphics if ( desc.imageDim == EImageDim_3D and view.viewType != EImage_3D ) { - if_unlikely( not AllBits( desc.options, EImageOpt::Array2DCompatible )) + if_unlikely( NoBits( desc.options, EImageOpt::Array2DCompatible )) return false; } - if ( desc.HasViewFormatList() ) + // check view format { - if_unlikely( not ArrayContains( ArrayView{desc.viewFormats}, view.format )) - return false; - } + const usize fmt_list_size = desc.ViewFormatListSize(); - if ( view.format != Default and view.format != desc.format ) - { - const auto& required = EPixelFormat_GetInfo( desc.format ); - const auto& origin = EPixelFormat_GetInfo( view.format ); - const bool req_comp = Any( required.TexBlockDim() > 1u ); - const bool orig_comp = Any( origin.TexBlockDim() > 1u ); + if ( fmt_list_size > 0 ) + { + if_unlikely( not ArrayContains( ArrayView{desc.viewFormats}, view.format )) + return false; - if_unlikely( not ArrayContains( ArrayView{desc.viewFormats}, view.format ) and - not AllBits( desc.options, EImageOpt::MutableFormat )) - return false; + if_unlikely( fmt_list_size > 1 and NoBits( desc.options, EImageOpt::MutableFormat )) + return false; + } - // compressed to uncompressed - if ( AllBits( desc.options, EImageOpt::BlockTexelViewCompatible ) and orig_comp and not req_comp ) + if ( AllBits( desc.options, EImageOpt::BlockTexelViewCompatible ) and not EPixelFormat_IsCompressed( view.format )) { - if_unlikely( required.bitsPerBlock != origin.bitsPerBlock ) + if_unlikely( not EPixelFormat_IsCopySupportedRelaxed( desc.format, view.format )) return false; } else { - if_unlikely( req_comp != orig_comp ) + if_unlikely( not EPixelFormat_IsCompatible( desc.format, view.format )) return false; - if_unlikely( Any( required.blockDim != origin.blockDim )) + if_unlikely( NoBits( desc.options, EImageOpt::MutableFormat ) and + fmt_list_size == 0 and + view.format != desc.format ) return false; - - if ( view.aspectMask == EImageAspect::Stencil ) - { - if_unlikely( required.bitsPerBlock2 != origin.bitsPerBlock2 ) - return false; - } - else - { - if_unlikely( required.bitsPerBlock != origin.bitsPerBlock ) - return false; - } } } diff --git a/AE/engine/src/graphics/Private/Shared.cpp b/AE/engine/src/graphics/Private/Shared.cpp new file mode 100644 index 00000000..48652371 --- /dev/null +++ b/AE/engine/src/graphics/Private/Shared.cpp @@ -0,0 +1,15 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + This file used in multiple projects. + Designed to be compatible with unity build. +*/ + +#include "EnumUtils.cpp.h" +#include "BufferDesc.cpp.h" +#include "FeatureSet.cpp.h" +#include "ImageDesc.cpp.h" +#include "RenderState.cpp.h" + +#include "../Vulkan/VEnumCast.cpp.h" + +#include "../Scripting/GraphicsBindings.cpp.h" diff --git a/AE/engine/src/graphics/Private/StagingBufferManager.cpp.h b/AE/engine/src/graphics/Private/StagingBufferManager.cpp.h index a3422755..f8fd87ff 100644 --- a/AE/engine/src/graphics/Private/StagingBufferManager.cpp.h +++ b/AE/engine/src/graphics/Private/StagingBufferManager.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #if defined(AE_ENABLE_VULKAN) # define STBUFMNGR VStagingBufferManager @@ -739,7 +741,7 @@ const auto& fmt_info = EPixelFormat_GetInfo( imgDesc.format ); const uint2 texblock_dim = fmt_info.TexBlockDim(); // TODO: use imageGranularity - const uint3 mip_dim = ImageUtils::MipmapDimension( imgDesc.dimension, uploadDesc.mipLevel.Get(), texblock_dim ); + const uint3 mip_dim = ImageUtils::MipmapDimension( imgDesc.Dimension(), uploadDesc.mipLevel.Get(), texblock_dim ); const uint3 region_dim = Min( mip_dim - uploadDesc.imageOffset, Max( uploadDesc.imageDim, 1u )); const uint texblock_bits = uploadDesc.aspectMask != EImageAspect::Stencil ? fmt_info.bitsPerBlock : fmt_info.bitsPerBlock2; const Bytes row_pitch = Max( uploadDesc.dataRowPitch, Bytes{region_dim.x * texblock_bits + texblock_dim.x-1} / (texblock_dim.x * 8) ); @@ -811,7 +813,7 @@ CHECK_ERRV( EPixelFormat_GetPlaneInfo( imgDesc.format, uploadDesc.aspectMask, OUT plane_fmt, OUT plane_scale )); const auto& fmt_info = EPixelFormat_GetInfo( plane_fmt ); - const uint2 plane_dim = imgDesc.dimension / plane_scale; + const uint2 plane_dim = imgDesc.Dimension2() / plane_scale; const uint2 region_dim = Min( plane_dim, Max( uint2{uploadDesc.imageDim}, 1u )); const Bytes texblock_bytes = fmt_info.BytesPerBlock(); const Bytes row_pitch = Max( uploadDesc.dataRowPitch, Bytes{region_dim.x * texblock_bytes} ); diff --git a/AE/engine/src/graphics/Private/Undef.h b/AE/engine/src/graphics/Private/Undef.h new file mode 100644 index 00000000..33573dc2 --- /dev/null +++ b/AE/engine/src/graphics/Private/Undef.h @@ -0,0 +1,9 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +// cleanup defines +#undef AE_VALIDATE_GCTX +#undef AE_DBG_GRAPHICS +#undef DBG_GRAPHICS_ONLY +#undef GCTX_CHECK +#undef GCTX_CHECK_MSG +#undef GRES_CHECK diff --git a/AE/engine/src/graphics/Private/Video.cpp b/AE/engine/src/graphics/Private/Video.cpp index 3b374a8d..26a75028 100644 --- a/AE/engine/src/graphics/Private/Video.cpp +++ b/AE/engine/src/graphics/Private/Video.cpp @@ -50,7 +50,7 @@ namespace AE::Graphics queues = desc.queues; } - ASSERT( not AnyBits( videoUsage, EVideoBufferUsage::DecodeDst | EVideoBufferUsage::EncodeSrc )); + ASSERT( NoBits( videoUsage, EVideoBufferUsage::DecodeDst | EVideoBufferUsage::EncodeSrc )); } //----------------------------------------------------------------------------- @@ -66,7 +66,7 @@ namespace AE::Graphics // validate as 'ImageDesc' { ImageDesc desc; - desc.dimension = uint3{ dimension, 1u }; + desc.dimension = ImageDim_t{uint3{ dimension, 1u }}; desc.arrayLayers = arrayLayers; desc.options = options; desc.usage = usage; @@ -75,7 +75,7 @@ namespace AE::Graphics desc.Validate(); - dimension = uint2{desc.dimension}; + dimension = VideoImageDim_t{ desc.dimension }; arrayLayers = desc.arrayLayers; options = desc.options; usage = desc.usage; @@ -83,7 +83,7 @@ namespace AE::Graphics queues = desc.queues; } - ASSERT( not AnyBits( videoUsage, EVideoImageUsage::DecodeSrc | EVideoImageUsage::EncodeDst )); + ASSERT( NoBits( videoUsage, EVideoImageUsage::DecodeSrc | EVideoImageUsage::EncodeDst )); } diff --git a/AE/engine/src/graphics/Public/BufferMemView.h b/AE/engine/src/graphics/Public/BufferMemView.h index 46a562ad..5c84e8cc 100644 --- a/AE/engine/src/graphics/Public/BufferMemView.h +++ b/AE/engine/src/graphics/Public/BufferMemView.h @@ -63,82 +63,80 @@ namespace AE::Graphics // methods private: - explicit BufferMemView (ArrayView parts) __NE___ : _parts{parts} {} + explicit BufferMemView (ArrayView parts) __NE___ : _parts{parts} {} public: - BufferMemView () __NE___ {} - BufferMemView (const BufferMemView &) __NE___ = default; - BufferMemView (BufferMemView &&) __NE___ = default; + BufferMemView () __NE___ {} + BufferMemView (const BufferMemView &) __NE___ = default; + BufferMemView (BufferMemView &&) __NE___ = default; - BufferMemView (void* ptr, Bytes size) __NE___ { PushBack( ptr, size ); } + BufferMemView (void* ptr, Bytes size) __NE___ { PushBack( ptr, size ); } template - explicit BufferMemView (Array &arr) __NE___ : BufferMemView{ arr.data(), ArraySizeOf(arr) } {} + explicit BufferMemView (Array &arr) __NE___ : BufferMemView{ arr.data(), ArraySizeOf(arr) } {} - BufferMemView& operator = (const BufferMemView &) __NE___ = default; - BufferMemView& operator = (BufferMemView &&) __NE___ = default; + BufferMemView& operator = (const BufferMemView &) __NE___ = default; + BufferMemView& operator = (BufferMemView &&) __NE___ = default; - ND_ explicit operator Array () C_NE___ { return _ToArray(); } + ND_ explicit operator Array () C_NE___ { return _ToArray(); } - ND_ auto Parts () C_NE___ { return ArrayView{ Cast(_parts.data()), _parts.size() }; } - ND_ auto Parts () __NE___ { return ArrayView{ _parts }; } + ND_ auto Parts () C_NE___ { return ArrayView{ Cast(_parts.data()), _parts.size() }; } + ND_ auto Parts () __NE___ { return ArrayView{ _parts }; } - ND_ auto begin () __NE___ { return Parts().begin(); } - ND_ auto end () __NE___ { return Parts().end(); } + ND_ auto begin () __NE___ { return Parts().begin(); } + ND_ auto end () __NE___ { return Parts().end(); } - ND_ auto begin () C_NE___ { return Parts().begin(); } - ND_ auto end () C_NE___ { return Parts().end(); } + ND_ auto begin () C_NE___ { return Parts().begin(); } + ND_ auto end () C_NE___ { return Parts().end(); } - ND_ bool Empty () C_NE___ { return _parts.empty(); } + ND_ bool Empty () C_NE___ { return _parts.empty(); } - void Clear () __NE___ { _parts.clear(); } + void Clear () __NE___ { _parts.clear(); } - bool PushBack (void* ptr, Bytes size) __NE___; + bool PushBack (void* ptr, Bytes size) __NE___; - ND_ Bytes DataSize () C_NE___; + ND_ Bytes DataSize () C_NE___; - ND_ BufferMemView Section (Bytes offset, Bytes size) __NE___; + ND_ BufferMemView Section (Bytes offset, Bytes size) __NE___; template - ND_ bool operator == (ArrayView rhs) C_NE___; + ND_ bool operator == (ArrayView rhs) C_NE___; - ND_ bool operator == (const BufferMemView &rhs) C_NE___ { return Compare( rhs ) == 0_b; } + ND_ bool operator == (const BufferMemView &rhs) C_NE___ { return Compare( rhs ) == 0_b; } // returns how much bytes are different - ND_ Bytes Compare (const BufferMemView &rhs) C_NE___; + ND_ Bytes Compare (const BufferMemView &rhs) C_NE___; // returns number of copied bytes - ND_ Bytes CopyFrom (Bytes dstOffset, const BufferMemView &from) __NE___ { return _Copy( from, 0_b, *this, dstOffset ); } - ND_ Bytes CopyFrom (Bytes dstOffset, const void* src, Bytes size) __NE___ { return _Copy( BufferMemView{ ConstCast(src), size }, 0_b, *this, dstOffset ); } - ND_ Bytes CopyFrom (const BufferMemView &from) __NE___ { return CopyFrom( 0_b, from ); } + ND_ Bytes CopyFrom (const void* src, Bytes size, Bytes dstOffset = 0_b)__NE___ { return _Copy( BufferMemView{ ConstCast(src), size }, 0_b, *this, dstOffset ); } + ND_ Bytes CopyFrom (const BufferMemView &from, Bytes dstOffset = 0_b) __NE___ { return _Copy( from, 0_b, *this, dstOffset ); } template - ND_ Bytes CopyFrom (ArrayView from) __NE___ { return CopyFrom( BufferMemView{ ConstCast(from.data()), ArraySizeOf(from) }); } + ND_ Bytes CopyFrom (ArrayView from, Bytes dstOffset = 0_b) __NE___ { return CopyFrom( from.data(), ArraySizeOf(from), dstOffset ); } template - ND_ Bytes CopyFrom (Array &from) __NE___ { return CopyFrom( BufferMemView{ from.data(), ArraySizeOf(from) }); } + ND_ Bytes CopyFrom (Array &from, Bytes dstOffset = 0_b) __NE___ { return CopyFrom( from.data(), ArraySizeOf(from), dstOffset ); } // returns number of copied bytes - ND_ Bytes CopyTo (OUT void* dst, Bytes size) C_NE___ { return CopyTo( 0_b, OUT dst, size ); } - ND_ Bytes CopyTo (Bytes srcOffset, OUT void* dst, Bytes size) C_NE___ { BufferMemView tmp{ dst, size }; return _Copy( *this, srcOffset, tmp, 0_b ); } + ND_ Bytes CopyTo (OUT void* dst, Bytes size, Bytes srcOffset = 0_b) C_NE___ { BufferMemView tmp{ dst, size }; return _Copy( *this, srcOffset, tmp, 0_b ); } template - ND_ Bytes CopyTo (OUT T &dst) C_NE___ { return CopyTo( 0_b, OUT &dst, SizeOf ); } + ND_ Bytes CopyTo (OUT T &dst, Bytes srcOffset = 0_b) C_NE___ { return CopyTo( OUT &dst, SizeOf, srcOffset ); } template - ND_ Bytes CopyTo (Bytes srcOffset, OUT T &dst) C_NE___ { return CopyTo( srcOffset, OUT &dst, SizeOf ); } + ND_ Bytes CopyTo (OUT Array &dst, Bytes srcOffset = 0_b) C_NE___ { return CopyTo( OUT dst.data(), ArraySizeOf(dst), srcOffset ); } - ND_ ConstData GetRange (Bytes offset, Bytes size) C_NE___; + ND_ ConstData GetRange (Bytes offset, Bytes size) C_NE___; private: - ND_ Array _ToArray () C_NE___; + ND_ Array _ToArray () C_NE___; ND_ static Bytes _Copy (const BufferMemView &src, Bytes srcOffset, - BufferMemView &dst, Bytes dstOffset) __NE___; + BufferMemView &dst, Bytes dstOffset) __NE___; }; diff --git a/AE/engine/src/graphics/Public/CommandBuffer.h b/AE/engine/src/graphics/Public/CommandBuffer.h index b217101c..4c86dc07 100644 --- a/AE/engine/src/graphics/Public/CommandBuffer.h +++ b/AE/engine/src/graphics/Public/CommandBuffer.h @@ -86,9 +86,10 @@ namespace AE::Graphics // requires: EPipelineDynamicState::StencilWriteMask virtual void SetStencilWriteMask (uint writeMask) __Th___ = 0; virtual void SetStencilWriteMask (uint frontWriteMask, uint backWriteMask) __Th___ = 0; - // requires: EPipelineDynamicState::FragmentShadingRate virtual void SetFragmentShadingRate (EShadingRate, EShadingRateCombinerOp primitiveOp, EShadingRateCombinerOp textureOp) __Th___ = 0; + // requires: EPipelineDynamicState::ViewportWScaling + virtual void SetViewportWScaling (ArrayView scaling) __Th___ = 0; ) // draw commands // diff --git a/AE/engine/src/graphics/Public/CommandBufferTypes.h b/AE/engine/src/graphics/Public/CommandBufferTypes.h index 60a2b686..ff0001a0 100644 --- a/AE/engine/src/graphics/Public/CommandBufferTypes.h +++ b/AE/engine/src/graphics/Public/CommandBufferTypes.h @@ -20,6 +20,23 @@ namespace AE::Graphics { + struct ImageSubresourceLayers + { + EImageAspect aspectMask = Default; + MipmapLevel mipLevel; + ImageLayer baseLayer; + ushort layerCount = 1; + + ImageSubresourceLayers () __NE___ = default; + ImageSubresourceLayers (EImageAspect aspectMask) __NE___ : aspectMask{aspectMask} {} + + ImageSubresourceLayers (EImageAspect aspectMask, MipmapLevel mipLevel, + ImageLayer baseLayer, uint layerCount = 1) __NE___ : + aspectMask{aspectMask}, mipLevel{mipLevel}, + baseLayer{baseLayer}, layerCount{ushort(layerCount)} {} + }; + + struct ImageSubresourceRange { EImageAspect aspectMask = Default; @@ -41,23 +58,10 @@ namespace AE::Graphics aspectMask{aspectMask}, baseMipLevel{baseMipLevel}, mipmapCount{ushort(mipmapCount)}, baseLayer{baseLayer}, layerCount{ushort(layerCount)} {} - }; - - - struct ImageSubresourceLayers - { - EImageAspect aspectMask = Default; - MipmapLevel mipLevel; - ImageLayer baseLayer; - ushort layerCount = 1; - - ImageSubresourceLayers () __NE___ = default; - ImageSubresourceLayers (EImageAspect aspectMask) __NE___ : aspectMask{aspectMask} {} - ImageSubresourceLayers (EImageAspect aspectMask, MipmapLevel mipLevel, - ImageLayer baseLayer, uint layerCount = 1) __NE___ : - aspectMask{aspectMask}, mipLevel{mipLevel}, - baseLayer{baseLayer}, layerCount{ushort(layerCount)} {} + explicit ImageSubresourceRange (const ImageSubresourceLayers &other) __NE___ : + aspectMask{other.aspectMask}, baseMipLevel{other.mipLevel}, + baseLayer{other.baseLayer}, layerCount{other.layerCount} {} }; diff --git a/AE/engine/src/graphics/Public/Common.h b/AE/engine/src/graphics/Public/Common.h index 4bb5333c..cc04b3ee 100644 --- a/AE/engine/src/graphics/Public/Common.h +++ b/AE/engine/src/graphics/Public/Common.h @@ -28,13 +28,20 @@ namespace AE::Graphics class RenderTask; class DrawTask; + using ImageDim_t = packed_ushort3; + using ImageDim2_t = packed_ushort2; + using VideoImageDim_t = ImageDim2_t; + using MipmapCount_t = ushort; + using LayerCount_t = ushort; + + // // Graphics Config // struct GraphicsConfig final : Noninstanceable { - // Values may be greater than current limit, used only to reserve memory. + // Current device limits may be greater than current limit, used only to reserve memory. // buffer static constexpr uint MaxVertexBuffers = 8; diff --git a/AE/engine/src/graphics/Public/FeatureSet.h b/AE/engine/src/graphics/Public/FeatureSet.h index 9d075790..985c9839 100644 --- a/AE/engine/src/graphics/Public/FeatureSet.h +++ b/AE/engine/src/graphics/Public/FeatureSet.h @@ -161,10 +161,11 @@ namespace AE::Graphics _visitor_( ushort, maxSubgroupSize, )\ _visitor_( EFeature, subgroup, : 2 )\ _visitor_( EFeature, subgroupBroadcastDynamicId, : 2 ) /* GL_ARB_shader_ballot */\ - _visitor_( EFeature, subgroupSizeControl, : 2 )\ + _visitor_( EFeature, subgroupSizeControl, : 2 ) /* VK_EXT_subgroup_size_control */\ _visitor_( EFeature, shaderSubgroupUniformControlFlow, : 2 ) /* GL_EXT_subgroupuniform_qualifier, GL_EXT_subgroup_uniform_control_flow */\ _visitor_( EFeature, shaderMaximalReconvergence, : 2 ) /* GL_EXT_maximal_reconvergence */\ _visitor_( EFeature, shaderQuadControl, : 2 ) /* GL_EXT_shader_quad_control */\ + _visitor_( EFeature, clipSpaceWScalingNV, : 2 ) /* VK_NV_clip_space_w_scaling */\ /* types */\ _visitor_( EFeature, shaderInt8, : 2 ) /* GL_EXT_shader_8bit_storage */\ _visitor_( EFeature, shaderInt16, : 2 ) /* GL_EXT_shader_16bit_storage */\ @@ -338,7 +339,7 @@ namespace AE::Graphics /* compute shader */\ _visitor_( uint, maxComputeSharedMemorySize, )\ _visitor_( uint, maxComputeWorkGroupInvocations, )\ - _visitor_( uint, maxComputeWorkGroupSizeX, ) /* maxComputeWorkGroupCount, local_size_x */\ + _visitor_( uint, maxComputeWorkGroupSizeX, ) /* local_size_x, maxComputeWorkGroupCount */\ _visitor_( uint, maxComputeWorkGroupSizeY, ) /* local_size_y */\ _visitor_( uint, maxComputeWorkGroupSizeZ, ) /* local_size_z */\ /* mesh shader */\ diff --git a/AE/engine/src/graphics/Public/ImageDesc.h b/AE/engine/src/graphics/Public/ImageDesc.h index a19e9537..5dceca8e 100644 --- a/AE/engine/src/graphics/Public/ImageDesc.h +++ b/AE/engine/src/graphics/Public/ImageDesc.h @@ -22,14 +22,14 @@ namespace AE::Graphics // variables - uint3 dimension; // width, height, depth + ImageDim_t dimension; // width, height, depth + EImageDim imageDim = Default; ImageLayer arrayLayers = 1_layer; MipmapLevel mipLevels = 1_mipmap; - EImageDim imageDim = Default; EImageOpt options = Default; EImageUsage usage = Default; EPixelFormat format = Default; - MultiSamples samples; // if > 1 then enabled multisampling + MultiSamples samples; // if > 1 then enabled multisampling EMemoryType memType = EMemoryType::DeviceLocal; EQueueMask queues = Default; FormatList_t viewFormats { Default, Default, Default, Default }; // 'imageFormatList' extension @@ -45,7 +45,8 @@ namespace AE::Graphics ND_ bool IsExclusiveSharing () C_NE___ { return queues == Default; } ND_ bool HasViewFormatList () C_NE___ { return ViewFormatListSize() != 0; } ND_ usize ViewFormatListSize () C_NE___; - ND_ uint3 Dimension () C_NE___ { return dimension; } + ND_ uint3 Dimension () C_NE___ { return uint3{dimension}; } + ND_ uint2 Dimension2 () C_NE___ { return uint2{dimension}; } ImageDesc& SetType (EImage value) __NE___; ImageDesc& SetType (EImageDim value) __NE___ { imageDim = value; return *this; } @@ -82,12 +83,14 @@ namespace AE::Graphics EImage viewType = Default; EPixelFormat format = Default; // optional EImageAspect aspectMask = Default; + // 1 byte padding EImageUsage extUsage = Default; // 'maintenance2' extension MipmapLevel baseMipmap; - ushort mipmapCount = UMax; + MipmapCount_t mipmapCount = UMax; ImageLayer baseLayer; - ushort layerCount = UMax; + LayerCount_t layerCount = UMax; ImageSwizzle swizzle; + ImageDim_t dimension; // may be different from image (if set 'BlockTexelViewCompatible', if non-zero mipmap, etc) // methods @@ -106,14 +109,17 @@ namespace AE::Graphics void Validate (const ImageDesc &desc) __NE___; - ND_ bool operator == (const ImageViewDesc &rhs) C_NE___; + ND_ bool operator == (const ImageViewDesc &rhs) C_NE___; + + ND_ uint3 Dimension () C_NE___ { return uint3{dimension}; } + ND_ uint2 Dimension2 () C_NE___ { return uint2{dimension}; } ImageViewDesc& SetType (EImage value) __NE___ { viewType = value; return *this; } ImageViewDesc& SetFormat (EPixelFormat value) __NE___ { format = value; return *this; } ImageViewDesc& SetBaseMipmap (uint value) __NE___ { baseMipmap= MipmapLevel{value}; return *this; } - ImageViewDesc& SetMipLevels (uint base, uint count) __NE___ { baseMipmap= MipmapLevel{base}; mipmapCount = CheckCast(count); return *this; } + ImageViewDesc& SetMipLevels (uint base, uint count) __NE___ { baseMipmap= MipmapLevel{base}; mipmapCount = CheckCast(count); return *this; } ImageViewDesc& SetBaseLayer (uint value) __NE___ { baseLayer = ImageLayer{value}; return *this; } - ImageViewDesc& SetArrayLayers (uint base, uint count) __NE___ { baseLayer = ImageLayer{base}; layerCount = CheckCast(count); return *this; } + ImageViewDesc& SetArrayLayers (uint base, uint count) __NE___ { baseLayer = ImageLayer{base}; layerCount = CheckCast(count); return *this; } ImageViewDesc& SetSwizzle (ImageSwizzle value) __NE___ { swizzle = value; return *this; } ImageViewDesc& SetAspect (EImageAspect value) __NE___ { aspectMask= value; return *this; } ImageViewDesc& SetExtUsage (EImageUsage value) __NE___ { extUsage = value; return *this; } @@ -130,7 +136,7 @@ namespace AE::Base template <> struct TMemCopyAvailable< AE::Graphics::ImageViewDesc > : CT_True {}; template <> struct TTriviallySerializable< AE::Graphics::ImageViewDesc >: CT_True {}; - StaticAssert( sizeof(AE::Graphics::ImageDesc) == 48 ); - StaticAssert( sizeof(AE::Graphics::ImageViewDesc) == 20 ); + StaticAssert( sizeof(AE::Graphics::ImageDesc) == 28 ); + StaticAssert( sizeof(AE::Graphics::ImageViewDesc) == 24 ); } // AE::Base diff --git a/AE/engine/src/graphics/Public/ImageMemView.h b/AE/engine/src/graphics/Public/ImageMemView.h index a489e709..af3beafc 100644 --- a/AE/engine/src/graphics/Public/ImageMemView.h +++ b/AE/engine/src/graphics/Public/ImageMemView.h @@ -31,8 +31,8 @@ namespace AE::Graphics BufferMemView _content; Bytes32u _rowPitch; Bytes32u _slicePitch; - ushort3 _offset; - ushort3 _dimension; + ImageDim_t _offset; + ImageDim_t _dimension; ushort _bitsPerBlock = 0; ubyte2 _texBlockDim; EPixelFormat _format = Default; diff --git a/AE/engine/src/graphics/Public/PipelineDesc.h b/AE/engine/src/graphics/Public/PipelineDesc.h index 70053d53..e39ee760 100644 --- a/AE/engine/src/graphics/Public/PipelineDesc.h +++ b/AE/engine/src/graphics/Public/PipelineDesc.h @@ -230,7 +230,8 @@ namespace AE::Graphics struct ComputePipelineDesc : BasePipelineDesc { // variables - packed_ushort3 localSize {UndefinedLocalSize}; + packed_ushort3 localSize {UndefinedLocalSize}; + ushort subgroupSize = 0; // methods ComputePipelineDesc () __NE___ {} diff --git a/AE/engine/src/graphics/Public/RenderState.h b/AE/engine/src/graphics/Public/RenderState.h index b455ed3f..93f9405d 100644 --- a/AE/engine/src/graphics/Public/RenderState.h +++ b/AE/engine/src/graphics/Public/RenderState.h @@ -102,11 +102,11 @@ namespace AE::Graphics { // variables EStencilOp failOp = EStencilOp::Keep; // stencil test failed - EStencilOp depthFailOp = EStencilOp::Keep; // depth and stencil tests are passed - EStencilOp passOp = EStencilOp::Keep; // stencil test passed and depth test failed + EStencilOp depthFailOp = EStencilOp::Keep; // stencil test passed and depth test failed + EStencilOp passOp = EStencilOp::Keep; // depth and stencil tests are passed ECompareOp compareOp = ECompareOp::Always; ubyte reference = 0; - ubyte writeMask = UMax; + ubyte writeMask = UMax; // which bits will be updated ubyte compareMask = UMax; // methods diff --git a/AE/engine/src/graphics/Public/RenderStateEnums.h b/AE/engine/src/graphics/Public/RenderStateEnums.h index 31f7814a..10c52bc0 100644 --- a/AE/engine/src/graphics/Public/RenderStateEnums.h +++ b/AE/engine/src/graphics/Public/RenderStateEnums.h @@ -9,29 +9,32 @@ namespace AE::Graphics enum class EBlendFactor : ubyte { - // src - from shader - // dst - from render target - // result = srcColor * srcBlend [blendOp] dstColor * dstBlend; - Zero, // 0 - One, // 1 - SrcColor, // src - OneMinusSrcColor, // 1 - src - DstColor, // dst - OneMinusDstColor, // 1 - dst - SrcAlpha, // src.a - OneMinusSrcAlpha, // 1 - src.a - DstAlpha, // dst.a - OneMinusDstAlpha, // 1 - dst.a - ConstColor, // cc - OneMinusConstColor, // 1 - cc - ConstAlpha, // cc.a - OneMinusConstAlpha, // 1 - cc.a - SrcAlphaSaturate, // rgb * min( src.a, dst.a ), a * 1 - - Src1Color, // src1 - OneMinusSrc1Color, // 1 - src1 - Src1Alpha, // src1.a - OneMinusSrc1Alpha, // 1 - src1.a + // S, srcColor - from shader + // D, dstColor - from render target + // S1 - from shader (dual src blend) + // cc - constant color + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + + Zero, // 0 + One, // 1 + SrcColor, // S + OneMinusSrcColor, // 1 - S + DstColor, // D + OneMinusDstColor, // 1 - D + SrcAlpha, // S.a + OneMinusSrcAlpha, // 1 - S.a + DstAlpha, // D.a + OneMinusDstAlpha, // 1 - D.a + ConstColor, // cc + OneMinusConstColor, // 1 - cc + ConstAlpha, // cc.a + OneMinusConstAlpha, // 1 - cc.a + SrcAlphaSaturate, // rgb * min( S.a, D.a ), a * 1 + + Src1Color, // S1 + OneMinusSrc1Color, // 1 - S1 + Src1Alpha, // S1.a + OneMinusSrc1Alpha, // 1 - S1.a _Count, Unknown = 0xFF, @@ -40,14 +43,15 @@ namespace AE::Graphics enum class EBlendOp : ubyte { - // S - from shader - // D - from render target - // result = srcColor * srcBlend [blendOp] dstColor * dstBlend; - Add, // S+D - Sub, // S-D - RevSub, // D-S - Min, // min(S,D) - Max, // max(S,D) + // S, srcColor - from shader + // D, dstColor - from render target + // result = srcColor * srcBlend [blendOp] dstColor * dstBlend + + Add, // S + D + Sub, // S - D + RevSub, // D - S + Min, // min( S, D ) + Max, // max( S, D ) _Count, Unknown = 0xFF, }; @@ -58,6 +62,7 @@ namespace AE::Graphics // S - from shader // D - from render target // result = S [logicOp] D + None, // disabled Clear, // 0 Set, // 1 @@ -97,14 +102,14 @@ namespace AE::Graphics enum class EStencilOp : ubyte { - Keep, // s + Keep, // src Zero, // 0 Replace, // ref - Incr, // min( ++s, 0 ) - IncrWrap, // ++s & maxvalue - Decr, // max( --s, 0 ) - DecrWrap, // --s & maxvalue - Invert, // ~s + Incr, // min( ++src, 0 ) + IncrWrap, // ++src & maxValue + Decr, // max( --src, 0 ) + DecrWrap, // --src & maxValue + Invert, // ~src _Count, Unknown = 0xFF, }; @@ -173,10 +178,14 @@ namespace AE::Graphics // shading rate FragmentShadingRate = 1 << 7, + // NV + ViewportWScaling = 1 << 8, + _Last, All = ((_Last-1) << 1) - 1, - GraphicsPipelineMask = StencilCompareMask | StencilWriteMask | StencilReference | DepthBias | BlendConstants | FragmentShadingRate, + GraphicsPipelineMask = StencilCompareMask | StencilWriteMask | StencilReference | DepthBias | + BlendConstants | FragmentShadingRate | ViewportWScaling, MeshPipelineMask = GraphicsPipelineMask, ComputePipelineMask = 0, TilePipelineMask = 0, diff --git a/AE/engine/src/graphics/Public/ResourceEnums.h b/AE/engine/src/graphics/Public/ResourceEnums.h index af595245..32b1bdf3 100644 --- a/AE/engine/src/graphics/Public/ResourceEnums.h +++ b/AE/engine/src/graphics/Public/ResourceEnums.h @@ -158,7 +158,8 @@ namespace AE::Graphics VertexPplnStore = 1 << 14, // storage image store and atomic operations in vertex, geometry, tessellation shaders FragmentPplnStore = 1 << 15, // storage image store and atomic operations in fragment shader - LossyRTCompression = 1 << 16, // Metal only, allow to use hardware lossy compression for the color attachments + LossyRTCompression = 1 << 16, // allow to use hardware lossy compression for the color attachments + ExtendedUsage = 1 << 17, // image may not support all usage flags //DepthComparison // TODO diff --git a/AE/engine/src/graphics/Public/Video.h b/AE/engine/src/graphics/Public/Video.h index 2ed116d3..5d140715 100644 --- a/AE/engine/src/graphics/Public/Video.h +++ b/AE/engine/src/graphics/Public/Video.h @@ -117,7 +117,7 @@ namespace AE::Graphics struct VideoImageDesc { // variables - uint2 dimension; // 0 - min, UMax - max + VideoImageDim_t dimension; // 0 - min, UMax - max ImageLayer arrayLayers = 1_layer; EPixelFormat format = Default; EImageOpt options = Default; @@ -138,8 +138,9 @@ namespace AE::Graphics ND_ bool IsExclusiveSharing () C_NE___ { return queues == Default; } ND_ uint3 Dimension () C_NE___ { return uint3{ dimension, 1u }; } + ND_ uint2 Dimension2 () C_NE___ { return uint2{ dimension }; } - VideoImageDesc& SetDimension (const uint2 &value) __NE___ { dimension = value; return *this; } + VideoImageDesc& SetDimension (const uint2 &value) __NE___ { dimension = CheckCast(value); return *this; } VideoImageDesc& SetDimension (uint w, uint h) __NE___ { return SetDimension( uint2{w,h} ); } VideoImageDesc& SetOptions (EImageOpt value) __NE___ { options = value; return *this; } VideoImageDesc& SetUsage (EImageUsage v1, EVideoImageUsage v2) __NE___ { usage = v1; videoUsage = v2; return *this; } diff --git a/AE/engine/src/graphics/Public/VulkanTypes.h b/AE/engine/src/graphics/Public/VulkanTypes.h index 9dfb1a98..70913bc1 100644 --- a/AE/engine/src/graphics/Public/VulkanTypes.h +++ b/AE/engine/src/graphics/Public/VulkanTypes.h @@ -17,22 +17,22 @@ namespace AE::Graphics // struct VulkanImageDesc { - VkImage image = Default; - VkImageType imageType = VK_IMAGE_TYPE_MAX_ENUM; - VkImageCreateFlagBits flags = Zero; - EImageOpt options = Default; // some options are not defined in 'flags' - VkImageUsageFlagBits usage = Zero; - VkFormat format = VK_FORMAT_UNDEFINED; - VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; - VkImageTiling tiling = VK_IMAGE_TILING_MAX_ENUM; + VkImage image = Default; + VkImageType imageType = VK_IMAGE_TYPE_MAX_ENUM; + VkImageCreateFlagBits flags = Zero; + EImageOpt options = Default; // some options are not defined in 'flags' + VkImageUsageFlagBits usage = Zero; + VkFormat format = VK_FORMAT_UNDEFINED; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + VkImageTiling tiling = VK_IMAGE_TILING_MAX_ENUM; uint3 dimension; - uint arrayLayers = 0; - uint mipLevels = 0; - EQueueMask queues = Default; - VkMemoryPropertyFlagBits memFlags = Zero; - VkImageAspectFlagBits aspectMask = Zero; - bool canBeDestroyed = false; - bool allocMemory = false; + uint arrayLayers = 0; + uint mipLevels = 0; + EQueueMask queues = Default; + VkMemoryPropertyFlagBits memFlags = Zero; + VkImageAspectFlagBits aspectMask = Zero; + bool canBeDestroyed = false; + bool allocMemory = false; }; @@ -41,13 +41,14 @@ namespace AE::Graphics // struct VulkanImageViewDesc { - VkImageView view = Default; - VkImageViewCreateFlagBits flags = Zero; - VkImageViewType viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; - VkFormat format = VK_FORMAT_UNDEFINED; - VkComponentMapping components {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - VkImageSubresourceRange subresourceRange { 0, 0, 0, 0, 0 }; - bool canBeDestroyed = false; + VkImageView view = Default; + VkImageViewCreateFlagBits flags = Zero; + VkImageViewType viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + VkFormat format = VK_FORMAT_UNDEFINED; + VkComponentMapping components {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + VkImageSubresourceRange subresourceRange { 0, 0, 0, 0, 0 }; + uint3 dimension; // if doesn't match with image mip + bool canBeDestroyed = false; }; @@ -56,13 +57,13 @@ namespace AE::Graphics // struct VulkanBufferDesc { - VkBuffer buffer = Default; - VkBufferUsageFlagBits usage = VkBufferUsageFlagBits(0); + VkBuffer buffer = Default; + VkBufferUsageFlagBits usage = VkBufferUsageFlagBits(0); Bytes size; - EQueueMask queues = Default; - VkMemoryPropertyFlagBits memFlags = Zero; - bool canBeDestroyed = false; - bool allocMemory = false; + EQueueMask queues = Default; + VkMemoryPropertyFlagBits memFlags = Zero; + bool canBeDestroyed = false; + bool allocMemory = false; }; @@ -71,11 +72,11 @@ namespace AE::Graphics // struct VulkanBufferViewDesc { - VkBufferView view = Default; - VkFormat format = VK_FORMAT_UNDEFINED; + VkBufferView view = Default; + VkFormat format = VK_FORMAT_UNDEFINED; Bytes offset; Bytes range; - bool canBeDestroyed = false; + bool canBeDestroyed = false; }; @@ -84,11 +85,11 @@ namespace AE::Graphics // struct VulkanMemoryObjInfo { - VkDeviceMemory memory = Default; - VkMemoryPropertyFlagBits flags = Zero; + VkDeviceMemory memory = Default; + VkMemoryPropertyFlagBits flags = Zero; Bytes offset; Bytes size; - void * mappedPtr = null; // include 'offset' + void * mappedPtr = null; // include 'offset' }; @@ -97,9 +98,9 @@ namespace AE::Graphics // struct VulkanCmdBatchDependency { - VkSemaphore semaphore = Default; - ulong value = 0; // for timeline semaphore - // VkPipelineStageFlags2KHR stages = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR; + VkSemaphore semaphore = Default; + ulong value = 0; // for timeline semaphore + // VkPipelineStageFlags2KHR stages = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR; ND_ explicit operator bool () C_NE___ { return semaphore != Default; } }; diff --git a/AE/engine/src/graphics/Remote/Commands/RBarrierManager.h b/AE/engine/src/graphics/Remote/Commands/RBarrierManager.h index 17435fd3..93b4dd4a 100644 --- a/AE/engine/src/graphics/Remote/Commands/RBarrierManager.h +++ b/AE/engine/src/graphics/Remote/Commands/RBarrierManager.h @@ -137,7 +137,7 @@ namespace AE::Graphics::_hidden_ template ND_ auto const& GetResourceDescription (Strong> &id) C_NE___ { return GetResourceDescription( id.Get() ); } \ \ private: \ - template ND_ decltype(auto) _GetResourcesOrThrow (IDs ...ids) __Th___ { return _mngr.GetResourceManager().GetResourcesOrThrow( ids... ); } \ + template ND_ exact_t _GetResourcesOrThrow (IDs ...ids) __Th___ { return _mngr.GetResourceManager().GetResourcesOrThrow( ids... ); } \ \ using EFeature = RDevice::EFeature; \ ND_ bool _HasFeature (EFeature feat) C_NE___ { return GetDevice().HasFeature( feat ); } diff --git a/AE/engine/src/graphics/Remote/Commands/RDrawBarrierManager.h b/AE/engine/src/graphics/Remote/Commands/RDrawBarrierManager.h index 960aa726..e83e13fa 100644 --- a/AE/engine/src/graphics/Remote/Commands/RDrawBarrierManager.h +++ b/AE/engine/src/graphics/Remote/Commands/RDrawBarrierManager.h @@ -34,7 +34,7 @@ namespace AE::Graphics::_hidden_ explicit RDrawBarrierManager (const RPrimaryCmdBufState &primaryState) __Th___; template - ND_ decltype(auto) Get (IDs ...ids) __Th___ { return _resMngr.GetResourcesOrThrow( ids... ); } + ND_ exact_t Get (IDs ...ids) __Th___ { return _resMngr.GetResourcesOrThrow( ids... ); } ND_ RDevice const& GetDevice () C_NE___ { return _resMngr.GetDevice(); } ND_ RResourceManager& GetResourceManager () C_NE___ { return _resMngr; } diff --git a/AE/engine/src/graphics/Remote/Commands/RDrawContext.cpp b/AE/engine/src/graphics/Remote/Commands/RDrawContext.cpp index 02335ef4..b5aef7f5 100644 --- a/AE/engine/src/graphics/Remote/Commands/RDrawContext.cpp +++ b/AE/engine/src/graphics/Remote/Commands/RDrawContext.cpp @@ -278,6 +278,21 @@ namespace AE::Graphics _cmdbuf->AddCommand( cmd ); } +/* +================================================= + SetViewportWScaling +================================================= +*/ + void RDrawContext::SetViewportWScaling (ArrayView scaling) __Th___ + { + VALIDATE_GCTX( SetViewportWScaling( _GetDynamicStates(), scaling )); + GCTX_CHECK( _HasFeature( EFeature::ViewportWScaling )); + + Msg::CmdBuf_Bake::Draw_SetViewportWScalingCmd cmd; + cmd.scaling = scaling; + _cmdbuf->AddCommand( cmd ); + } + /* ================================================= BindIndexBuffer diff --git a/AE/engine/src/graphics/Remote/Commands/RDrawContext.h b/AE/engine/src/graphics/Remote/Commands/RDrawContext.h index dc21564d..1e468897 100644 --- a/AE/engine/src/graphics/Remote/Commands/RDrawContext.h +++ b/AE/engine/src/graphics/Remote/Commands/RDrawContext.h @@ -65,6 +65,7 @@ namespace AE::Graphics void SetBlendConstants (const RGBA32f &color) __Th_OV; void SetDepthBounds (float minDepthBounds, float maxDepthBounds) __Th_OV; void SetFragmentShadingRate (EShadingRate, EShadingRateCombinerOp primitiveOp, EShadingRateCombinerOp textureOp) __Th_OV; + void SetViewportWScaling (ArrayView scaling) __Th_OV; // draw commands void BindIndexBuffer (BufferID buffer, Bytes offset, EIndex indexType) __Th_OV; @@ -204,7 +205,7 @@ namespace AE::Graphics private: template - ND_ decltype(auto) _GetResourcesOrThrow (IDs ...ids) __Th___ { return this->_mngr.Get( ids... ); } + ND_ exact_t _GetResourcesOrThrow (IDs ...ids) __Th___ { return this->_mngr.Get( ids... ); } ND_ bool _NoPendingBarriers () C_NE___ { return _mngr.NoPendingBarriers(); } ND_ auto _GetDynamicStates () C_NE___ { return _dynStates; } diff --git a/AE/engine/src/graphics/Remote/RConnection.cpp b/AE/engine/src/graphics/Remote/RConnection.cpp index 4cd77f81..892d5077 100644 --- a/AE/engine/src/graphics/Remote/RConnection.cpp +++ b/AE/engine/src/graphics/Remote/RConnection.cpp @@ -28,8 +28,8 @@ namespace AE::RemoteGraphics bool RConnection::InitServer (ushort port, Ptr factory) { TcpSocket::Config cfg; + cfg.nonBlocking = true; // can be blocking or non-blocking cfg.noDelay = true; - cfg.nonBlocking = true; cfg.reuseAddress = false; cfg.receiveBufferSize = c_BufferSize; cfg.maxConnections = 1; @@ -55,7 +55,7 @@ namespace AE::RemoteGraphics if ( _socket.Accept( _server, OUT addr )) { //AE_LOG_DBG( "RmG server: connected client "s << addr.ToString() ); - _socket.KeepAlive(); + //_socket.KeepAlive(); return true; } ThreadUtils::Sleep_15ms(); @@ -99,7 +99,7 @@ namespace AE::RemoteGraphics Send ================================================= */ - bool RConnection::Send (const void *data, const Bytes dataSize) + bool RConnection::Send (const void* data, const Bytes dataSize) { for (Bytes offset; offset < dataSize;) { @@ -149,7 +149,7 @@ namespace AE::RemoteGraphics Serializing::Serializer enc {FastWStream{ _sentBuffer.Data(), _sentBuffer.End() }}; enc.factory = _factory; - CHECK_ERR( enc( &msg ) and enc.Flush() ); + CHECK_ERR( enc( &msg )); size = _sentBuffer.Size() - enc.stream.RemainingSize(); } diff --git a/AE/engine/src/graphics/Remote/RConnection.h b/AE/engine/src/graphics/Remote/RConnection.h index 6d374faa..6606d57e 100644 --- a/AE/engine/src/graphics/Remote/RConnection.h +++ b/AE/engine/src/graphics/Remote/RConnection.h @@ -8,7 +8,7 @@ namespace AE::RemoteGraphics { struct RmNetConfig { - static constexpr ushort serverPort = 7435; + static constexpr ushort serverPort = 3000; // set your TCP port number static constexpr uint socketCount = 3; }; @@ -44,7 +44,7 @@ namespace AE::RemoteGraphics ND_ bool WaitForClient (); ND_ bool Send (const Msg::BaseMsg &); - ND_ bool Send (const void *data, Bytes dataSize); + ND_ bool Send (const void* data, Bytes dataSize); ND_ bool Receive () __Th___; ND_ auto Encode () -> RC; diff --git a/AE/engine/src/graphics/Remote/RDevice.cpp b/AE/engine/src/graphics/Remote/RDevice.cpp index 08a506b2..1acb1be8 100644 --- a/AE/engine/src/graphics/Remote/RDevice.cpp +++ b/AE/engine/src/graphics/Remote/RDevice.cpp @@ -98,14 +98,16 @@ namespace { auto& conn = _connArr.Get( lock ); bool ok = conn.Send( msg ); - for (;;) + for (bool loop = true; loop;) { Unused( conn.Receive() ); - if ( auto msg2 = conn.Encode() ) + loop = false; + for (; auto msg2 = conn.Encode(); ) + { _ProcessMessage( conn, RVRef(msg2) ); - else - break; + loop = true; + } } _connArr.Unlock( lock ); @@ -126,14 +128,16 @@ namespace { for (auto* msg : msgs) ok = ok and conn.Send( *msg ); - for (;;) + for (bool loop = true; loop;) { Unused( conn.Receive() ); - if ( auto msg2 = conn.Encode() ) + loop = false; + for (; auto msg2 = conn.Encode(); ) + { _ProcessMessage( conn, RVRef(msg2) ); - else - break; + loop = true; + } } _connArr.Unlock( lock ); diff --git a/AE/engine/src/graphics/Remote/RDevice.h b/AE/engine/src/graphics/Remote/RDevice.h index f67c1dc8..a0845034 100644 --- a/AE/engine/src/graphics/Remote/RDevice.h +++ b/AE/engine/src/graphics/Remote/RDevice.h @@ -184,28 +184,33 @@ namespace AE::Graphics auto lock = _connArr.Lock(); auto& conn = _connArr.Get( lock ); - bool ok = conn.Send( msg ); - for (;ok;) - { - bool ok2 = conn.Receive(); - - if ( auto msg2 = conn.Encode() ) + const auto SendAndWait = [&] () + {{ + bool ok = conn.Send( msg ); + for (;ok;) { - const TypeId type = msg2->GetTypeId(); - if ( type == TypeIdOf() ) + bool ok2 = conn.Receive(); + + for (; auto msg2 = conn.Encode(); ) { - response = Base::Cast(msg2); - break; + if ( msg2->GetTypeId() == TypeIdOf() ) + { + response = Base::Cast(msg2); + return true; + } + + _ProcessMessage( conn, RVRef(msg2) ); + ok2 = true; } - _ProcessMessage( conn, RVRef(msg2) ); - ok2 = true; - } - if ( not ok2 ) - ThreadUtils::Sleep_1us(); - } + if ( not ok2 ) + ThreadUtils::Sleep_1us(); + } + return ok; + }}; + bool ok = SendAndWait(); _connArr.Unlock( lock ); return ok; } @@ -222,31 +227,37 @@ namespace AE::Graphics auto lock = _connArr.Lock(); auto& conn = _connArr.Get( lock ); - bool ok = true; - for (auto* msg : msgs) - ok = ok and conn.Send( *msg ); + const auto SendAndWait = [&] () + {{ + bool ok = true; - for (;ok;) - { - bool ok2 = conn.Receive(); + for (auto* msg : msgs) + ok = ok and conn.Send( *msg ); - if ( auto msg2 = conn.Encode() ) + for (;ok;) { - const TypeId type = msg2->GetTypeId(); - if ( type == TypeIdOf() ) + bool ok2 = conn.Receive(); + + if ( auto msg2 = conn.Encode() ) { - response = Base::Cast(msg2); - break; + if ( msg2->GetTypeId() == TypeIdOf() ) + { + response = Base::Cast(msg2); + return true; + } + + _ProcessMessage( conn, RVRef(msg2) ); + ok2 = true; } - _ProcessMessage( conn, RVRef(msg2) ); - ok2 = true; - } - if ( not ok2 ) - ThreadUtils::Sleep_1us(); - } + if ( not ok2 ) + ThreadUtils::Sleep_1us(); + } + return ok; + }}; + bool ok = SendAndWait(); _connArr.Unlock( lock ); return ok; } diff --git a/AE/engine/src/graphics/Remote/RMessages.cpp b/AE/engine/src/graphics/Remote/RMessages.cpp index 800309f2..b1c50086 100644 --- a/AE/engine/src/graphics/Remote/RMessages.cpp +++ b/AE/engine/src/graphics/Remote/RMessages.cpp @@ -53,6 +53,8 @@ namespace AE::RemoteGraphics::Msg Ser_SwapchainDesc( _desc_.swapchain ) + DECL_SERIALIZER( ProfGeneral_Initialize_Response::SerCpuCluster, name, logicalCores ) + DECL_SERIALIZER( DefaultResponse, ok ) DECL_SERIALIZER( UploadData, size ) // skip 'data' DECL_SERIALIZER( UploadDataAndCopy, size, dst ) // skip 'data' @@ -120,15 +122,15 @@ namespace AE::RemoteGraphics::Msg //----------------------------------------------------------------------------- - StaticAssert64( sizeof(ImageDesc) == 48 ); + StaticAssert64( sizeof(ImageDesc) == 28 ); #define Ser_ImageDesc( _desc_ )\ _desc_.dimension, _desc_.arrayLayers, _desc_.mipLevels, _desc_.imageDim, _desc_.options, \ _desc_.usage, _desc_.format, _desc_.samples, _desc_.memType, _desc_.queues, _desc_.viewFormats - StaticAssert64( sizeof(ImageViewDesc) == 20 ); + StaticAssert64( sizeof(ImageViewDesc) == 24 ); #define Ser_ImageViewDesc( _desc_ )\ - _desc_.viewType, _desc_.format, _desc_.aspectMask, _desc_.extUsage, \ - _desc_.baseMipmap, _desc_.mipmapCount, _desc_.baseLayer, _desc_.layerCount, _desc_.swizzle + _desc_.viewType, _desc_.format, _desc_.aspectMask, _desc_.extUsage, _desc_.baseMipmap, \ + _desc_.mipmapCount, _desc_.baseLayer, _desc_.layerCount, _desc_.dimension, _desc_.swizzle StaticAssert64( sizeof(BufferDesc) == 24 ); #define Ser_BufferDesc( _desc_ )\ @@ -360,8 +362,8 @@ namespace AE::RemoteGraphics::Msg DECL_SERIALIZER( ProfPVR_Initialize, required ) DECL_SERIALIZER( ProfPVR_Initialize_Response, ok, enabled ) - DECL_EMPTY_SERIALIZER( ProfPVR_Tick ) - DECL_SERIALIZER( ProfPVR_Tick_Response, timings ) + DECL_EMPTY_SERIALIZER( ProfPVR_GetTiming ) + DECL_SERIALIZER( ProfPVR_GetTiming_Response, timings ) DECL_EMPTY_SERIALIZER( ProfPVR_Sample ) DECL_SERIALIZER( ProfPVR_Sample_Response, counters ) @@ -369,6 +371,11 @@ namespace AE::RemoteGraphics::Msg DECL_SERIALIZER( ProfNVidia_Initialize_Response, ok, enabled ) DECL_EMPTY_SERIALIZER( ProfNVidia_Sample ) DECL_SERIALIZER( ProfNVidia_Sample_Response, counters ) + + DECL_SERIALIZER( ProfGeneral_Initialize, required ) + DECL_SERIALIZER( ProfGeneral_Initialize_Response, ok, cpuClusters ) + DECL_EMPTY_SERIALIZER( ProfGeneral_Sample ) + DECL_SERIALIZER( ProfGeneral_Sample_Response, counters, totalCpuUsage, kernelUsage ) //----------------------------------------------------------------------------- @@ -495,6 +502,7 @@ namespace AE::RemoteGraphics::Msg DECL_SERIALIZER( CmdBuf_Bake::Draw_SetBlendConstantsCmd, color ) DECL_SERIALIZER( CmdBuf_Bake::Draw_SetDepthBoundsCmd, minDepthBounds, maxDepthBounds ) DECL_SERIALIZER( CmdBuf_Bake::Draw_SetFragmentShadingRateCmd, rate, primitiveOp, textureOp ) + DECL_SERIALIZER( CmdBuf_Bake::Draw_SetViewportWScalingCmd, scaling ) DECL_SERIALIZER( CmdBuf_Bake::Draw_BindIndexBufferCmd, buffer, offset, indexType ) DECL_SERIALIZER( CmdBuf_Bake::Draw_BindVertexBuffersCmd, firstBinding, buffers, offsets ) DECL_SERIALIZER( CmdBuf_Bake::DrawCmd, vertexCount, instanceCount, firstVertex, firstInstance ) diff --git a/AE/engine/src/graphics/Remote/RMessages.cpp.h b/AE/engine/src/graphics/Remote/RMessages.cpp.h index e3fc858d..12ff3298 100644 --- a/AE/engine/src/graphics/Remote/RMessages.cpp.h +++ b/AE/engine/src/graphics/Remote/RMessages.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "graphics/Remote/RMessages.h" @@ -75,6 +77,7 @@ _reg_( CmdBuf_Bake::Draw_SetBlendConstantsCmd ) and\ _reg_( CmdBuf_Bake::Draw_SetDepthBoundsCmd ) and\ _reg_( CmdBuf_Bake::Draw_SetFragmentShadingRateCmd ) and\ + _reg_( CmdBuf_Bake::Draw_SetViewportWScalingCmd ) and\ _reg_( CmdBuf_Bake::Draw_BindIndexBufferCmd ) and\ _reg_( CmdBuf_Bake::Draw_BindVertexBuffersCmd ) and\ _reg_( CmdBuf_Bake::DrawCmd ) and\ @@ -265,10 +268,12 @@ _regMsg_( ProfAdreno_Initialize ) and\ _regMsg_( ProfAdreno_Sample ) and\ _regMsg_( ProfPVR_Initialize ) and\ - _regMsg_( ProfPVR_Tick ) and\ + _regMsg_( ProfPVR_GetTiming ) and\ _regMsg_( ProfPVR_Sample ) and\ _regMsg_( ProfNVidia_Initialize ) and\ _regMsg_( ProfNVidia_Sample ) and\ + _regMsg_( ProfGeneral_Initialize ) and\ + _regMsg_( ProfGeneral_Sample ) and\ _regResp_( ProfArm_Initialize_Response ) and\ _regResp_( ProfArm_Sample_Response ) and\ _regResp_( ProfMali_Initialize_Response ) and\ @@ -276,10 +281,12 @@ _regResp_( ProfAdreno_Initialize_Response ) and\ _regResp_( ProfAdreno_Sample_Response ) and\ _regResp_( ProfPVR_Initialize_Response ) and\ - _regResp_( ProfPVR_Tick_Response ) and\ + _regResp_( ProfPVR_GetTiming_Response ) and\ _regResp_( ProfPVR_Sample_Response ) and\ _regResp_( ProfNVidia_Initialize_Response ) and\ - _regResp_( ProfNVidia_Sample_Response ));\ + _regResp_( ProfNVidia_Sample_Response ) and\ + _regResp_( ProfGeneral_Initialize_Response ) and\ + _regResp_( ProfGeneral_Sample_Response ));\ \ CHECK_ERR(\ _regMsg_( DescUpd_Flush ) and\ diff --git a/AE/engine/src/graphics/Remote/RMessages.h b/AE/engine/src/graphics/Remote/RMessages.h index 7e57c5ac..def570c0 100644 --- a/AE/engine/src/graphics/Remote/RMessages.h +++ b/AE/engine/src/graphics/Remote/RMessages.h @@ -16,11 +16,12 @@ #include "graphics/Public/QueryManager.h" #include "graphics/Public/IDevice.h" -#include "profiler/Utils/ArmProfiler.h" -#include "profiler/Utils/MaliProfiler.h" -#include "profiler/Utils/NVidiaProfiler.h" -#include "profiler/Utils/AdrenoProfiler.h" -#include "profiler/Utils/PowerVRProfiler.h" +#include "profiler/Profilers/ArmProfiler.h" +#include "profiler/Profilers/MaliProfiler.h" +#include "profiler/Profilers/NVidiaProfiler.h" +#include "profiler/Profilers/AdrenoProfiler.h" +#include "profiler/Profilers/PowerVRProfiler.h" +#include "profiler/Profilers/GeneralProfiler.h" #include "PipelineCompilerImpl.h" @@ -127,6 +128,7 @@ namespace AE::RemoteGraphics::Msg DrawIndirectCount, DrawIndexedIndirectCount, DrawMeshTasksIndirectCount, + ViewportWScaling, // ITransferContext (Vulkan) ClearColorImage, @@ -936,9 +938,9 @@ namespace AE::RemoteGraphics::Msg Profiler::PowerVRProfiler::ECounterSet enabled; ) - DECL_MSG( ProfPVR_Tick ) + DECL_MSG( ProfPVR_GetTiming ) - DECL_RESP( ProfPVR_Tick_Response, + DECL_RESP( ProfPVR_GetTiming_Response, Profiler::PowerVRProfiler::TimeScopeArr_t timings; ) @@ -968,6 +970,37 @@ namespace AE::RemoteGraphics::Msg //----------------------------------------------------------------------------- + DECL_MSG( ProfGeneral_Initialize, + Profiler::GeneralProfiler::ECounterSet required; + ) + + DECL_RESP( ProfGeneral_Initialize_Response, + + struct SerCpuCluster final : Profiler::GeneralProfiler::CpuCluster, ISerializable + { + SerCpuCluster () __NE___ = default; + SerCpuCluster (const CpuCluster &other) : CpuCluster{other} {} + SerCpuCluster (const SerCpuCluster &) = default; + SerCpuCluster (SerCpuCluster &&) = default; + + bool Serialize (Serializer &) C_NE_OV; + bool Deserialize (Deserializer &) __NE_OV; + }; + + bool ok; + FixedArray< SerCpuCluster, CpuArchInfo::MaxCoreTypes > cpuClusters; + ) + + DECL_MSG( ProfGeneral_Sample ) + + DECL_RESP( ProfGeneral_Sample_Response, + Profiler::GeneralProfiler::Counters_t counters; + ArrayView totalCpuUsage; + ArrayView kernelUsage; + ) +//----------------------------------------------------------------------------- + + struct DescUpd_Flush final : BaseMsg { // types @@ -1592,6 +1625,10 @@ namespace AE::RemoteGraphics::Msg EShadingRateCombinerOp textureOp; ) + DECL_CMD( Draw_SetViewportWScalingCmd, + ArrayView scaling; + ) + DECL_CMD( Draw_BindIndexBufferCmd, RmBufferID buffer; Bytes offset; diff --git a/AE/engine/src/graphics/Remote/RSwapchain.cpp b/AE/engine/src/graphics/Remote/RSwapchain.cpp index 5ac19374..106f8053 100644 --- a/AE/engine/src/graphics/Remote/RSwapchain.cpp +++ b/AE/engine/src/graphics/Remote/RSwapchain.cpp @@ -232,14 +232,14 @@ namespace AE::Graphics if ( res->minimized ) { - _surfaceSize.store( ushort2{res->viewSize} ); + _surfaceSize.store( ImageDim2_t{res->viewSize} ); return true; } CHECK_ERR( res->created ); _desc = res->desc; - _surfaceSize.store( ushort2{res->viewSize} ); + _surfaceSize.store( ImageDim2_t{res->viewSize} ); _imageAvailableSem = res->imageAvailable; _renderFinishedSem = res->renderFinished; @@ -251,7 +251,7 @@ namespace AE::Graphics RemoteImageDesc img_desc; RemoteImageViewDesc view_desc; - img_desc.desc.dimension = uint3{res->viewSize, 1u}; + img_desc.desc.dimension = ImageDim_t{uint3{ res->viewSize, 1u }}; img_desc.desc.imageDim = EImageDim_2D; img_desc.desc.options = _desc.options; img_desc.desc.usage = _desc.usage; diff --git a/AE/engine/src/graphics/Remote/RSwapchain.h b/AE/engine/src/graphics/Remote/RSwapchain.h index f3c57a4a..9845bdd2 100644 --- a/AE/engine/src/graphics/Remote/RSwapchain.h +++ b/AE/engine/src/graphics/Remote/RSwapchain.h @@ -49,7 +49,7 @@ namespace AE::Graphics RDevice const* _device = null; mutable SharedMutex _guard; - StructAtomic< ushort2 > _surfaceSize; + StructAtomic< ImageDim2_t > _surfaceSize; StructAtomic< MutableIdxBits > _indices; ImageIDs_t _imageIDs {}; // protected by '_guard' diff --git a/AE/engine/src/graphics/Remote/Resources/RImage.cpp b/AE/engine/src/graphics/Remote/Resources/RImage.cpp index 1c13cf36..235ebcc9 100644 --- a/AE/engine/src/graphics/Remote/Resources/RImage.cpp +++ b/AE/engine/src/graphics/Remote/Resources/RImage.cpp @@ -31,7 +31,7 @@ namespace AE::Graphics { DRC_EXLOCK( _drCheck ); CHECK_ERR( not _imageId ); - CHECK_ERR( All( desc.dimension > uint3{0} )); + CHECK_ERR( All( desc.dimension > ImageDim_t{0} )); CHECK_ERR( desc.imageDim != Default ); CHECK_ERR( desc.arrayLayers > 0_layer ); CHECK_ERR( desc.mipLevels > 0_mipmap ); diff --git a/AE/engine/src/graphics/Remote/Resources/RImage.h b/AE/engine/src/graphics/Remote/Resources/RImage.h index 5a99fa42..4bf8fcce 100644 --- a/AE/engine/src/graphics/Remote/Resources/RImage.h +++ b/AE/engine/src/graphics/Remote/Resources/RImage.h @@ -45,7 +45,7 @@ namespace AE::Graphics ND_ ImageDesc const& Description () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc; } ND_ MemoryID MemoryId () C_NE___ { return Default; } - ND_ uint3 const Dimension () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension; } + ND_ uint3 const Dimension () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.Dimension(); } ND_ uint Width () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension.x; } ND_ uint Height () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension.y; } ND_ uint Depth () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension.z; } diff --git a/AE/engine/src/graphics/Remote/Resources/RPipelineHelper.cpp.h b/AE/engine/src/graphics/Remote/Resources/RPipelineHelper.cpp.h index cc18132f..27f23ee5 100644 --- a/AE/engine/src/graphics/Remote/Resources/RPipelineHelper.cpp.h +++ b/AE/engine/src/graphics/Remote/Resources/RPipelineHelper.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + # include "graphics/Remote/RResourceManager.h" # include "graphics/Remote/REnumCast.h" diff --git a/AE/engine/src/graphics/RenderGraph/RGCommandContext.h b/AE/engine/src/graphics/RenderGraph/RGCommandContext.h index 1da4db7e..cbb76aac 100644 --- a/AE/engine/src/graphics/RenderGraph/RGCommandContext.h +++ b/AE/engine/src/graphics/RenderGraph/RGCommandContext.h @@ -271,6 +271,7 @@ namespace AE::RG::_hidden_ void SetStencilWriteMask (uint writeMask) __Th_OV { return _ctx.SetStencilWriteMask( writeMask ); } void SetStencilWriteMask (uint frontWriteMask, uint backWriteMask) __Th_OV { return _ctx.SetStencilWriteMask( frontWriteMask, backWriteMask ); } void SetFragmentShadingRate (EShadingRate rate, EShadingRateCombinerOp primitiveOp, EShadingRateCombinerOp textureOp)__Th_OV { return _ctx.SetFragmentShadingRate( rate, primitiveOp, textureOp ); } + void SetViewportWScaling (ArrayView scaling) __Th_OV { return _ctx.SetViewportWScaling( scaling ); } #endif // draw commands // @@ -692,10 +693,29 @@ namespace AE::RG::_hidden_ template void TransferContext::CopyImage (ImageID srcImage, ImageID dstImage, ArrayView ranges) __Th___ { - ResourceState( srcImage, EResourceState::CopySrc ); - ResourceState( dstImage, EResourceState::CopyDst ); - _ctx.CommitBarriers(); - _ctx.CopyImage( srcImage, dstImage, ranges ); + if_likely( srcImage != dstImage ) + { + ResourceState( srcImage, EResourceState::CopySrc ); + ResourceState( dstImage, EResourceState::CopyDst ); + _ctx.CommitBarriers(); + _ctx.CopyImage( srcImage, dstImage, ranges ); + } + else + { + const EResourceState state = EResourceState::CopySrc; + ResourceState( srcImage, state ); + + for (auto& range : ranges) { + _ctx.ImageBarrier( dstImage, state, EResourceState::CopyDst, ImageSubresourceRange{range.dstSubres} ); + } + _ctx.CommitBarriers(); + _ctx.CopyImage( srcImage, dstImage, ranges ); + + for (auto& range : ranges) { + _ctx.ImageBarrier( dstImage, EResourceState::CopyDst, state, ImageSubresourceRange{range.dstSubres} ); + } + _ctx.CommitBarriers(); + } } template @@ -887,10 +907,29 @@ namespace AE::RG::_hidden_ template void TransferContext::BlitImage (ImageID srcImage, ImageID dstImage, EBlitFilter filter, ArrayView regions) __Th___ { - ResourceState( srcImage, EResourceState::BlitSrc ); - ResourceState( dstImage, EResourceState::BlitDst ); - _ctx.CommitBarriers(); - _ctx.BlitImage( srcImage, dstImage, filter, regions ); + if_likely( srcImage != dstImage ) + { + ResourceState( srcImage, EResourceState::BlitSrc ); + ResourceState( dstImage, EResourceState::BlitDst ); + _ctx.CommitBarriers(); + _ctx.BlitImage( srcImage, dstImage, filter, regions ); + } + else + { + const EResourceState state = EResourceState::BlitSrc; + ResourceState( srcImage, state ); + + for (auto& range : regions) { + _ctx.ImageBarrier( dstImage, state, EResourceState::BlitDst, ImageSubresourceRange{range.dstSubres} ); + } + _ctx.CommitBarriers(); + _ctx.BlitImage( srcImage, dstImage, filter, regions ); + + for (auto& range : regions) { + _ctx.ImageBarrier( dstImage, EResourceState::BlitDst, state, ImageSubresourceRange{range.dstSubres} ); + } + _ctx.CommitBarriers(); + } } #if defined(AE_ENABLE_VULKAN) or defined(AE_ENABLE_REMOTE_GRAPHICS) @@ -929,10 +968,29 @@ namespace AE::RG::_hidden_ template void TransferContext::ResolveImage (ImageID srcImage, ImageID dstImage, ArrayView regions) __Th___ { - ResourceState( srcImage, EResourceState::BlitSrc ); - ResourceState( dstImage, EResourceState::BlitDst ); - _ctx.CommitBarriers(); - _ctx.ResolveImage( srcImage, dstImage, regions ); + if_likely( srcImage != dstImage ) + { + ResourceState( srcImage, EResourceState::BlitSrc ); + ResourceState( dstImage, EResourceState::BlitDst ); + _ctx.CommitBarriers(); + _ctx.ResolveImage( srcImage, dstImage, regions ); + } + else + { + const EResourceState state = EResourceState::BlitSrc; + ResourceState( srcImage, state ); + + for (auto& range : regions) { + _ctx.ImageBarrier( dstImage, state, EResourceState::BlitDst, ImageSubresourceRange{range.dstSubres} ); + } + _ctx.CommitBarriers(); + _ctx.ResolveImage( srcImage, dstImage, regions ); + + for (auto& range : regions) { + _ctx.ImageBarrier( dstImage, EResourceState::BlitDst, state, ImageSubresourceRange{range.dstSubres} ); + } + _ctx.CommitBarriers(); + } } template diff --git a/AE/engine/src/graphics/RenderGraph/RenderGraph.cpp b/AE/engine/src/graphics/RenderGraph/RenderGraph.cpp index 17c6a99c..ff9e9227 100644 --- a/AE/engine/src/graphics/RenderGraph/RenderGraph.cpp +++ b/AE/engine/src/graphics/RenderGraph/RenderGraph.cpp @@ -1,7 +1,8 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -#include "graphics/RenderGraph/RenderGraph.h" #include "platform/Public/OutputSurface.h" +#include "graphics/Private/Defines.h" +#include "graphics/RenderGraph/RenderGraph.h" namespace AE::RG::_hidden_ { diff --git a/AE/engine/src/graphics/RenderGraph/ResStateTracker.cpp b/AE/engine/src/graphics/RenderGraph/ResStateTracker.cpp index 89ca018c..04fbc12d 100644 --- a/AE/engine/src/graphics/RenderGraph/ResStateTracker.cpp +++ b/AE/engine/src/graphics/RenderGraph/ResStateTracker.cpp @@ -1,7 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -#include "graphics/RenderGraph/ResStateTracker.h" #include "graphics/Private/EnumToString.h" +#include "graphics/RenderGraph/ResStateTracker.h" namespace AE::RG::_hidden_ { @@ -246,7 +246,7 @@ namespace AE::RG::_hidden_ _AddResource (ImageID) ================================================= */ - bool ResStateTracker::_AddResource (ImageID id, EResourceState currentState, EResourceState defaultState, const CommandBatchPtr &batch, EQueueType queue) __NE___ + bool ResStateTracker::_AddResource (ImageID id, EResourceState currentState, EResourceState defaultState, const RC &batch, EQueueType queue) __NE___ { if ( defaultState == Default ) defaultState = EResourceState::General; @@ -276,7 +276,7 @@ namespace AE::RG::_hidden_ _AddResource (BufferID) ================================================= */ - bool ResStateTracker::_AddResource (BufferID id, EResourceState currentState, EResourceState defaultState, const CommandBatchPtr &batch, EQueueType queue) __NE___ + bool ResStateTracker::_AddResource (BufferID id, EResourceState currentState, EResourceState defaultState, const RC &batch, EQueueType queue) __NE___ { if ( defaultState == Default ) defaultState = EResourceState::General; @@ -306,7 +306,7 @@ namespace AE::RG::_hidden_ _AddResource (RTGeometryID) ================================================= */ - bool ResStateTracker::_AddResource (RTGeometryID id, EResourceState currentState, EResourceState defaultState, const CommandBatchPtr &batch, EQueueType queue) __NE___ + bool ResStateTracker::_AddResource (RTGeometryID id, EResourceState currentState, EResourceState defaultState, const RC &batch, EQueueType queue) __NE___ { if ( defaultState == Default ) defaultState = EResourceState::General; @@ -334,7 +334,7 @@ namespace AE::RG::_hidden_ _AddResource (RTSceneID) ================================================= */ - bool ResStateTracker::_AddResource (RTSceneID id, EResourceState currentState, EResourceState defaultState, const CommandBatchPtr &batch, EQueueType queue) __NE___ + bool ResStateTracker::_AddResource (RTSceneID id, EResourceState currentState, EResourceState defaultState, const RC &batch, EQueueType queue) __NE___ { if ( defaultState == Default ) defaultState = EResourceState::General; @@ -362,7 +362,7 @@ namespace AE::RG::_hidden_ _AddResource (VideoImageID) ================================================= */ - bool ResStateTracker::_AddResource (VideoImageID id, EResourceState currentState, EResourceState defaultState, const CommandBatchPtr &batch, EQueueType queue) __NE___ + bool ResStateTracker::_AddResource (VideoImageID id, EResourceState currentState, EResourceState defaultState, const RC &batch, EQueueType queue) __NE___ { auto& res_mngr = GraphicsScheduler().GetResourceManager(); auto* res = res_mngr.GetResource( id ); diff --git a/AE/engine/src/graphics/Scripting/GraphicsBindings.cpp b/AE/engine/src/graphics/Scripting/GraphicsBindings.cpp.h similarity index 92% rename from AE/engine/src/graphics/Scripting/GraphicsBindings.cpp rename to AE/engine/src/graphics/Scripting/GraphicsBindings.cpp.h index 63c69c2d..f853dce9 100644 --- a/AE/engine/src/graphics/Scripting/GraphicsBindings.cpp +++ b/AE/engine/src/graphics/Scripting/GraphicsBindings.cpp.h @@ -160,30 +160,36 @@ namespace { EnumBinder binder{ se }; binder.Create(); + binder.Comment( "S, srcColor - from shader\n" + "D, dstColor - from render target\n" + "S1 - from shader (dual src blend)\n" + "cc - constant color\n" + "result = srcColor * srcBlend [blendOp] dstColor * dstBlend\n\n"); + switch_enum( EBlendFactor::Unknown ) { case EBlendFactor::Unknown : case EBlendFactor::_Count : - #define BIND( _name_ ) case EBlendFactor::_name_ : binder.AddValue( AE_TOSTRING(_name_), EBlendFactor::_name_ ); - BIND( Zero ) - BIND( One ) - BIND( SrcColor ) - BIND( OneMinusSrcColor ) - BIND( DstColor ) - BIND( OneMinusDstColor ) - BIND( SrcAlpha ) - BIND( OneMinusSrcAlpha ) - BIND( DstAlpha ) - BIND( OneMinusDstAlpha ) - BIND( ConstColor ) - BIND( OneMinusConstColor ) - BIND( ConstAlpha ) - BIND( OneMinusConstAlpha ) - BIND( SrcAlphaSaturate ) - BIND( Src1Color ) - BIND( OneMinusSrc1Color ) - BIND( Src1Alpha ) - BIND( OneMinusSrc1Alpha ) + #define BIND( _name_, _comment_ ) case EBlendFactor::_name_ : binder.Comment( _comment_ ); binder.AddValue( AE_TOSTRING(_name_), EBlendFactor::_name_ ); + BIND( Zero, "0" ) + BIND( One, "1" ) + BIND( SrcColor, "S" ) + BIND( OneMinusSrcColor, "1 - S" ) + BIND( DstColor, "D" ) + BIND( OneMinusDstColor, "1 - D" ) + BIND( SrcAlpha, "S.a" ) + BIND( OneMinusSrcAlpha, "1 - S.a" ) + BIND( DstAlpha, "D.a" ) + BIND( OneMinusDstAlpha, "1 - D.a" ) + BIND( ConstColor, "cc" ) + BIND( OneMinusConstColor, "1 - cc" ) + BIND( ConstAlpha, "cc.a" ) + BIND( OneMinusConstAlpha, "1 - cc.a" ) + BIND( SrcAlphaSaturate, "rgb * min( S.a, D.a ), a * 1" ) + BIND( Src1Color, "S1" ) + BIND( OneMinusSrc1Color, "1 - S1" ) + BIND( Src1Alpha, "S1.a" ) + BIND( OneMinusSrc1Alpha, "1 - S1.a" ) #undef BIND default : break; } @@ -199,16 +205,20 @@ namespace { EnumBinder binder{ se }; binder.Create(); + binder.Comment( "S, srcColor - from shader\n" + "D, dstColor - from render target\n" + "result = srcColor * srcBlend [blendOp] dstColor * dstBlend\n\n"); + switch_enum( EBlendOp::Unknown ) { case EBlendOp::Unknown : case EBlendOp::_Count : - #define BIND( _name_ ) case EBlendOp::_name_ : binder.AddValue( AE_TOSTRING(_name_), EBlendOp::_name_ ); - BIND( Add ) - BIND( Sub ) - BIND( RevSub ) - BIND( Min ) - BIND( Max ) + #define BIND( _name_, _comment_ ) case EBlendOp::_name_ : binder.Comment( _comment_ ); binder.AddValue( AE_TOSTRING(_name_), EBlendOp::_name_ ); + BIND( Add, "S + D" ) + BIND( Sub, "S - D" ) + BIND( RevSub, "D - S" ) + BIND( Min, "min( S, D )" ) + BIND( Max, "max( S, D )" ) #undef BIND default : break; } @@ -224,28 +234,31 @@ namespace { EnumBinder binder{ se }; binder.Create(); + binder.Comment( "S - from shader\n" + "D - from render target\n" + "result = S [logicOp] D\n\n"); switch_enum( ELogicOp::Unknown ) { case ELogicOp::Unknown : case ELogicOp::_Count : - #define BIND( _name_ ) case ELogicOp::_name_ : binder.AddValue( AE_TOSTRING(_name_), ELogicOp::_name_ ); - BIND( None ) - BIND( Clear ) - BIND( Set ) - BIND( Copy ) - BIND( CopyInverted ) - BIND( NoOp ) - BIND( Invert ) - BIND( And ) - BIND( NotAnd ) - BIND( Or ) - BIND( NotOr ) - BIND( Xor ) - BIND( Equiv ) - BIND( AndReverse ) - BIND( AndInverted ) - BIND( OrReverse ) - BIND( OrInverted ) + #define BIND( _name_, _comment_ ) case ELogicOp::_name_ : binder.Comment( _comment_ ); binder.AddValue( AE_TOSTRING(_name_), ELogicOp::_name_ ); + BIND( None, "disabled" ) + BIND( Clear, "0" ) + BIND( Set, "1" ) + BIND( Copy, "S" ) + BIND( CopyInverted, "~S" ) + BIND( NoOp, "D" ) + BIND( Invert, "~D" ) + BIND( And, "S & D" ) + BIND( NotAnd, "~ ( S & D )" ) + BIND( Or, "S | D" ) + BIND( NotOr, "~ ( S | D )" ) + BIND( Xor, "S ^ D" ) + BIND( Equiv, "~ ( S ^ D )" ) + BIND( AndReverse, "S & ~D" ) + BIND( AndInverted, "~S & D" ) + BIND( OrReverse, "S | ~D" ) + BIND( OrInverted, "~S | D" ) #undef BIND default : break; } @@ -265,15 +278,15 @@ namespace { case EStencilOp::Unknown : case EStencilOp::_Count : - #define BIND( _name_ ) case EStencilOp::_name_ : binder.AddValue( AE_TOSTRING(_name_), EStencilOp::_name_ ); - BIND( Keep ) - BIND( Zero ) - BIND( Replace ) - BIND( Incr ) - BIND( IncrWrap ) - BIND( Decr ) - BIND( DecrWrap ) - BIND( Invert ) + #define BIND( _name_, _comment_ ) case EStencilOp::_name_ : binder.Comment( _comment_ ); binder.AddValue( AE_TOSTRING(_name_), EStencilOp::_name_ ); + BIND( Keep, "src" ) + BIND( Zero, "0" ) + BIND( Replace, "ref" ) + BIND( Incr, "min( ++src, 0 )" ) + BIND( IncrWrap, "++src & maxValue" ) + BIND( Decr, "max( --src, 0 )" ) + BIND( DecrWrap, "--src & maxValue" ) + BIND( Invert, "~src" ) #undef BIND default : break; } @@ -379,6 +392,7 @@ namespace BIND( BlendConstants ) BIND( RTStackSize ) BIND( FragmentShadingRate ) + BIND( ViewportWScaling ) #undef BIND default : break; } @@ -1187,6 +1201,7 @@ namespace BIND( VertexPplnStore ) BIND( FragmentPplnStore ) BIND( LossyRTCompression ) + BIND( ExtendedUsage ) #undef BIND default : binder.AddValue( "All", EImageOpt::All ); @@ -1626,25 +1641,34 @@ namespace self.back.compareOp = op; } - static void RenderState_StencilBufferState_Reference (RenderState::StencilBufferState &self, ubyte value) + static void RenderState_StencilBufferState_Reference (RenderState::StencilBufferState &self, uint value) { + CHECK_THROW( value <= MaxValue(), + "Stencil Reference (x"s << ToString<16>( value ) << ") must be <= than 0xFF" ); + self.enabled = true; - self.front.reference = value; - self.back.reference = value; + self.front.reference = ubyte(value); + self.back.reference = ubyte(value); } - static void RenderState_StencilBufferState_WriteMask (RenderState::StencilBufferState &self, ubyte value) + static void RenderState_StencilBufferState_WriteMask (RenderState::StencilBufferState &self, uint value) { + CHECK_THROW( value <= MaxValue(), + "Stencil WriteMask (x"s << ToString<16>( value ) << ") must be <= than 0xFF" ); + self.enabled = true; - self.front.writeMask = value; - self.back.writeMask = value; + self.front.writeMask = ubyte(value); + self.back.writeMask = ubyte(value); } - static void RenderState_StencilBufferState_CompareMask (RenderState::StencilBufferState &self, ubyte value) + static void RenderState_StencilBufferState_CompareMask (RenderState::StencilBufferState &self, uint value) { + CHECK_THROW( value <= MaxValue(), + "Stencil CompareMask (x"s << ToString<16>( value ) << ") must be <= than 0xFF" ); + self.enabled = true; - self.front.compareMask = value; - self.back.compareMask = value; + self.front.compareMask = ubyte(value); + self.back.compareMask = ubyte(value); } static void Bind_RenderState_StencilBufferState (const ScriptEnginePtr &se) __Th___ @@ -1656,13 +1680,29 @@ namespace binder.AddProperty( &RenderState::StencilBufferState::back, "back" ); binder.AddProperty( &RenderState::StencilBufferState::enabled, "enabled" ); + binder.Comment( "Stencil test compare operator.\n" + "if '(stencilAttachment & CompareMask) [CompareOp] (Reference & CompareMask)' then sample passed stencil test." ); + binder.AddMethodFromGlobal( &RenderState_StencilBufferState_CompareOp, "CompareOp", {} ); + binder.AddMethodFromGlobal( &RenderState_StencilBufferState_Reference, "Reference", {} ); + binder.AddMethodFromGlobal( &RenderState_StencilBufferState_CompareMask, "CompareMask", {} ); + + binder.Comment( "Action performed on samples that fail the stencil test.\n" + "'stencilValue = FailOp( stencilAttachment )'\n" + "See 'CompareOp', 'Reference' and 'CompareMask' to know how stencil test is performed." ); binder.AddMethodFromGlobal( &RenderState_StencilBufferState_FailOp, "FailOp", {} ); + + binder.Comment( "Action performed on samples that pass the stencil test and fail the depth test.\n" + "'stencilValue = DepthFailOp( stencilAttachment )'\n" + "Depth test happens after stencil test and before stencil update." ); binder.AddMethodFromGlobal( &RenderState_StencilBufferState_DepthFailOp, "DepthFailOp", {} ); + + binder.Comment( "Action performed on samples that pass both the depth and stencil tests.\n" + "'stencilValue = PassOp( stencilAttachment )'" ); binder.AddMethodFromGlobal( &RenderState_StencilBufferState_PassOp, "PassOp", {} ); - binder.AddMethodFromGlobal( &RenderState_StencilBufferState_CompareOp, "CompareOp", {} ); - binder.AddMethodFromGlobal( &RenderState_StencilBufferState_Reference, "Reference", {} ); + + binder.Comment( "Bitmask which is ANDed with new stencil value and stencil attachment value before updating stencil attachment.\n" + "'stencilAttachment = (stencilAttachment & WriteMask) | (stencilValue & WriteMask)'" ); binder.AddMethodFromGlobal( &RenderState_StencilBufferState_WriteMask, "WriteMask", {} ); - binder.AddMethodFromGlobal( &RenderState_StencilBufferState_CompareMask, "CompareMask", {} ); } /* diff --git a/AE/engine/src/graphics/Vulkan/Allocators/VBlockMemAllocator.cpp b/AE/engine/src/graphics/Vulkan/Allocators/VBlockMemAllocator.cpp index f3e293ea..616d37d2 100644 --- a/AE/engine/src/graphics/Vulkan/Allocators/VBlockMemAllocator.cpp +++ b/AE/engine/src/graphics/Vulkan/Allocators/VBlockMemAllocator.cpp @@ -309,4 +309,5 @@ namespace AE::Graphics } // AE::Graphics +#undef VGFXALLOC #endif // AE_ENABLE_VULKAN diff --git a/AE/engine/src/graphics/Vulkan/Allocators/VGfxMemAllocatorUtils.cpp.h b/AE/engine/src/graphics/Vulkan/Allocators/VGfxMemAllocatorUtils.cpp.h index 5fadd646..5bec2e13 100644 --- a/AE/engine/src/graphics/Vulkan/Allocators/VGfxMemAllocatorUtils.cpp.h +++ b/AE/engine/src/graphics/Vulkan/Allocators/VGfxMemAllocatorUtils.cpp.h @@ -13,7 +13,7 @@ namespace AE::Graphics { CHECK_ERR( image != Default ); CHECK_ERR( desc.memType != Default ); - ASSERT_MSG( not AnyBits( desc.memType, EMemoryType::Dedicated ), + ASSERT_MSG( NoBits( desc.memType, EMemoryType::Dedicated ), "Dedicated allocation is not supported" ); auto& dev = GraphicsScheduler().GetDevice(); @@ -56,7 +56,7 @@ namespace AE::Graphics CHECK_ERR( buffer != Default ); CHECK_ERR( desc.memType != Default ); - ASSERT_MSG( not AnyBits( desc.memType, EMemoryType::Dedicated ), + ASSERT_MSG( NoBits( desc.memType, EMemoryType::Dedicated ), "Dedicated allocation is not supported" ); auto& dev = GraphicsScheduler().GetDevice(); diff --git a/AE/engine/src/graphics/Vulkan/Allocators/VLinearMemAllocator.cpp b/AE/engine/src/graphics/Vulkan/Allocators/VLinearMemAllocator.cpp index 67cd7423..d7e7668c 100644 --- a/AE/engine/src/graphics/Vulkan/Allocators/VLinearMemAllocator.cpp +++ b/AE/engine/src/graphics/Vulkan/Allocators/VLinearMemAllocator.cpp @@ -271,4 +271,5 @@ namespace } // AE::Graphics +#undef VGFXALLOC #endif // AE_ENABLE_VULKAN diff --git a/AE/engine/src/graphics/Vulkan/Commands/VBarrierManager.cpp b/AE/engine/src/graphics/Vulkan/Commands/VBarrierManager.cpp index ff2ce763..0b0b1f3c 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VBarrierManager.cpp +++ b/AE/engine/src/graphics/Vulkan/Commands/VBarrierManager.cpp @@ -124,7 +124,7 @@ namespace AE::Graphics::_hidden_ barrier.srcStageMask |= (barrier.srcStageMask == 0 ? VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT : 0); // same as VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT barrier.dstStageMask |= (barrier.dstStageMask == 0 ? VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT : 0); // same as VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT - ASSERT( not AnyBits( dstState, EResourceState::Invalidate )); + ASSERT( NoBits( dstState, EResourceState::Invalidate )); ASSERT( barrier.newLayout != VK_IMAGE_LAYOUT_UNDEFINED ); } @@ -567,14 +567,14 @@ namespace AE::Graphics::_hidden_ const uint idx = it->second.Index(); const auto dst_state = att_states[ idx ].initial; const bool req_barrier = EResourceState_RequireImageBarrier( att.initial, dst_state, Bool{att.relaxedStateTransition} ); - const bool is_valid = not AnyBits( dst_state, EResourceState::Invalidate ); + const bool is_valid = NoBits( dst_state, EResourceState::Invalidate ); if ( req_barrier and is_valid ) ImageBarrier( fb_images[ idx ], att.initial, dst_state ); const auto src_state = att_states[ idx ].final; const bool req_barrier2 = EResourceState_RequireImageBarrier( src_state, att.final, Bool{att.relaxedStateTransition} ); - const bool is_valid2 = not AnyBits( att.final, EResourceState::Invalidate ); + const bool is_valid2 = NoBits( att.final, EResourceState::Invalidate ); if ( req_barrier2 and is_valid2 ) finalStates[ idx ] = att.final; diff --git a/AE/engine/src/graphics/Vulkan/Commands/VBarrierManagerUtils.cpp.h b/AE/engine/src/graphics/Vulkan/Commands/VBarrierManagerUtils.cpp.h index c5d8944f..39196ad5 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VBarrierManagerUtils.cpp.h +++ b/AE/engine/src/graphics/Vulkan/Commands/VBarrierManagerUtils.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + namespace AE::Graphics::_hidden_ { namespace diff --git a/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.cpp b/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.cpp index 1b40fc88..e869d13c 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.cpp +++ b/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.cpp @@ -580,6 +580,12 @@ namespace AE::Graphics::_hidden_ fn.vkCmdSetFragmentShadingRateKHR( cmdbuf, &frag_size, combiner_ops ); } + static void Fn_SetViewportWScalingCmd (VulkanDeviceFn fn, VkCommandBuffer cmdbuf, const SetViewportWScalingCmd &cmd) __NE___ + { + auto* scaling = Cast( AlignUp( static_cast(&cmd + 1), AlignOf )); + fn.vkCmdSetViewportWScalingNV( cmdbuf, cmd.first, cmd.count, scaling ); + } + static void Fn_BindIndexBufferCmd (VulkanDeviceFn fn, VkCommandBuffer cmdbuf, const BindIndexBufferCmd &cmd) __NE___ { fn.vkCmdBindIndexBuffer( cmdbuf, cmd.buffer, cmd.offset, cmd.indexType ); diff --git a/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.h b/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.h index 1ff01284..7e6e947d 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.h +++ b/AE/engine/src/graphics/Vulkan/Commands/VBaseIndirectContext.h @@ -325,6 +325,13 @@ namespace AE::Graphics::_hidden_ VkFragmentShadingRateCombinerOpKHR textureOp; }; + struct SetViewportWScalingCmd : BaseCmd + { + ushort first; + ushort count; + //VkViewportWScalingNV scaling; + }; + struct BindIndexBufferCmd : BaseCmd { VkBuffer buffer; @@ -548,6 +555,7 @@ namespace AE::Graphics::_hidden_ _visitor_( SetStencilReferenceCmd )\ _visitor_( SetBlendConstantsCmd )\ _visitor_( SetFragmentShadingRateCmd )\ + _visitor_( SetViewportWScalingCmd )\ _visitor_( BindIndexBufferCmd )\ _visitor_( BindVertexBuffersCmd )\ _visitor_( DrawCmd )\ diff --git a/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.cpp b/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.cpp index 4649f26e..910218bf 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.cpp +++ b/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.cpp @@ -260,6 +260,21 @@ namespace AE::Graphics::_hidden_ cmd.textureOp = textureOp; } +/* +================================================= + _SetViewportWScaling +================================================= +*/ + void _VIndirectDrawCtx::_SetViewportWScaling (ArrayView scaling) __Th___ + { + auto& cmd = _cmdbuf->CreateCmd< SetViewportWScalingCmd, VkViewportWScalingNV >( scaling.size() ); // throw + auto* dst = Cast( AlignUp( static_cast(&cmd + 1), AlignOf )); + + cmd.first = 0; + cmd.count = ushort(scaling.size()); + MemCopy( OUT dst, scaling.data(), ArraySizeOf(scaling) ); + } + /* ================================================= SetStencilCompareMask diff --git a/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.h b/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.h index 5c263f4b..a9fb7bd0 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.h +++ b/AE/engine/src/graphics/Vulkan/Commands/VDrawContext.h @@ -110,6 +110,7 @@ namespace AE::Graphics::_hidden_ void _SetFragmentShadingRate (const VkExtent2D &fragSize, VkFragmentShadingRateCombinerOpKHR primitiveOp, VkFragmentShadingRateCombinerOpKHR textureOp) __Th___; + void _SetViewportWScaling (ArrayView scaling) __Th___; void _BindVertexBuffers (uint firstBinding, ArrayView buffers, ArrayView offsets) __Th___; @@ -231,6 +232,7 @@ namespace AE::Graphics::_hidden_ void _SetFragmentShadingRate (const VkExtent2D &fragSize, VkFragmentShadingRateCombinerOpKHR primitiveOp, VkFragmentShadingRateCombinerOpKHR textureOp) __Th___; + void _SetViewportWScaling (ArrayView scaling) __Th___; void _BindVertexBuffers (uint firstBinding, ArrayView buffers, ArrayView offsets) __Th___; @@ -314,6 +316,7 @@ namespace AE::Graphics::_hidden_ void SetBlendConstants (const RGBA32f &color) __Th_OV { RawCtx::_SetBlendConstants( color ); } void SetDepthBounds (float minDepthBounds, float maxDepthBounds) __Th_OV; void SetFragmentShadingRate (EShadingRate, EShadingRateCombinerOp primitiveOp, EShadingRateCombinerOp textureOp) __Th_OV; + void SetViewportWScaling (ArrayView scaling) __Th_OV; using RawCtx::SetViewport; using RawCtx::SetScissor; @@ -634,6 +637,20 @@ namespace AE::Graphics::_hidden_ RawCtx::_SetFragmentShadingRate( VkExtent2D{size.x, size.y}, VEnumCast(primitiveOp), VEnumCast(textureOp) ); } +/* +================================================= + SetViewportWScaling +================================================= +*/ + template + void _VDrawContextImpl::SetViewportWScaling (ArrayView scaling) __Th___ + { + VALIDATE_GCTX( SetViewportWScaling( this->_GetDynamicStates(), scaling )); + StaticAssert( sizeof(VkViewportWScalingNV) == sizeof(float2) ); + + RawCtx::_SetViewportWScaling( scaling.Cast() ); + } + /* ================================================= SetStencilCompareMask @@ -1090,7 +1107,7 @@ namespace AE::Graphics::_hidden_ ================================================= */ inline void _VDirectDrawCtx::_SetFragmentShadingRate (const VkExtent2D &fragSize, VkFragmentShadingRateCombinerOpKHR primitiveOp, - VkFragmentShadingRateCombinerOpKHR textureOp) + VkFragmentShadingRateCombinerOpKHR textureOp) __Th___ { ASSERT( fragSize.width <= 4 and fragSize.height <= 4 ); @@ -1099,6 +1116,16 @@ namespace AE::Graphics::_hidden_ vkCmdSetFragmentShadingRateKHR( _cmdbuf.Get(), &fragSize, combiner_ops ); } +/* +================================================= + _SetViewportWScaling +================================================= +*/ + inline void _VDirectDrawCtx::_SetViewportWScaling (ArrayView scaling) __Th___ + { + vkCmdSetViewportWScalingNV( _cmdbuf.Get(), 0, uint(scaling.size()), scaling.data() ); + } + /* ================================================= SetStencilCompareMask diff --git a/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.cpp b/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.cpp index 1228bed2..7a9fa15b 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.cpp +++ b/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.cpp @@ -97,7 +97,7 @@ namespace ConvertViewports ================================================= */ - void ConvertViewports (ArrayView inViewports, ArrayView inScissors, + void ConvertViewports (ArrayView inViewports, ArrayView inScissors, const int2 &areaSize, OUT VDrawCommandBatch::Viewports_t &outViewports, OUT VDrawCommandBatch::Scissors_t &outScissors) __NE___ { ASSERT( not inViewports.empty() ); @@ -120,10 +120,10 @@ namespace VkRect2D& rect = outScissors.emplace_back(); if ( inScissors.empty() ) { - rect.offset.x = RoundToInt( src.rect.left ); - rect.offset.y = RoundToInt( src.rect.top ); - rect.extent.width = RoundToInt( src.rect.Width() ); - rect.extent.height = RoundToInt( src.rect.Height() ); + rect.offset.x = RoundToInt( Max( src.rect.left, 0.f )); + rect.offset.y = RoundToInt( Max( src.rect.top, 0.f )); + rect.extent.width = Min( RoundToInt( src.rect.Width() ), areaSize.x ); + rect.extent.height = Min( RoundToInt( src.rect.Height() ), areaSize.y ); } else { @@ -185,7 +185,7 @@ namespace { Viewports_t viewports; Scissors_t scissors; - ConvertViewports( desc.viewports, Default, OUT viewports, OUT scissors ); + ConvertViewports( desc.viewports, Default, desc.area.Size(), OUT viewports, OUT scissors ); vkCmdSetViewport( _cmdbuf.Get(), 0, uint(viewports.size()), viewports.data() ); vkCmdSetScissor( _cmdbuf.Get(), 0, uint(scissors.size()), scissors.data() ); @@ -291,7 +291,7 @@ namespace { Viewports_t viewports; Scissors_t scissors; - ConvertViewports( desc.viewports, Default, OUT viewports, OUT scissors ); + ConvertViewports( desc.viewports, Default, desc.area.Size(), OUT viewports, OUT scissors ); // viewports { diff --git a/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.h b/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.h index 0c5233a9..61fb788b 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.h +++ b/AE/engine/src/graphics/Vulkan/Commands/VGraphicsContext.h @@ -142,7 +142,7 @@ namespace AE::Graphics::_hidden_ }; - void ConvertViewports (ArrayView inViewports, ArrayView inScissors, + void ConvertViewports (ArrayView inViewports, ArrayView inScissors, const int2 &areaSize, OUT VDrawCommandBatch::Viewports_t &outViewports, OUT VDrawCommandBatch::Scissors_t &outScissors) __NE___; } // AE::Graphics::_hidden_ diff --git a/AE/engine/src/graphics/Vulkan/Commands/VTransferContext.h b/AE/engine/src/graphics/Vulkan/Commands/VTransferContext.h index 414793ee..0532dc9c 100644 --- a/AE/engine/src/graphics/Vulkan/Commands/VTransferContext.h +++ b/AE/engine/src/graphics/Vulkan/Commands/VTransferContext.h @@ -356,7 +356,7 @@ namespace AE::Graphics::_hidden_ ArrayView mem_view = ArrayView{ Cast(mem_info.mappedPtr + offset), usize(size) }; - if_unlikely( not AllBits( mem_info.flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT )) + if_unlikely( NoBits( mem_info.flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT )) { GCTX_CHECK( offset + size <= mem_info.size ); this->_mngr.GetStagingManager().AcquireMappedMemory( GetFrameId(), mem_info.memory, mem_info.offset + offset, size ); @@ -414,7 +414,7 @@ namespace AE::Graphics::_hidden_ MemCopy( OUT mem_info.mappedPtr + offset, data, size ); - if_unlikely( not AllBits( mem_info.flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT )) + if_unlikely( NoBits( mem_info.flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT )) { VkMappedMemoryRange range; range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; @@ -730,7 +730,7 @@ namespace AE::Graphics::_hidden_ range.mipmapCount = ushort(desc.mipLevels.Get()); VALIDATE_GCTX( GenerateMipmaps( img.Description(), {range} )); - RawCtx::GenerateMipmaps( img.Handle(), desc.dimension, {range}, srcState ); + RawCtx::GenerateMipmaps( img.Handle(), desc.Dimension(), {range}, srcState ); } template @@ -740,7 +740,7 @@ namespace AE::Graphics::_hidden_ ImageDesc const& desc = img.Description(); VALIDATE_GCTX( GenerateMipmaps( img.Description(), ranges )); - RawCtx::GenerateMipmaps( img.Handle(), desc.dimension, ranges, srcState ); + RawCtx::GenerateMipmaps( img.Handle(), desc.Dimension(), ranges, srcState ); } /* diff --git a/AE/engine/src/graphics/Vulkan/Descriptors/VDescriptorUpdater.cpp b/AE/engine/src/graphics/Vulkan/Descriptors/VDescriptorUpdater.cpp index 1373e3cb..542095f9 100644 --- a/AE/engine/src/graphics/Vulkan/Descriptors/VDescriptorUpdater.cpp +++ b/AE/engine/src/graphics/Vulkan/Descriptors/VDescriptorUpdater.cpp @@ -189,6 +189,7 @@ namespace default_unlikely : RETURN_ERR( "unknown update mode" ); } + switch_end _Reset(); return true; @@ -359,7 +360,7 @@ namespace CHECK( not is_sp_input or is_sp_input == AllBits( desc.usage, EImageUsage::InputAttachment )); Unused( img_type ); - ASSERT_MSG( PipelineCompiler::EImageType_IsCompatible( img_type, un->image.type ), + CHECK_ERR_MSG( PipelineCompiler::EImageType_IsCompatible( img_type, un->image.type ), "image view '"s << view->GetDebugName() << "' with type (" << ToString( img_type ) << ") is not compatible with sampler '" << name.GetName() << "' type (" << ToString( un->image.type ) << ")" ); ) @@ -479,7 +480,7 @@ namespace CHECK( AllBits( desc.usage, EImageUsage::Sampled )); Unused( img_type ); - ASSERT_MSG( PipelineCompiler::EImageType_IsCompatible( img_type, un->image.type ), + CHECK_ERR_MSG( PipelineCompiler::EImageType_IsCompatible( img_type, un->image.type ), "image view '"s << view->GetDebugName() << "' with type " << ToString( img_type ) << " is not compatible with sampler '" << name.GetName() << "' type " << ToString( un->image.type )); ) @@ -896,7 +897,7 @@ namespace CHECK( not is_storage or is_storage == AllBits( desc.usage, EBufferUsage::StorageTexel )); Unused( img_type ); - ASSERT_MSG( PipelineCompiler::EImageType_IsCompatible( img_type, un->texelBuffer.type ), + CHECK_ERR_MSG( PipelineCompiler::EImageType_IsCompatible( img_type, un->texelBuffer.type ), "buffer view '"s << view->GetDebugName() << "' with type " << ToString( img_type ) << " is not compatible with sampler '" << name.GetName() << "' type " << ToString( un->texelBuffer.type )); ) diff --git a/AE/engine/src/graphics/Vulkan/Resources/VBuffer.cpp b/AE/engine/src/graphics/Vulkan/Resources/VBuffer.cpp index 16b9c467..96a551f0 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VBuffer.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VBuffer.cpp @@ -165,7 +165,7 @@ namespace AE::Graphics { DRC_EXLOCK( _drCheck ); - const bool is_internal = not AllBits( _desc.memType, EMemoryType::_External ); + const bool is_internal = NoBits( _desc.memType, EMemoryType::_External ); auto& dev = resMngr.GetDevice(); if ( is_internal and _buffer != Default ) @@ -194,7 +194,7 @@ namespace AE::Graphics desc.size = _desc.size; desc.queues = _desc.queues; desc.memFlags = VEnumCast( _desc.memType ); - desc.canBeDestroyed = not AllBits( _desc.memType, EMemoryType::_External ); + desc.canBeDestroyed = NoBits( _desc.memType, EMemoryType::_External ); return desc; } diff --git a/AE/engine/src/graphics/Vulkan/Resources/VComputePipeline.cpp b/AE/engine/src/graphics/Vulkan/Resources/VComputePipeline.cpp index ff2dae16..162de79e 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VComputePipeline.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VComputePipeline.cpp @@ -48,7 +48,11 @@ namespace AE::Graphics ci.templCI.localSizeSpec.z == UMax or ci.specCI.localSize.z == UMax ? ci.templCI.defaultLocalSize.z : ci.specCI.localSize.z }; CHECK_ERR( All( _localSize > Zero )); - VkComputePipelineCreateInfo pipeline_info = {}; + const uint total_threads = Area( uint3{_localSize} ); + CHECK_ERR( total_threads <= dev.GetVProperties().properties.limits.maxComputeWorkGroupInvocations ); + + VkComputePipelineCreateInfo pipeline_info = {}; + VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_size_ci; // TODO: VkPipelineCreateFlags2CreateInfoKHR (VK_KHR_maintenance5) @@ -65,6 +69,31 @@ namespace AE::Graphics pipeline_info.basePipelineHandle= Default; pipeline_info.basePipelineIndex = -1; + if ( ci.specCI.subgroupSize != 0 ) + { + auto& feats = dev.GetVProperties().subgroupSizeControlFeats; + auto& props = dev.GetVProperties().subgroupSizeControlProps; + + CHECK_ERR( dev.GetVExtensions().subgroupSizeControl ); + CHECK_ERR( ci.specCI.subgroupSize >= props.minSubgroupSize ); + CHECK_ERR( ci.specCI.subgroupSize <= props.maxSubgroupSize ); + CHECK_ERR( DivCeil( total_threads, ci.specCI.subgroupSize ) <= props.maxComputeWorkgroupSubgroups ); + CHECK_ERR( AllBits( props.requiredSubgroupSizeStages, VK_SHADER_STAGE_COMPUTE_BIT )); + CHECK_ERR( IsMultipleOf( _localSize.x, ci.specCI.subgroupSize )); + + // Vulkan docs: + // "VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT and VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT are effectively deprecated when + // compiling SPIR-V 1.6 shaders, as this behavior is the default for Vulkan with SPIR-V 1.6. This is more aligned with developer expectations, + // and avoids applications unexpectedly breaking in the future." + if ( feats.computeFullSubgroups ) + pipeline_info.stage.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; + + pipeline_info.stage.pNext = &subgroup_size_ci; + subgroup_size_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; + subgroup_size_ci.pNext = null; + subgroup_size_ci.requiredSubgroupSize = ci.specCI.subgroupSize; + } + const auto AddCustomSpec = [&ci, this] (VkShaderStageFlagBits, VkSpecializationMapEntry* entryArr, uint* dataArr, OUT uint &count) __NE___ {{ count = Sum( ci.templCI.localSizeSpec != UMax ); diff --git a/AE/engine/src/graphics/Vulkan/Resources/VFramebuffer.cpp b/AE/engine/src/graphics/Vulkan/Resources/VFramebuffer.cpp index be4b6d48..9c6f9e9f 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VFramebuffer.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VFramebuffer.cpp @@ -58,7 +58,6 @@ namespace AE::Graphics { DRC_EXLOCK( _drCheck ); CHECK_ERR( not _framebuffer ); - CHECK_ERR( not rpDesc.attachments.empty() ); _renderPassId = resMngr.AcquireResource( rpId ); CHECK_ERR( _renderPassId ); diff --git a/AE/engine/src/graphics/Vulkan/Resources/VGraphicsPipeline.cpp b/AE/engine/src/graphics/Vulkan/Resources/VGraphicsPipeline.cpp index 7c53b194..6dc74ae4 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VGraphicsPipeline.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VGraphicsPipeline.cpp @@ -59,17 +59,18 @@ namespace AE::Graphics subpass = &render_pass->Subpasses()[ subpass_idx ]; } - VkGraphicsPipelineCreateInfo pipeline_info = {}; - VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {}; - VkPipelineColorBlendStateCreateInfo blend_info = {}; - VkPipelineDepthStencilStateCreateInfo depth_stencil_info = {}; - VkPipelineMultisampleStateCreateInfo multisample_info = {}; - VkPipelineRasterizationStateCreateInfo rasterization_info = {}; - VkPipelineTessellationStateCreateInfo tessellation_info = {}; - VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; - VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; - VkPipelineViewportStateCreateInfo viewport_info = {}; - VTempLinearAllocator allocator; + VkGraphicsPipelineCreateInfo pipeline_info = {}; + VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {}; + VkPipelineColorBlendStateCreateInfo blend_info = {}; + VkPipelineDepthStencilStateCreateInfo depth_stencil_info = {}; + VkPipelineMultisampleStateCreateInfo multisample_info = {}; + VkPipelineRasterizationStateCreateInfo rasterization_info = {}; + VkPipelineTessellationStateCreateInfo tessellation_info = {}; + VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; + VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; + VkPipelineViewportStateCreateInfo viewport_info = {}; + VkPipelineViewportWScalingStateCreateInfoNV w_scaling = {}; + VTempLinearAllocator allocator; // TODO: VkPipelineCreateFlags2CreateInfoKHR (VK_KHR_maintenance5) @@ -112,6 +113,14 @@ namespace AE::Graphics pipeline_info.pColorBlendState = null; } + if ( AllBits( ci.specCI.dynamicState, EPipelineDynamicState::ViewportWScaling )) + { + w_scaling.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV; + w_scaling.viewportWScalingEnable = VK_TRUE; + w_scaling.viewportCount = ci.specCI.viewportCount; + viewport_info.pNext = &w_scaling; + } + VK_CHECK_ERR( CreateGraphicsPipelines( dev, ppln_cache, 1, &pipeline_info, null, OUT &_handle )); dev.SetObjectName( _handle, ci.specCI.dbgName, VK_OBJECT_TYPE_PIPELINE ); diff --git a/AE/engine/src/graphics/Vulkan/Resources/VImage.cpp b/AE/engine/src/graphics/Vulkan/Resources/VImage.cpp index 2e05e6a2..d9bad5df 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VImage.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VImage.cpp @@ -38,6 +38,9 @@ namespace { */ ND_ static bool CheckFormatFeatures (const VResourceManager &resMngr, VkFormat format, EImageUsage usage, EImageOpt options, bool optTiling) __NE___ { + if ( AllBits( options, EImageOpt::ExtendedUsage )) + return true; + const auto& dev = resMngr.GetDevice(); const auto& fs = resMngr.GetFeatureSet(); @@ -143,7 +146,7 @@ namespace { DRC_EXLOCK( _drCheck ); CHECK_ERR( _image == Default ); CHECK_ERR( _memoryId == Default ); - CHECK_ERR( All( desc.dimension > uint3{0} )); + CHECK_ERR( All( desc.dimension > ImageDim_t{0} )); CHECK_ERR( desc.imageDim != Default ); CHECK_ERR( desc.arrayLayers > 0_layer ); CHECK_ERR( desc.mipLevels > 0_mipmap ); @@ -250,7 +253,7 @@ namespace { _image = desc.image; _desc.imageDim = AEEnumCast( desc.imageType ); _desc.options = AEEnumCast( desc.flags ) | desc.options; - _desc.dimension = desc.dimension; + _desc.dimension = CheckCast( desc.dimension ); _desc.format = AEEnumCast( desc.format ); _desc.arrayLayers = ImageLayer{ desc.arrayLayers }; _desc.mipLevels = MipmapLevel{ desc.mipLevels }; @@ -295,7 +298,7 @@ namespace { { DRC_EXLOCK( _drCheck ); - const bool is_internal = not AllBits( _desc.memType, EMemoryType::_External ); + const bool is_internal = NoBits( _desc.memType, EMemoryType::_External ); auto& dev = resMngr.GetDevice(); if ( is_internal and _image != Default ) @@ -326,13 +329,13 @@ namespace { desc.usage = VEnumCast( _desc.usage, _desc.memType ); desc.format = VEnumCast( _desc.format ); desc.samples = VEnumCast( _desc.samples ); - desc.dimension = _desc.dimension; + desc.dimension = _desc.Dimension(); desc.arrayLayers = _desc.arrayLayers.Get(); desc.mipLevels = _desc.mipLevels.Get(); desc.tiling = AllBits( _desc.memType, EMemoryType::DeviceLocal ) ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR; desc.queues = _desc.queues; desc.memFlags = VEnumCast( _desc.memType ); - desc.canBeDestroyed = not AllBits( _desc.memType, EMemoryType::_External ); + desc.canBeDestroyed = NoBits( _desc.memType, EMemoryType::_External ); return desc; } @@ -343,8 +346,6 @@ namespace { */ bool VImage::IsSupported (const VResourceManager &resMngr, const ImageDesc &desc) __NE___ { - StaticAssert( uint(EImageOpt::All) == 0x1FFFF ); - const auto& dev = resMngr.GetDevice(); const auto& dev_props = dev.GetVProperties(); const bool opt_tiling = AllBits( desc.memType, EMemoryType::DeviceLocal ); @@ -401,6 +402,7 @@ namespace { case EImageOpt::BlitSrc : case EImageOpt::BlitDst : case EImageOpt::BlockTexelViewCompatible : + case EImageOpt::ExtendedUsage : case EImageOpt::StorageAtomic : case EImageOpt::VertexPplnStore : case EImageOpt::FragmentPplnStore : @@ -439,7 +441,7 @@ namespace { if_unlikely( desc.arrayLayers.Get() > props.maxArrayLayers ) return false; - if_unlikely( not AllBits( props.sampleCounts, desc.samples.Get() )) + if_unlikely( NoBits( props.sampleCounts, desc.samples.Get() )) return false; } diff --git a/AE/engine/src/graphics/Vulkan/Resources/VImage.h b/AE/engine/src/graphics/Vulkan/Resources/VImage.h index e748da84..7b537326 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VImage.h +++ b/AE/engine/src/graphics/Vulkan/Resources/VImage.h @@ -47,7 +47,7 @@ namespace AE::Graphics ND_ ImageDesc const& Description () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc; } ND_ VkImageAspectFlags AspectMask () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _aspectMask; } - ND_ uint3 const Dimension () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension; } + ND_ uint3 const Dimension () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.Dimension(); } ND_ uint Width () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension.x; } ND_ uint Height () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension.y; } ND_ uint Depth () C_NE___ { DRC_SHAREDLOCK( _drCheck ); return _desc.dimension.z; } diff --git a/AE/engine/src/graphics/Vulkan/Resources/VImageView.cpp b/AE/engine/src/graphics/Vulkan/Resources/VImageView.cpp index e5816444..ee9f8838 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VImageView.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VImageView.cpp @@ -97,6 +97,7 @@ namespace AE::Graphics _desc.mipmapCount = ushort(desc.subresourceRange.levelCount); _desc.baseLayer = ImageLayer{ desc.subresourceRange.baseArrayLayer }; _desc.layerCount = ushort(desc.subresourceRange.layerCount); + _desc.dimension = CheckCast( desc.dimension ); // TODO: swizzle _canBeDestroyed = desc.canBeDestroyed; diff --git a/AE/engine/src/graphics/Vulkan/Resources/VMeshPipeline.cpp b/AE/engine/src/graphics/Vulkan/Resources/VMeshPipeline.cpp index f09a0789..ce334ef3 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VMeshPipeline.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VMeshPipeline.cpp @@ -72,16 +72,17 @@ namespace AE::Graphics CHECK_ERR( All( _taskLocalSize > Zero )); } - VkGraphicsPipelineCreateInfo pipeline_info = {}; - VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {}; - VkPipelineColorBlendStateCreateInfo blend_info = {}; - VkPipelineDepthStencilStateCreateInfo depth_stencil_info = {}; - VkPipelineMultisampleStateCreateInfo multisample_info = {}; - VkPipelineRasterizationStateCreateInfo rasterization_info = {}; - VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; - VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; - VkPipelineViewportStateCreateInfo viewport_info = {}; - VTempLinearAllocator allocator; + VkGraphicsPipelineCreateInfo pipeline_info = {}; + VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {}; + VkPipelineColorBlendStateCreateInfo blend_info = {}; + VkPipelineDepthStencilStateCreateInfo depth_stencil_info = {}; + VkPipelineMultisampleStateCreateInfo multisample_info = {}; + VkPipelineRasterizationStateCreateInfo rasterization_info = {}; + VkPipelineDynamicStateCreateInfo dynamic_state_info = {}; + VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; + VkPipelineViewportStateCreateInfo viewport_info = {}; + VkPipelineViewportWScalingStateCreateInfoNV w_scaling = {}; + VTempLinearAllocator allocator; // TODO: VkPipelineCreateFlags2CreateInfoKHR (VK_KHR_maintenance5) @@ -200,6 +201,14 @@ namespace AE::Graphics pipeline_info.pColorBlendState = null; } + if ( AllBits( ci.specCI.dynamicState, EPipelineDynamicState::ViewportWScaling )) + { + w_scaling.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV; + w_scaling.viewportWScalingEnable = VK_TRUE; + w_scaling.viewportCount = ci.specCI.viewportCount; + viewport_info.pNext = &w_scaling; + } + auto& dev = resMngr.GetDevice(); VK_CHECK_ERR( CreateGraphicsPipelines( dev, ppln_cache, 1, &pipeline_info, null, OUT &_handle )); diff --git a/AE/engine/src/graphics/Vulkan/Resources/VPipelineHelper.cpp.h b/AE/engine/src/graphics/Vulkan/Resources/VPipelineHelper.cpp.h index edd92ca9..4c91070d 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VPipelineHelper.cpp.h +++ b/AE/engine/src/graphics/Vulkan/Resources/VPipelineHelper.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #ifdef AE_ENABLE_GLSL_TRACE # include "ShaderTrace.h" #else @@ -303,7 +305,7 @@ namespace outState.pNext = null; outState.flags = 0; outState.polygonMode = VEnumCast( inState.polygonMode ); - outState.lineWidth = 2.f; //inState.lineWidth; + outState.lineWidth = 1.f; //inState.lineWidth; // TODO outState.depthBiasConstantFactor = inState.depthBiasConstFactor; outState.depthBiasClamp = inState.depthBiasClamp; outState.depthBiasSlopeFactor = inState.depthBiasSlopeFactor; @@ -559,7 +561,7 @@ namespace __try { return device.vkCreateComputePipelines( device.GetVkDevice(), pipelineCache, createInfoCount, pCreateInfos, pAllocator, OUT pPipelines ); } - __except ( AnyEqual( GetExceptionCode(), AE_SEH_STACK_OVERFLOW, AE_SEH_ACCESS_VIOLATION ) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) + __except( AnyEqual( GetExceptionCode(), AE_SEH_STACK_OVERFLOW, AE_SEH_ACCESS_VIOLATION ) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { if ( GetExceptionCode() == AE_SEH_STACK_OVERFLOW ) _resetstkoflw(); @@ -577,7 +579,7 @@ namespace __try { return device.vkCreateGraphicsPipelines( device.GetVkDevice(), pipelineCache, createInfoCount, pCreateInfos, pAllocator, OUT pPipelines ); } - __except ( AnyEqual( GetExceptionCode(), AE_SEH_STACK_OVERFLOW, AE_SEH_ACCESS_VIOLATION ) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) + __except( AnyEqual( GetExceptionCode(), AE_SEH_STACK_OVERFLOW, AE_SEH_ACCESS_VIOLATION ) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { if ( GetExceptionCode() == AE_SEH_STACK_OVERFLOW ) _resetstkoflw(); @@ -595,7 +597,7 @@ namespace __try { return device.vkCreateRayTracingPipelinesKHR( device.GetVkDevice(), deferredOperation, pipelineCache, createInfoCount, pCreateInfos, pAllocator, OUT pPipelines ); } - __except ( AnyEqual( GetExceptionCode(), AE_SEH_STACK_OVERFLOW, AE_SEH_ACCESS_VIOLATION ) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) + __except( AnyEqual( GetExceptionCode(), AE_SEH_STACK_OVERFLOW, AE_SEH_ACCESS_VIOLATION ) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) { if ( GetExceptionCode() == AE_SEH_STACK_OVERFLOW ) _resetstkoflw(); diff --git a/AE/engine/src/graphics/Vulkan/Resources/VQueryManager.cpp b/AE/engine/src/graphics/Vulkan/Resources/VQueryManager.cpp index c4be4cb7..5c55d85c 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VQueryManager.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VQueryManager.cpp @@ -343,7 +343,7 @@ Supported queue types: Graphics / Compute switch_enum( type ) { case_likely EQueryType::Timestamp : - if_unlikely( not AllBits( _timestampAllowed, EQueueMask(0) | queueType )) + if_unlikely( NoBits( _timestampAllowed, EQueueMask(0) | queueType )) return Default; break; @@ -360,7 +360,7 @@ Supported queue types: Graphics / Compute case EQueryType::AccelStructCompactedSize : case EQueryType::AccelStructSize : case EQueryType::AccelStructSerializationSize : - if_unlikely( not AllBits( EQueueMask::Graphics | EQueueMask::AsyncCompute, EQueueMask(0) | queueType )) + if_unlikely( NoBits( EQueueMask::Graphics | EQueueMask::AsyncCompute, EQueueMask(0) | queueType )) return Default; break; diff --git a/AE/engine/src/graphics/Vulkan/Resources/VRTScene.cpp b/AE/engine/src/graphics/Vulkan/Resources/VRTScene.cpp index bdb303f1..fe9386da 100644 --- a/AE/engine/src/graphics/Vulkan/Resources/VRTScene.cpp +++ b/AE/engine/src/graphics/Vulkan/Resources/VRTScene.cpp @@ -244,10 +244,10 @@ namespace AE::Graphics auto* buf = resMngr.GetResource( build.instanceData.id, False{"don't inc ref"}, True{"quiet"} ); Bytes size = build.instanceData.offset + build.instanceData.stride * build.maxInstanceCount; - if_unlikely( buf == null or - build.instanceData.offset >= buf->Size() or - size > buf->Size() or - not AllBits( buf->Description().usage, EBufferUsage::ASBuild_ReadOnly ) or + if_unlikely( buf == null or + build.instanceData.offset >= buf->Size() or + size > buf->Size() or + NoBits( buf->Description().usage, EBufferUsage::ASBuild_ReadOnly ) or not IsMultipleOf( ulong(buf->GetDeviceAddress() + build.instanceData.offset), props.instanceDataAlign )) return false; } @@ -255,9 +255,9 @@ namespace AE::Graphics if ( build.scratch.id != Default ) { auto* buf = resMngr.GetResource( build.scratch.id, False{"don't inc ref"}, True{"quiet"} ); - if_unlikely( buf == null or - build.scratch.offset >= buf->Size() or - not AllBits( buf->Description().usage, EBufferUsage::ASBuild_Scratch ) or + if_unlikely( buf == null or + build.scratch.offset >= buf->Size() or + NoBits( buf->Description().usage, EBufferUsage::ASBuild_Scratch ) or not IsMultipleOf( ulong(buf->GetDeviceAddress() + build.scratch.offset), props.scratchBufferAlign )) return false; } diff --git a/AE/engine/src/graphics/Vulkan/Utils/RenderDocApi.cpp b/AE/engine/src/graphics/Vulkan/Utils/RenderDocApi.cpp index b9bf4db4..9493a724 100644 --- a/AE/engine/src/graphics/Vulkan/Utils/RenderDocApi.cpp +++ b/AE/engine/src/graphics/Vulkan/Utils/RenderDocApi.cpp @@ -17,10 +17,7 @@ namespace AE::Graphics { namespace { - using RDocApi_t = RENDERDOC_API_1_6_0; - - static constexpr Version3 min_ver { 1, 4, 0 }; - static constexpr RENDERDOC_Version min_ver2 = RENDERDOC_Version( (min_ver.major * 10000) + (min_ver.minor * 100) + (min_ver.patch) ); + using RDocApi_t = RENDERDOC_API_1_6_0; } /* @@ -30,6 +27,9 @@ namespace */ bool RenderDocApi::Initialize (VkInstance instance, const NativeWindow &wndHandle) __NE___ { + constexpr Version3 min_ver { 1, 4, 0 }; + constexpr RENDERDOC_Version min_ver2 = RENDERDOC_Version( (min_ver.major * 10000) + (min_ver.minor * 100) + (min_ver.patch) ); + if ( _api != null ) return true; diff --git a/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.cpp b/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.cpp index e5e71d12..f4984395 100644 --- a/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.cpp +++ b/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.cpp @@ -9,6 +9,7 @@ # endif # define DISABLE_GPA 0 +# include "base/Platforms/WindowsHeader.cpp.h" # include "gpu_performance_api/gpu_perf_api.h" # ifdef AE_COMPILER_MSVC @@ -199,7 +200,7 @@ namespace { bool VAMDPerfProfiler::_Initialize (const VDevice &dev, const EDeviceFlags devFlags) __NE___ { - if ( not AnyBits( devFlags, EDeviceFlags::_AmdApiMask )) + if ( NoBits( devFlags, EDeviceFlags::_AmdApiMask )) return false; const auto AMD_VENDOR_ID = 0x01002; @@ -287,6 +288,7 @@ namespace { case kGpaHwGenerationLast : default : break; } + switch_end } } @@ -318,7 +320,7 @@ namespace { { if ( _impl->ctxId != null ) { - _impl->fnTable.GpaCloseContext( _impl->ctxId ); + //_impl->fnTable.GpaCloseContext( _impl->ctxId ); _impl->ctxId = null; } diff --git a/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.h b/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.h index 3bfc83a6..d19ccce3 100644 --- a/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.h +++ b/AE/engine/src/graphics/Vulkan/Utils/VAMDPerfProfiler.h @@ -1,6 +1,8 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' /* API for AMD GPU Perf SDK. + + [Performance counters description](https://github.com/azhirnov/cpu-gpu-arch/blob/main/gpu/AMD_PC.md) */ #pragma once diff --git a/AE/engine/src/graphics/Vulkan/Utils/VNvPerfProfiler.h b/AE/engine/src/graphics/Vulkan/Utils/VNvPerfProfiler.h index 65b815b0..79844bdf 100644 --- a/AE/engine/src/graphics/Vulkan/Utils/VNvPerfProfiler.h +++ b/AE/engine/src/graphics/Vulkan/Utils/VNvPerfProfiler.h @@ -1,6 +1,8 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' /* API for NSight Perf SDK + + [Performance counters description](https://github.com/azhirnov/cpu-gpu-arch/blob/main/gpu/NVidia_PC.md) */ #pragma once diff --git a/AE/engine/src/graphics/Vulkan/VDevice.cpp b/AE/engine/src/graphics/Vulkan/VDevice.cpp index c90de14e..c89a503e 100644 --- a/AE/engine/src/graphics/Vulkan/VDevice.cpp +++ b/AE/engine/src/graphics/Vulkan/VDevice.cpp @@ -43,6 +43,8 @@ namespace # include "vulkan_loader/vkenum_to_str.h" +# ifndef AE_CFG_RELEASE + /* ================================================= VK_DBGUTILS_DBGREPORT_OBJECT_TYPES @@ -234,7 +236,8 @@ namespace switch_end return "unknown"; } -} +# endif // AE_CFG_RELEASE +} // namespace //----------------------------------------------------------------------------- @@ -289,6 +292,7 @@ namespace */ bool VDevice::SetObjectName (ulong id, NtStringView name, VkObjectType type) C_NE___ { + #ifndef AE_CFG_RELEASE if ( name.empty() or id == 0 ) return false; @@ -318,6 +322,9 @@ namespace return true; } + #else + Unused( id, name, type ); + #endif return false; } @@ -334,7 +341,7 @@ namespace for (uint i = 0; (1u << i) <= uint(mask); ++i) { - if ( not AllBits( mask, 1u << i )) + if ( NoBits( mask, 1u << i )) continue; auto q = GetQueue( EQueueType(i) ); @@ -371,10 +378,10 @@ namespace if ( AllBits( memoryTypeBits, 1u << i ) and AllBits( flags, includeFlags )) { - if ( not AnyBits( flags, excludeFlags )) + if ( NoBits( flags, excludeFlags )) { const bool has_inc_opt = AllBits( flags, optIncludeFlags ); - const bool not_exc_opt = not AnyBits( flags, optExcludeFlags ); + const bool not_exc_opt = NoBits( flags, optExcludeFlags ); if ( has_inc_opt and not_exc_opt ) { @@ -974,9 +981,11 @@ namespace VDeviceInitializer::VDeviceInitializer (Bool enableInfoLog) __NE___ : _enableInfoLog{ enableInfoLog } { + #ifndef AE_CFG_RELEASE auto dbg_report = _dbgReport.WriteLock(); NOTHROW( dbg_report->tempObjectDbgInfos.reserve( 16 )); dbg_report->tempString.reserve( 1024 ); + #endif } /* @@ -1569,6 +1578,9 @@ namespace { outResFlags.imageOptions = EImageOpt::ColorAttachmentBlend | EImageOpt::SampledLinear | EImageOpt::CubeCompatible | EImageOpt::MutableFormat | EImageOpt::BlitSrc | EImageOpt::BlitDst; + if ( _extensions.maintenance2 ) + outResFlags.imageOptions |= EImageOpt::BlockTexelViewCompatible | EImageOpt::ExtendedUsage; + if ( props.accelerationStructureFeats.accelerationStructure ) outResFlags.bufferUsage |= EBufferUsage::ASBuild_Scratch | EBufferUsage::ASBuild_ReadOnly; @@ -1716,7 +1728,7 @@ namespace { result[ heap_idx ] = memType; } - //ASSERT( not AnyBits( heap_bits, new_heap_bits )); // must not intersects + //ASSERT( NoBits( heap_bits, new_heap_bits )); // must not intersects heap_bits |= new_heap_bits; }}; @@ -2147,27 +2159,7 @@ namespace { if ( devCI.fsToDeviceFeatures != null ) CHECK_ERR( _InitFeaturesAndPropertiesByFeatureSet( *devCI.fsToDeviceFeatures )); - // disable some features - { - _properties.features.robustBufferAccess = VK_FALSE; // this feature affects performance - - _properties.bufferDeviceAddressFeats.bufferDeviceAddressCaptureReplay = VK_FALSE; - _properties.bufferDeviceAddressFeats.bufferDeviceAddressMultiDevice = VK_FALSE; - - _properties.accelerationStructureFeats.accelerationStructureCaptureReplay = VK_FALSE; - _properties.accelerationStructureFeats.accelerationStructureHostCommands = VK_FALSE; - - _properties.rayTracingPipelineFeats.rayTracingPipelineShaderGroupHandleCaptureReplay = VK_FALSE; - _properties.rayTracingPipelineFeats.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = VK_FALSE; - - _properties.cooperativeMatrixFeats.cooperativeMatrixRobustBufferAccess = VK_FALSE; - } - - if ( not IsEnabledDebugCallback() ) - { - // enabled only with env variable 'NV_ALLOW_RAYTRACING_VALIDATION=1' - _properties.rayTracingValidationFeats.rayTracingValidation = VK_FALSE; - } + _SetupFeatures( INOUT _properties ); if ( devCI.disableFeatures != null ) devCI.disableFeatures( devCI.userData, INOUT _properties ); @@ -2277,6 +2269,37 @@ namespace { return true; } +/* +================================================= + _SetupFeatures +================================================= +*/ + void VDeviceInitializer::_SetupFeatures (INOUT VProperties &feats) C_NE___ + { + // disable some features + { + feats.features.robustBufferAccess = VK_FALSE; // this feature affects performance + feats.features.wideLines = VK_FALSE; + + feats.bufferDeviceAddressFeats.bufferDeviceAddressCaptureReplay = VK_FALSE; + feats.bufferDeviceAddressFeats.bufferDeviceAddressMultiDevice = VK_FALSE; + + feats.accelerationStructureFeats.accelerationStructureCaptureReplay = VK_FALSE; + feats.accelerationStructureFeats.accelerationStructureHostCommands = VK_FALSE; + + feats.rayTracingPipelineFeats.rayTracingPipelineShaderGroupHandleCaptureReplay = VK_FALSE; + feats.rayTracingPipelineFeats.rayTracingPipelineShaderGroupHandleCaptureReplayMixed = VK_FALSE; + + feats.cooperativeMatrixFeats.cooperativeMatrixRobustBufferAccess = VK_FALSE; + } + + if ( not IsEnabledDebugCallback() ) + { + // enabled only with env variable 'NV_ALLOW_RAYTRACING_VALIDATION=1' + feats.rayTracingValidationFeats.rayTracingValidation = VK_FALSE; + } + } + /* ================================================= _LogLogicalDevice @@ -2441,9 +2464,9 @@ namespace { qtypes[ uint(vq.type) ] = &vq; } else - if ( AllBits( q.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR ) and - not AnyBits( q.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR ) and - qtypes[ uint(EQueueType::VideoDecode) ] == null ) + if ( AllBits( q.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR ) and + NoBits( q.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR ) and + qtypes[ uint(EQueueType::VideoDecode) ] == null ) { if ( vq.debugName.empty() ) vq.debugName = "VideoDecode"; @@ -2457,9 +2480,9 @@ namespace { qtypes[ uint(vq.type) ] = &vq; } else - if ( AllBits( q.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR ) and - not AnyBits( q.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR ) and - qtypes[ uint(EQueueType::VideoEncode) ] == null ) + if ( AllBits( q.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR ) and + NoBits( q.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR ) and + qtypes[ uint(EQueueType::VideoEncode) ] == null ) { if ( vq.debugName.empty() ) vq.debugName = "VideoEncode"; @@ -2853,8 +2876,9 @@ namespace { CreateDebugCallback ================================================= */ - bool VDeviceInitializer::CreateDebugCallback (VkDebugUtilsMessageSeverityFlagsEXT severity, DebugReport_t &&callback) __NE___ + bool VDeviceInitializer::CreateDebugCallback (VkDebugUtilsMessageSeverityFlagsEXT severity, DebugReport_t callback) __NE___ { + #ifndef AE_CFG_RELEASE DRC_EXLOCK( _drCheck ); CHECK_ERR( GetVkInstance() != Default ); @@ -2898,6 +2922,9 @@ namespace { dbg_report->callback = RVRef(callback); return true; } + #else + Unused( severity, callback ); + #endif return false; } @@ -2909,6 +2936,7 @@ namespace { */ void VDeviceInitializer::DestroyDebugCallback () __NE___ { + #ifndef AE_CFG_RELEASE DRC_EXLOCK( _drCheck ); auto dbg_report = _dbgReport.WriteLock(); @@ -2923,6 +2951,7 @@ namespace { dbg_report->debugUtilsMessenger = Default; dbg_report->debugReportCallback = Default; + #endif } /* @@ -2932,11 +2961,17 @@ namespace { */ bool VDeviceInitializer::IsEnabledDebugCallback () C_NE___ { + #ifndef AE_CFG_RELEASE auto dbg_report = _dbgReport.ReadLock(); return dbg_report->debugUtilsMessenger != Default or dbg_report->debugReportCallback != Default; + #else + return false; + #endif } + +#ifndef AE_CFG_RELEASE /* ================================================= CheckFalsePositive @@ -2992,7 +3027,13 @@ namespace { obj.objectHandle }; } - self->_DebugReport( dbg_report->tempString, + // skip false possitive if used 'VAMDPerfProfiler' + if ( self->_amdPerf.IsLoaded() and + (HasSubString( pCallbackData->pMessage, "VkStructureType (1000133005)" ) or + HasSubString( pCallbackData->pMessage, "VkStructureType (1000133001)" ))) + return VK_FALSE; + + self->_DebugReport( INOUT dbg_report->tempString, dbg_report->breakOnValidationError, dbg_report->callback, { dbg_report->tempObjectDbgInfos, pCallbackData->pMessage, @@ -3075,6 +3116,9 @@ namespace { #endif } +#endif // AE_CFG_RELEASE + + /* ================================================= GetRecommendedInstanceLayers diff --git a/AE/engine/src/graphics/Vulkan/VDevice.h b/AE/engine/src/graphics/Vulkan/VDevice.h index 00cd672f..021e2f40 100644 --- a/AE/engine/src/graphics/Vulkan/VDevice.h +++ b/AE/engine/src/graphics/Vulkan/VDevice.h @@ -199,7 +199,7 @@ namespace AE::Graphics InstanceCreateInfo () __NE___ {} }; - using DisableFeaturesFn_t = void (*) (void* userData, VProperties &); + using DisableFeaturesFn_t = void (*) (void* userData, INOUT VProperties &); struct DeviceCreateInfo { @@ -265,8 +265,10 @@ namespace AE::Graphics // variable private: + #ifndef AE_CFG_RELEASE Synchronized< SharedMutex, DbgReportData > _dbgReport; + #endif bool _enableInfoLog = false; bool _isCopy = false; @@ -292,7 +294,7 @@ namespace AE::Graphics bool DestroyInstance () __NE___; bool CreateDebugCallback (VkDebugUtilsMessageSeverityFlagsEXT severity, - DebugReport_t && callback = Default) __NE___; + DebugReport_t callback = Default) __NE___; void DestroyDebugCallback () __NE___; ND_ bool IsEnabledDebugCallback () C_NE___; @@ -352,6 +354,8 @@ namespace AE::Graphics void _InitQueues (ArrayView props, INOUT Queues_t &queues, INOUT QueueTypes_t &qtypes)C_NE___; void _ValidateQueueStages (INOUT Queues_t &queues) C_NE___; + void _SetupFeatures (INOUT VProperties &) C_NE___; + ND_ bool _InitFeaturesAndPropertiesByFeatureSet (const FeatureSet &fs) __NE___; // new debug api diff --git a/AE/engine/src/graphics/Vulkan/VDeviceFS.cpp b/AE/engine/src/graphics/Vulkan/VDeviceFS.cpp index f7c48533..561713af 100644 --- a/AE/engine/src/graphics/Vulkan/VDeviceFS.cpp +++ b/AE/engine/src/graphics/Vulkan/VDeviceFS.cpp @@ -131,6 +131,9 @@ namespace outFeatureSet.shaderQuadControl = True; } + if ( _extensions.clipSpaceWScalingNV ) + outFeatureSet.clipSpaceWScalingNV = True; + if ( _extensions.shaderFloat16Int8 ) { SET_FEAT2( shaderInt8, _properties.shaderFloat16Int8Feats ); @@ -616,23 +619,23 @@ namespace for (auto& props : queue_family_props) { if ( AllBits( props.queueFlags, VK_QUEUE_GRAPHICS_BIT ) and - not AnyBits( props.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) + NoBits( props.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) outFeatureSet.queues.supported |= EQueueMask::Graphics; if ( AllBits( props.queueFlags, VK_QUEUE_COMPUTE_BIT ) and - not AnyBits( props.queueFlags, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) + NoBits( props.queueFlags, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) outFeatureSet.queues.supported |= EQueueMask::AsyncCompute; if ( AllBits( props.queueFlags, VK_QUEUE_TRANSFER_BIT ) and - not AnyBits( props.queueFlags, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) + NoBits( props.queueFlags, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) outFeatureSet.queues.supported |= EQueueMask::AsyncTransfer; if ( AllBits( props.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR ) and - not AnyBits( props.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) + NoBits( props.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR )) outFeatureSet.queues.supported |= EQueueMask::VideoDecode; if ( AllBits( props.queueFlags, VK_QUEUE_VIDEO_ENCODE_BIT_KHR ) and - not AnyBits( props.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR )) + NoBits( props.queueFlags, VK_QUEUE_VIDEO_DECODE_BIT_KHR )) outFeatureSet.queues.supported |= EQueueMask::VideoEncode; } } @@ -716,6 +719,9 @@ namespace _extensions.shaderQuadControl = true; } + if ( inFS.clipSpaceWScalingNV == True ) + _extensions.clipSpaceWScalingNV = true; + if ( inFS.subgroupBroadcastDynamicId == True ) { CHECK_ERR( _extensions.subgroup ); diff --git a/AE/engine/src/graphics/Vulkan/VEnumCast.cpp b/AE/engine/src/graphics/Vulkan/VEnumCast.cpp.h similarity index 100% rename from AE/engine/src/graphics/Vulkan/VEnumCast.cpp rename to AE/engine/src/graphics/Vulkan/VEnumCast.cpp.h diff --git a/AE/engine/src/graphics/Vulkan/VEnumCast.h b/AE/engine/src/graphics/Vulkan/VEnumCast.h index 84af9477..108846be 100644 --- a/AE/engine/src/graphics/Vulkan/VEnumCast.h +++ b/AE/engine/src/graphics/Vulkan/VEnumCast.h @@ -264,7 +264,7 @@ namespace AE::Graphics { switch_enum( value ) { - case EPipelineDynamicState::StencilCompareMask: return VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + case EPipelineDynamicState::StencilCompareMask : return VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; case EPipelineDynamicState::StencilWriteMask : return VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; case EPipelineDynamicState::StencilReference : return VK_DYNAMIC_STATE_STENCIL_REFERENCE; case EPipelineDynamicState::DepthBias: return VK_DYNAMIC_STATE_DEPTH_BIAS; @@ -272,7 +272,8 @@ namespace AE::Graphics //case EPipelineDynamicState::DepthBounds: return VK_DYNAMIC_STATE_DEPTH_BOUNDS; case EPipelineDynamicState::RTStackSize : return VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR; - case EPipelineDynamicState::FragmentShadingRate: return VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR; + case EPipelineDynamicState::FragmentShadingRate : return VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR; + case EPipelineDynamicState::ViewportWScaling : return VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_NV; // TODO: // VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT @@ -388,6 +389,7 @@ namespace AE::Graphics case EImageOpt::MutableFormat : flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; break; case EImageOpt::Array2DCompatible : flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; break; case EImageOpt::BlockTexelViewCompatible: flags |= VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT; break; + case EImageOpt::ExtendedUsage : flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; break; case EImageOpt::SparseResidency : flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; break; case EImageOpt::SparseAliased : flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_ALIASED_BIT; break; @@ -898,7 +900,7 @@ namespace AE::Graphics { EImageOpt result = Zero; - StaticAssert( uint(EImageOpt::All) == 0x1FFFF ); + StaticAssert( uint(EImageOpt::All) == 0x3FFFF ); for (auto t : BitfieldIterate( values )) { switch_enum( t ) @@ -911,13 +913,13 @@ namespace AE::Graphics case VK_IMAGE_CREATE_ALIAS_BIT : result |= EImageOpt::Alias; break; case VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT : result |= EImageOpt::BlockTexelViewCompatible; break; case VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT : result |= EImageOpt::SampleLocationsCompatible; break; + case VK_IMAGE_CREATE_EXTENDED_USAGE_BIT : result |= EImageOpt::ExtendedUsage; break; case VK_IMAGE_CREATE_DISJOINT_BIT : break; // skip case VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT : case VK_IMAGE_CREATE_SPARSE_BINDING_BIT : case VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT : - case VK_IMAGE_CREATE_EXTENDED_USAGE_BIT : case VK_IMAGE_CREATE_PROTECTED_BIT : case VK_IMAGE_CREATE_FLAG_BITS_MAX_ENUM : case VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV : diff --git a/AE/engine/src/graphics/Vulkan/VEnumToString.h b/AE/engine/src/graphics/Vulkan/VEnumToString.h index fd22c87d..2903f172 100644 --- a/AE/engine/src/graphics/Vulkan/VEnumToString.h +++ b/AE/engine/src/graphics/Vulkan/VEnumToString.h @@ -114,7 +114,7 @@ namespace AE::Graphics String result; for (VkImageUsageFlags i = 1; i <= value; i <<= 1) { - if ( not AllBits( value, i )) + if ( NoBits( value, i )) continue; if ( result.size() ) diff --git a/AE/engine/src/graphics/Vulkan/VRenderTaskScheduler.cpp b/AE/engine/src/graphics/Vulkan/VRenderTaskScheduler.cpp index 00da5084..99efdc8e 100644 --- a/AE/engine/src/graphics/Vulkan/VRenderTaskScheduler.cpp +++ b/AE/engine/src/graphics/Vulkan/VRenderTaskScheduler.cpp @@ -38,7 +38,7 @@ namespace AE::Graphics VDrawCommandBatch::Viewports_t viewports; VDrawCommandBatch::Scissors_t scissors; - Graphics::_hidden_::ConvertViewports( desc.viewports, Default, OUT viewports, OUT scissors ); + Graphics::_hidden_::ConvertViewports( desc.viewports, Default, desc.area.Size(), OUT viewports, OUT scissors ); return rts._CreateDrawBatch( primaryState, viewports, scissors, dbg ); } diff --git a/AE/engine/src/graphics/Vulkan/VResourceManager.cpp b/AE/engine/src/graphics/Vulkan/VResourceManager.cpp index 0818d801..5226c830 100644 --- a/AE/engine/src/graphics/Vulkan/VResourceManager.cpp +++ b/AE/engine/src/graphics/Vulkan/VResourceManager.cpp @@ -167,7 +167,7 @@ namespace AE::Graphics VFramebuffer::Key key{ StructView{ desc.attachments.GetValueArray(), &RenderPassDesc::Attachment::imageView }, rp_id, - uint3{ desc.area.Width(), desc.area.Height(), desc.layerCount.Get() }}; + uint3{ uint(desc.area.Width()), uint(desc.area.Height()), desc.layerCount.Get() }}; // find in cache { diff --git a/AE/engine/src/graphics/Vulkan/VSwapchain.cpp b/AE/engine/src/graphics/Vulkan/VSwapchain.cpp index f04ee703..5253e74c 100644 --- a/AE/engine/src/graphics/Vulkan/VSwapchain.cpp +++ b/AE/engine/src/graphics/Vulkan/VSwapchain.cpp @@ -553,11 +553,12 @@ namespace AE::Graphics { const auto& surf_fmt = surf_formats[i]; - if ( AnyEqual( surf_fmt.format, required_format1, required_format2 ) and + if ( AnyEqual( surf_fmt.format, required_format1, required_format2 ) and surf_fmt.colorSpace == required_colorspace ) { both_match_idx = i; - break; + if ( surf_fmt.format == required_format1 ) + break; } else // separate check @@ -615,7 +616,7 @@ namespace AE::Graphics VK_CHECK( vkGetPhysicalDeviceImageFormatProperties( _device->GetVkPhysicalDevice(), colorFormat, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, colorImageUsage, 0, OUT &image_props )); - if ( not AllBits( image_props.sampleCounts, VK_SAMPLE_COUNT_1_BIT )) + if ( NoBits( image_props.sampleCounts, VK_SAMPLE_COUNT_1_BIT )) return false; return true; @@ -713,7 +714,7 @@ namespace AE::Graphics } _vkImages.fill( Default ); - _surfaceSize.store( ushort2{uint2{ swapchain_info.imageExtent.width, swapchain_info.imageExtent.height }}); + _surfaceSize.store( ImageDim2_t{uint2{ swapchain_info.imageExtent.width, swapchain_info.imageExtent.height }}); _device->SetObjectName( _vkSwapchain, dbgName, VK_OBJECT_TYPE_SWAPCHAIN_KHR ); @@ -1001,7 +1002,7 @@ namespace AE::Graphics VkSurfaceCapabilities2KHR surf_caps2; VK_CHECK( vkGetPhysicalDeviceSurfaceCapabilities2KHR( _device->GetVkPhysicalDevice(), &surf_info, OUT &surf_caps2 )); - for (VkBaseInStructure const *iter = reinterpret_cast(&surf_caps2); + for (VkBaseInStructure const* iter = reinterpret_cast(&surf_caps2); iter != null; iter = iter->pNext) { @@ -1029,32 +1030,32 @@ namespace AE::Graphics ASSERT( AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT )); if ( AllBits( imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT ) and - (not AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_TRANSFER_SRC_BIT ) or - not AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_BLIT_DST_BIT )) ) + (NoBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_TRANSFER_SRC_BIT ) or + NoBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_BLIT_DST_BIT )) ) { imageUsage &= ~VK_IMAGE_USAGE_TRANSFER_SRC_BIT; } if ( AllBits( imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT ) and - not AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_TRANSFER_DST_BIT )) + NoBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_TRANSFER_DST_BIT )) { imageUsage &= ~VK_IMAGE_USAGE_TRANSFER_DST_BIT; } if ( AllBits( imageUsage, VK_IMAGE_USAGE_STORAGE_BIT ) and - not AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT )) + NoBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT )) { imageUsage &= ~VK_IMAGE_USAGE_STORAGE_BIT; } if ( AllBits( imageUsage, VK_IMAGE_USAGE_SAMPLED_BIT ) and - not AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT )) + NoBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT )) { imageUsage &= ~VK_IMAGE_USAGE_SAMPLED_BIT; } if ( AllBits( imageUsage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ) and - not AllBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT )) + NoBits( format_props.optimalTilingFeatures, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT )) { imageUsage &= ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } diff --git a/AE/engine/src/graphics/Vulkan/VSwapchain.h b/AE/engine/src/graphics/Vulkan/VSwapchain.h index 4dc7683e..e49731d9 100644 --- a/AE/engine/src/graphics/Vulkan/VSwapchain.h +++ b/AE/engine/src/graphics/Vulkan/VSwapchain.h @@ -69,7 +69,7 @@ namespace AE::Graphics VkSwapchainKHR _vkSwapchain = Default; // protected by '_guard' VkSurfaceKHR _vkSurface = Default; // protected by '_guard' - StructAtomic< ushort2 > _surfaceSize; + StructAtomic< ImageDim2_t > _surfaceSize; StructAtomic< MutableIdxBits > _indices; Images_t _vkImages {}; // protected by '_guard' diff --git a/AE/engine/src/graphics/Vulkan/Video/VVideoImage.cpp b/AE/engine/src/graphics/Vulkan/Video/VVideoImage.cpp index 36b091a1..ce2d9324 100644 --- a/AE/engine/src/graphics/Vulkan/Video/VVideoImage.cpp +++ b/AE/engine/src/graphics/Vulkan/Video/VVideoImage.cpp @@ -464,16 +464,14 @@ namespace return WithVideoProfile( dev, desc.profile, [&] (const VkVideoProfileInfoKHR &profileInfo, const VkVideoCapabilitiesKHR &capabilities) -> bool { - if ( All( desc.dimension == uint2{0} )) { - desc.dimension.x = capabilities.minCodedExtent.width; - desc.dimension.y = capabilities.minCodedExtent.height; - } - if ( All( desc.dimension == UMax )) { - desc.dimension.x = capabilities.maxCodedExtent.width; - desc.dimension.y = capabilities.maxCodedExtent.height; - } - CHECK_ERR( All( desc.dimension >= uint2{capabilities.minCodedExtent.width, capabilities.minCodedExtent.height} )); - CHECK_ERR( All( desc.dimension <= uint2{capabilities.maxCodedExtent.width, capabilities.maxCodedExtent.height} )); + if ( All( desc.dimension == ImageDim2_t{0} )) + desc.dimension = CheckCast(uint2{ capabilities.minCodedExtent.width, capabilities.minCodedExtent.height }); + + if ( All( desc.dimension == UMax )) + desc.dimension = CheckCast(uint2{ capabilities.maxCodedExtent.width, capabilities.maxCodedExtent.height }); + + CHECK_ERR( All( desc.Dimension2() >= uint2{capabilities.minCodedExtent.width, capabilities.minCodedExtent.height} )); + CHECK_ERR( All( desc.Dimension2() <= uint2{capabilities.maxCodedExtent.width, capabilities.maxCodedExtent.height} )); pictureAccessGranularity = ushort2{ uint2{ capabilities.pictureAccessGranularity.width, capabilities.pictureAccessGranularity.height }}; @@ -535,7 +533,7 @@ namespace result &= dev.GetVExtensions().samplerYcbcrConversion; - result &= All( IsMultipleOf( desc.dimension, dim_granularity )); + result &= All( IsMultipleOf( desc.Dimension2(), dim_granularity )); return result; } diff --git a/AE/engine/src/graphics/Vulkan/Video/VVideoUtils.cpp.h b/AE/engine/src/graphics/Vulkan/Video/VVideoUtils.cpp.h index 0b258c62..77c76929 100644 --- a/AE/engine/src/graphics/Vulkan/Video/VVideoUtils.cpp.h +++ b/AE/engine/src/graphics/Vulkan/Video/VVideoUtils.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "graphics/Vulkan/VEnumCast.h" namespace AE::Graphics diff --git a/AE/engine/src/graphics_hl/CMakeLists.txt b/AE/engine/src/graphics_hl/CMakeLists.txt index 61e16e36..59b0414f 100644 --- a/AE/engine/src/graphics_hl/CMakeLists.txt +++ b/AE/engine/src/graphics_hl/CMakeLists.txt @@ -39,6 +39,7 @@ if (TARGET "Graphics") EnablePCH( "GraphicsHL" ) EnablePrebuild( "GraphicsHL" ) + EnableUnitBuild( "GraphicsHL" ) install( TARGETS "GraphicsHL" ARCHIVE DESTINATION "lib" ) endif() diff --git a/AE/engine/src/graphics_hl/Canvas/VertexTypes.h b/AE/engine/src/graphics_hl/Canvas/VertexTypes.h index fea977dd..3219e566 100644 --- a/AE/engine/src/graphics_hl/Canvas/VertexTypes.h +++ b/AE/engine/src/graphics_hl/Canvas/VertexTypes.h @@ -90,8 +90,8 @@ struct VB_UVs2_SCs1_Col8 ubyte4 Color; VB_UVs2_SCs1_Col8 () {} - VB_UVs2_SCs1_Col8 (const packed_ushort2 &uv, ushort scale, const ubyte4 &col) : UV_Scale{uv, scale, 0}, Color{col} {} - VB_UVs2_SCs1_Col8 (const packed_ushort2 &uv, ushort scale, const RGBA8u &col) : UV_Scale{uv, scale, 0}, Color{col} {} + VB_UVs2_SCs1_Col8 (const packed_ushort2 &uv, ushort scale, const ubyte4 &col) : UV_Scale{uv, scale, ushort(0)}, Color{col} {} + VB_UVs2_SCs1_Col8 (const packed_ushort2 &uv, ushort scale, const RGBA8u &col) : UV_Scale{uv, scale, ushort(0)}, Color{col} {} }; StaticAssert( sizeof(VB_UVs2_SCs1_Col8) == 12 ); diff --git a/AE/engine/src/graphics_hl/ImGui/ImGuiRenderer.cpp b/AE/engine/src/graphics_hl/ImGui/ImGuiRenderer.cpp index 6193e804..b478d48b 100644 --- a/AE/engine/src/graphics_hl/ImGui/ImGuiRenderer.cpp +++ b/AE/engine/src/graphics_hl/ImGui/ImGuiRenderer.cpp @@ -438,7 +438,7 @@ namespace AE::Graphics UploadImageDesc upload; upload.aspectMask = EImageAspect::Color; upload.heapType = EStagingHeapType::Dynamic; - upload.imageDim = uint3{width, height, 1}; + upload.imageDim = int3{ width, height, 1 }; upload.dataRowPitch = Bytes{width * 4 * sizeof(ubyte)}; const Bytes size {width * height * 4 * sizeof(ubyte)}; diff --git a/AE/engine/src/graphics_hl/UI/Layout.cpp b/AE/engine/src/graphics_hl/UI/Layout.cpp index c502eede..1105f0ed 100644 --- a/AE/engine/src/graphics_hl/UI/Layout.cpp +++ b/AE/engine/src/graphics_hl/UI/Layout.cpp @@ -493,7 +493,7 @@ namespace AE::UI // if initialized if ( cell->_data != null ) { - count += usize( not AllBits( cell->StyleFlags(), EStyleState::Invisible )); + count += usize( NoBits( cell->StyleFlags(), EStyleState::Invisible )); } else ++count; @@ -800,7 +800,7 @@ namespace for (auto& cs : child_state) { - active_count += usize{ not AllBits( cs.StyleFlags(), EStyleState::Invisible )}; + active_count += usize{ NoBits( cs.StyleFlags(), EStyleState::Invisible )}; } if ( active_count == 0 ) @@ -818,7 +818,7 @@ namespace cd.range.x = step * idx; cd.range.y = step * (idx+1); - idx += usize{ not AllBits( cs.StyleFlags(), EStyleState::Invisible )}; + idx += usize{ NoBits( cs.StyleFlags(), EStyleState::Invisible )}; } ASSERT( active_count == idx ); } diff --git a/AE/engine/src/networking/CMakeLists.txt b/AE/engine/src/networking/CMakeLists.txt index 17e20988..4311edf5 100644 --- a/AE/engine/src/networking/CMakeLists.txt +++ b/AE/engine/src/networking/CMakeLists.txt @@ -14,5 +14,6 @@ target_compile_definitions( "Networking" PUBLIC "AE_ENABLE_NETWORKING" ) EnablePCH( "Networking" ) EnablePrebuild( "Networking" ) +EnableUnitBuild( "Networking" ) install( TARGETS "Networking" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/networking/HighLevel/DataEncoder.h b/AE/engine/src/networking/HighLevel/DataEncoder.h index aba478f3..15b708ae 100644 --- a/AE/engine/src/networking/HighLevel/DataEncoder.h +++ b/AE/engine/src/networking/HighLevel/DataEncoder.h @@ -33,7 +33,7 @@ namespace AE::Networking ND_ bool IsFull () C_NE___ { return _ser.stream.Empty(); } // optional - ND_ bool Flush () __NE___ { return _ser.Flush(); } + ND_ bool Flush () __NE___ { return true; } template ND_ bool operator () (const Arg0 &arg0, const Args& ...args) __NE___ { return _ser( arg0, args... ); } diff --git a/AE/engine/src/networking/HighLevel/TcpChannel.cpp b/AE/engine/src/networking/HighLevel/TcpChannel.cpp index 1db538d0..9c0c0d38 100644 --- a/AE/engine/src/networking/HighLevel/TcpChannel.cpp +++ b/AE/engine/src/networking/HighLevel/TcpChannel.cpp @@ -7,7 +7,7 @@ namespace AE::Networking { namespace { - static constexpr auto c_ChannelType = EChannel::Reliable; + static constexpr auto c_TcpChannelType = EChannel::Reliable; } /* @@ -402,7 +402,7 @@ namespace { _ValidateMsgStream ================================================= */ - void TcpChannel::_ValidateMsgStream (const void *ptr, Bytes size) __NE___ + void TcpChannel::_ValidateMsgStream (const void* ptr, Bytes size) __NE___ { Bytes offset; @@ -527,7 +527,7 @@ namespace { _toSend.encoded = 0_b; _received.received = 0_b; - _serverProvider->GetAddress( c_ChannelType, _serverIndex, True{"TCP"}, OUT _serverAddress ); + _serverProvider->GetAddress( c_TcpChannelType, _serverIndex, True{"TCP"}, OUT _serverAddress ); if ( _socket.AsyncConnect( _serverAddress )) { @@ -672,7 +672,7 @@ namespace { if_unlikely( idx < 0 or idx >= int(_maxClients) ) break; // client pool overflow - if ( auto client_id = _listener->OnClientConnected( c_ChannelType, addr ); client_id != Default ) + if ( auto client_id = _listener->OnClientConnected( c_TcpChannelType, addr ); client_id != Default ) { // save client _poolBits.set( idx ); @@ -821,7 +821,7 @@ namespace { { for (uint idx : BitIndexIterate( _poolBits )) { - _listener->OnClientDisconnected( c_ChannelType, _clientPool[idx].id ); + _listener->OnClientDisconnected( c_TcpChannelType, _clientPool[idx].id ); } for (usize i = 0; i < _clientPool.size(); ++i) @@ -906,7 +906,7 @@ namespace { } ASSERT( _uniqueClientId.erase( id )); - _listener->OnClientDisconnected( c_ChannelType, id ); + _listener->OnClientDisconnected( c_TcpChannelType, id ); AE_LOG_DBG( "client ("s << ToString<16>(uint(id)) << ") disconnected" ); } diff --git a/AE/engine/src/networking/HighLevel/TcpChannel.h b/AE/engine/src/networking/HighLevel/TcpChannel.h index f048475b..329565d6 100644 --- a/AE/engine/src/networking/HighLevel/TcpChannel.h +++ b/AE/engine/src/networking/HighLevel/TcpChannel.h @@ -100,7 +100,7 @@ namespace AE::Networking ND_ bool _IsValid () C_NE___; - static void _ValidateMsgStream (const void *ptr, Bytes size) __NE___; + static void _ValidateMsgStream (const void* ptr, Bytes size) __NE___; }; //----------------------------------------------------------------------------- diff --git a/AE/engine/src/networking/HighLevel/UdpUnreliable.cpp b/AE/engine/src/networking/HighLevel/UdpUnreliable.cpp index c5ae4b10..75011d5b 100644 --- a/AE/engine/src/networking/HighLevel/UdpUnreliable.cpp +++ b/AE/engine/src/networking/HighLevel/UdpUnreliable.cpp @@ -7,7 +7,7 @@ namespace AE::Networking { namespace { - static constexpr auto c_ChannelType = EChannel::Unreliable; + static constexpr auto c_UdpChannelType = EChannel::Unreliable; } /* @@ -232,7 +232,7 @@ namespace { */ void UdpUnreliableClientChannel::_Reconnect () __NE___ { - _serverProvider->GetAddress( c_ChannelType, _serverIndex, False{"UDP"}, OUT _serverAddress ); + _serverProvider->GetAddress( c_UdpChannelType, _serverIndex, False{"UDP"}, OUT _serverAddress ); ++_serverIndex; } diff --git a/AE/engine/src/networking/LowLevel/PlatformSpecific.cpp.h b/AE/engine/src/networking/LowLevel/PlatformSpecific.cpp.h index 8ea58dab..ac0937ac 100644 --- a/AE/engine/src/networking/LowLevel/PlatformSpecific.cpp.h +++ b/AE/engine/src/networking/LowLevel/PlatformSpecific.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "networking/LowLevel/SocketError.h" #ifdef AE_PLATFORM_WINDOWS @@ -7,8 +9,8 @@ # if AE_PLATFORM_TARGET_VERSION_MAJOR < 6 # error Required at least Windows Vista. # endif -# define AE_WINDOWS_SOCKET # include "base/Platforms/WindowsHeader.cpp.h" +# define AE_WINDOWS_SOCKET #elif defined(AE_EMS_NATIVE_SOCKETS) // https://emscripten.org/docs/porting/networking.html diff --git a/AE/engine/src/networking/LowLevel/TcpSocket.cpp b/AE/engine/src/networking/LowLevel/TcpSocket.cpp index 87a7966c..3832aee9 100644 --- a/AE/engine/src/networking/LowLevel/TcpSocket.cpp +++ b/AE/engine/src/networking/LowLevel/TcpSocket.cpp @@ -28,7 +28,7 @@ namespace AE::Networking return false; // no clients // TODO: check errors #ifdef AE_UNIX_SOCKET - // On Linux, the new socket returned by accept() does not inherit file status flags such as O_NONBLOCK and O_ASYNC from the listening socket. + // On Linux, the new socket returned by 'accept()' does not inherit file status flags such as O_NONBLOCK and O_ASYNC from the listening socket. // This behavior differs from the canonical BSD sockets implementation. if ( other.IsNonBlocking() ) CHECK_ERR( _SetNonBlocking() ); diff --git a/AE/engine/src/networking/Utils/MsgAndSync.h b/AE/engine/src/networking/Utils/MsgAndSync.h index 091db6f6..a5a052b4 100644 --- a/AE/engine/src/networking/Utils/MsgAndSync.h +++ b/AE/engine/src/networking/Utils/MsgAndSync.h @@ -32,8 +32,8 @@ namespace AE::Networking::_hidden_ // methods public: MsgWithExtra () __NE___ {} - MsgWithExtra (T* msg, Bytes extraSize) __NE___ : _msg{msg} DEBUG_ONLY(, _extraSize{extraSize}) { Unused(extraSize); } - MsgWithExtra (MsgWithExtra &&other) __NE___ : _msg{other._msg} DEBUG_ONLY(, _extraSize{other.extraSize}) { other._msg = null; } + MsgWithExtra (T* msg, Bytes extraSize) __NE___ : _msg{msg} DEBUG_ONLY(, _extraSize{extraSize}) { Unused(extraSize); } + MsgWithExtra (MsgWithExtra &&other) __NE___ : _msg{other._msg} DEBUG_ONLY(, _extraSize{other.extraSize}) { other._msg = null; } MsgWithExtra& operator = (MsgWithExtra &&rhs) __NE___; @@ -51,20 +51,26 @@ namespace AE::Networking::_hidden_ void* PutExtra (const void* data, Bytes dataSize) __NE___; template - BasicStringView PutExtra (BasicStringView str) __NE___ { return BasicStringView{ PutExtra( str.data(), str.size() ), str.size() }; } + auto PutExtra (BasicStringView str) __NE___ { return BasicStringView{ PutExtra( str.data(), str.size() ), str.size() }; } template - ArrayView PutExtra (ArrayView arr) __NE___ { return ArrayView{ PutExtra( arr.data(), arr.size() ), arr.size() }; } + auto PutExtra (ArrayView arr) __NE___ { return ArrayView{ PutExtra( arr.data(), arr.size() ), arr.size() }; } template void Put (M T::*, const void* src, Bytes size) __NE___; template - void Put (M T::*m, BasicStringView str) __NE___; + void Put (M T::*, BasicStringView str) __NE___; template - void Put (M T::*m, ArrayView arr) __NE___; + void Put (M T::*, ArrayView arr) __NE___; + + template + void Put (M T::*dst, C T::*counter, BasicStringView) __NE___; + + template + void Put (M T::*dst, C T::*counter, ArrayView) __NE___; template void Extract (M T::*, OUT void* dst, Bytes size) C_NE___; @@ -183,11 +189,11 @@ namespace AE::Networking::_hidden_ */ template template - void MsgWithExtra::Put (M T::*member, const void* srcData, const Bytes srcDataSize) __NE___ + void MsgWithExtra::Put (M T::*dstMember, const void* srcData, const Bytes srcDataSize) __NE___ { NonNull( _msg ); - void* dst = &(_msg->*member); + void* dst = &(_msg->*dstMember); ASSERT( dst + srcDataSize <= (Cast(_msg) + SizeOf + _extraSize) ); MemCopy_NullCheck( OUT dst, srcData, srcDataSize ); @@ -195,18 +201,48 @@ namespace AE::Networking::_hidden_ template template - void MsgWithExtra::Put (M T::*m, BasicStringView str) __NE___ + void MsgWithExtra::Put (M T::*dstMember, BasicStringView str) __NE___ { StaticAssert( IsSameTypes< RemoveArray, E >); - Put( m, str.data(), StringSizeOf(str) ); + Put( dstMember, str.data(), StringSizeOf(str) ); } template template - void MsgWithExtra::Put (M T::*m, ArrayView arr) __NE___ + void MsgWithExtra::Put (M T::*dstMember, ArrayView arr) __NE___ + { + StaticAssert( IsSameTypes< RemoveArray, E >); + Put( dstMember, arr.data(), ArraySizeOf(arr) ); + } + +/* +================================================= + Put +---- + Same as 'Put( dstMember, arr )' but with counter field. + 'counter' will be set to actual element count. +================================================= +*/ + template + template + void MsgWithExtra::Put (M T::*dstMember, C T::*counter, BasicStringView str) __NE___ + { + StaticAssert( IsSameTypes< RemoveArray, E >); + Put( dstMember, str.data(), StringSizeOf(str) ); + + auto& cnt = (_msg->*counter); + cnt = CheckCast( str.size() ); + } + + template + template + void MsgWithExtra::Put (M T::*dstMember, C T::*counter, ArrayView arr) __NE___ { StaticAssert( IsSameTypes< RemoveArray, E >); - Put( m, arr.data(), ArraySizeOf(arr) ); + Put( dstMember, arr.data(), ArraySizeOf(arr) ); + + auto& cnt = (_msg->*counter); + cnt = CheckCast( arr.size() ); } /* diff --git a/AE/engine/src/platform/Android/ApplicationAndroid.cpp b/AE/engine/src/platform/Android/ApplicationAndroid.cpp index 250e8bb5..5ef64dc8 100644 --- a/AE/engine/src/platform/Android/ApplicationAndroid.cpp +++ b/AE/engine/src/platform/Android/ApplicationAndroid.cpp @@ -16,6 +16,9 @@ namespace { static const size_t s_JNI_ptr = size_t(&JNI_OnLoad) + size_t(&JNI_OnUnload); } +namespace AE::Base { + extern bool Android_IsUnderDebugger; +} namespace AE::App { @@ -315,7 +318,7 @@ namespace { native_OnCreate ================================================= */ - void JNICALL ApplicationAndroid::native_OnCreate (JNIEnv* env, jclass, jobject appCtx, jobject assetMngr) __NE___ + void JNICALL ApplicationAndroid::native_OnCreate (JNIEnv* env, jclass, jobject appCtx, jobject assetMngr, jboolean isUnderDebugger) __NE___ { auto& app = GetApp(); DRC_EXLOCK( app._drCheck ); @@ -329,6 +332,11 @@ namespace { app._java.application.Method( "ShowToast", OUT app._methods.showToast ); app._java.application.Method( "IsNetworkConnected", OUT app._methods.isNetworkConnected ); //app._java.application.Method( "CreateWindow", OUT app._methods.createWindow ); + + Base::Android_IsUnderDebugger = isUnderDebugger; + + if ( isUnderDebugger ) + StaticLogger::AddLogger( ILogger::CreateBreakOnError() ); } /* @@ -475,6 +483,8 @@ namespace { wnd_class.RegisterStaticMethod( "native_OnTouch", &WindowAndroid::native_OnTouch ); wnd_class.RegisterStaticMethod( "native_OnOrientationChanged", &WindowAndroid::native_OnOrientationChanged ); wnd_class.RegisterStaticMethod( "native_UpdateSensor", &WindowAndroid::native_UpdateSensor ); + wnd_class.RegisterStaticMethod( "native_SendBatteryStat1", &WindowAndroid::native_SendBatteryStat1 ); + wnd_class.RegisterStaticMethod( "native_SendBatteryStat2", &WindowAndroid::native_SendBatteryStat2 ); } CHECK( ApplicationAndroid::_GetAppInstance() != null ); diff --git a/AE/engine/src/platform/Android/ApplicationAndroid.h b/AE/engine/src/platform/Android/ApplicationAndroid.h index 7dc7dcce..7ef140a7 100644 --- a/AE/engine/src/platform/Android/ApplicationAndroid.h +++ b/AE/engine/src/platform/Android/ApplicationAndroid.h @@ -101,14 +101,17 @@ namespace AE::App // called from java private: - static void JNICALL native_OnCreate (JNIEnv*, jclass, jobject app, jobject assetMngr) __NE___; - static void JNICALL native_SetDirectories (JNIEnv*, jclass, jstring, jstring, jstring, jstring) __NE___; + static void JNICALL native_OnCreate (JNIEnv*, jclass, jobject app, + jobject assetMngr, + jboolean isUnderDebugger) __NE___; + static void JNICALL native_SetDirectories (JNIEnv*, jclass, jstring, + jstring, jstring, jstring) __NE___; static void JNICALL native_SetDisplayInfo (JNIEnv*, jclass, - jint minWidth, jint minHeight, - jint maxWidth, jint maxHeight, - float dpi, jint orientation, - float avrLum, float maxLum, float minLum, - jintArray cutoutRects, jint cutoutRectCount) __NE___; + jint minWidth, jint minHeight, + jint maxWidth, jint maxHeight, + float dpi, jint orientation, + float avrLum, float maxLum, float minLum, + jintArray cutoutRects, jint cutoutRectCount) __NE___; static void JNICALL native_SetSystemInfo (JNIEnv*, jclass, jstring, jstring) __NE___; static void JNICALL native_EnableCamera (JNIEnv*, jclass) __NE___; diff --git a/AE/engine/src/platform/Android/WindowAndroid.cpp b/AE/engine/src/platform/Android/WindowAndroid.cpp index 3fbe037c..ffef66ee 100644 --- a/AE/engine/src/platform/Android/WindowAndroid.cpp +++ b/AE/engine/src/platform/Android/WindowAndroid.cpp @@ -521,6 +521,23 @@ namespace { } } +/* +================================================= + native_SendBatteryStat +================================================= +*/ + void JNICALL WindowAndroid::native_SendBatteryStat1 (JNIEnv*, jclass, + float current, float capacity, float energy) __NE___ + { + PerformanceStat::_SetBatteryStat1( current, capacity, energy ); + } + + void JNICALL WindowAndroid::native_SendBatteryStat2 (JNIEnv*, jclass, + float level, float temperature, float voltage, jboolean isCharging) __NE___ + { + PerformanceStat::_SetBatteryStat2( level, temperature, voltage, isCharging ); + } + } // AE::App diff --git a/AE/engine/src/platform/Android/WindowAndroid.h b/AE/engine/src/platform/Android/WindowAndroid.h index 23c596ee..a699356f 100644 --- a/AE/engine/src/platform/Android/WindowAndroid.h +++ b/AE/engine/src/platform/Android/WindowAndroid.h @@ -86,6 +86,9 @@ namespace AE::App jint touchCount, jfloatArray touchData) __NE___; static void JNICALL native_OnOrientationChanged (JNIEnv*, jclass, WinID wndId, jint newOrientation) __NE___; static void JNICALL native_UpdateSensor (JNIEnv*, jclass, WinID wndId, int sensor, jfloatArray values) __NE___; + static void JNICALL native_SendBatteryStat1 (JNIEnv*, jclass, float current, float capacity, float) __NE___; + static void JNICALL native_SendBatteryStat2 (JNIEnv*, jclass, float level, float temperature, + float voltage, jboolean isCharging) __NE___; }; diff --git a/AE/engine/src/platform/CMakeLists.txt b/AE/engine/src/platform/CMakeLists.txt index 7d710dd6..662e7dd6 100644 --- a/AE/engine/src/platform/CMakeLists.txt +++ b/AE/engine/src/platform/CMakeLists.txt @@ -34,9 +34,7 @@ if (NOT APPLE) set_source_files_properties( ${OBJC_SOURCES} PROPERTIES HEADER_FILE_ONLY TRUE ) endif() if (WIN32) - set( ANDROID_SDK_ROOT "$ENV{ANDROID_SDK_ROOT}" ) - set( ANDROID_NDK_INCLUDE "${ANDROID_SDK_ROOT}/ndk/24.0.8215888/toolchains/llvm/prebuilt/windows-x86_64/sysroot/usr" ) - target_include_directories( "Platform" PRIVATE "${ANDROID_NDK_INCLUDE}" ) + target_include_directories( "Platform" PRIVATE "${ANDROID_NDK_INCLUDE}/sysroot/usr" ) set_source_files_properties( ${ANDROID_SOURCES} PROPERTIES HEADER_FILE_ONLY TRUE ) endif() @@ -78,5 +76,6 @@ endif() EnablePCH( "Platform" ) EnablePrebuild( "Platform" ) +EnableUnitBuild( "Platform" ) install( TARGETS "Platform" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/platform/GLFW/WindowGLFW.cpp b/AE/engine/src/platform/GLFW/WindowGLFW.cpp index d822a29d..713bd547 100644 --- a/AE/engine/src/platform/GLFW/WindowGLFW.cpp +++ b/AE/engine/src/platform/GLFW/WindowGLFW.cpp @@ -548,7 +548,7 @@ namespace AE::App auto* self = Cast( glfwGetWindowUserPointer( wnd )); DRC_EXLOCK( self->_drCheck ); - const uint2 size {w,h}; + const uint2 size {int2{ w, h }}; if ( All( not IsZero( size ))) self->_surface.ResizeSwapchain(); diff --git a/AE/engine/src/platform/Private/VRDeviceEmulator.cpp b/AE/engine/src/platform/Private/VRDeviceEmulator.cpp index 3ef397fa..36e1bff9 100644 --- a/AE/engine/src/platform/Private/VRDeviceEmulator.cpp +++ b/AE/engine/src/platform/Private/VRDeviceEmulator.cpp @@ -52,19 +52,19 @@ namespace AE::App .ImageBarrier( src_rt1.imageId, src_rt1.finalState, EResourceState::BlitSrc ); ImageBlit region; - region.srcOffset0 = uint3{ src_rt0.region.left, src_rt0.region.top, 0u }; - region.srcOffset1 = uint3{ src_rt0.region.right, src_rt0.region.bottom, 1u }; - region.dstOffset0 = uint3{ dst_rt.region.left, dst_rt.region.top, 0u }; - region.dstOffset1 = uint3{ dst_rt.region.CenterX(), dst_rt.region.bottom, 1u }; + region.srcOffset0 = int3{ src_rt0.region.left, src_rt0.region.top, 0 }; + region.srcOffset1 = int3{ src_rt0.region.right, src_rt0.region.bottom, 1 }; + region.dstOffset0 = int3{ dst_rt.region.left, dst_rt.region.top, 0 }; + region.dstOffset1 = int3{ dst_rt.region.CenterX(), dst_rt.region.bottom, 1 }; region.srcSubres = { EImageAspect::Color, 0_mipmap, src_rt0.layer, 1u }; region.dstSubres = { EImageAspect::Color, 0_mipmap, dst_rt.layer, 1u }; ctx.BlitImage( src_rt0.imageId, dst_rt.imageId, EBlitFilter::Linear, ArrayView{ ®ion, 1 }); - region.srcOffset0 = uint3{ src_rt1.region.left, src_rt1.region.top, 0u }; - region.srcOffset1 = uint3{ src_rt1.region.right, src_rt1.region.bottom, 1u }; - region.dstOffset0 = uint3{ dst_rt.region.CenterX(), dst_rt.region.top, 0u }; - region.dstOffset1 = uint3{ dst_rt.region.right, dst_rt.region.bottom, 1u }; + region.srcOffset0 = int3{ src_rt1.region.left, src_rt1.region.top, 0 }; + region.srcOffset1 = int3{ src_rt1.region.right, src_rt1.region.bottom, 1 }; + region.dstOffset0 = int3{ dst_rt.region.CenterX(), dst_rt.region.top, 0 }; + region.dstOffset1 = int3{ dst_rt.region.right, dst_rt.region.bottom, 1 }; region.srcSubres = { EImageAspect::Color, 0_mipmap, src_rt1.layer, 1u }; region.dstSubres = { EImageAspect::Color, 0_mipmap, dst_rt.layer, 1u }; diff --git a/AE/engine/src/platform/Private/VRSurface.cpp b/AE/engine/src/platform/Private/VRSurface.cpp index 9134ab82..ea1e8442 100644 --- a/AE/engine/src/platform/Private/VRSurface.cpp +++ b/AE/engine/src/platform/Private/VRSurface.cpp @@ -40,7 +40,7 @@ namespace AE::App ImageDesc img_desc; ImageViewDesc view_desc; - img_desc.dimension = uint3{ desc.dimension, 1u }; + img_desc.dimension = ImageDim_t{uint3{ desc.dimension, 1u }}; img_desc.imageDim = EImageDim_2D; img_desc.usage = desc.usage; img_desc.options = desc.options; diff --git a/AE/engine/src/platform/Private/WindowBase.cpp b/AE/engine/src/platform/Private/WindowBase.cpp index 5063455b..4866fbf0 100644 --- a/AE/engine/src/platform/Private/WindowBase.cpp +++ b/AE/engine/src/platform/Private/WindowBase.cpp @@ -167,9 +167,9 @@ namespace ASSERT( info.size() == 1 ); if ( info.size() > 0 and - Any( info[0].dimension != GetSurfaceSize() )) + Any( info[0].Dimension() != GetSurfaceSize() )) { - SetSize( info[0].dimension ); + SetSize( info[0].Dimension() ); } } diff --git a/AE/engine/src/platform/Public/OutputSurface.h b/AE/engine/src/platform/Public/OutputSurface.h index 664995d9..858fe306 100644 --- a/AE/engine/src/platform/Public/OutputSurface.h +++ b/AE/engine/src/platform/Public/OutputSurface.h @@ -26,6 +26,7 @@ namespace AE::App using Graphics::EColorSpace; using Graphics::CommandBatchPtr; using Graphics::SurfaceFormat; + using Graphics::ImageDim2_t; @@ -85,11 +86,14 @@ namespace AE::App struct RenderTargetInfo { - uint2 dimension; - float pixToMm; // pixels to millimeters, used for touch screen, should not be used for VR + ImageDim2_t dimension; + float pixToMm; // pixels to millimeters, used for touch screen, should not be used for VR - RenderTargetInfo () __NE___ {} - RenderTargetInfo (uint2 dim, float pixToMm) __NE___ : dimension{dim}, pixToMm{pixToMm} {} + RenderTargetInfo () __NE___ {} + RenderTargetInfo (ImageDim2_t dim, float pixToMm) __NE___ : dimension{dim}, pixToMm{pixToMm} {} + RenderTargetInfo (uint2 dim, float pixToMm) __NE___ : dimension{dim}, pixToMm{pixToMm} {} + + ND_ uint2 Dimension () C_NE___ { return uint2{dimension}; } }; using RenderTarget = IOutputSurface_RenderTarget; diff --git a/AE/engine/src/platform/Public/VRDevice.h b/AE/engine/src/platform/Public/VRDevice.h index 7aec62ef..b74f0fb5 100644 --- a/AE/engine/src/platform/Public/VRDevice.h +++ b/AE/engine/src/platform/Public/VRDevice.h @@ -9,6 +9,7 @@ namespace AE::App { using Graphics::EImageUsage; using Graphics::EImageOpt; + using Graphics::ImageDim2_t; // @@ -51,7 +52,7 @@ namespace AE::App struct VRImageDesc { - uint2 dimension; + ImageDim2_t dimension; EPixelFormat format = Default; EImageUsage usage = EImageUsage::ColorAttachment | EImageUsage::Sampled | EImageUsage::Transfer; EImageOpt options = EImageOpt::BlitDst; diff --git a/AE/engine/src/platform/WinAPI/WindowWinAPI.cpp b/AE/engine/src/platform/WinAPI/WindowWinAPI.cpp index b9831bf0..2336b11f 100644 --- a/AE/engine/src/platform/WinAPI/WindowWinAPI.cpp +++ b/AE/engine/src/platform/WinAPI/WindowWinAPI.cpp @@ -382,6 +382,8 @@ namespace AE::App ::SetWindowPos( BitCast(_wnd), HWND_TOP, // win2000 _windowPos.x, _windowPos.y, 0, 0, SWP_FRAMECHANGED | SWP_NOSIZE | SWP_NOZORDER | SWP_NOCOPYBITS ); + + // TODO: try InvalidateRect() } _UpdateDescription(); break; diff --git a/AE/engine/src/profiler/CMakeLists.txt b/AE/engine/src/profiler/CMakeLists.txt index fdb2b142..58500ffb 100644 --- a/AE/engine/src/profiler/CMakeLists.txt +++ b/AE/engine/src/profiler/CMakeLists.txt @@ -31,6 +31,7 @@ if ( (TARGET "GraphicsHL") AND (${AE_ENABLE_PROFILER}) ) EnablePCH( "Profiler" ) EnablePrebuild( "Profiler" ) + EnableUnitBuild( "Profiler" ) install( TARGETS "Profiler" ARCHIVE DESTINATION "lib" ) endif() diff --git a/AE/engine/src/profiler/ImGui/ImColumnHistoryDiagram.cpp b/AE/engine/src/profiler/ImGui/ImColumnHistoryDiagram.cpp index 8cd85cc2..6d338d4a 100644 --- a/AE/engine/src/profiler/ImGui/ImColumnHistoryDiagram.cpp +++ b/AE/engine/src/profiler/ImGui/ImColumnHistoryDiagram.cpp @@ -61,10 +61,10 @@ namespace AE::Profiler if ( Abs( item_y1 - item_y0 ) > height_threshold ) { - draw_list->AddRectFilled( ImVec2{column_x, item_y0}, ImVec2{column_x + column_width, item_y1}, BitCast(item.color) ); + item_y0 = Max( item_y0, diag_region_pad.top ); + item_y1 = Max( item_y1, diag_region_pad.top ); - ASSERT( item_y0 >= diag_region_pad.top ); - ASSERT( item_y1 >= diag_region_pad.top ); + draw_list->AddRectFilled( ImVec2{column_x, item_y0}, ImVec2{column_x + column_width, item_y1}, BitCast(item.color) ); } } } @@ -211,8 +211,8 @@ namespace AE::Profiler item.begin -= min; item.end -= min; - ASSERT( item.begin >= nanosecondsd{0.0} ); - ASSERT( item.begin <= item.end ); + // ASSERT( item.begin >= nanosecondsd{0.0} ); + // ASSERT( item.begin <= item.end ); } std::sort( f.items.begin(), f.items.end(), [](auto& lhs, auto& rhs) { return lhs.begin < rhs.begin; }); diff --git a/AE/engine/src/profiler/ImGui/ImLineGraph.cpp b/AE/engine/src/profiler/ImGui/ImLineGraph.cpp index 06afa083..490cc158 100644 --- a/AE/engine/src/profiler/ImGui/ImLineGraph.cpp +++ b/AE/engine/src/profiler/ImGui/ImLineGraph.cpp @@ -56,7 +56,7 @@ namespace AE::Profiler if ( _lines[l].points.empty() ) desc << '-'; else - desc << ToString2( _lines[l].points.back() ) << _suffix; + desc << ToString2( _lines[l].points.back(), 3 ) << _suffix; } ImGui::SetTooltip( "%s", desc.c_str() ); } @@ -93,12 +93,18 @@ namespace AE::Profiler if ( _invLimits ) { - if ( max_y < _limits.x ) _bgIndex = 0; else - if ( max_y < _limits.y ) _bgIndex = 1; else + // green + // yllow + // red + if ( max_y > _limits.y ) _bgIndex = 0; else + if ( max_y > _limits.x ) _bgIndex = 1; else _bgIndex = 2; } else { + // red + // yellow + // green if ( max_y > _limits.y ) _bgIndex = 2; else if ( max_y > _limits.x ) _bgIndex = 1; else _bgIndex = 0; @@ -106,6 +112,7 @@ namespace AE::Profiler if ( adaptive ) { + _range *= float2{IsFinite( _range )}; _range.x = Min( _range.y * 0.99f, min_y * 0.9f ); _range.y = Max( _range.y * 0.99f, max_y * 1.1f ); _range = Max( _range, 0.f ); @@ -121,7 +128,7 @@ namespace AE::Profiler // min value { - String val_str = ToString2( _range.x ) << _suffix; + String val_str = ToString2( _range.x, 2 ) << _suffix; const float text_width = ImGui::GetTextLineHeight() * (4 + _suffix.size()); const ImVec2 text_pos { line_region.right - text_width - line_region.Width() * 0.01f, border_region.bottom - ImGui::GetTextLineHeightWithSpacing() * 1.1f }; @@ -131,7 +138,7 @@ namespace AE::Profiler // max value { - String val_str = ToString2( _range.y ) << _suffix; + String val_str = ToString2( _range.y, 2 ) << _suffix; const float text_width = ImGui::GetTextLineHeight() * (4 + _suffix.size()); const ImVec2 text_pos { line_region.right - text_width - line_region.Width() * 0.01f, border_region.top + ImGui::GetTextLineHeightWithSpacing() * 0.1f }; diff --git a/AE/engine/src/profiler/Impl/GraphicsProfiler.cpp b/AE/engine/src/profiler/Impl/GraphicsProfiler.cpp index 76711824..b09965c8 100644 --- a/AE/engine/src/profiler/Impl/GraphicsProfiler.cpp +++ b/AE/engine/src/profiler/Impl/GraphicsProfiler.cpp @@ -223,11 +223,9 @@ namespace AE::Profiler }{ auto task = MakeRCNe< Threading::AsyncTaskFn >( [this]() { - #if defined(AE_ENABLE_REMOTE_GRAPHICS) or defined(AE_ENABLE_PVRCOUNTER) if ( _pvrProfiler and _pvrProfiler->IsInitialized() ) _ReadResultsPVR(); else - #endif _ReadResults(); }, "GraphicsProfiler::ReadResults", @@ -310,19 +308,71 @@ namespace AE::Profiler auto& f = _perFrame[_readIndex]; EXLOCK( f.guard ); + _pvrProfiler->ReadTimingData( OUT _pvrTimings ); + + if ( _pvrTimings.empty() ) + return; + _gpuTime.min = nanosecondsd{MaxValue()}; _gpuTime.max = nanosecondsd{0.0}; - _imHistory.Begin(); + ArrayView timings_view = _pvrTimings; - _pvrProfiler->ReadTimingData( OUT _pvrTimings ); + #if 0 + // find min/max time + nanosecondsd max_dt {0.0}; + nanosecondsd avg_dt {0.0}; + + for (auto& t : timings_view) + { + auto dt = t.end - t.begin; + max_dt = Max( max_dt, dt ); + avg_dt += dt; + } + + avg_dt /= double(timings_view.size()); + const nanosecondsd min_dt = Min( max_dt * 0.1, avg_dt ); + + + // find significant time + FixedArray< PowerVRProfiler::TimeScope, 32 > timings; - for (auto& t : _pvrTimings) + for (auto& t : timings_view) { - ASSERT( t.begin <= t.end ); _gpuTime.min = Min( _gpuTime.min, t.begin ); _gpuTime.max = Max( _gpuTime.max, t.end ); + auto dt = t.end - t.begin; + if ( dt > min_dt ) + timings.try_push_back( t ); + } + timings_view = timings; + + #else + + const nanosecondsd min_dt = secondsd{2.0 / 60.0}; + + for (auto& t : timings_view) + { + auto frame_time = _gpuTime.max - _gpuTime.min; + if_unlikely( frame_time > min_dt ) + { + timings_view = ArrayView{ timings_view.data(), &t }; + break; + } + + _gpuTime.min = Min( _gpuTime.min, t.begin ); + _gpuTime.max = Max( _gpuTime.max, t.end ); + } + + #endif + + + // add to graph + _imHistory.Begin(); + + for (auto& t : timings_view) + { StringView name; RGBA8u color; @@ -330,9 +380,9 @@ namespace AE::Profiler switch_enum( t.pass ) { case EPass::Compute : name = "Compute"; color = HtmlColor::Yellow; break; - case EPass::TileAccel : name = "TileAccel"; color = HtmlColor::Blue; break; - case EPass::TBDR : name = "TBDR"; color = HtmlColor::Lime; break; - case EPass::Blit : name = "Blit"; color = HtmlColor::Red; break; + case EPass::Tiler : name = "Tiler"; color = HtmlColor::Blue; break; + case EPass::Renderer : name = "Renderer"; color = HtmlColor::Lime; break; + case EPass::Transfer : name = "Transfer"; color = HtmlColor::Red; break; case EPass::RayTracing : name = "RayTracing"; color = HtmlColor::Violet; break; case EPass::RTASBuild : name = "RTASBuild"; color = HtmlColor::Pink; break; case EPass::Unknown : break; diff --git a/AE/engine/src/profiler/Impl/GraphicsProfiler.h b/AE/engine/src/profiler/Impl/GraphicsProfiler.h index eec82817..7b233697 100644 --- a/AE/engine/src/profiler/Impl/GraphicsProfiler.h +++ b/AE/engine/src/profiler/Impl/GraphicsProfiler.h @@ -3,7 +3,7 @@ #pragma once #include "profiler/Impl/ProfilerUtils.h" -#include "profiler/Utils/PowerVRProfiler.h" +#include "profiler/Profilers/PowerVRProfiler.h" namespace AE::Profiler { diff --git a/AE/engine/src/profiler/Impl/HwpcProfiler.cpp b/AE/engine/src/profiler/Impl/HwpcProfiler.cpp index 888f94b3..1ab502fa 100644 --- a/AE/engine/src/profiler/Impl/HwpcProfiler.cpp +++ b/AE/engine/src/profiler/Impl/HwpcProfiler.cpp @@ -8,6 +8,7 @@ #include "profiler/Remote/RemoteNVidiaProfiler.h" #include "profiler/Remote/RemoteAdrenoProfiler.h" #include "profiler/Remote/RemotePowerVRProfiler.h" +#include "profiler/Remote/RemoteGeneralProfiler.h" #include "graphics/GraphicsImpl.h" namespace AE::Profiler @@ -39,19 +40,14 @@ namespace AE::Profiler { CHECK_ERR( (client != null) == (msgProducer != null) ); + _isRemote = bool{client}; + _initialized |= _InitNvProf( client, msgProducer ); _initialized |= _InitArmProf( client, msgProducer ); _initialized |= _InitMaliProf( client, msgProducer ); _initialized |= _InitAdrenoProf( client, msgProducer ); _initialized |= _InitPowerVRProf( client, msgProducer ); - - // CPU usage - { - #if defined(AE_PLATFORM_WINDOWS) and not defined(AE_ENABLE_REMOTE_GRAPHICS) - _cpuUsage.enabled = true; - #endif - _initialized |= _cpuUsage.enabled; - } + _initialized |= _InitGeneralProf( client, msgProducer ); #ifdef AE_ENABLE_IMGUI _InitImGui(); @@ -78,25 +74,27 @@ namespace AE::Profiler Update ================================================= */ - void HwpcProfiler::Update (secondsf, uint frameCount) + void HwpcProfiler::Update (secondsf dt, uint frameCount) { if ( not _initialized ) return; - SampleGraphicsCounters(); - SampleCPUCounters(); + const float inv_dt = 1.f / dt.count(); + const float inv_frames = 1.f / float(frameCount); + const bool per_frame = not _isRemote; + + SampleGraphicsCounters( inv_dt ); + SampleCPUCounters( inv_dt ); #ifdef AE_ENABLE_IMGUI - const float inv_fc = 1.f / float(frameCount); - - _UpdateCpuUsageImGui(); - _UpdateArmCountersImGui( double(inv_fc) ); - _UpdateMaliCountersImGui( double(inv_fc) ); - _UpdateAdrenoCountersImGui( inv_fc ); - _UpdatePowerVRCountersImGui( inv_fc ); - _UpdateNVidiaCountersImGui( inv_fc ); + _UpdateGeneralPerfImGui( per_frame, inv_frames ); + _UpdateArmCountersImGui( per_frame, inv_frames ); + _UpdateMaliCountersImGui( per_frame, inv_frames ); + _UpdateAdrenoCountersImGui( per_frame, inv_frames ); + _UpdatePowerVRCountersImGui( per_frame, inv_frames ); + _UpdateNVidiaCountersImGui( per_frame, inv_frames ); #else - Unused( frameCount ); + Unused( dt, frameCount ); #endif } @@ -196,9 +194,15 @@ namespace AE::Profiler using ECounter = MaliProfiler::ECounter; _maliProf.requiredCounters = MaliProfiler::ECounterSet{ + // Clock // + #if 1 + ECounter::GPUActiveCy, ECounter::PerCoreActiveCy, ECounter::TilerActiveCy, + ECounter::FragThroughputCy, ECounter::NonFragThroughputCy, + ECounter::ExtMemEnergy, ECounter::CoreEnergy, ECounter::TotalEnergy, + #endif // External memory // #if 1 - ECounter::ExtBusRdBy, ECounter::ExtBusWrBy, + ECounter::ExtBusRdBy, ECounter::ExtBusWrBy, ECounter::ExtBusTotalBy, ECounter::ExtBusRdStallRate, ECounter::ExtBusWrStallRate, ECounter::ExtBusRdLat0, ECounter::ExtBusRdLat128, ECounter::ExtBusRdLat192, ECounter::ExtBusRdLat256, ECounter::ExtBusRdLat320, ECounter::ExtBusRdLat384, ECounter::ExtBusRdOTQ1, ECounter::ExtBusRdOTQ2, ECounter::ExtBusRdOTQ3, ECounter::ExtBusRdOTQ4, @@ -211,7 +215,7 @@ namespace AE::Profiler ECounter::SCBusTileWrBy, ECounter::SCBusLSWrBy, ECounter::SCBusFFEL2RdBy, ECounter::SCBusLSL2RdBy, ECounter::SCBusTexL2RdBy, ECounter::L2CacheFlushCy, - ECounter::FragTileKillRate, + ECounter::TilerPosCacheHitRate, ECounter::TilerVarCacheHitRate, #endif // Binning phase // #if 1 @@ -225,6 +229,10 @@ namespace AE::Profiler ECounter::FragEZSTestQd, ECounter::FragEZSKillQd, ECounter::FragLZSTestQd, ECounter::FragLZSKillQd, ECounter::FragEZSKillRate, ECounter::FragFPKKillRate, ECounter::FragLZSKillRate, ECounter::FragOpaqueQdRate, ECounter::FragOverdraw, + ECounter::GeomTrianglePrim, ECounter::GeomPointPrim, ECounter::GeomLinePrim, + ECounter::FragTile, ECounter::FragTileKill, + ECounter::FragTileKillRate, + // ECounter::FragRastQd, ECounter::FragOpaqueQd, ECounter::FragTransparentQd, ECounter::FragShadedQd, #endif // Functional unit utilization // #if 1 @@ -245,11 +253,19 @@ namespace AE::Profiler ECounter::EngNarrowInstrRate, ECounter::EngFMAPipeUtil, ECounter::EngCVTPipeUtil, ECounter::EngSFUPipeUtil, ECounter::EngDivergedInstrRate, - ECounter::FragWarp, ECounter::NonFragWarp, ECounter::CoreFullQdWarp, ECounter::CoreAllRegsWarp, + ECounter::FragWarp, ECounter::NonFragWarp, ECounter::CoreFullWarp, ECounter::CoreAllRegsWarp, ECounter::CoreAllRegsWarpRate, - ECounter::CoreFullQdWarpRate, + ECounter::CoreFullWarpRate, ECounter::FragRastPartQdRate, #endif + // Usage // + #if 0 + ECounter::ExtBusRdStallCy, ECounter::ExtBusWrStallCy, + ECounter::GPUIRQActiveCy, + ECounter::FragQueueActiveCy, ECounter::NonFragQueueActiveCy, + ECounter::FragActiveCy, ECounter::NonFragActiveCy, ECounter::TilerActiveCy, + ECounter::CoreActiveCy, ECounter::LSIssueCy, + #endif }; return _maliProf.profiler.Initialize( _maliProf.requiredCounters ); @@ -284,14 +300,31 @@ namespace AE::Profiler ECounter::GPU_MemoryInterfaceLoad, ECounter::GPU_ClockSpeed, ECounter::Tiler_TriangleRatio, ECounter::Texture_ReadStall, ECounter::Shader_ShaderProcessingLoad, ECounter::GPU_MemoryRead, ECounter::GPU_MemoryWrite, ECounter::VertexShader_RegisterOverload, ECounter::PixelShader_RegisterOverload, - ECounter::Tiler_TrianglesInputPerFrame, ECounter::Tiler_TrianglesOutputPerFrame, ECounter::Renderer_HSR_Efficiency, - ECounter::Renderer_ISP_PixelLoad, ECounter::RendererTimePerFrame, ECounter::GeometryTimePerFrame, - ECounter::TDM_TimePerFrame, ECounter::Shader_CyclesPerComputeKernel, ECounter::Shader_CyclesPerVertex, + ECounter::Renderer_HSR_Efficiency, ECounter::TDM_Active, ECounter::SPM_Active, + ECounter::Renderer_ISP_PixelLoad, ECounter::Shader_CyclesPerComputeKernel, ECounter::Shader_CyclesPerVertex, ECounter::Shader_CyclesPerPixel, ECounter::ComputeShader_ProcessingLoad, ECounter::VertexShader_ProcessingLoad, ECounter::PixelShader_ProcessingLoad, ECounter::RendererActive, ECounter::GeometryActive, - ECounter::TDM_Active, ECounter::SPM_Active + ECounter::Texture_FetchesPerPixel, ECounter::Texture_FilterCyclesPerFetch, ECounter::Texture_FilterInputLoad, + ECounter::Texture_FilterLoad, ECounter::Texture_ReadCyclesPerFetch, ECounter::GPU_MemoryTotal }; + if ( _isRemote ) + { + _pvrProf.requiredCounters.insert( ECounter::RendererTime ); + _pvrProf.requiredCounters.insert( ECounter::GeometryTime ); + _pvrProf.requiredCounters.insert( ECounter::TDM_Time ); + _pvrProf.requiredCounters.insert( ECounter::Tiler_TrianglesInputPerSecond ); + _pvrProf.requiredCounters.insert( ECounter::Tiler_TrianglesOutputPerSecond ); + } + else + { + _pvrProf.requiredCounters.insert( ECounter::RendererTimePerFrame ); + _pvrProf.requiredCounters.insert( ECounter::GeometryTimePerFrame ); + _pvrProf.requiredCounters.insert( ECounter::TDM_TimePerFrame ); + _pvrProf.requiredCounters.insert( ECounter::Tiler_TrianglesInputPerFrame ); + _pvrProf.requiredCounters.insert( ECounter::Tiler_TrianglesOutputPerFrame ); + } + return _pvrProf.profiler.Initialize( _pvrProf.requiredCounters ); } @@ -320,11 +353,76 @@ namespace AE::Profiler using ECounter = AdrenoProfiler::ECounter; - _adrenoProf.requiredCounters = AdrenoProfiler::ECounterSet{}.SetAll(); + _adrenoProf.requiredCounters = AdrenoProfiler::ECounterSet{ + // LRZ // + #if 1 + ECounter::LRZ_PrimKilledByMaskGen, ECounter::LRZ_PrimKilledByLRZ, ECounter::LRZ_PrimPassed, + ECounter::LRZ_TileKilled, ECounter::LRZ_TotalPixel, + ECounter::LRZ_Read, ECounter::LRZ_Write, + #endif + // Render backend // + #if 1 + ECounter::RB_Z_Pass, ECounter::RB_Z_Fail, ECounter::RB_S_Fail, ECounter::RB_TotalPass, + ECounter::RB_ZRead, ECounter::RB_ZWrite, + ECounter::RB_CRead, ECounter::RB_CWrite, + // ECounter::RB_AliveCycles2D, + #endif + // CCU // + #if 1 + // ECounter::CCU_PartialBlockRead, ECounter::CCU_2DPixels, + ECounter::CCU_DepthBlocks, ECounter::CCU_ColorBlocks, + ECounter::CCU_GMemRead, ECounter::CCU_GMemWrite, + ECounter::CCU_2dReadReq, ECounter::CCU_2dWriteReq, + #endif + // Rasterizer // + #if 1 + ECounter::RAS_SuperTiles, ECounter::RAS_8x4Tiles, + ECounter::RAS_FullyCoveredSuperTiles, ECounter::RAS_FullyCovered8x4Tiles, + #endif + // Shader/Streaming Processor // + #if 1 + ECounter::SSP_ALUcy, ECounter::SSP_EFUcy, + ECounter::SSP_VS_EFUInst, ECounter::SSP_VS_FullALUInst, ECounter::SSP_VS_HalfALUInst, + ECounter::SSP_FS_EFUInst, ECounter::SSP_FS_FullALUInst, ECounter::SSP_FS_HalfALUInst, + ECounter::SSP_L2Read, ECounter::SSP_L2Write, + #endif + // Compression and Decompression // + #if 1 + ECounter::CMP_2dReadData, ECounter::CMP_2dWriteData, + #endif + }; return _adrenoProf.profiler.Initialize( _adrenoProf.requiredCounters ); } +/* +================================================= + _InitGeneralProf +================================================= +*/ + bool HwpcProfiler::_InitGeneralProf (ClientServer_t client, MsgProducer_t msgProducer) + { + #ifdef AE_ENABLE_REMOTE_GRAPHICS + Unused( client, msgProducer ); + #else + // initialize remote profiling + if ( client ) + { + CHECK( msgProducer->GetChannels() == EnumSet{EChannel::Reliable} ); + + auto gen_prof_client = MakeRC( RVRef(msgProducer) ); + + CHECK( client->Add( gen_prof_client->GetMsgConsumer() )); + + CHECK( _genProf.profiler.InitClient( gen_prof_client )); + } + #endif + + _genProf.requiredCounters = GeneralProfiler::ECounterSet{}.SetAll(); + + return _genProf.profiler.Initialize( _genProf.requiredCounters ); + } + /* ================================================= Deinitialize @@ -335,6 +433,7 @@ namespace AE::Profiler _nvProf.profiler.Deinitialize(); _armProf.profiler.Deinitialize(); _pvrProf.profiler.Deinitialize(); + _genProf.profiler.Deinitialize(); _maliProf.profiler.Deinitialize(); _adrenoProf.profiler.Deinitialize(); @@ -353,14 +452,15 @@ namespace AE::Profiler SampleGraphicsCounters ================================================= */ - void HwpcProfiler::SampleGraphicsCounters () + void HwpcProfiler::SampleGraphicsCounters (float invdt) { - const auto Sample = [] (auto& prof) + const auto Sample = [b = not _isRemote, invdt] (auto& prof) {{ - if ( not prof.profiler.IsInitialized() ) + if ( b and not prof.profiler.IsInitialized() ) return; - prof.profiler.Sample( OUT prof.counters ); + prof.invTimeDelta = invdt; + prof.profiler.Sample( OUT prof.counters, INOUT prof.invTimeDelta ); }}; Sample( _nvProf ); @@ -374,17 +474,19 @@ namespace AE::Profiler SampleCPUCounters ================================================= */ - void HwpcProfiler::SampleCPUCounters () + void HwpcProfiler::SampleCPUCounters (float invdt) { - const auto Sample = [] (auto& prof) + const auto Sample = [b = not _isRemote, invdt] (auto& prof) {{ - if ( not prof.profiler.IsInitialized() ) + if ( b and not prof.profiler.IsInitialized() ) return; - prof.profiler.Sample( OUT prof.counters ); + prof.invTimeDelta = invdt; + prof.profiler.Sample( OUT prof.counters, INOUT prof.invTimeDelta ); }}; Sample( _armProf ); + Sample( _genProf ); } diff --git a/AE/engine/src/profiler/Impl/HwpcProfiler.h b/AE/engine/src/profiler/Impl/HwpcProfiler.h index 6bfc2709..68ee6a49 100644 --- a/AE/engine/src/profiler/Impl/HwpcProfiler.h +++ b/AE/engine/src/profiler/Impl/HwpcProfiler.h @@ -3,11 +3,12 @@ #pragma once #include "profiler/Impl/ProfilerUtils.h" -#include "profiler/Utils/ArmProfiler.h" -#include "profiler/Utils/MaliProfiler.h" -#include "profiler/Utils/AdrenoProfiler.h" -#include "profiler/Utils/PowerVRProfiler.h" -#include "profiler/Utils/NVidiaProfiler.h" +#include "profiler/Profilers/ArmProfiler.h" +#include "profiler/Profilers/MaliProfiler.h" +#include "profiler/Profilers/AdrenoProfiler.h" +#include "profiler/Profilers/PowerVRProfiler.h" +#include "profiler/Profilers/NVidiaProfiler.h" +#include "profiler/Profilers/GeneralProfiler.h" namespace AE::Profiler { @@ -27,11 +28,13 @@ namespace AE::Profiler // variables private: bool _initialized = false; + bool _isRemote = false; struct { ArmProfiler profiler; ArmProfiler::Counters_t counters; ArmProfiler::ECounterSet requiredCounters; + float invTimeDelta = 0.0f; #ifdef AE_ENABLE_IMGUI ImLineGraphTable graphTable; #endif @@ -41,6 +44,7 @@ namespace AE::Profiler MaliProfiler profiler; MaliProfiler::Counters_t counters; MaliProfiler::ECounterSet requiredCounters; + float invTimeDelta = 0.0f; #ifdef AE_ENABLE_IMGUI ImLineGraphTable graphTable; #endif @@ -50,6 +54,7 @@ namespace AE::Profiler PowerVRProfiler profiler; PowerVRProfiler::Counters_t counters; PowerVRProfiler::ECounterSet requiredCounters; + float invTimeDelta = 0.0f; #ifdef AE_ENABLE_IMGUI ImLineGraphTable graphTable; #endif @@ -59,6 +64,7 @@ namespace AE::Profiler AdrenoProfiler profiler; AdrenoProfiler::Counters_t counters; AdrenoProfiler::ECounterSet requiredCounters; + float invTimeDelta = 0.0f; #ifdef AE_ENABLE_IMGUI ImLineGraphTable graphTable; #endif @@ -68,6 +74,7 @@ namespace AE::Profiler NVidiaProfiler profiler; NVidiaProfiler::Counters_t counters; NVidiaProfiler::ECounterSet requiredCounters; + float invTimeDelta = 0.0f; #ifdef AE_ENABLE_IMGUI ImLineGraphTable graphTable; #endif @@ -75,12 +82,16 @@ namespace AE::Profiler struct { + GeneralProfiler profiler; + GeneralProfiler::Counters_t counters; + GeneralProfiler::ECounterSet requiredCounters; + float invTimeDelta = 0.0f; #ifdef AE_ENABLE_IMGUI Array< Unique > coreUsage; - uint corePerLine = 1; + uint corePerLine = 0; + ImLineGraphTable graphTable; #endif - bool enabled = true; - } _cpuUsage; + } _genProf; // methods @@ -90,14 +101,15 @@ namespace AE::Profiler void DrawImGUI (); void Draw (Canvas &canvas); + void Update (secondsf dt, uint frameCount); void Tick (); ND_ bool Initialize (ClientServer_t, MsgProducer_t); void Deinitialize (); - void SampleGraphicsCounters (); - void SampleCPUCounters (); + void SampleGraphicsCounters (float invdt); + void SampleCPUCounters (float invdt); ND_ PowerVRProfiler& GetPowerVRProfiler () { return _pvrProf.profiler; } @@ -107,30 +119,34 @@ namespace AE::Profiler ND_ bool _InitAdrenoProf (ClientServer_t, MsgProducer_t); ND_ bool _InitMaliProf (ClientServer_t, MsgProducer_t); ND_ bool _InitPowerVRProf (ClientServer_t, MsgProducer_t); + ND_ bool _InitGeneralProf (ClientServer_t, MsgProducer_t); private: #ifdef AE_ENABLE_IMGUI void _InitImGui (); + void _InitGeneralPerfImGui (const ImLineGraph::ColorStyle &, const ImLineGraph::ColorStyle &); + void _InitCpuUsageImGui (); void _InitArmCountersImGui (const ImLineGraph::ColorStyle &, const ImLineGraph::ColorStyle &); void _InitMaliCountersImGui (const ImLineGraph::ColorStyle &, const ImLineGraph::ColorStyle &); void _InitAdrenoCountersImGui (const ImLineGraph::ColorStyle &, const ImLineGraph::ColorStyle &); void _InitNVidiaCountersImGui (const ImLineGraph::ColorStyle &, const ImLineGraph::ColorStyle &); void _InitPowerVRCountersImGui (const ImLineGraph::ColorStyle &, const ImLineGraph::ColorStyle &); - void _UpdateArmCountersImGui (double invFC); - void _UpdateMaliCountersImGui (double invFC); - void _UpdateAdrenoCountersImGui (float invFC); - void _UpdateNVidiaCountersImGui (float invFC); - void _UpdatePowerVRCountersImGui (float invFC); + // 'scale' - 1/frame or 1/dt + // 'invDT' - 1/dt + void _UpdateGeneralPerfImGui (bool perFrame, float invFC); + void _UpdateArmCountersImGui (bool perFrame, float invFC); + void _UpdateMaliCountersImGui (bool perFrame, float invFC); + void _UpdateAdrenoCountersImGui (bool perFrame, float invFC); + void _UpdateNVidiaCountersImGui (bool perFrame, float invFC); + void _UpdatePowerVRCountersImGui (bool perFrame, float invFC); - void _DrawCpuUsageImGui (); + void _DrawGeneralPerfImGui (); void _DrawProfilerArmImGui (); void _DrawProfilerMaliImGui (); void _DrawProfilerAdrenoImGui (); void _DrawProfilerNVidiaImGui (); void _DrawProfilerPowerVRImGui (); - - void _UpdateCpuUsageImGui (); #endif }; diff --git a/AE/engine/src/profiler/Impl/HwpcProfiler_ImGui.cpp b/AE/engine/src/profiler/Impl/HwpcProfiler_ImGui.cpp index fe3ee9de..dc51b9f5 100644 --- a/AE/engine/src/profiler/Impl/HwpcProfiler_ImGui.cpp +++ b/AE/engine/src/profiler/Impl/HwpcProfiler_ImGui.cpp @@ -26,9 +26,9 @@ namespace // PowerVR GPU // - static constexpr GraphName PVR_MemoryTraffic {"MemoryTraffic"}; + static constexpr GraphName PVR_MemoryTraffic {"MemoryTrafficPerFrame"}; + static constexpr GraphName PVR_MemoryTraffic2 {"MemoryTrafficPerSec"}; static constexpr GraphName PVR_MemoryBusLoad {"MemoryBusLoad"}; - static constexpr GraphName PVR_TexReadStall {"TexReadStall"}; static constexpr GraphName PVR_GpuCycles {"GpuCycles"}; static constexpr GraphName PVR_GpuTime {"GpuTime"}; @@ -43,6 +43,11 @@ namespace static constexpr GraphName PVR_ShaderLoad2 {"ShaderLoad2"}; static constexpr GraphName PVR_RegisterOverload {"RegisterOverload"}; + static constexpr GraphName PVR_TexReadStall {"TexReadStall"}; + static constexpr GraphName PVR_TexFetchesPerPixel {"TexFetchesPerPixel"}; + static constexpr GraphName PVR_TexFilterLoad {"TexFilterLoad"}; + static constexpr GraphName PVR_TexCycles {"TexCycles"}; + // Adreno GPU // static constexpr GraphName Adreno_LrzTraffic {"LrzTraffic"}; @@ -50,8 +55,8 @@ namespace static constexpr GraphName Adreno_LrzTileKilled {"LrzTileKilled"}; static constexpr GraphName Adreno_LrzTotalPixel {"LrzTotalPixel"}; - static constexpr GraphName Adreno_RasTiles {"RasTiles"}; - static constexpr GraphName Adreno_RasFullyCoveredTiles {"RasFullyCoveredTiles"}; + static constexpr GraphName Adreno_RasSuperTiles {"RasSuperTiles"}; + static constexpr GraphName Adreno_Ras8x4Tiles {"Ras8x4Tiles"}; static constexpr GraphName Adreno_RbZTraffic {"RbZTraffic"}; static constexpr GraphName Adreno_RbCTraffic {"RbCTraffic"}; @@ -62,10 +67,19 @@ namespace static constexpr GraphName Adreno_CcuPartBlockRd {"CcuPartBlockRd"}; static constexpr GraphName Adreno_CcuGMem {"CcuGMem"}; static constexpr GraphName Adreno_Ccu2DPix {"Ccu2DPix"}; + static constexpr GraphName Adreno_Ccu2dReq {"Ccu2dReq"}; + + static constexpr GraphName Adreno_SspALUCycles {"SspALUCycles"}; + static constexpr GraphName Adreno_SspVSInst {"SspVSInst"}; + static constexpr GraphName Adreno_SspFSInst {"SspFSInst"}; + static constexpr GraphName Adreno_SspL2Traffic {"SspL2Traffic"}; + + static constexpr GraphName Adreno_Cmp2dTraffic {"Cmp2dTraffic"}; // Mali GPU // - static constexpr GraphName Mali_ExtMemTraffic {"ExtMemTraffic"}; + static constexpr GraphName Mali_ExtMemTraffic {"ExtMemTrafficPerFrame"}; + static constexpr GraphName Mali_ExtMemTraffic2 {"ExtMemTrafficPerSec"}; static constexpr GraphName Mali_ExtMemStalls {"ExtMemStalls"}; static constexpr GraphName Mali_ExtReadLatency1 {"ExtReadLatency1"}; static constexpr GraphName Mali_ExtReadLatency2 {"ExtReadLatency2"}; @@ -77,17 +91,20 @@ namespace static constexpr GraphName Mali_LSTileWrite {"LS & Tile write"}; static constexpr GraphName Mali_CacheReads {"CacheReads"}; static constexpr GraphName Mali_CacheFlush {"CacheFlush"}; - static constexpr GraphName Mali_FragTileKillRate {"FragTileKillRate"}; static constexpr GraphName Mali_Primitives {"Primitives"}; static constexpr GraphName Mali_PosVarShadingReq {"PosVarShadingRequests"}; static constexpr GraphName Mali_PosVarShadingThread {"PosVarShadingThreads"}; static constexpr GraphName Mali_PosVarShadingPerPrim {"PosVarShadingPerInputPrimitive"}; + static constexpr GraphName Mali_PrimitiveType {"PrimitiveType"}; + static constexpr GraphName Mali_TilerCacheHit {"TilerCacheHit"}; static constexpr GraphName Mali_ZSTest {"ZSTest"}; static constexpr GraphName Mali_ZSTest2 {"ZSTest2"}; static constexpr GraphName Mali_FragOpaqueRate {"FragOpaqueRate"}; static constexpr GraphName Mali_FragOverdraw {"FragOverdraw"}; + static constexpr GraphName Mali_TileCount {"TileCount"}; + static constexpr GraphName Mali_FragTileKillRate {"FragTileKillRate"}; static constexpr GraphName Mali_CoreUtil {"Core Util"}; static constexpr GraphName Mali_VarUtil {"Varying Util"}; @@ -100,6 +117,10 @@ namespace static constexpr GraphName Mali_TilerUtil {"Tiler Util"}; static constexpr GraphName Mali_FPKUtil {"FPK Util"}; static constexpr GraphName Mali_QueueUtil {"QueueUtil"}; + static constexpr GraphName Mali_GPUActiveCy {"GPUActiveCy"}; + static constexpr GraphName Mali_ThroughputCy {"ThroughputCy"}; + static constexpr GraphName Mali_TilerActiveCy {"TilerActiveCy"}; + static constexpr GraphName Mali_Power {"Power"}; static constexpr GraphName Mali_ALUUtil {"ALU Util"}; static constexpr GraphName Mali_ArithPipeUtil {"ArithPipeUtil"}; @@ -114,18 +135,164 @@ namespace static constexpr GraphName NV_MemoryUtil {"MemUtil"}; static constexpr GraphName NV_MemoryClock {"MemClock"}; static constexpr GraphName NV_MemUsed {"MemUsed"}; - static constexpr GraphName NV_MemUsedMb {"MemUsedMb"}; + static constexpr GraphName NV_DevMemUsedMb {"DevMemUsedMb"}; + static constexpr GraphName NV_UniMemUsedMb {"UniMemUsedMb"}; static constexpr GraphName NV_GpuUtil {"GpuUtil"}; static constexpr GraphName NV_GraphicsClock {"GraphicsClock"}; - static constexpr GraphName NV_SMClock {"SMClock"}; - static constexpr GraphName NV_VideoClock {"VideoClock"}; static constexpr GraphName NV_GpuTemp {"GpuTemp"}; static constexpr GraphName NV_PowerUsage {"PowerUsage"}; - static constexpr GraphName NV_PerfState {"PerfState"}; +// static constexpr GraphName NV_PerfState {"PerfState"}; static constexpr GraphName NV_FanSpeed {"FanSpeed"}; -} + + + // OS performance counters // + static constexpr GraphName GenPerf_ProcMemUsage {"ProcMemUsage"}; + static constexpr GraphName GenPerf_MemUsage {"MemUsagePercent"}; + static constexpr GraphName GenPerf_PhysMemUsage {"PhysUsageBytes"}; + static constexpr GraphName GenPerf_VirtMemUsage {"VirtUsageBytes"}; + static constexpr GraphName GenPerf_CtxSwitches {"CtxSwitches"}; + static constexpr GraphName GenPerf_FileIO {"FileIO"}; + static constexpr GraphName GenPerf_KernelTime {"KernelTime"}; + + static constexpr GraphName GenPerf_BatteryDischargeTotal{"BatteryDischargeTotal"}; + static constexpr GraphName GenPerf_BatteryDischarge {"BatteryDischarge"}; + static constexpr GraphName GenPerf_BatteryLevel {"BatteryLevel"}; + + static constexpr bool GenProf_BatteryAux = false; + static constexpr GraphName GenPerf_BatteryTemperature {"BatteryTemperature"}; + static constexpr GraphName GenPerf_BatteryCurrent {"BatteryCurrent"}; + static constexpr GraphName GenPerf_BatteryVoltage {"BatteryVoltage"}; + + + static constexpr char c_ExtMemInfo [] = + "Calculate bandwidth:\n" + " clock_mhz x bus_width x channels x rate / 8000 = GB/s\n" + " clock_mhz x bus_width x channels x rate / 8 * 1e6/(1<<30) = GiB/s\n" + "\n" + "| name | rate | mJ/GB |\n" + "---------|------|--------|\n" + " LPDDR* | 2 | 80-100 |\n" + " DDR* | 2 | |\n" + " GDDR4 | 2 | |\n" + " GDDR5 | 4 | 72 |\n" + " GDDR5X | 8 | 64 |\n" + " GDDR6 | 8 | 60 |\n" + " GDDR6X | 16 | 58 |\n"; + +/* +================================================= + GetStyle4 +================================================= +*/ + ND_ static ImLineGraph::ColorStyle GetStyle4 () + { + // const RGBA8u text_col {255, 255, 255, 255}; + const RGBA8u text_col {200, 200, 200, 255}; + + ImLineGraph::ColorStyle style4; + style4.lines[0] = RGBA8u{180, 20, 20, 255}; + style4.lines[1] = RGBA8u{ 20, 170, 20, 255}; + style4.lines[2] = RGBA8u{ 70, 70, 255, 255}; + style4.lines[3] = RGBA8u{170, 170, 50, 255}; + style4.background[0]= RGBA8u{ 0, 0, 40, 255}; + style4.background[1]= RGBA8u{ 30, 30, 0, 255}; + style4.background[2]= RGBA8u{ 30, 0, 0, 255}; + style4.border = RGBA8u{200, 200, 255, 255}; + style4.text = text_col; + style4.minMaxValue = text_col; + style4.mode = ImLineGraph::EMode::Line_Adaptive; + + return style4; + } + +/* +================================================= + GetStyle1 +================================================= +*/ + ND_ static ImLineGraph::ColorStyle GetStyle1 () + { + ImLineGraph::ColorStyle style1 = GetStyle4(); + style1.lines[0] = RGBA8u{ 20, 170, 20, 255}; + style1.border = RGBA8u{200, 200, 255, 255}; + return style1; + } + +/* +================================================= + _AddPoint +================================================= +*/ + template + static void _AddPoint1 (P &prof, const C &cnt, E type, GraphName::Ref graphName, const T scale) + { + auto graph = prof.graphTable.Get( graphName ); + CHECK_ERRV( graph ); + ASSERT( prof.requiredCounters.contains( type )); + + if ( auto it = cnt.find( type ); it != cnt.end()) graph->AddNonScaled( List{ it->second * scale }); + } + + template + static void _AddPoint2 (P &prof, const C &cnt, E type0, E type1, GraphName::Ref graphName, const T scale) + { + auto graph = prof.graphTable.Get( graphName ); + CHECK_ERRV( graph ); + ASSERT( prof.requiredCounters.contains( type0 )); + ASSERT( prof.requiredCounters.contains( type1 )); + + T value0 = T(0), value1 = T(0); + bool exists = false; + + if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } + if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } + + if ( exists ) graph->AddNonScaled( List{ value0, value1 }); + } + + template + static void _AddPoint3 (P &prof, const C &cnt, E type0, E type1, E type2, GraphName::Ref graphName, const T scale) + { + auto graph = prof.graphTable.Get( graphName ); + CHECK_ERRV( graph ); + ASSERT( prof.requiredCounters.contains( type0 )); + ASSERT( prof.requiredCounters.contains( type1 )); + ASSERT( prof.requiredCounters.contains( type2 )); + + T value0 = T(0), value1 = T(0), value2 = T(0); + bool exists = false; + + if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } + if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } + if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } + + if ( exists ) graph->AddNonScaled( List{ value0, value1, value2 }); + } + + template + static void _AddPoint4 (P &prof, const C &cnt, E type0, E type1, E type2, E type3, GraphName::Ref graphName, const T scale) + { + auto graph = prof.graphTable.Get( graphName ); + CHECK_ERRV( graph ); + ASSERT( prof.requiredCounters.contains( type0 )); + ASSERT( prof.requiredCounters.contains( type1 )); + ASSERT( prof.requiredCounters.contains( type2 )); + ASSERT( prof.requiredCounters.contains( type3 )); + + T value0 = T(0), value1 = T(0), value2 = T(0), value3 = T(0); + bool exists = false; + + if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } + if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } + if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } + if ( auto it = cnt.find( type3 ); it != cnt.end()) { value3 = it->second * scale; exists = true; } + + if ( exists ) graph->AddNonScaled( List{ value0, value1, value2, value3 }); + } + +} // namespace //----------------------------------------------------------------------------- @@ -150,8 +317,7 @@ namespace if ( ImGui::Begin( "HWProfiler", null, flags )) { - _DrawCpuUsageImGui(); - + _DrawGeneralPerfImGui(); _DrawProfilerArmImGui(); _DrawProfilerMaliImGui(); _DrawProfilerAdrenoImGui(); @@ -168,59 +334,10 @@ namespace */ void HwpcProfiler::_InitImGui () { - // const RGBA8u text_col {255, 255, 255, 255}; - const RGBA8u text_col {200, 200, 200, 255}; - - ImLineGraph::ColorStyle style4; - style4.lines[0] = RGBA8u{180, 20, 20, 255}; - style4.lines[1] = RGBA8u{ 20, 170, 20, 255}; - style4.lines[2] = RGBA8u{ 70, 70, 255, 255}; - style4.lines[3] = RGBA8u{170, 170, 50, 255}; - style4.background[0]= RGBA8u{ 0, 0, 40, 255}; - style4.background[1]= RGBA8u{ 30, 30, 0, 255}; - style4.background[2]= RGBA8u{ 30, 0, 0, 255}; - style4.border = RGBA8u{200, 200, 255, 255}; - style4.text = text_col; - style4.minMaxValue = text_col; - style4.mode = ImLineGraph::EMode::Line; - - // CPU usage - if ( _cpuUsage.enabled ) - { - const uint capacity1 = 100; - const auto& cpu_info = CpuArchInfo::Get(); - - _cpuUsage.coreUsage.resize( cpu_info.cpu.logicalCoreCount ); - _cpuUsage.corePerLine = 1; - - for (auto& core : cpu_info.cpu.coreTypes) - { - for (uint core_id : BitIndexIterate( core.logicalBits.to_ullong() )) - { - _cpuUsage.coreUsage[core_id] = MakeUnique(); - auto& graph = *_cpuUsage.coreUsage[core_id]; - graph.SetCapacity( capacity1, 2 ); - graph.SetColor( style4 ); - graph.SetName( ToString(core_id) ); - graph.SetLabel( "total", 0 ); - graph.SetLabel( "kernel", 1 ); - graph.SetSuffix( "%" ); - graph.SetRange( 0.f, 100.f ); - } - - _cpuUsage.corePerLine = Max( _cpuUsage.corePerLine, core.LogicalCount() ); - } - - if ( cpu_info.cpu.coreTypes.size() == 1 ) - _cpuUsage.corePerLine = Max( 1u, uint( Sqrt( float(cpu_info.cpu.logicalCoreCount) ) + 0.5f )); - } - - style4.mode = ImLineGraph::EMode::Line_Adaptive; - - ImLineGraph::ColorStyle style1 = style4; - style1.lines[0] = RGBA8u{ 20, 170, 20, 255}; - style1.border = RGBA8u{200, 200, 255, 255}; + ImLineGraph::ColorStyle style4 = GetStyle4(); + ImLineGraph::ColorStyle style1 = GetStyle1(); + _InitGeneralPerfImGui( style4, style1 ); _InitArmCountersImGui( style4, style1 ); _InitMaliCountersImGui( style4, style1 ); _InitNVidiaCountersImGui( style4, style1 ); @@ -258,25 +375,20 @@ namespace _UpdateArmCountersImGui ================================================= */ - void HwpcProfiler::_UpdateArmCountersImGui (const double invFC) + void HwpcProfiler::_UpdateArmCountersImGui (const bool perFrame, const float invFC) { using ECounter = ArmProfiler::ECounter; - auto& prof = _armProf; - auto& cnt = prof.counters; + auto& prof = _armProf; + auto& cnt = prof.counters; + const double in_scale = perFrame ? double(invFC) : double(prof.invTimeDelta); if ( cnt.empty() ) return; - const auto AddPoint = [&] (ECounter type, GraphName::Ref graphName, double scale = 0.0) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type )); - - scale = (scale == 0.0 ? invFC : scale); - if ( auto it = cnt.find( type ); it != cnt.end()) graph->AddNonScaled( List{ it->second * scale }); - }}; + const auto AddPoint = [&] (ECounter type, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint1( prof, cnt, type, graphName, (scale == 0.0 ? in_scale : scale) ); + }; AddPoint( ECounter::Cycles, ARM_CpuCycles ); AddPoint( ECounter::CacheMisses, ARM_CacheMisses ); @@ -354,82 +466,30 @@ namespace _UpdateMaliCountersImGui ================================================= */ - void HwpcProfiler::_UpdateMaliCountersImGui (const double invFC) + void HwpcProfiler::_UpdateMaliCountersImGui (const bool perFrame, const float invFC) { using ECounter = MaliProfiler::ECounter; - auto& prof = _maliProf; - auto cnt = prof.counters; + auto& prof = _maliProf; + auto cnt = prof.counters; + const double in_scale = perFrame ? double(invFC) : double(prof.invTimeDelta); + const double inv_dt = double(prof.invTimeDelta); if ( cnt.empty() ) return; - const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, double scale = 0.0) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type )); - - scale = (scale == 0.0 ? invFC : scale); - if ( auto it = cnt.find( type ); it != cnt.end()) graph->AddNonScaled( List{ it->second * scale }); - }}; - - const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, double scale = 0.0) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 )); - - scale = (scale == 0.0 ? invFC : scale); - double value0 = 0.0, value1 = 0.0; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1 }); - }}; - - const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, double scale = 0.0) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 ) and - prof.requiredCounters.contains( type2 )); - - scale = (scale == 0.0 ? invFC : scale); - double value0 = 0.0, value1 = 0.0, value2 = 0.0; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1, value2 }); - }}; - - const auto AddPoint4 = [&] (ECounter type0, ECounter type1, ECounter type2, ECounter type3, GraphName::Ref graphName, double scale = 0.0) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 ) and - prof.requiredCounters.contains( type2 ) and - prof.requiredCounters.contains( type3 )); - - scale = (scale == 0.0 ? invFC : scale); - double value0 = 0.0, value1 = 0.0, value2 = 0.0, value3 = 0.0; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type3 ); it != cnt.end()) { value3 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1, value2, value3 }); - }}; + const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint1( prof, cnt, type, graphName, (scale == 0.0 ? in_scale : scale) ); + }; + const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint2( prof, cnt, type0, type1, graphName, (scale == 0.0 ? in_scale : scale) ); + }; + const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint3( prof, cnt, type0, type1, type2, graphName, (scale == 0.0 ? in_scale : scale) ); + }; + const auto AddPoint4 = [&] (ECounter type0, ECounter type1, ECounter type2, ECounter type3, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint4( prof, cnt, type0, type1, type2, type3, graphName, (scale == 0.0 ? in_scale : scale) ); + }; AddPoint1( ECounter::CoreUtil, Mali_CoreUtil, 1.0 ); AddPoint1( ECounter::ALUUtil, Mali_ALUUtil, 1.0 ); @@ -441,37 +501,47 @@ namespace AddPoint1( ECounter::EngNarrowInstrRate, Mali_NarrowArithUtil, 1.0 ); AddPoint1( ECounter::EngDivergedInstrRate, Mali_WarpDivRate, 1.0 ); AddPoint1( ECounter::FragOpaqueQdRate, Mali_FragOpaqueRate, 1.0 ); - AddPoint1( ECounter::FragOverdraw, Mali_FragOverdraw, 1.0 ); + AddPoint1( ECounter::FragOverdraw, Mali_FragOverdraw ); AddPoint1( ECounter::CoreAllRegsWarpRate, Mali_AllRegWarpRate, 1.0 ); - AddPoint1( ECounter::CoreFullQdWarpRate, Mali_FullQuadWarpRate, 1.0 ); + AddPoint1( ECounter::CoreFullWarpRate, Mali_FullQuadWarpRate, 1.0 ); AddPoint1( ECounter::FragRastPartQdRate, Mali_FragRastPartRate, 1.0 ); AddPoint1( ECounter::L2CacheFlushCy, Mali_CacheFlush ); AddPoint1( ECounter::FragTileKillRate, Mali_FragTileKillRate, 1.0 ); AddPoint1( ECounter::TilerUtil, Mali_TilerUtil, 1.0 ); AddPoint1( ECounter::FragFPKBUtil, Mali_FPKUtil, 1.0 ); + AddPoint1( ECounter::TilerActiveCy, Mali_TilerActiveCy, inv_dt ); AddPoint2( ECounter::ExtBusRdStallRate, ECounter::ExtBusWrStallRate, Mali_ExtMemStalls, 1.0 ); - AddPoint2( ECounter::ExtBusRdBy, ECounter::ExtBusWrBy, Mali_ExtMemTraffic ); // AddPoint2( ECounter::GeomPosShadTask, ECounter::GeomVarShadTask, Mali_PosVarShadingReq ); AddPoint2( ECounter::GeomPosShadThread, ECounter::GeomVarShadThread, Mali_PosVarShadingThread ); AddPoint2( ECounter::GeomVarShadThreadPerPrim, ECounter::GeomPosShadThreadPerPrim, Mali_PosVarShadingPerPrim ); AddPoint2( ECounter::NonFragUtil, ECounter::FragUtil, Mali_FragNonFragUtil, 1.0 ); AddPoint2( ECounter::L2CacheRdMissRate, ECounter::L2CacheWrMissRate, Mali_CacheMiss, 1.0 ); AddPoint2( ECounter::SCBusTileWrBy, ECounter::SCBusLSWrBy, Mali_LSTileWrite ); - - AddPoint3( ECounter::ExtBusRdLat0, ECounter::ExtBusRdLat128, ECounter::ExtBusRdLat192, Mali_ExtReadLatency1 ); - AddPoint3( ECounter::ExtBusRdLat256, ECounter::ExtBusRdLat320, ECounter::ExtBusRdLat384, Mali_ExtReadLatency2 ); - AddPoint3( ECounter::GeomTotalCullPrim, ECounter::GeomVisiblePrim, ECounter::GeomTotalPrim, Mali_Primitives ); - AddPoint3( ECounter::EngFMAPipeUtil, ECounter::EngCVTPipeUtil, ECounter::EngSFUPipeUtil, Mali_ArithPipeUtil, 1.0 ); - AddPoint3( ECounter::FragEZSKillRate, ECounter::FragFPKKillRate, ECounter::FragLZSKillRate, Mali_ZSTest2, 1.0 ); - AddPoint3( ECounter::SCBusFFEExtRdBy, ECounter::SCBusLSExtRdBy, ECounter::SCBusTexExtRdBy, Mali_ExtReads ); - AddPoint3( ECounter::SCBusFFEL2RdBy, ECounter::SCBusLSL2RdBy, ECounter::SCBusTexL2RdBy, Mali_CacheReads ); - AddPoint3( ECounter::GPUIRQUtil, ECounter::NonFragQueueUtil, ECounter::FragQueueUtil, Mali_QueueUtil, 1.0 ); + AddPoint2( ECounter::FragTile, ECounter::FragTileKill, Mali_TileCount ); + AddPoint2( ECounter::TilerPosCacheHitRate, ECounter::TilerVarCacheHitRate, Mali_TilerCacheHit ); + AddPoint2( ECounter::GPUActiveCy, ECounter::PerCoreActiveCy, Mali_GPUActiveCy, inv_dt ); + AddPoint2( ECounter::FragThroughputCy, ECounter::NonFragThroughputCy, Mali_ThroughputCy, inv_dt ); + + if ( perFrame ) + AddPoint3( ECounter::ExtBusRdBy, ECounter::ExtBusWrBy, ECounter::ExtBusTotalBy, Mali_ExtMemTraffic ); + + AddPoint3( ECounter::ExtBusRdBy, ECounter::ExtBusWrBy, ECounter::ExtBusTotalBy, Mali_ExtMemTraffic2, inv_dt ); + AddPoint3( ECounter::ExtBusRdLat0, ECounter::ExtBusRdLat128, ECounter::ExtBusRdLat192, Mali_ExtReadLatency1 ); + AddPoint3( ECounter::ExtBusRdLat256, ECounter::ExtBusRdLat320, ECounter::ExtBusRdLat384, Mali_ExtReadLatency2 ); + AddPoint3( ECounter::GeomTotalCullPrim, ECounter::GeomVisiblePrim, ECounter::GeomTotalPrim, Mali_Primitives ); + AddPoint3( ECounter::GeomTrianglePrim, ECounter::GeomPointPrim, ECounter::GeomLinePrim, Mali_PrimitiveType ); + AddPoint3( ECounter::EngFMAPipeUtil, ECounter::EngCVTPipeUtil, ECounter::EngSFUPipeUtil, Mali_ArithPipeUtil, 1.0 ); + AddPoint3( ECounter::FragEZSKillRate, ECounter::FragFPKKillRate, ECounter::FragLZSKillRate, Mali_ZSTest2, 1.0 ); + AddPoint3( ECounter::SCBusFFEExtRdBy, ECounter::SCBusLSExtRdBy, ECounter::SCBusTexExtRdBy, Mali_ExtReads ); + AddPoint3( ECounter::SCBusFFEL2RdBy, ECounter::SCBusLSL2RdBy, ECounter::SCBusTexL2RdBy, Mali_CacheReads ); + AddPoint3( ECounter::GPUIRQUtil, ECounter::NonFragQueueUtil, ECounter::FragQueueUtil, Mali_QueueUtil, 1.0 ); + AddPoint3( ECounter::ExtMemEnergy, ECounter::CoreEnergy, ECounter::TotalEnergy, Mali_Power, inv_dt ); // J to W AddPoint4( ECounter::ExtBusRdOTQ1, ECounter::ExtBusRdOTQ2, ECounter::ExtBusRdOTQ3, ECounter::ExtBusRdOTQ4, Mali_ExtOutstandingReads ); AddPoint4( ECounter::ExtBusWrOTQ1, ECounter::ExtBusWrOTQ2, ECounter::ExtBusWrOTQ3, ECounter::ExtBusWrOTQ4, Mali_ExtOutstandingWrites ); AddPoint4( ECounter::FragEZSTestQd, ECounter::FragEZSKillQd, ECounter::FragLZSTestQd, ECounter::FragLZSKillQd, Mali_ZSTest ); - AddPoint4( ECounter::NonFragWarp, ECounter::FragWarp, ECounter::CoreFullQdWarp, ECounter::CoreAllRegsWarp, Mali_WrapCount ); + AddPoint4( ECounter::NonFragWarp, ECounter::FragWarp, ECounter::CoreFullWarp, ECounter::CoreAllRegsWarp, Mali_WrapCount ); } /* @@ -485,16 +555,74 @@ namespace auto& prof = _maliProf; { + constexpr SecName sec {"Clock"}; + { + auto& graph = prof.graphTable.Add( sec, Mali_GPUActiveCy ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "freq" ); + graph.SetLabel( "gpu", 0 ); + graph.SetLabel( "core", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "Hz" ); + graph.SetLimits( 500.0e+6f, 800.0e+6f ); + graph.SetDescription( "GPU clock speed.\nLow clock indicates low workload." ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_TilerActiveCy ); + graph.SetCapacity( capacity ); + graph.SetName( "tiler" ); + graph.SetColor( style1 ); + graph.SetSuffix( "Hz" ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_ThroughputCy ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "throughput" ); + graph.SetLabel( "frag", 0 ); + graph.SetLabel( "non-frag", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "cy" ); + graph.SetDescription( "Cycles per thread" ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_Power ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "power" ); + graph.SetLabel( "mem", 0 ); + graph.SetLabel( "core", 1 ); + graph.SetLabel( "total", 2 ); + graph.SetColor( style4 ); + graph.SetSuffix( "W" ); + graph.SetDescription( "Power budget 2-3W.\nMemory power consumption calculated as external memory access * 100mW per 1GB/s.\n" ); + } + prof.graphTable.SetCaption( sec, "Clock" ); + }{ + const char read_latency_cy_desc[] = + "This metric shows how many GPU cycles it takes to fetch data from the downstream memory system,\n" + "which is either system cache or external DRAM.\n" + "Each stage should be less than previous to achieve maximum performance."; + const char outstanding_rw_desc[] = + "If each stage has ~1/10 scale then it indicates that all cache levels are utilized.\n" + "If '<100%' if greater than '<25%' it indicates that cache is not used or have high miss rate."; + constexpr SecName sec {"ExternalMemory"}; { auto& graph = prof.graphTable.Add( sec, Mali_ExtMemTraffic ); - graph.SetCapacity( capacity, 2 ); + graph.SetCapacity( capacity, 3 ); graph.SetName( "traffic" ); graph.SetLabel( "read", 0 ); graph.SetLabel( "write", 1 ); + graph.SetLabel( "total", 2 ); graph.SetColor( style4 ); - graph.SetSuffix( "B" ); // bytes - graph.SetDescription( "Read/write external memory. DRAM access costs between 80mW and 100mW per GB/s of bandwidth used." ); + graph.SetSuffix( "B/f" ); + graph.SetDescription( "Read/write external memory per frame." ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_ExtMemTraffic2 ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "traffic" ); + graph.SetLabel( "read", 0 ); + graph.SetLabel( "write", 1 ); + graph.SetLabel( "total", 2 ); + graph.SetColor( style4 ); + graph.SetSuffix( "B/s" ); + graph.SetDescription( "Read/write external memory per second.\n"s + c_ExtMemInfo ); }{ auto& graph = prof.graphTable.Add( sec, Mali_ExtMemStalls ); graph.SetCapacity( capacity, 2 ); @@ -512,7 +640,7 @@ namespace graph.SetLabel( "<191", 1 ); graph.SetLabel( "<255", 2 ); graph.SetColor( style4 ); - graph.SetDescription( "This metric shows how many GPU cycles it takes to fetch data from the downstream memory system,\nwhich is either system cache or external DRAM." ); + graph.SetDescription( read_latency_cy_desc ); }{ auto& graph = prof.graphTable.Add( sec, Mali_ExtReadLatency2 ); graph.SetCapacity( capacity, 3 ); @@ -521,7 +649,7 @@ namespace graph.SetLabel( "<383", 1 ); graph.SetLabel( ">384", 2 ); graph.SetColor( style4 ); - graph.SetDescription( "This metric shows how many GPU cycles it takes to fetch data from the downstream memory system,\nwhich is either system cache or external DRAM." ); + graph.SetDescription( read_latency_cy_desc ); }{ auto& graph = prof.graphTable.Add( sec, Mali_ExtOutstandingReads ); graph.SetCapacity( capacity, 4 ); @@ -531,6 +659,7 @@ namespace graph.SetLabel( "<75%", 2 ); graph.SetLabel( "<100%", 3 ); graph.SetColor( style4 ); + graph.SetDescription( outstanding_rw_desc ); }{ auto& graph = prof.graphTable.Add( sec, Mali_ExtOutstandingWrites ); graph.SetCapacity( capacity, 4 ); @@ -540,6 +669,7 @@ namespace graph.SetLabel( "<75%", 2 ); graph.SetLabel( "<100%", 3 ); graph.SetColor( style4 ); + graph.SetDescription( outstanding_rw_desc ); }{ auto& graph = prof.graphTable.Add( sec, Mali_ExtReads ); graph.SetCapacity( capacity, 3 ); @@ -563,7 +693,7 @@ namespace graph.SetColor( style4 ); graph.SetSuffix( "%" ); graph.SetLimits( 70.f, 90.f ); - graph.SetDescription( "L2 cache misses" ); + graph.SetDescription( "L2 cache miss / hit" ); }{ auto& graph = prof.graphTable.Add( sec, Mali_LSTileWrite ); graph.SetCapacity( capacity, 2 ); @@ -590,12 +720,14 @@ namespace graph.SetColor( style1 ); graph.SetDescription( "L2 cache flush cycles" ); }{ - auto& graph = prof.graphTable.Add( sec, Mali_FragTileKillRate ); - graph.SetCapacity( capacity ); - graph.SetName( "tile kill" ); - graph.SetColor( style1 ); + auto& graph = prof.graphTable.Add( sec, Mali_TilerCacheHit ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "tiler cache hit" ); + graph.SetLabel( "pos", 0 ); + graph.SetLabel( "var", 1 ); + graph.SetColor( style4 ); graph.SetSuffix( "%" ); - graph.SetDescription( "Defines the percentage of tiles that are killed by the transaction elimination CRC\ncheck because the content of a tile matches the content already stored in memory." ); + graph.SetDescription( "Position cache hit / miss.\nVarying cache hit / miss." ); } prof.graphTable.SetCaption( sec, "Cache" ); }{ @@ -631,8 +763,16 @@ namespace graph.SetLabel( "pos", 0 ); graph.SetLabel( "var", 1 ); graph.SetColor( style4 ); - graph.SetLimits( 1.5f, 2.7f ); + graph.SetLimits( 1.6f, 2.7f ); graph.SetDescription( "Efficient meshes with a good vertex reuse have average less than 1.5 vertices shaded per triangle,\nas vertex computation is shared by multiple primitives." ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_PrimitiveType ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "prim types" ); + graph.SetLabel( "triangle", 0 ); + graph.SetLabel( "point", 1 ); + graph.SetLabel( "line", 2 ); + graph.SetColor( style4 ); } prof.graphTable.SetCaption( sec, "Binning phase" ); }{ @@ -656,7 +796,7 @@ namespace graph.SetLabel( "late", 2 ); graph.SetColor( style4 ); graph.SetSuffix( "%" ); - graph.SetLimits( 70.f, 90.f ); + graph.SetInvLimits( 70.f, 90.f ); graph.SetDescription( "It is important that as many fragments as possible are early ZS (depth and stencil) tested before shading." ); }{ auto& graph = prof.graphTable.Add( sec, Mali_FragOpaqueRate ); @@ -672,6 +812,20 @@ namespace graph.SetName( "overdraw" ); graph.SetColor( style1 ); graph.SetDescription( "Number of fragments shaded per output pixel.\nGPU processing cost per pixel accumulates with the layer count. High overdraw can build up to a\nsignificant processing cost, especially when rendering to a high-resolution framebuffer. Minimize\noverdraw by rendering opaque objects front-to-back and minimizing use of blended transparent layers." ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_FragTileKillRate ); + graph.SetCapacity( capacity ); + graph.SetName( "tile kill" ); + graph.SetColor( style1 ); + graph.SetSuffix( "%" ); + graph.SetDescription( "Defines the percentage of tiles that are killed by the transaction elimination\nCRC check because the content of a tile matches the content already stored in memory." ); + }{ + auto& graph = prof.graphTable.Add( sec, Mali_TileCount ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "tiles" ); + graph.SetLabel( "pass", 0 ); + graph.SetLabel( "kill", 1 ); + graph.SetColor( style4 ); } prof.graphTable.SetCaption( sec, "Rasterization" ); }{ @@ -731,7 +885,6 @@ namespace graph.SetLabel( "frag", 1 ); graph.SetColor( style4 ); graph.SetSuffix( "%" ); - graph.SetLimits( 70.f, 90.f ); graph.SetDescription( "Utilization of the shader core fragment and non-fragment path." ); }{ auto& graph = prof.graphTable.Add( sec, Mali_TilerUtil ); @@ -747,7 +900,7 @@ namespace graph.SetName( "fpk" ); graph.SetColor( style1 ); graph.SetSuffix( "%" ); - graph.SetLimits( 70.f, 90.f ); + graph.SetInvLimits( 70.f, 90.f ); graph.SetDescription( "Defines the percentage of cycles where the Forward Pixel Kill (FPK) quad buffer\ncontains at least one fragment quad. This buffer is located after early ZS but before the execution core.\n\nDuring fragment shading this counter must be close to 100%. This indicates that the fragment\nfront-end is able to keep up with the shader core shading rate. This counter commonly drops below\n100% for three reasons:\n* The running workload has many empty tiles with no geometry to render. Empty tiles are\ncommon in shadow maps, for any screen region with no shadow casters.\n* The application consists of simple shaders but a high percentage of microtriangles. This\ncombination causes the shader core to complete fragments faster than they are rasterized, so\nthe quad buffer starts to drain.\n* The application consists of layers which stall at early ZS because of a dependency on an earlier\nfragment layer which is still in flight. Stalled layers prevent new fragments entering the quad\nbuffer, so the quad buffer starts to drain." ); }{ auto& graph = prof.graphTable.Add( sec, Mali_QueueUtil ); @@ -758,7 +911,7 @@ namespace graph.SetLabel( "frag", 2 ); graph.SetColor( style4 ); graph.SetSuffix( "%" ); - graph.SetLimits( 70.f, 90.f ); + graph.SetInvLimits( 50.f, 70.f ); graph.SetDescription( "IRQ pending utilization compared against the GPU active cycles.\nIn a well-functioning system, this expression is ideally less than 3% of the total cycles.\nIf the value is much higher than 3%, a system issue might be preventing the CPU from efficiently handling interrupts.\n\nNon-fragment queue utilization compared against the GPU active cycles.\nFor GPU bound content, it is expected that the GPU queues process work in parallel.\nThe dominant queue must be close to 100% utilized. If no queue is dominant, but the GPU is close to\n100% utilized, then there might be a serialization or dependency problem preventing better overlap\nacross the queues.\n\nFragment queue utilization compared against the GPU active cycles.\nFor GPU bound content, the GPU queues are expected to process work in parallel.\nAim to keep the dominant queue close to 100% utilized. If no queue is dominant, but the GPU is close to\n100% utilized, then there might be a serialization or dependency problem preventing better queue overlap." ); } prof.graphTable.SetCaption( sec, "Functional unit utilization" ); @@ -796,7 +949,7 @@ namespace graph.SetName( "warp divg" ); graph.SetColor( style1 ); graph.SetSuffix( "%" ); - graph.SetInvLimits( 5.f, 25.f ); + graph.SetLimits( 5.f, 25.f ); graph.SetDescription( "This expression defines the percentage of instructions that have\ncontrol flow divergence (false branches) across the warp." ); }{ auto& graph = prof.graphTable.Add( sec, Mali_WrapCount ); @@ -822,7 +975,7 @@ namespace graph.SetName( "full quad" ); graph.SetColor( style1 ); graph.SetSuffix( "%" ); - graph.SetLimits( 5.f, 25.f ); + graph.SetInvLimits( 25.f, 50.f ); graph.SetDescription( "Warps that have a full thread slot allocation.\nNote that allocated thread slots may not contain a running thread\nif the workload cannot fill the whole allocation.\nFully allocated warps are more likely if:\n* Draw calls avoid late ZS dependency hazards.\n* Draw calls use meshes with a low percentage of tiny primitives.\n* Compute dispatches use work groups that are a multiple of warp size." ); }{ auto& graph = prof.graphTable.Add( sec, Mali_FragRastPartRate ); @@ -830,7 +983,7 @@ namespace graph.SetName( "part quad" ); graph.SetColor( style1 ); graph.SetSuffix( "%" ); - graph.SetInvLimits( 5.f, 25.f ); + graph.SetLimits( 25.f, 35.f ); graph.SetDescription( "Fragment quads that contain samples with no coverage.\nA high percentage can indicate that the content has a high density of small triangles, which are\nexpensive to process. To avoid this, use mesh level-of-detail algorithms to select simpler meshes as\nobjects move further from the camera." ); } prof.graphTable.SetCaption( sec, "Shader core" ); @@ -869,97 +1022,54 @@ namespace _UpdatePowerVRCountersImGui ================================================= */ - void HwpcProfiler::_UpdatePowerVRCountersImGui (const float invFC) + void HwpcProfiler::_UpdatePowerVRCountersImGui (const bool perFrame, const float invFC) { using ECounter = PowerVRProfiler::ECounter; - auto& prof = _pvrProf; - auto& cnt = prof.counters; + auto& prof = _pvrProf; + auto& cnt = prof.counters; + const float in_scale = perFrame ? invFC : prof.invTimeDelta; if ( cnt.empty() ) return; - const auto AddPoint = [&] (ECounter type, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type )); - - scale = (scale == 0.f ? invFC : scale); - if ( auto it = cnt.find( type ); it != cnt.end()) graph->AddNonScaled( List{ it->second * scale }); - }}; - - const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 )); - - scale = (scale == 0.f ? invFC : scale); - float value0 = 0.f, value1 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1 }); - }}; - - const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 ) and - prof.requiredCounters.contains( type2 )); - - scale = (scale == 0.f ? invFC : scale); - float value0 = 0.f, value1 = 0.f, value2 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1, value2 }); - }}; - - const auto AddPoint4 = [&] (ECounter type0, ECounter type1, ECounter type2, ECounter type3, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 ) and - prof.requiredCounters.contains( type2 ) and - prof.requiredCounters.contains( type3 )); - - scale = (scale == 0.f ? invFC : scale); - float value0 = 0.f, value1 = 0.f, value2 = 0.f, value3 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type3 ); it != cnt.end()) { value3 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1, value2, value3 }); - }}; - - AddPoint( ECounter::GPU_MemoryInterfaceLoad, PVR_MemoryBusLoad, 1.f ); // % - AddPoint( ECounter::GPU_ClockSpeed, PVR_GpuClockSpeed, 1.f ); - AddPoint( ECounter::Tiler_TriangleRatio, PVR_TilerTriangleRatio, 1.f ); - AddPoint( ECounter::Texture_ReadStall, PVR_TexReadStall, 1.f ); // % - AddPoint( ECounter::Shader_ShaderProcessingLoad, PVR_ShaderLoad ); - - AddPoint2( ECounter::GPU_MemoryRead, ECounter::GPU_MemoryWrite, PVR_MemoryTraffic ); - AddPoint2( ECounter::VertexShader_RegisterOverload, ECounter::PixelShader_RegisterOverload, PVR_RegisterOverload ); - AddPoint2( ECounter::Tiler_TrianglesInputPerFrame, ECounter::Tiler_TrianglesOutputPerFrame, PVR_TilerTrianglesIO, 1.f ); + const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint1( prof, cnt, type, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint2( prof, cnt, type0, type1, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint3( prof, cnt, type0, type1, type2, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + const auto AddPoint4 = [&] (ECounter type0, ECounter type1, ECounter type2, ECounter type3, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint4( prof, cnt, type0, type1, type2, type3, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + + AddPoint1( ECounter::GPU_MemoryInterfaceLoad, PVR_MemoryBusLoad, 1.f ); // % + AddPoint1( ECounter::GPU_ClockSpeed, PVR_GpuClockSpeed, 1.f ); + AddPoint1( ECounter::Tiler_TriangleRatio, PVR_TilerTriangleRatio, 1.f ); // % + AddPoint1( ECounter::Shader_ShaderProcessingLoad, PVR_ShaderLoad, 1.f ); // % + AddPoint1( ECounter::Texture_ReadStall, PVR_TexReadStall, 1.f ); // % + AddPoint1( ECounter::Texture_FetchesPerPixel, PVR_TexFetchesPerPixel ); + + AddPoint2( ECounter::Texture_FilterInputLoad, ECounter::Texture_FilterLoad, PVR_TexFilterLoad, 1.f ); // % + AddPoint2( ECounter::VertexShader_RegisterOverload, ECounter::PixelShader_RegisterOverload, PVR_RegisterOverload ); // % AddPoint2( ECounter::Renderer_HSR_Efficiency, ECounter::Renderer_ISP_PixelLoad, PVR_ZTest, 1.f ); // % + AddPoint2( ECounter::Texture_FilterCyclesPerFetch, ECounter::Texture_ReadCyclesPerFetch, PVR_TexCycles ); - AddPoint3( ECounter::RendererTimePerFrame, ECounter::GeometryTimePerFrame, ECounter::TDM_TimePerFrame, PVR_GpuTime, 1.f ); + if ( perFrame ) AddPoint2( ECounter::Tiler_TrianglesInputPerFrame, ECounter::Tiler_TrianglesOutputPerFrame, PVR_TilerTrianglesIO, 1.f ); + else AddPoint2( ECounter::Tiler_TrianglesInputPerSecond, ECounter::Tiler_TrianglesOutputPerSecond, PVR_TilerTrianglesIO, 1.f ); + + if ( perFrame ) AddPoint3( ECounter::RendererTimePerFrame, ECounter::GeometryTimePerFrame, ECounter::TDM_TimePerFrame, PVR_GpuTime, 1.f ); + else AddPoint3( ECounter::RendererTime, ECounter::GeometryTime, ECounter::TDM_Time, PVR_GpuTime ); + + if ( perFrame ) + AddPoint3( ECounter::GPU_MemoryRead, ECounter::GPU_MemoryWrite, ECounter::GPU_MemoryTotal, PVR_MemoryTraffic ); + + AddPoint3( ECounter::GPU_MemoryRead, ECounter::GPU_MemoryWrite, ECounter::GPU_MemoryTotal, PVR_MemoryTraffic2, 1.f ); AddPoint3( ECounter::Shader_CyclesPerComputeKernel, ECounter::Shader_CyclesPerVertex, ECounter::Shader_CyclesPerPixel, PVR_GpuCycles ); - AddPoint3( ECounter::ComputeShader_ProcessingLoad, ECounter::VertexShader_ProcessingLoad, ECounter::PixelShader_ProcessingLoad, PVR_ShaderLoad2 ); + AddPoint3( ECounter::ComputeShader_ProcessingLoad, ECounter::VertexShader_ProcessingLoad, ECounter::PixelShader_ProcessingLoad, PVR_ShaderLoad2, 1.f ); // % AddPoint4( ECounter::RendererActive, ECounter::GeometryActive, ECounter::TDM_Active, ECounter::SPM_Active, PVR_GpuActive, 1.f ); // % } @@ -978,13 +1088,24 @@ namespace constexpr SecName sec {"Memory"}; { auto& graph = prof.graphTable.Add( sec, PVR_MemoryTraffic ); - graph.SetCapacity( capacity, 2 ); + graph.SetCapacity( capacity, 3 ); graph.SetName( "traffic" ); graph.SetLabel( "read", 0 ); graph.SetLabel( "write", 1 ); + graph.SetLabel( "total", 2 ); graph.SetColor( style4 ); - graph.SetSuffix( "B" ); // bytes - graph.SetDescription( "GPU memory read/write bytes per second" ); + graph.SetSuffix( "B/f" ); + graph.SetDescription( "GPU memory read/write bytes per frame" ); + }{ + auto& graph = prof.graphTable.Add( sec, PVR_MemoryTraffic2 ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "traffic" ); + graph.SetLabel( "read", 0 ); + graph.SetLabel( "write", 1 ); + graph.SetLabel( "total", 2 ); + graph.SetColor( style4 ); + graph.SetSuffix( "B/s" ); + graph.SetDescription( "GPU memory read/write bytes per second.\n"s + c_ExtMemInfo ); }{ auto& graph = prof.graphTable.Add( sec, PVR_MemoryBusLoad ); graph.SetCapacity( capacity ); @@ -992,13 +1113,6 @@ namespace graph.SetColor( style1 ); graph.SetSuffix( "%" ); graph.SetDescription( "GPU memory interface load.\nShows the total utilization of the GPU memory bus, for both read and write memory operations over the GPU memory interface within the current period." ); - }{ - auto& graph = prof.graphTable.Add( sec, PVR_TexReadStall ); - graph.SetCapacity( capacity ); - graph.SetName( "tex read stall" ); - graph.SetColor( style1 ); - graph.SetSuffix( "%" ); - graph.SetDescription( "Texture read stall" ); } prof.graphTable.SetCaption( sec, "External memory" ); }{ @@ -1007,30 +1121,31 @@ namespace auto& graph = prof.graphTable.Add( sec, PVR_GpuTime ); graph.SetCapacity( capacity, 3 ); graph.SetName( "time" ); - graph.SetLabel( "gfx", 0 ); - graph.SetLabel( "geom", 1 ); - graph.SetLabel( "tex", 2 ); + graph.SetLabel( "ren", 0 ); + graph.SetLabel( "bin", 1 ); + graph.SetLabel( "tdm", 2 ); graph.SetColor( style4 ); graph.SetSuffix( "s" ); - graph.SetDescription( "Time per frame: Renderer, Geometry, Texture (TDM)" ); + graph.SetDescription( "Time per frame.\nRenderer - rasterization, fragment shader, output.\nBinning - vertex shader and binning on tiles.\nTDM - ?" ); }{ auto& graph = prof.graphTable.Add( sec, PVR_GpuActive ); graph.SetCapacity( capacity, 4 ); graph.SetName( "active" ); - graph.SetLabel( "gfx", 0 ); - graph.SetLabel( "geom", 1 ); + graph.SetLabel( "ren", 0 ); + graph.SetLabel( "bin", 1 ); graph.SetLabel( "tdm", 2 ); graph.SetLabel( "spm", 3 ); graph.SetColor( style4 ); graph.SetSuffix( "%" ); - graph.SetDescription( "Active/inactive cycles in % for: Renderer, Geometry, TDM, SPM.\nRenderer - shows percentage of time that Renderer tasks were active. Renderer time refers to\nany time that is spent processing pixels and shading them. This includes the ISP (Image Synthesis\nProcessor), Texturing and Shader Processor units.\nGeometry - input primitives, tiler ?\nTDM - texture data master ?\nSPM - If the GPU overflows the parameter buffer during vertex processing it will enter smart parameter mode and attempt to grow the parameter buffer.\n" ); + graph.SetDescription( "Active/inactive cycles in % for: Renderer, Binning, TDM, SPM.\nRenderer - shows percentage of time that Renderer tasks were active. Renderer time refers to\nany time that is spent processing pixels and shading them. This includes the ISP (Image Synthesis Processor),\n Texturing and Shader Processor units.\nGeometry - input primitives, tiler ?\nTDM - texture data master ?\nSPM - If the GPU overflows the parameter buffer during vertex processing it will enter smart parameter mode and attempt to grow the parameter buffer.\n" ); }{ auto& graph = prof.graphTable.Add( sec, PVR_GpuClockSpeed ); graph.SetCapacity( capacity ); graph.SetName( "clock" ); graph.SetColor( style1 ); graph.SetSuffix( "Hz" ); - graph.SetDescription( "GPU clock speed" ); + graph.SetLimits( 500.0e+6f, 800.0e+6f ); + graph.SetDescription( "GPU clock speed.\nLow clock indicates low workload." ); }{ auto& graph = prof.graphTable.Add( sec, PVR_GpuCycles ); graph.SetCapacity( capacity, 3 ); @@ -1049,6 +1164,7 @@ namespace graph.SetCapacity( capacity ); graph.SetName( "tile tris rate" ); graph.SetColor( style1 ); + graph.SetSuffix( "%" ); graph.SetDescription( "Tiler triangle ratio" ); }{ auto& graph = prof.graphTable.Add( sec, PVR_TilerTrianglesIO ); @@ -1079,12 +1195,14 @@ namespace graph.SetLabel( "vert", 1 ); graph.SetLabel( "frag", 2 ); graph.SetColor( style4 ); + graph.SetSuffix( "%" ); graph.SetDescription( "Shader processing load: compute, vertex, pixel.\nA high value indicates that a large percentage of the Shader's workload has been spent executing shader.\nPixel: Reduce alpha blending and discard/alpha test." ); }{ auto& graph = prof.graphTable.Add( sec, PVR_ShaderLoad ); graph.SetCapacity( capacity ); graph.SetName( "shader load" ); graph.SetColor( style1 ); + graph.SetSuffix( "%" ); graph.SetDescription( "Shader processing load.\nA high value indicates that a large percentage of the Shader's workload has been spent processing vertices, fragments and/or compute kernels." ); }{ auto& graph = prof.graphTable.Add( sec, PVR_RegisterOverload ); @@ -1093,10 +1211,44 @@ namespace graph.SetLabel( "vert", 0 ); graph.SetLabel( "frag", 1 ); graph.SetColor( style4 ); - graph.SetLimits( 1.f, 10.f ); + graph.SetLimits( 10.f, 25.f ); + graph.SetSuffix( "%" ); graph.SetDescription( "Shader register overload: vertex, pixel.\nThis counter indicates when the hardware is under register pressure.\nThe value should be near 0% or very low in most situations." ); } prof.graphTable.SetCaption( sec, "Shader core" ); + }{ + constexpr SecName sec {"Texture"}; + { + auto& graph = prof.graphTable.Add( sec, PVR_TexReadStall ); + graph.SetCapacity( capacity ); + graph.SetName( "read stall" ); + graph.SetColor( style1 ); + graph.SetSuffix( "%" ); + graph.SetDescription( "Texture read stall" ); + }{ + auto& graph = prof.graphTable.Add( sec, PVR_TexFetchesPerPixel ); + graph.SetCapacity( capacity ); + graph.SetName( "fetch/px" ); + graph.SetColor( style1 ); + graph.SetDescription( "Texture fetches per pixel" ); + }{ + auto& graph = prof.graphTable.Add( sec, PVR_TexFilterLoad ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "filter" ); + graph.SetLabel( "in", 0 ); + graph.SetLabel( "out", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "%" ); + }{ + auto& graph = prof.graphTable.Add( sec, PVR_TexCycles ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "cycles" ); + graph.SetLabel( "filter", 0 ); + graph.SetLabel( "read", 1 ); + graph.SetColor( style4 ); + graph.SetDescription( "Texture filter cycles per fetch.\nTexture read cycles per fetch." ); + } + prof.graphTable.SetCaption( sec, "Texture" ); } } #endif @@ -1131,99 +1283,53 @@ namespace _UpdateAdrenoCountersImGui ================================================= */ - void HwpcProfiler::_UpdateAdrenoCountersImGui (const float invFC) + void HwpcProfiler::_UpdateAdrenoCountersImGui (const bool perFrame, const float invFC) { using ECounter = AdrenoProfiler::ECounter; - auto& prof = _adrenoProf; - auto& cnt = prof.counters; + auto& prof = _adrenoProf; + auto& cnt = prof.counters; + const float in_scale = perFrame ? invFC : prof.invTimeDelta; if ( cnt.empty() ) return; - const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type )); - - scale = (scale == 0.f ? invFC : scale); - if ( auto it = cnt.find( type ); it != cnt.end()) graph->AddNonScaled( List{ it->second * scale }); - }}; - - const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 )); - - scale = (scale == 0.f ? invFC : scale); - float value0 = 0.f, value1 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1 }); - }}; - - const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 ) and - prof.requiredCounters.contains( type2 )); - - scale = (scale == 0.f ? invFC : scale); - float value0 = 0.f, value1 = 0.f, value2 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1, value2 }); - }}; - - const auto AddPoint4 = [&] (ECounter type0, ECounter type1, ECounter type2, ECounter type3, GraphName::Ref graphName, float scale = 0.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 ) and - prof.requiredCounters.contains( type2 ) and - prof.requiredCounters.contains( type3 )); - - scale = (scale == 0.f ? invFC : scale); - float value0 = 0.f, value1 = 0.f, value2 = 0.f, value3 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type2 ); it != cnt.end()) { value2 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type3 ); it != cnt.end()) { value3 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1, value2, value3 }); - }}; + const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint1( prof, cnt, type, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint2( prof, cnt, type0, type1, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint3( prof, cnt, type0, type1, type2, graphName, (scale == 0.0f ? in_scale : scale) ); + }; + const auto AddPoint4 = [&] (ECounter type0, ECounter type1, ECounter type2, ECounter type3, GraphName::Ref graphName, float scale = 0.f) { + _AddPoint4( prof, cnt, type0, type1, type2, type3, graphName, (scale == 0.0f ? in_scale : scale) ); + }; AddPoint1( ECounter::LRZ_TileKilled, Adreno_LrzTileKilled ); AddPoint1( ECounter::LRZ_TotalPixel, Adreno_LrzTotalPixel ); - AddPoint1( ECounter::RB_AliveCycles2D, Adreno_Rb2DActive ); - AddPoint1( ECounter::CCU_PartialBlockRead, Adreno_CcuPartBlockRd ); - AddPoint1( ECounter::CCU_2DPixels, Adreno_Ccu2DPix ); - - AddPoint2( ECounter::LRZ_Read, ECounter::LRZ_Write, Adreno_LrzTraffic ); - AddPoint2( ECounter::RAS_SuperTiles, ECounter::RAS_8x4Tiles, Adreno_RasTiles ); - AddPoint2( ECounter::RAS_FullyCoveredSuperTiles, ECounter::RAS_FullyCovered8x4Tiles, Adreno_RasFullyCoveredTiles ); - AddPoint2( ECounter::CCU_DepthBlocks, ECounter::CCU_ColorBlocks, Adreno_CcuDCBlocks ); - AddPoint2( ECounter::CCU_GMemRead, ECounter::CCU_GMemWrite, Adreno_CcuGMem ); - AddPoint2( ECounter::RB_ZRead, ECounter::RB_ZWrite, Adreno_RbZTraffic ); - AddPoint2( ECounter::RB_CRead, ECounter::RB_CWrite, Adreno_RbCTraffic ); - - AddPoint3( ECounter::LRZ_PrimKilledByMaskGen, ECounter::LRZ_PrimKilledByLRZ, ECounter::LRZ_PrimPassed, Adreno_LrzPrim ); - AddPoint3( ECounter::RB_Z_Pass, ECounter::RB_Z_Fail, ECounter::RB_S_Fail, Adreno_RbZSPass ); + // AddPoint1( ECounter::RB_AliveCycles2D, Adreno_Rb2DActive ); + // AddPoint1( ECounter::CCU_PartialBlockRead, Adreno_CcuPartBlockRd ); + // AddPoint1( ECounter::CCU_2DPixels, Adreno_Ccu2DPix ); + + AddPoint2( ECounter::SSP_ALUcy, ECounter::SSP_EFUcy, Adreno_SspALUCycles ); + AddPoint2( ECounter::LRZ_Read, ECounter::LRZ_Write, Adreno_LrzTraffic ); + AddPoint2( ECounter::RAS_SuperTiles, ECounter::RAS_FullyCoveredSuperTiles, Adreno_RasSuperTiles ); + AddPoint2( ECounter::RAS_8x4Tiles, ECounter::RAS_FullyCovered8x4Tiles, Adreno_Ras8x4Tiles ); + AddPoint2( ECounter::CCU_DepthBlocks, ECounter::CCU_ColorBlocks, Adreno_CcuDCBlocks ); + AddPoint2( ECounter::CCU_GMemRead, ECounter::CCU_GMemWrite, Adreno_CcuGMem ); + AddPoint2( ECounter::RB_ZRead, ECounter::RB_ZWrite, Adreno_RbZTraffic ); + AddPoint2( ECounter::RB_CRead, ECounter::RB_CWrite, Adreno_RbCTraffic ); + AddPoint2( ECounter::SSP_L2Read, ECounter::SSP_L2Write, Adreno_SspL2Traffic ); + AddPoint2( ECounter::CCU_2dReadReq, ECounter::CCU_2dWriteReq, Adreno_Ccu2dReq ); + AddPoint2( ECounter::CMP_2dReadData, ECounter::CMP_2dWriteData, Adreno_Cmp2dTraffic ); + + AddPoint3( ECounter::LRZ_PrimKilledByMaskGen, ECounter::LRZ_PrimKilledByLRZ, ECounter::LRZ_PrimPassed, Adreno_LrzPrim ); + AddPoint3( ECounter::SSP_VS_EFUInst, ECounter::SSP_VS_FullALUInst, ECounter::SSP_VS_HalfALUInst, Adreno_SspVSInst ); + AddPoint3( ECounter::SSP_FS_EFUInst, ECounter::SSP_FS_FullALUInst, ECounter::SSP_FS_HalfALUInst, Adreno_SspFSInst ); + + AddPoint4( ECounter::RB_Z_Pass, ECounter::RB_Z_Fail, ECounter::RB_S_Fail, ECounter::RB_TotalPass, Adreno_RbZSPass ); } /* @@ -1245,6 +1351,7 @@ namespace graph.SetLabel( "read", 0 ); graph.SetLabel( "write", 1 ); graph.SetColor( style4 ); + graph.SetSuffix( "B" ); }{ auto& graph = prof.graphTable.Add( sec, Adreno_LrzPrim ); graph.SetCapacity( capacity, 3 ); @@ -1253,6 +1360,7 @@ namespace graph.SetLabel( "kill", 1 ); graph.SetLabel( "pass", 2 ); graph.SetColor( style4 ); + graph.SetDescription( "maskgen - primitives killed by maskgen.\nkill - primitives killed by LRZ.\npass - passed primitives." ); }{ auto& graph = prof.graphTable.Add( sec, Adreno_LrzTileKilled ); graph.SetCapacity( capacity ); @@ -1268,18 +1376,19 @@ namespace }{ constexpr SecName sec {"Rasterizer"}; { - auto& graph = prof.graphTable.Add( sec, Adreno_RasTiles ); + auto& graph = prof.graphTable.Add( sec, Adreno_RasSuperTiles ); graph.SetCapacity( capacity, 2 ); - graph.SetName( "tiles" ); - graph.SetLabel( "super", 0 ); - graph.SetLabel( "8x4", 1 ); + graph.SetName( "super tiles" ); + graph.SetLabel( "total", 0 ); + graph.SetLabel( "full", 1 ); graph.SetColor( style4 ); + graph.SetDescription( "Super tile count. Super tile has size from 32x32px to 256x256px or greater.\nFull - number of fully covered tiles." ); }{ - auto& graph = prof.graphTable.Add( sec, Adreno_RasFullyCoveredTiles ); + auto& graph = prof.graphTable.Add( sec, Adreno_Ras8x4Tiles ); graph.SetCapacity( capacity, 2 ); - graph.SetName( "full cov tiles" ); - graph.SetLabel( "super", 0 ); - graph.SetLabel( "8x4", 1 ); + graph.SetName( "8x4 tiles" ); + graph.SetLabel( "total", 0 ); + graph.SetLabel( "full", 1 ); graph.SetColor( style4 ); } prof.graphTable.SetCaption( sec, "Rasterizer" ); @@ -1292,6 +1401,7 @@ namespace graph.SetLabel( "read", 0 ); graph.SetLabel( "write", 1 ); graph.SetColor( style4 ); + graph.SetSuffix( "B" ); }{ auto& graph = prof.graphTable.Add( sec, Adreno_RbCTraffic ); graph.SetCapacity( capacity, 2 ); @@ -1299,20 +1409,23 @@ namespace graph.SetLabel( "read", 0 ); graph.SetLabel( "write", 1 ); graph.SetColor( style4 ); + graph.SetSuffix( "B" ); }{ auto& graph = prof.graphTable.Add( sec, Adreno_RbZSPass ); - graph.SetCapacity( capacity, 3 ); + graph.SetCapacity( capacity, 4 ); graph.SetName( "ZS" ); graph.SetLabel( "Z-pass", 0 ); graph.SetLabel( "Z-fail", 1 ); graph.SetLabel( "S-fail", 2 ); + graph.SetLabel( "pass", 3 ); graph.SetColor( style4 ); - }{ + }/*{ auto& graph = prof.graphTable.Add( sec, Adreno_Rb2DActive ); graph.SetCapacity( capacity ); - graph.SetName( "2d" ); + graph.SetName( "2d cycles" ); graph.SetColor( style1 ); - } + graph.SetSuffix( "Hz" ); + }*/ prof.graphTable.SetCaption( sec, "Render backend" ); }{ constexpr SecName sec {"CCU"}; @@ -1323,12 +1436,12 @@ namespace graph.SetLabel( "depth", 0 ); graph.SetLabel( "color", 1 ); graph.SetColor( style4 ); - }{ + }/*{ auto& graph = prof.graphTable.Add( sec, Adreno_CcuPartBlockRd ); graph.SetCapacity( capacity ); graph.SetName( "part block rd" ); graph.SetColor( style1 ); - }{ + }*/{ auto& graph = prof.graphTable.Add( sec, Adreno_CcuGMem ); graph.SetCapacity( capacity, 2 ); graph.SetName( "gmem" ); @@ -1336,13 +1449,64 @@ namespace graph.SetLabel( "write", 1 ); graph.SetColor( style4 ); graph.SetSuffix( "B" ); // bytes - }{ + }/*{ auto& graph = prof.graphTable.Add( sec, Adreno_Ccu2DPix ); graph.SetCapacity( capacity ); graph.SetName( "2d pix" ); graph.SetColor( style1 ); + }*/{ + auto& graph = prof.graphTable.Add( sec, Adreno_Ccu2dReq ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "2d req" ); + graph.SetLabel( "read", 0 ); + graph.SetLabel( "write", 1 ); + graph.SetColor( style4 ); + }{ + auto& graph = prof.graphTable.Add( sec, Adreno_Cmp2dTraffic ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "cmp 2d data" ); + graph.SetLabel( "read", 0 ); + graph.SetLabel( "write", 1 ); + graph.SetColor( style4 ); } prof.graphTable.SetCaption( sec, "Cache and Compression Unit" ); + }{ + constexpr SecName sec {"ShaderCore"}; + { + auto& graph = prof.graphTable.Add( sec, Adreno_SspALUCycles ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "cycles" ); + graph.SetLabel( "alu", 0 ); + graph.SetLabel( "efu", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "Hz" ); + }{ + auto& graph = prof.graphTable.Add( sec, Adreno_SspVSInst ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "VS" ); + graph.SetLabel( "full", 0 ); + graph.SetLabel( "half", 1 ); + graph.SetLabel( "efu", 2 ); + graph.SetColor( style4 ); + }{ + auto& graph = prof.graphTable.Add( sec, Adreno_SspFSInst ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "FS" ); + graph.SetLabel( "full", 0 ); + graph.SetLabel( "half", 1 ); + graph.SetLabel( "efu", 2 ); + graph.SetColor( style4 ); + }{ + auto& graph = prof.graphTable.Add( sec, Adreno_SspL2Traffic ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "L2" ); + graph.SetLabel( "read", 0 ); + graph.SetLabel( "write", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "T/f" ); + graph.SetDescription( "Buffer/image storage load/store operations. One transaction is single texel or group of texels." ); + } + prof.graphTable.SetCaption( sec, " Shader/Streaming Processor" ); } } @@ -1354,86 +1518,311 @@ namespace #if 1 /* ================================================= - _DrawCpuUsageImGui + _InitCpuUsageImGui ================================================= */ - void HwpcProfiler::_DrawCpuUsageImGui () + void HwpcProfiler::_InitCpuUsageImGui () { - if ( not _cpuUsage.enabled ) + ASSERT( _genProf.corePerLine == 0 ); + + const uint capacity = 50; + const auto clusters = _genProf.profiler.GetCpuClusters(); + auto style = GetStyle4(); + style.mode = ImLineGraph::EMode::Line; + usize total_cores = 0; + + for (auto& cluster : clusters) { + total_cores += cluster.logicalCores.count(); + } + + _genProf.coreUsage.resize( total_cores ); + _genProf.corePerLine = 1; + + for (auto& cluster : clusters) + { + for (uint core_id : BitIndexIterate( cluster.logicalCores.to_ullong() )) + { + _genProf.coreUsage[core_id] = MakeUnique(); + auto& graph = *_genProf.coreUsage[core_id]; + graph.SetCapacity( capacity, 2 ); + graph.SetColor( style ); + graph.SetName( ToString(core_id) ); + graph.SetLabel( "total", 0 ); + graph.SetLabel( "kernel", 1 ); + graph.SetSuffix( "%" ); + graph.SetRange( 0.f, 100.f ); + graph.SetLimits( 50.f, 90.f ); + } + + _genProf.corePerLine = Max( _genProf.corePerLine, uint(cluster.logicalCores.count()) ); + } + + if ( clusters.size() == 1 ) + _genProf.corePerLine = Max( 1u, uint( Sqrt( float(total_cores) ) + 0.5f )); + } + +/* +================================================= + _InitGeneralPerfImGui +================================================= +*/ + void HwpcProfiler::_InitGeneralPerfImGui (const ImLineGraph::ColorStyle &style4, const ImLineGraph::ColorStyle &style1) + { + const uint capacity = 50; + { + constexpr SecName sec {"Memory"}; + { + auto& graph = _genProf.graphTable.Add( sec, GenPerf_ProcMemUsage ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "proc mem" ); + graph.SetLabel( "curr", 0 ); + graph.SetLabel( "peak", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "B" ); + graph.SetDescription( "Current process RAM usage." ); + }{ + auto style = style4; style.mode = ImLineGraph::EMode::Line; + auto& graph = _genProf.graphTable.Add( sec, GenPerf_MemUsage ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "mem usage" ); + graph.SetLabel( "proc", 0 ); + graph.SetLabel( "phys", 1 ); + graph.SetLabel( "virt", 2 ); + graph.SetColor( style ); + graph.SetSuffix( "%" ); + graph.SetRange( 0.f, 100.f ); + graph.SetDescription( "proc - current process memory usage.\nphys - physical memory (RAM) usage.\nvirt - virtual memory usage (on disk)." ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_PhysMemUsage ); + graph.SetCapacity( capacity ); + graph.SetName( "RAM" ); + graph.SetColor( style1 ); + graph.SetSuffix( "B" ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_VirtMemUsage ); + graph.SetCapacity( capacity ); + graph.SetName( "virt mem" ); + graph.SetColor( style1 ); + graph.SetSuffix( "B" ); + } + _genProf.graphTable.SetCaption( sec, "Memory" ); + }{ + constexpr SecName sec {"Process"}; + { + auto& graph = _genProf.graphTable.Add( sec, GenPerf_CtxSwitches ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "ctx swch" ); + graph.SetLabel( "io", 0 ); + graph.SetLabel( "hip", 1 ); + graph.SetColor( style4 ); + graph.SetDescription( "Context switches per second:\n* io - context switch when awaiting availability of a resource (IO).\n Use AsyncFile to avoid context switches.\n* hip - higher priority process replace current process.\n Minimize usage of sync primitives, use task dependencies instead." ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_FileIO ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "file" ); + graph.SetLabel( "in", 0 ); + graph.SetLabel( "out", 1 ); + graph.SetColor( style4 ); + graph.SetDescription( "Number of times the filesystem had to perform input/output per second." ); + }{ + auto style = style1; style.mode = ImLineGraph::EMode::Line; + auto& graph = _genProf.graphTable.Add( sec, GenPerf_KernelTime ); + graph.SetCapacity( capacity ); + graph.SetName( "kernel" ); + graph.SetColor( style ); + graph.SetSuffix( "%" ); + graph.SetRange( 0.f, 100.f ); + graph.SetLimits( 20.f, 60.f ); + graph.SetDescription( "Percentage of kernel time.\nCan be large when app has low workload (low CPU usage).\nOn high CPU usage should be low." ); + } + _genProf.graphTable.SetCaption( sec, "Process" ); + }{ + constexpr SecName sec {"Battery"}; + { + auto& graph = _genProf.graphTable.Add( sec, GenPerf_BatteryDischarge ); + graph.SetCapacity( capacity, 2 ); + graph.SetName( "discharge" ); + graph.SetLabel( "now", 0 ); + graph.SetLabel( "avg", 1 ); + graph.SetColor( style4 ); + graph.SetSuffix( "W" ); + graph.SetLimits( 3.f, 6.f ); // for mobile + graph.SetDescription( "Device power consumption based on battery indicators.\nNow - measure power as current * voltage.\nAvg - measure changes in battery capacity." ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_BatteryDischargeTotal ); + graph.SetCapacity( capacity ); + graph.SetName( "energy lost" ); + graph.SetColor( style1 ); + graph.SetSuffix( "J" ); + graph.SetDescription( "Total battery energy lost from profiling start." ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_BatteryLevel ); + graph.SetCapacity( capacity ); + graph.SetName( "level" ); + graph.SetColor( style1 ); + graph.SetSuffix( "%" ); + } + + if constexpr( GenProf_BatteryAux ) + { + { + auto& graph = _genProf.graphTable.Add( sec, GenPerf_BatteryTemperature ); + graph.SetCapacity( capacity ); + graph.SetName( "temp" ); + graph.SetColor( style1 ); + graph.SetSuffix( "C" ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_BatteryCurrent ); + graph.SetCapacity( capacity ); + graph.SetName( "current" ); + graph.SetColor( style1 ); + graph.SetSuffix( "A" ); + }{ + auto& graph = _genProf.graphTable.Add( sec, GenPerf_BatteryVoltage ); + graph.SetCapacity( capacity ); + graph.SetName( "voltage" ); + graph.SetColor( style1 ); + graph.SetSuffix( "V" ); + } + } + _genProf.graphTable.SetCaption( sec, "Battery" ); + } + } + +/* +================================================= + _DrawGeneralPerfImGui +================================================= +*/ + void HwpcProfiler::_DrawGeneralPerfImGui () + { + if ( not _genProf.profiler.IsInitialized() ) return; const float wnd_pos_x = ImGui::GetCursorScreenPos().x; const float x_offset = wnd_pos_x + c_GraphPadding[0]; const ImVec2 wnd_size = ImGui::GetContentRegionAvail(); - if ( ImGui::CollapsingHeader( "CPU usage", 0 )) + if ( not _genProf.coreUsage.empty() ) { - const auto& cpu_info = CpuArchInfo::Get(); - const uint core_per_line = _cpuUsage.corePerLine; - const float graph_width = wnd_size.x / core_per_line; - const float graph_height = 100.f; - float2 left_top = float2{ x_offset, 0.f }; - - for (auto& core : cpu_info.cpu.coreTypes) + if ( ImGui::CollapsingHeader( "CPU usage", 0 )) { - ImGui::Text( "%s (%s)", core.name.c_str(), ToString( core.type ).data() ); - - left_top.x = x_offset; - left_top.y = ImGui::GetCursorScreenPos().y; + const auto clusters = _genProf.profiler.GetCpuClusters(); + const uint core_per_line = _genProf.corePerLine; + const float graph_width = wnd_size.x / core_per_line; + const float graph_height = 100.f; + float2 left_top = float2{ x_offset, 0.f }; - for (ulong bits = core.logicalBits.to_ullong(), i = 1; bits != 0; ++i) + for (auto& cluster : clusters) { - uint core_id = ExtractBitIndex( INOUT bits ); - auto& graph = _cpuUsage.coreUsage[core_id]; - - RectF region; - region.left = left_top.x; - region.right = left_top.x + graph_width; - region.top = left_top.y; - region.bottom = left_top.y + graph_height; - left_top.x = region.right; + ImGui::TextUnformatted( cluster.name.c_str() ); - graph->Draw( INOUT region ); + left_top.x = x_offset; + left_top.y = ImGui::GetCursorScreenPos().y; - if ( i >= core_per_line ) + for (ulong bits = cluster.logicalCores.to_ullong(), i = 1; bits != 0; ++i) { - i = 0; - left_top.x = x_offset; - left_top.y += graph_height; + const uint core_id = ExtractBitIndex( INOUT bits ); + + ASSERT( core_id < _genProf.coreUsage.size() ); + if ( core_id >= _genProf.coreUsage.size() ) + continue; + + if ( i > core_per_line ) + { + i = 0; + left_top.x = x_offset; + left_top.y += graph_height; + } + + auto& graph = _genProf.coreUsage[core_id]; + + RectF region; + region.left = left_top.x; + region.right = left_top.x + graph_width; + region.top = left_top.y; + region.bottom = left_top.y + graph_height; + left_top.x = region.right; + + graph->Draw( INOUT region ); } + left_top.y += graph_height; + ImGui::SetCursorScreenPos( ImVec2{ wnd_pos_x, left_top.y }); } - ImGui::SetCursorScreenPos( ImVec2{ wnd_pos_x, left_top.y }); + ImGui::SetCursorScreenPos( ImVec2{ wnd_pos_x, left_top.y + c_GraphPadding[1] }); } - left_top.y += graph_height; - ImGui::SetCursorScreenPos( ImVec2{ wnd_pos_x, left_top.y - (left_top.x == x_offset ? graph_height : 0.f) + c_GraphPadding[1] }); + } + + if ( ImGui::CollapsingHeader( "OS performance counters", ImGuiTreeNodeFlags_DefaultOpen )) + { + _genProf.graphTable.Draw( wnd_size.x, c_GraphHeight, c_GraphPadding, ImGui::IsItemHovered() ); } ImGui::SetCursorScreenPos( ImVec2{ wnd_pos_x, ImGui::GetCursorScreenPos().y }); } /* ================================================= - _UpdateCpuUsageImGui + _UpdateGeneralPerfImGui ================================================= */ - void HwpcProfiler::_UpdateCpuUsageImGui () + void HwpcProfiler::_UpdateGeneralPerfImGui (const bool, const float) { - if ( not _cpuUsage.enabled ) - return; + using ECounter = GeneralProfiler::ECounter; - StaticArray< float, 64 > user, kernel; + auto& prof = _genProf; + auto& cnt = prof.counters; + const double inv_dt = double(prof.invTimeDelta); - CpuPerformance::GetUsage( OUT user.data(), OUT kernel.data(), uint(user.size()) ); - - const auto& cpu_info = CpuArchInfo::Get(); - for (auto& core : cpu_info.cpu.coreTypes) + GeneralProfiler::CpuUsage_t total, kernel; + if ( _genProf.profiler.GetUsage( OUT total, OUT kernel )) { - for (uint core_id : BitIndexIterate( core.logicalBits.to_ullong() )) + if_unlikely( _genProf.corePerLine == 0 ) + _InitCpuUsageImGui(); + + const auto clusters = _genProf.profiler.GetCpuClusters(); + for (auto& cluster : clusters) { - auto& graph = _cpuUsage.coreUsage[core_id]; + for (uint core_id : BitIndexIterate( cluster.logicalCores.to_ullong() )) + { + auto& graph = _genProf.coreUsage[core_id]; - graph->Add( List{ (user[core_id] + kernel[core_id]) * 100.f, kernel[core_id] * 100.f }); + graph->Add( List{ total[core_id] * 100.f, kernel[core_id] * 100.f }); + } } } + + if ( cnt.empty() ) + return; + + const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint1( prof, cnt, type, graphName, (scale == 0.0 ? inv_dt : scale) ); + }; + const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint2( prof, cnt, type0, type1, graphName, (scale == 0.0 ? inv_dt : scale) ); + }; + const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, double scale = 0.0) { + _AddPoint3( prof, cnt, type0, type1, type2, graphName, (scale == 0.0 ? inv_dt : scale) ); + }; + + AddPoint1( ECounter::KernelTime, GenPerf_KernelTime, 1.0 ); // % + AddPoint1( ECounter::BatteryLevel, GenPerf_BatteryLevel, 1.0 ); + AddPoint1( ECounter::PhysicalMemoryUsed, GenPerf_PhysMemUsage, 1.0 ); + AddPoint1( ECounter::VirtualMemoryUsed, GenPerf_VirtMemUsage, 1.0 ); + AddPoint1( ECounter::BatteryDischargeTotal, GenPerf_BatteryDischargeTotal, 1.0 ); + + if constexpr( GenProf_BatteryAux ) + { + AddPoint1( ECounter::BatteryTemperature, GenPerf_BatteryTemperature, 1.0 ); + AddPoint1( ECounter::BatteryCurrent, GenPerf_BatteryCurrent, 1.0 ); + AddPoint1( ECounter::BatteryVoltage, GenPerf_BatteryVoltage, 1.0 ); + } + + AddPoint2( ECounter::ProcessMemoryUsed, ECounter::ProcessPeakMemory, GenPerf_ProcMemUsage, 1.0 ); // % + AddPoint2( ECounter::BatteryDischarge, ECounter::BatteryDischargeAvg, GenPerf_BatteryDischarge, 1.0 ); + AddPoint2( ECounter::FSInput, ECounter::FSOutput, GenPerf_FileIO ); + AddPoint2( ECounter::ContextSwitches_IO, ECounter::ContextSwitches_HighPrio, GenPerf_CtxSwitches ); + + AddPoint3( ECounter::ProcessMemoryUsage, ECounter::PhysicalMemoryUsage, ECounter::VirtualMemoryUsage, GenPerf_MemUsage, 1.0 ); } #endif //----------------------------------------------------------------------------- @@ -1466,7 +1855,7 @@ namespace _UpdateNVidiaCountersImGui ================================================= */ - void HwpcProfiler::_UpdateNVidiaCountersImGui (const float) + void HwpcProfiler::_UpdateNVidiaCountersImGui (const bool, const float) { using ECounter = NVidiaProfiler::ECounter; @@ -1476,46 +1865,29 @@ namespace if ( cnt.empty() ) return; - const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, float scale = 1.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type )); - - if ( auto it = cnt.find( type ); it != cnt.end()) graph->AddNonScaled( List{ it->second * scale }); - }}; - - const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, float scale = 1.f) - {{ - auto graph = prof.graphTable.Get( graphName ); - CHECK_ERRV( graph ); - ASSERT( prof.requiredCounters.contains( type0 ) and - prof.requiredCounters.contains( type1 )); - - float value0 = 0.f, value1 = 0.f; - bool exists = false; - - if ( auto it = cnt.find( type0 ); it != cnt.end()) { value0 = it->second * scale; exists = true; } - if ( auto it = cnt.find( type1 ); it != cnt.end()) { value1 = it->second * scale; exists = true; } - - if ( exists ) graph->AddNonScaled( List{ value0, value1 }); - }}; + const auto AddPoint1 = [&] (ECounter type, GraphName::Ref graphName, float scale = 1.f) { + _AddPoint1( prof, cnt, type, graphName, scale ); + }; + const auto AddPoint2 = [&] (ECounter type0, ECounter type1, GraphName::Ref graphName, float scale = 1.f) { + _AddPoint2( prof, cnt, type0, type1, graphName, scale ); + }; + const auto AddPoint3 = [&] (ECounter type0, ECounter type1, ECounter type2, GraphName::Ref graphName, float scale = 1.f) { + _AddPoint3( prof, cnt, type0, type1, type2, graphName, scale ); + }; AddPoint1( ECounter::MemoryUtil, NV_MemoryUtil ); AddPoint1( ECounter::MemoryClock, NV_MemoryClock, 1.0e+6f ); - AddPoint1( ECounter::GpuUtil, NV_GpuUtil ); - AddPoint1( ECounter::GraphicsClock, NV_GraphicsClock, 1.0e+6f ); - AddPoint1( ECounter::SMClock, NV_SMClock, 1.0e+6f ); - AddPoint1( ECounter::VideoClock, NV_VideoClock, 1.0e+6f ); - AddPoint1( ECounter::GpuTemperature, NV_GpuTemp ); AddPoint1( ECounter::PowerUsage, NV_PowerUsage ); - AddPoint1( ECounter::PerfState, NV_PerfState ); + // AddPoint1( ECounter::PerfState, NV_PerfState ); AddPoint1( ECounter::FanSpeed, NV_FanSpeed ); + AddPoint1( ECounter::DevMemUsedMb, NV_DevMemUsedMb ); + AddPoint1( ECounter::UnifiedMemUsedMb, NV_UniMemUsedMb ); AddPoint2( ECounter::DevMemUsed, ECounter::UnifiedMemUsed, NV_MemUsed ); - AddPoint2( ECounter::DevMemUsedMb, ECounter::UnifiedMemUsedMb, NV_MemUsedMb ); + + AddPoint3( ECounter::GraphicsClock, ECounter::SMClock, ECounter::VideoClock, NV_GraphicsClock, 1.0e+6f ); } /* @@ -1529,76 +1901,73 @@ namespace auto& prof = _nvProf; { - constexpr SecName sec {"Memory"}; + constexpr SecName sec {"GPU"}; { + auto& graph = prof.graphTable.Add( sec, NV_GpuUtil ); + graph.SetCapacity( capacity ); + graph.SetName( "gpu util" ); + graph.SetColor( style1 ); + graph.SetSuffix( "%" ); + graph.SetLimits( 75.f, 90.f ); + graph.SetDescription( "GPU utilization" ); + }{ auto& graph = prof.graphTable.Add( sec, NV_MemoryUtil ); graph.SetCapacity( capacity ); - graph.SetName( "util" ); + graph.SetName( "mem util" ); graph.SetColor( style1 ); graph.SetSuffix( "%" ); graph.SetLimits( 75.f, 90.f ); graph.SetDescription( "Memory utilization" ); + }{ + auto& graph = prof.graphTable.Add( sec, NV_GraphicsClock ); + graph.SetCapacity( capacity, 3 ); + graph.SetName( "clock" ); + graph.SetLabel( "gpu", 0 ); + graph.SetLabel( "sm", 1 ); + graph.SetLabel( "video", 2 ); + graph.SetColor( style4 ); + graph.SetSuffix( "Hz" ); + graph.SetLimits( 1.6e+9f, 1.9e+9f ); + graph.SetDescription( "Graphics clock.\nSM clock.\nVideo encoder/decoder clock" ); }{ auto& graph = prof.graphTable.Add( sec, NV_MemoryClock ); graph.SetCapacity( capacity ); - graph.SetName( "clock" ); + graph.SetName( "mem clock" ); graph.SetColor( style1 ); graph.SetSuffix( "Hz" ); graph.SetDescription( "Memory clock" ); - }{ + } + prof.graphTable.SetCaption( sec, "GPU" ); + }{ + constexpr SecName sec {"Memory"}; + { + auto style = style4; style.mode = ImLineGraph::EMode::Line; auto& graph = prof.graphTable.Add( sec, NV_MemUsed ); graph.SetCapacity( capacity, 2 ); graph.SetName( "used" ); graph.SetLabel( "dev", 0 ); - graph.SetLabel( "host", 1 ); - graph.SetColor( style4 ); + graph.SetLabel( "uni", 1 ); + graph.SetColor( style ); graph.SetSuffix( "%" ); + graph.SetRange( 0.f, 100.f ); graph.SetLimits( 75.f, 90.f ); - graph.SetDescription( "Used memory in %:\nDevice - VRAM\nHost - CPU visible VRAM (unified memory)" ); + graph.SetDescription( "Used memory in %:\nDevice - VRAM\nUnified - CPU visible VRAM" ); }{ - auto& graph = prof.graphTable.Add( sec, NV_MemUsedMb ); - graph.SetCapacity( capacity, 2 ); - graph.SetName( "used" ); - graph.SetLabel( "dev", 0 ); - graph.SetLabel( "host", 1 ); - graph.SetColor( style4 ); - graph.SetSuffix( "b" ); - graph.SetDescription( "Used memory in bytes:\nDevice - VRAM\nHost - CPU visible VRAM (unified memory)" ); - } - prof.graphTable.SetCaption( sec, "Memory" ); - }{ - constexpr SecName sec {"GPU"}; - { - auto& graph = prof.graphTable.Add( sec, NV_GpuUtil ); - graph.SetCapacity( capacity ); - graph.SetName( "util" ); - graph.SetColor( style1 ); - graph.SetSuffix( "%" ); - graph.SetLimits( 75.f, 90.f ); - graph.SetDescription( "GPU utilization" ); - }{ - auto& graph = prof.graphTable.Add( sec, NV_GraphicsClock ); + auto& graph = prof.graphTable.Add( sec, NV_DevMemUsedMb ); graph.SetCapacity( capacity ); - graph.SetName( "graphics" ); + graph.SetName( "device mem" ); graph.SetColor( style1 ); - graph.SetSuffix( "Hz" ); - graph.SetDescription( "Graphics clock" ); - }{ - auto& graph = prof.graphTable.Add( sec, NV_SMClock ); - graph.SetCapacity( capacity ); - graph.SetName( "sm" ); - graph.SetColor( style1 ); - graph.SetSuffix( "Hz" ); - graph.SetDescription( "SM clock" ); + graph.SetSuffix( "B" ); + graph.SetDescription( "Used VRAM (device local) memory." ); }{ - auto& graph = prof.graphTable.Add( sec, NV_VideoClock ); + auto& graph = prof.graphTable.Add( sec, NV_UniMemUsedMb ); graph.SetCapacity( capacity ); - graph.SetName( "video" ); + graph.SetName( "unified mem" ); graph.SetColor( style1 ); - graph.SetSuffix( "Hz" ); - graph.SetDescription( "Video encoder/decoder clock" ); + graph.SetSuffix( "B" ); + graph.SetDescription( "Used CPU visible VRAM (unified) memory." ); } - prof.graphTable.SetCaption( sec, "GPU Core" ); + prof.graphTable.SetCaption( sec, "Memory" ); }{ constexpr SecName sec {"Power"}; { @@ -1616,14 +1985,6 @@ namespace graph.SetColor( style1 ); graph.SetSuffix( "W" ); graph.SetDescription( "GPU power usage in Watts" ); - }{ - auto& graph = prof.graphTable.Add( sec, NV_PerfState ); - graph.SetCapacity( capacity ); - graph.SetName( "perf" ); - graph.SetColor( style1 ); - graph.SetSuffix( "%" ); - graph.SetLimits( 75.f, 90.f ); - graph.SetDescription( "Performance state:\n1 - maximum performance\n0 - minimum performance" ); }{ auto& graph = prof.graphTable.Add( sec, NV_FanSpeed ); graph.SetCapacity( capacity ); diff --git a/AE/engine/src/profiler/ProfilerUI.cpp b/AE/engine/src/profiler/ProfilerUI.cpp index 8a4d4327..8247ecf3 100644 --- a/AE/engine/src/profiler/ProfilerUI.cpp +++ b/AE/engine/src/profiler/ProfilerUI.cpp @@ -39,16 +39,24 @@ namespace AE::Profiler } _hwpcProf.reset( new HwpcProfiler{ start_time }); - if ( _hwpcProf->Initialize( client, _msgProducer )) + if ( not _hwpcProf->Initialize( client, _msgProducer )) + _hwpcProf.reset( null ); + + PowerVRProfiler* pvr = null; + if ( _hwpcProf ) { - //_graphics->AddNextFrameListener( [this]() { _SampleGraphicsCounters(); }); - //_graphics->SetPowerVRProfiler( _hwpcProf->GetPowerVRProfiler() ); + #if defined(AE_ENABLE_REMOTE_GRAPHICS) or defined(AE_ENABLE_PVRCOUNTER) + const bool enable = true; + #else + const bool enable = false; + #endif + + if ( client or enable ) + pvr = &_hwpcProf->GetPowerVRProfiler(); } - else - _hwpcProf.reset( null ); _task = MakeRC( start_time ); - _graphics = MakeRC( start_time, (_hwpcProf ? &_hwpcProf->GetPowerVRProfiler() : null) ); + _graphics = MakeRC( start_time, pvr ); _memory = MakeRC( start_time ); Scheduler().SetProfiler( _task ); diff --git a/AE/engine/src/profiler/Utils/AdrenoProfiler.cpp b/AE/engine/src/profiler/Profilers/AdrenoProfiler.cpp similarity index 94% rename from AE/engine/src/profiler/Utils/AdrenoProfiler.cpp rename to AE/engine/src/profiler/Profilers/AdrenoProfiler.cpp index 91e7c6ce..df008b1a 100644 --- a/AE/engine/src/profiler/Utils/AdrenoProfiler.cpp +++ b/AE/engine/src/profiler/Profilers/AdrenoProfiler.cpp @@ -2,7 +2,7 @@ #ifdef AE_ENABLE_REMOTE_GRAPHICS # include "profiler/Profiler.pch.h" -# include "profiler/Utils/AdrenoProfiler.h" +# include "profiler/Profilers/AdrenoProfiler.h" namespace AE::Profiler { @@ -22,7 +22,7 @@ namespace AE::Profiler AdrenoProfiler::~AdrenoProfiler () __NE___ {} bool AdrenoProfiler::IsInitialized () C_NE___ { return bool{_impl}; } - void AdrenoProfiler::Deinitialize () __NE___ { _impl.reset( null ); } + void AdrenoProfiler::Deinitialize () __NE___ { _impl.reset(); } AdrenoProfiler::ECounterSet AdrenoProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->enabled : Default; } AdrenoProfiler::HWInfo AdrenoProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->info : Default; } @@ -59,9 +59,10 @@ namespace AE::Profiler Sample ================================================= */ - void AdrenoProfiler::Sample (OUT Counters_t &result) C_NE___ + void AdrenoProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { result.clear(); + Unused( invdt ); // keep current if ( not IsInitialized() ) return; @@ -89,53 +90,53 @@ extern "C" #define ADRENO_PROPERTY_DEVICE_INFO 0x1 struct adreno_device_info { - unsigned int device_id; - unsigned int chip_id; - unsigned int mmu_enabled; - unsigned long gmem_gpu_base_address; - unsigned int gpu_id; - size_t gmem_sizebytes; + unsigned int device_id; + unsigned int chip_id; + unsigned int mmu_enabled; + unsigned long gmem_gpu_base_address; + unsigned int gpu_id; + size_t gmem_sizebytes; }; struct adreno_device_get_property { - unsigned int type; - void *value; - size_t num_bytes; + unsigned int type; + void * value; + size_t num_bytes; }; #define ADRENO_IOCTL_DEVICE_GET_PROPERTY \ _IOWR(ADRENO_IOCTL_TYPE, 0x2, struct adreno_device_get_property) struct adreno_counter_get { - unsigned int group_id; - unsigned int countable_selector; - unsigned int regster_offset_low; - unsigned int regster_offset_high; - unsigned int __pad; + unsigned int group_id; + unsigned int countable_selector; + unsigned int regster_offset_low; + unsigned int regster_offset_high; + unsigned int __pad; }; #define ADRENO_IOCTL_COUNTER_GET \ _IOWR(ADRENO_IOCTL_TYPE, 0x38, struct adreno_counter_get) struct adreno_counter_put { - unsigned int group_id; - unsigned int countable_selector; - unsigned int __pad[2]; + unsigned int group_id; + unsigned int countable_selector; + unsigned int __pad[2]; }; #define ADRENO_IOCTL_COUNTER_PUT \ _IOW(ADRENO_IOCTL_TYPE, 0x39, struct adreno_counter_put) struct hpc_gpu_adreno_ioctl_counter_read_counter_t { - uint32_t group_id; - uint32_t countable_selector; - uint64_t value; + uint32_t group_id; + uint32_t countable_selector; + uint64_t value; }; struct adreno_counter_read { struct hpc_gpu_adreno_ioctl_counter_read_counter_t *counters; - unsigned int num_counters; - unsigned int __pad[2]; + unsigned int num_counters; + unsigned int __pad[2]; }; #define ADRENO_IOCTL_COUNTER_READ \ @@ -143,7 +144,7 @@ extern "C" } // extern "C" -# include "profiler/Utils/AdrenoProfiler.h" +# include "profiler/Profilers/AdrenoProfiler.h" namespace AE::Profiler { @@ -175,21 +176,23 @@ namespace // 0x00'xx // RBBM + 0x01'02, // RBBM_TSEbusy 0x01'03, // RBBM_RasterizerBusy + 0x01'09, // RBBM_VBIFbusy 0x01'0A, // RBBM_VSCbusy 0x01'0C, // RBBM_UCHEbusy - 0x01'09, // RBBM_VBIFbusy - 0x01'02, // RBBM_TSEbusy // PC + 0x02'10, // PC_Instances + 0x02'11, // PC_VPCPrimitives 0x02'12, // PC_DeadPrim 0x02'13, // PC_LivePrim + 0x02'14, // PC_VertexHits 0x02'15, // PC_IA_Vertices 0x02'16, // PC_IA_Primitives 0x02'1A, // PC_VS_Invocations 0x02'1E, // PC_DrawCalls3D 0x02'1F, // PC_DrawCalls2D - 0x02'11, // PC_VPCPrimitives // Vertex Fetch and Decode 0x03'17, // VFD_TotalVertices @@ -219,13 +222,23 @@ namespace // 0x09'xx // Shader/Streaming Processor - // 0x0A'xx + 0x0A'01, // SSP_ALUcy + 0x0A'02, // SSP_EFUcy + 0x0A'23, // SSP_VS_EFUInst + 0x0A'24, // SSP_VS_FullALUInst + 0x0A'25, // SSP_VS_HalfALUInst + 0x0A'28, // SSP_FS_EFUInst + 0x0A'29, // SSP_FS_FullALUInst + 0x0A'30, // SSP_FS_HalfALUInst + 0x0A'2F, // SSP_L2Read + 0x0A'30, // SSP_L2Write // Render backend 0x0B'0D, // RB_ZRead 0x0B'0E, // RB_ZWrite 0x0B'0F, // RB_CRead 0x0B'10, // RB_CWrite + 0x0B'11, // RB_TotalPass 0x0B'12, // RB_Z_Pass 0x0B'13, // RB_Z_Fail 0x0B'14, // RB_S_Fail @@ -243,7 +256,8 @@ namespace 0x18'08, // CCU_PartialBlockRead 0x18'09, // CCU_GMemRead 0x18'0A, // CCU_GMemWrite - 0x18'19, // CCU_2DPixels + 0x18'16, // CCU_2dReadReq + 0x18'17, // CCU_2dWriteReq // low resolution Z pass 0x19'07, // LRZ_Read @@ -255,8 +269,8 @@ namespace 0x19'11, // LRZ_TotalPixel // CMP - // 0x1A'xx - + 0x1A'16, // CMP_2dReadData + 0x1A'17, // CMP_2dWriteData }; StaticAssert( CountOf(values) == uint(ECounter::_Count) ); return Unpack( values[ uint(c) ]); @@ -288,21 +302,23 @@ namespace // 0x00'xx // RBBM + 0x01'02, // RBBM_TSEbusy 0x01'03, // RBBM_RasterizerBusy + 0x01'09, // RBBM_VBIFbusy 0x01'0A, // RBBM_VSCbusy 0x01'0C, // RBBM_UCHEbusy - 0x01'09, // RBBM_VBIFbusy - 0x01'02, // RBBM_TSEbusy // PC + 0x02'10, // PC_Instances + 0x02'11, // PC_VPCPrimitives 0x02'12, // PC_DeadPrim 0x02'13, // PC_LivePrim + 0x02'14, // PC_VertexHits 0x02'15, // PC_IA_Vertices 0x02'16, // PC_IA_Primitives 0x02'1A, // PC_VS_Invocations 0x02'1E, // PC_DrawCalls3D 0x02'1F, // PC_DrawCalls2D - 0x02'11, // PC_VPCPrimitives // Vertex Fetch and Decode 0x03'17, // VFD_TotalVertices @@ -332,13 +348,23 @@ namespace // 0x09'xx // Shader/Streaming Processor - // 0x0A'xx + 0x0A'01, // SSP_ALUcy + 0x0A'02, // SSP_EFUcy + 0x0A'22, // SSP_VS_EFUInst + 0x0A'23, // SSP_VS_FullALUInst + 0x0A'24, // SSP_VS_HalfALUInst + 0x0A'27, // SSP_FS_EFUInst + 0x0A'28, // SSP_FS_FullALUInst + 0x0A'29, // SSP_FS_HalfALUInst + 0x0A'2E, // SSP_L2Read + 0x0A'2F, // SSP_L2Write // Render backend 0x0B'0D, // RB_ZRead 0x0B'0E, // RB_ZWrite 0x0B'0F, // RB_CRead 0x0B'10, // RB_CWrite + 0x0B'11, // RB_TotalPass 0x0B'12, // RB_Z_Pass 0x0B'13, // RB_Z_Fail 0x0B'14, // RB_S_Fail @@ -353,7 +379,8 @@ namespace 0x18'08, // CCU_PartialBlockRead 0x18'09, // CCU_GMemRead 0x18'0A, // CCU_GMemWrite - 0x18'19, // CCU_2DPixels + 0x18'1B, // CCU_2dReadReq + 0x18'1C, // CCU_2dWriteReq // low resolution Z pass 0x19'07, // LRZ_Read @@ -365,8 +392,8 @@ namespace 0x19'11, // LRZ_TotalPixel // CMP - // 0x1A'xx - + 0x1A'1C, // CMP_2dReadData + 0x1A'1D, // CMP_2dWriteData }; StaticAssert( CountOf(values) == uint(ECounter::_Count) ); return Unpack( values[ uint(c) ]); @@ -386,7 +413,7 @@ namespace case AdrenoProfiler::EGPUSeries::A5xx : return UnpackGroupAndSelector_A5xx( c ); case AdrenoProfiler::EGPUSeries::A6xx : return UnpackGroupAndSelector_A6xx( c ); case AdrenoProfiler::EGPUSeries::A7xx : - case AdrenoProfiler::EGPUSeries::Unknown : break; + case AdrenoProfiler::EGPUSeries::Unknown : break; } switch_end return { ~0u, 0 }; @@ -1736,8 +1763,8 @@ namespace { case AdrenoProfiler::EGPUSeries::A5xx : return GetCounterName_A5xx( group, counter ); case AdrenoProfiler::EGPUSeries::A6xx : return GetCounterName_A6xx( group, counter ); - case AdrenoProfiler::EGPUSeries::A7xx : - case AdrenoProfiler::EGPUSeries::Unknown : break; + case AdrenoProfiler::EGPUSeries::A7xx : + case AdrenoProfiler::EGPUSeries::Unknown : break; } switch_end return "???"; @@ -1773,7 +1800,7 @@ namespace ND_ auto GetCurPrevValues () C_NE___ -> Pair< const ulong*, const ulong* >; - void PrintCounters () C_NE___; + void Print () C_NE___; }; @@ -1799,8 +1826,12 @@ namespace read.num_counters = uint(_counters.size()); read.counters = _counters.data(); - if_unlikely( ::ioctl( _gpuDevice, ADRENO_IOCTL_COUNTER_READ, INOUT &read ) < 0 ) + int err = ::ioctl( _gpuDevice, ADRENO_IOCTL_COUNTER_READ, INOUT &read ); + if ( err == -1 ) + { + UNIX_CHECK_DEV( "failed to read Adreno counters: " ); return false; + } ulong* dst = _values[_valueId].data(); @@ -1824,19 +1855,32 @@ namespace /* ================================================= - Impl::PrintCounters + Impl::Print ================================================= */ - void AdrenoProfiler::Impl::PrintCounters () C_NE___ + void AdrenoProfiler::Impl::Print () C_NE___ { #ifdef AE_ENABLE_LOGS const uint max_group = 0x22; const uint max_counter = 255; - String str = "Adreno perf counters:\n"; + String str = "Adreno GPU info"; + + str << "\nseries: "; + switch_enum( _hwInfo.series ) + { + case EGPUSeries::A5xx : str << "A5xx"; break; + case EGPUSeries::A6xx : str << "A6xx"; break; + case EGPUSeries::A7xx : str << "A7xx"; break; + case EGPUSeries::Unknown : + default : str << "unknown"; break; + } + switch_end + str << "\ngmem: " << ToString( _hwInfo.gmemSize ); + str << "\n\nPerformance counters:\n"; for (uint g = 0; g <= max_group; ++g) { - str << "| - | " << GetGroupName( _hwInfo.series, g ) << " | - |\n"; + str << "| - | **" << GetGroupName( _hwInfo.series, g ) << "** | - |\n"; for (uint c = 0; c <= max_counter; ++c) { @@ -1844,9 +1888,15 @@ namespace cnt.group_id = g; cnt.countable_selector = c; - if ( ::ioctl( _gpuDevice, ADRENO_IOCTL_COUNTER_GET, INOUT &cnt ) >= 0 ) + if ( ::ioctl( _gpuDevice, ADRENO_IOCTL_COUNTER_GET, INOUT &cnt ) != -1 ) { - str << "| " << ToString(g) << " / " << ToString(c) << " | " << GetCounterName( _hwInfo.series, g, c ) << " |\n"; + str << "| " << ToString(g) << ", " << ToString(c) << " | " << GetCounterName( _hwInfo.series, g, c ) << " |\n"; + + // deactivate + ::adreno_counter_put put = {}; + put.group_id = g; + put.countable_selector = c; + ::ioctl( _gpuDevice, ADRENO_IOCTL_COUNTER_PUT, INOUT &put ); } } } @@ -1893,7 +1943,7 @@ namespace prop.value = &dev_info; prop.num_bytes = sizeof(dev_info); - if_unlikely( ::ioctl( impl->_gpuDevice, ADRENO_IOCTL_DEVICE_GET_PROPERTY, INOUT &prop ) < 0 ) + if_unlikely( ::ioctl( impl->_gpuDevice, ADRENO_IOCTL_DEVICE_GET_PROPERTY, INOUT &prop ) == -1 ) return false; uint chip_id = dev_info.chip_id; @@ -1917,25 +1967,27 @@ namespace impl->_hwInfo.series = EGPUSeries::A5xx; } + //impl->Print(); + const auto Activate = [&impl] (ECounter c) -> bool {{ auto [group, selector] = UnpackGroupAndSelector( impl->_hwInfo.series, c ); - ReadCounter read; - read.group_id = group; - read.countable_selector = selector; - read.value = 0; - ::adreno_counter_get cnt = {}; cnt.group_id = group; cnt.countable_selector = selector; - if_unlikely( ::ioctl( impl->_gpuDevice, ADRENO_IOCTL_COUNTER_GET, INOUT &cnt ) < 0 ) + if_unlikely( ::ioctl( impl->_gpuDevice, ADRENO_IOCTL_COUNTER_GET, INOUT &cnt ) == -1 ) { AE_LOG_DBG( "Adreno counter x"s << ToString<16>( (group<<8) | selector ) << " is not supported" ); return false; } + ReadCounter read; + read.group_id = group; + read.countable_selector = selector; + read.value = 0; + impl->_counters.push_back( read ); return true; }}; @@ -1959,8 +2011,6 @@ namespace _impl = RVRef(impl); - //impl->PrintCounters(); - AE_LOGI( "Started Adreno GPU profiler" ); return true; } @@ -2012,9 +2062,10 @@ namespace Sample ================================================= */ - void AdrenoProfiler::Sample (OUT Counters_t &outCounters) C_NE___ + void AdrenoProfiler::Sample (OUT Counters_t &outCounters, INOUT float &invdt) C_NE___ { outCounters.clear(); + Unused( invdt ); // keep current if ( not _impl ) return; // not initialized @@ -2039,7 +2090,7 @@ namespace #else // AE_ENABLE_ADRENO_PERFCOUNTER and not AE_ENABLE_REMOTE_GRAPHICS -# include "profiler/Utils/AdrenoProfiler.h" +# include "profiler/Profilers/AdrenoProfiler.h" # include "profiler/Remote/RemoteAdrenoProfiler.h" namespace AE::Profiler @@ -2051,19 +2102,19 @@ namespace AE::Profiler Impl (RC c) __NE___ : client{RVRef(c)} {} }; - AdrenoProfiler::AdrenoProfiler () __NE___ {} - AdrenoProfiler::~AdrenoProfiler () __NE___ {} + AdrenoProfiler::AdrenoProfiler () __NE___ {} + AdrenoProfiler::~AdrenoProfiler () __NE___ {} - bool AdrenoProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } - bool AdrenoProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } + bool AdrenoProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } + bool AdrenoProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } - AdrenoProfiler::ECounterSet AdrenoProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } - AdrenoProfiler::HWInfo AdrenoProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->client->GetHWInfo() : Default; } + AdrenoProfiler::ECounterSet AdrenoProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } + AdrenoProfiler::HWInfo AdrenoProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->client->GetHWInfo() : Default; } - void AdrenoProfiler::Sample (OUT Counters_t &result) C_NE___ { if (_impl) return _impl->client->Sample( OUT result ); } + void AdrenoProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { if (_impl) return _impl->client->Sample( OUT result, INOUT invdt ); } - bool AdrenoProfiler::InitClient (RC client) __NE___ + bool AdrenoProfiler::InitClient (RC client) __NE___ { CHECK_ERR( client ); diff --git a/AE/engine/src/profiler/Profilers/AdrenoProfiler.h b/AE/engine/src/profiler/Profilers/AdrenoProfiler.h new file mode 100644 index 00000000..d90aead6 --- /dev/null +++ b/AE/engine/src/profiler/Profilers/AdrenoProfiler.h @@ -0,0 +1,161 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + API for Adreno GPU hardware performance counters. + + [Performance counters description](https://github.com/azhirnov/cpu-gpu-arch/blob/main/gpu/Adreno_PC.md) +*/ + +#pragma once + +#include "base/Utils/EnumSet.h" +#include "base/Pointers/RefCounter.h" + +namespace AE::Profiler +{ + using namespace AE::Base; + class AdrenoProfilerClient; + + + // + // Adreno GPU Profiler + // + + class AdrenoProfiler + { + // types + public: + enum class EGPUSeries : ubyte + { + Unknown, + A5xx, + A6xx, + A7xx, // TODO + }; + + enum class ECounter : ubyte + { + // RBBM + RBBM_TSEbusy, // Triangle Setup Engine busy cycles? + RBBM_RasterizerBusy, // Rasterizer busy cycles? + RBBM_VBIFbusy, // VBIF busy cycles? + RBBM_VSCbusy, // Visibility Stream Compressor busy cycles? + RBBM_UCHEbusy, // Unified L2 cache busy cycles? + + // PC + PC_Instances, // ? + PC_VPCPrimitives, // Varying/Position Cache primitives + PC_DeadPrim, // ? + PC_LivePrim, // ? + PC_VertexHits, // ? + PC_IA_Vertices, // input vertices ? + PC_IA_Primitives, // input primitives ? + PC_VS_Invocations, // vertex shader invocations + PC_DrawCalls3D, // with DS attachment ? + PC_DrawCalls2D, // blit/copy ? + + // VFD + VFD_TotalVertices, // ? + + // VPC + VPC_BusyCycles, // ? + VPC_WorkingCycles, // ? + + // Rasterizer + RAS_SuperTiles, // number of large tiles (from 32x32 to 256x256) + RAS_8x4Tiles, // number of small tiles + RAS_MaskgenActive, // ? + RAS_FullyCoveredSuperTiles, // ? + RAS_FullyCovered8x4Tiles, // ? + RAS_PrimKilledInvisible, // ? + + // Shader/Streaming Processor + SSP_ALUcy, // ALU cycles (fma?) + SSP_EFUcy, // EFU cycles (1/x, sqrt, length, ...) + SSP_VS_EFUInst, // Vertex EFU instruction count + SSP_VS_FullALUInst, // Vertex full ALU instruction count (dual issue?) + SSP_VS_HalfALUInst, // Vertex half ALU instruction count + SSP_FS_EFUInst, // Fragment EFU instruction count + SSP_FS_FullALUInst, // Fragment full ALU instruction count + SSP_FS_HalfALUInst, // Fragment half ALU instruction count + SSP_L2Read, // Unified L2 cache read transactions + SSP_L2Write, // Unified L2 cache write transactions + + // Render backend + RB_ZRead, // \__ Z buffer bytes? + RB_ZWrite, // / + RB_CRead, // \__ color bytes? + RB_CWrite, // / + RB_TotalPass, // \. + RB_Z_Pass, // -| + RB_Z_Fail, // -|-- depth stencil test (pixels) + RB_S_Fail, // / + RB_AliveCycles2D, // ? + + // Visibility Stream Compressor + VSC_WorkingCycles, // ? + + // Cache and Compression Unit + CCU_DepthBlocks, // ? + CCU_ColorBlocks, // ? + CCU_PartialBlockRead, // ? + CCU_GMemRead, // bytes? + CCU_GMemWrite, // bytes? + CCU_2dReadReq, // ? + CCU_2dWriteReq, // ? + + // low resolution Z pass + LRZ_Read, // ? + LRZ_Write, // ? + LRZ_PrimKilledByMaskGen, // ? + LRZ_PrimKilledByLRZ, // ? + LRZ_PrimPassed, // ? + LRZ_TileKilled, // ? + LRZ_TotalPixel, // ? + + // Compression and Decompression + CMP_2dReadData, // bytes? + CMP_2dWriteData, // bytes? + + _Count + }; + using ECounterSet = EnumSet< ECounter >; + using Counters_t = FlatHashMap< ECounter, ulong >; + + struct HWInfo + { + uint gpuId = 0; + EGPUSeries series = Default; + Bytes32u gmemSize; // on-chip memory + }; + + private: + struct Impl; + + + // variables + private: + Unique _impl; + + + // methods + public: + AdrenoProfiler () __NE___; + ~AdrenoProfiler () __NE___; + + ND_ bool Initialize (const ECounterSet &counterSet) __NE___; + void Deinitialize () __NE___; + ND_ bool IsInitialized () C_NE___; + + ND_ ECounterSet EnabledCounterSet () C_NE___; + ND_ HWInfo GetHWInfo () C_NE___; + + void Sample (OUT Counters_t &, INOUT float &invdt) C_NE___; + + + #ifndef AE_ENABLE_ADRENO_PERFCOUNTER + ND_ bool InitClient (RC) __NE___; + #endif + }; + + +} // AE::Profiler diff --git a/AE/engine/src/profiler/Utils/ArmProfiler.cpp b/AE/engine/src/profiler/Profilers/ArmProfiler.cpp similarity index 91% rename from AE/engine/src/profiler/Utils/ArmProfiler.cpp rename to AE/engine/src/profiler/Profilers/ArmProfiler.cpp index 143470e2..84cc5dd0 100644 --- a/AE/engine/src/profiler/Utils/ArmProfiler.cpp +++ b/AE/engine/src/profiler/Profilers/ArmProfiler.cpp @@ -2,7 +2,7 @@ #ifdef AE_ENABLE_REMOTE_GRAPHICS # include "profiler/Profiler.pch.h" -# include "profiler/Utils/ArmProfiler.h" +# include "profiler/Profilers/ArmProfiler.h" namespace AE::Profiler { @@ -56,9 +56,10 @@ namespace AE::Profiler Sample ================================================= */ - void ArmProfiler::Sample (OUT Counters_t &result) C_NE___ + void ArmProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { result.clear(); + Unused( invdt ); // keep current if ( not IsInitialized() ) return; @@ -82,7 +83,7 @@ namespace AE::Profiler # include # include -# include "profiler/Utils/ArmProfiler.h" +# include "profiler/Profilers/ArmProfiler.h" // based on https://github.com/ARM-software/HWCPipe/tree/1.x // MIT License @@ -352,9 +353,10 @@ namespace AE::Profiler Sample ================================================= */ - void ArmProfiler::Sample (OUT Counters_t &outCounters) C_NE___ + void ArmProfiler::Sample (OUT Counters_t &outCounters, INOUT float &invdt) C_NE___ { outCounters.clear(); + Unused( invdt ); // keep current if ( not _impl ) return; // not initialized @@ -372,7 +374,7 @@ namespace AE::Profiler #else // not AE_ENABLE_ARM_PMU and not AE_ENABLE_REMOTE_GRAPHICS -# include "profiler/Utils/ArmProfiler.h" +# include "profiler/Profilers/ArmProfiler.h" # include "profiler/Remote/RemoteArmProfiler.h" namespace AE::Profiler @@ -384,18 +386,18 @@ namespace AE::Profiler Impl (RC c) __NE___ : client{RVRef(c)} {} }; - ArmProfiler::ArmProfiler () __NE___ {} - ArmProfiler::~ArmProfiler () __NE___ {} + ArmProfiler::ArmProfiler () __NE___ {} + ArmProfiler::~ArmProfiler () __NE___ {} - bool ArmProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } - bool ArmProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } + bool ArmProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } + bool ArmProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } - ArmProfiler::ECounterSet ArmProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } + ArmProfiler::ECounterSet ArmProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } - void ArmProfiler::Sample (OUT Counters_t &result) C_NE___ { if (_impl) return _impl->client->Sample( OUT result ); } + void ArmProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { if (_impl) return _impl->client->Sample( OUT result, INOUT invdt ); } - bool ArmProfiler::InitClient (RC client) __NE___ + bool ArmProfiler::InitClient (RC client) __NE___ { CHECK_ERR( client ); @@ -405,7 +407,7 @@ namespace AE::Profiler void ArmProfiler::Deinitialize () __NE___ { - _impl.reset( null ); + _impl.reset(); } } // AE::Profiler diff --git a/AE/engine/src/profiler/Utils/ArmProfiler.h b/AE/engine/src/profiler/Profilers/ArmProfiler.h similarity index 95% rename from AE/engine/src/profiler/Utils/ArmProfiler.h rename to AE/engine/src/profiler/Profilers/ArmProfiler.h index 5399491b..bbdcfc92 100644 --- a/AE/engine/src/profiler/Utils/ArmProfiler.h +++ b/AE/engine/src/profiler/Profilers/ArmProfiler.h @@ -76,7 +76,7 @@ namespace AE::Profiler ND_ ECounterSet EnabledCounterSet () C_NE___; - void Sample (OUT Counters_t &) C_NE___; + void Sample (OUT Counters_t &, INOUT float &invdt) C_NE___; #ifndef AE_ENABLE_ARM_PMU diff --git a/AE/engine/src/profiler/Profilers/GeneralProfiler.cpp b/AE/engine/src/profiler/Profilers/GeneralProfiler.cpp new file mode 100644 index 00000000..f2101380 --- /dev/null +++ b/AE/engine/src/profiler/Profilers/GeneralProfiler.cpp @@ -0,0 +1,428 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#ifdef AE_ENABLE_REMOTE_GRAPHICS +# include "profiler/Profiler.pch.h" +# include "profiler/Profilers/GeneralProfiler.h" + +namespace AE::Profiler +{ + using namespace AE::RemoteGraphics; + + struct GeneralProfiler::Impl + { + RDevice const& dev; + + CpuClusters_t clusters; + + CpuUsage_t totalUsage; + CpuUsage_t kernelUsage; + uint cpuCoreCount = 0; + + Impl (RDevice const& dev) __NE___ : dev{dev} {} + }; + + GeneralProfiler::GeneralProfiler () __NE___ {} + GeneralProfiler::~GeneralProfiler () __NE___ {} + + bool GeneralProfiler::IsInitialized () C_NE___ { return bool{_impl}; } + void GeneralProfiler::Deinitialize () __NE___ { _impl.reset(); } + + GeneralProfiler::ECounterSet GeneralProfiler::EnabledCounterSet () C_NE___ { return ECounterSet{}.SetAll(); } + GeneralProfiler::CpuClusters_t GeneralProfiler::GetCpuClusters () C_NE___ { CHECK_ERR( _impl ); return _impl->clusters; } + uint GeneralProfiler::GetCpuCoreCount () C_NE___ { CHECK_ERR( _impl ); return _impl->cpuCoreCount; } + +/* +================================================= + Initialize +================================================= +*/ + bool GeneralProfiler::Initialize (const ECounterSet &cs) __NE___ + { + CHECK_ERR( not IsInitialized() ); + CHECK_ERR( cs.Any() ); + + Msg::ProfGeneral_Initialize msg; + RC res; + + msg.required = cs; + + auto& dev = GraphicsScheduler().GetDevice(); + CHECK_ERR( dev.SendAndWait( msg, OUT res )); + + if ( res->ok ) + { + _impl = MakeUnique( dev ); + + _impl->clusters = res->cpuClusters; + _impl->cpuCoreCount = 0; + + for (auto& cluster : _impl->clusters) + _impl->cpuCoreCount += uint(cluster.logicalCores.count()); + } + return res->ok; + } + +/* +================================================= + Sample +================================================= +*/ + void GeneralProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ + { + result.clear(); + Unused( invdt ); // keep current + + if ( not IsInitialized() ) return; + + Msg::ProfGeneral_Sample msg; + RC res; + + CHECK_ERRV( _impl->dev.SendAndWait( msg, OUT res )); + + result = RVRef(res->counters); + + const Bytes usage_size = Sizeof(_impl->totalUsage[0]) * Min( _impl->cpuCoreCount, _impl->totalUsage.size() ); + + if ( ArraySizeOf(res->totalCpuUsage) == usage_size ) + MemCopy( OUT _impl->totalUsage.data(), res->totalCpuUsage.data(), usage_size ); + + if ( ArraySizeOf(res->kernelUsage) == usage_size ) + MemCopy( OUT _impl->kernelUsage.data(), res->kernelUsage.data(), usage_size ); + } + +/* +================================================= + GetUsage +================================================= +*/ + bool GeneralProfiler::GetUsage (OUT CpuUsage_t &total, OUT CpuUsage_t &kernel) C_NE___ + { + if ( not IsInitialized() ) return false; + + const Bytes usage_size = Sizeof(_impl->totalUsage[0]) * Min( _impl->cpuCoreCount, _impl->totalUsage.size() ); + + MemCopy( OUT total.data(), _impl->totalUsage.data(), usage_size ); + MemCopy( OUT kernel.data(), _impl->kernelUsage.data(), usage_size ); + + return true; + } + +} // AE::Profiler +//----------------------------------------------------------------------------- + +#else +# include "profiler/Profilers/GeneralProfiler.h" +# include "profiler/Remote/RemoteGeneralProfiler.h" + +namespace AE::Profiler +{ +namespace +{ + static const auto c_PerProcessCounters = GeneralProfiler::ECounterSet{} + .insert( GeneralProfiler::ECounter::ProcessMemoryUsed ) + .insert( GeneralProfiler::ECounter::ProcessPeakMemory ) + .insert( GeneralProfiler::ECounter::PageFaults ) + .insert( GeneralProfiler::ECounter::ProcessMemoryUsage ) + .insert( GeneralProfiler::ECounter::ContextSwitches_HighPrio ) + .insert( GeneralProfiler::ECounter::ContextSwitches_IO ) + .insert( GeneralProfiler::ECounter::KernelTime ) + .insert( GeneralProfiler::ECounter::FSInput ) + .insert( GeneralProfiler::ECounter::FSOutput ); +} + +/* +================================================= + Impl +================================================= +*/ + struct GeneralProfiler::Impl + { + // variables + PerformanceStat::PerProcessCounters procCounters; + CpuClusters_t clusters; + + CpuUsage_t totalUsage; + CpuUsage_t kernelUsage; + uint cpuCoreCount = 0; + + ECounterSet enabled; + RC client; + + PerformanceStat::Capacity_t pervCapacity {-1.f}; + Clock pervCapacityClock; + Timer levelUpdateTimer; + PerformanceStat::Power_t batteryPower; + PerformanceStat::Energy_t batteryEnergyLost; + + + // methods + Impl () __NE___ {} + + ND_ bool Initialize (const ECounterSet &) __NE___; + }; + + +/* +================================================= + Impl::Initialize +================================================= +*/ + bool GeneralProfiler::Impl::Initialize (const ECounterSet &counterSet) __NE___ + { + enabled = counterSet; + + Unused( PerformanceStat::GetPerfCounters( OUT &procCounters, null, null )); + + { + const auto& cpu_info = CpuArchInfo::Get(); + + cpuCoreCount = uint(Min( cpu_info.LogicalCoreMask().count(), totalUsage.size() )); + + for (auto& core : cpu_info.cpu.coreTypes) + { + auto& dst = clusters.emplace_back(); + dst.name = String{core.name} << " (" << ToString( core.type ) << ')'; + dst.logicalCores = core.logicalBits; + } + } + return true; + } +//----------------------------------------------------------------------------- + + + +/* +================================================= + constructor / destructor +================================================= +*/ + GeneralProfiler::GeneralProfiler () __NE___ + {} + + GeneralProfiler::~GeneralProfiler () __NE___ + {} + +/* +================================================= + Initialize / Deinitialize +================================================= +*/ + bool GeneralProfiler::Initialize (const ECounterSet &counterSet) __NE___ + { + if ( not _impl ) + { + _impl.reset( new Impl{} ); + return _impl->Initialize( counterSet ); + } + + CHECK_ERR( _impl->client ); + return _impl->client->Initialize( counterSet ); + } + + void GeneralProfiler::Deinitialize () __NE___ + { + _impl.reset(); + } + + bool GeneralProfiler::InitClient (RC client) __NE___ + { + CHECK_ERR( client ); + + _impl.reset( new Impl{} ); + _impl->client = RVRef(client); + + return true; + } + +/* +================================================= + IsInitialized +================================================= +*/ + bool GeneralProfiler::IsInitialized () C_NE___ + { + return bool{_impl} and + (not _impl->client or _impl->client->IsInitialized()); + } + +/* +================================================= + Sample +================================================= +*/ + void GeneralProfiler::Sample (OUT Counters_t &outCounters, INOUT float &invdt) C_NE___ + { + outCounters.clear(); + Unused( invdt ); // keep current + + if ( _impl->client ) + return _impl->client->Sample( OUT outCounters, INOUT invdt ); + + if ( not IsInitialized() ) + return; // not initialized + + // process counters + { + PerformanceStat::MemoryCounters mem_info; + PerformanceStat::PerProcessCounters per_proc; + auto& prev = _impl->procCounters; + + if ( PerformanceStat::GetPerfCounters( OUT &per_proc, null, OUT &mem_info )) + { + PerformanceStat::PerProcessCounters d; + d.userTime = per_proc.userTime - prev.userTime; + d.kernelTime = per_proc.kernelTime - prev.kernelTime; + d.fsInput = per_proc.fsInput - prev.fsInput; + d.fsOutput = per_proc.fsOutput - prev.fsOutput; + d.voluntaryContextSwitches = per_proc.voluntaryContextSwitches - prev.voluntaryContextSwitches; + d.involuntaryContextSwitches = per_proc.involuntaryContextSwitches - prev.involuntaryContextSwitches; + prev = per_proc; + + if ( (_impl->enabled & c_PerProcessCounters).Any() ) + { + outCounters.emplace( ECounter::FSInput, double(d.fsInput) ); + outCounters.emplace( ECounter::FSOutput, double(d.fsOutput) ); + outCounters.emplace( ECounter::KernelTime, double(d.kernelTime.count()) * 100.0 / double((d.userTime + d.kernelTime).count()) ); + + outCounters.emplace( ECounter::ContextSwitches_HighPrio, double(d.involuntaryContextSwitches) ); + outCounters.emplace( ECounter::ContextSwitches_IO, double(d.voluntaryContextSwitches) ); + + outCounters.emplace( ECounter::ProcessMemoryUsed, double(ulong{mem_info.currentUsage}) ); + outCounters.emplace( ECounter::ProcessPeakMemory, double(ulong{mem_info.peakUsage}) ); + outCounters.emplace( ECounter::PageFaults, double(ulong{mem_info.pageFaults}) ); + + outCounters.emplace( ECounter::ProcessMemoryUsage, double(ulong{mem_info.currentUsage}) / double(ulong{mem_info.totalPhysical}) ); + } + + Bytes phys_mem_used = mem_info.totalPhysical - mem_info.availablePhysical; + outCounters.emplace( ECounter::PhysicalMemoryUsage, 100.0 * double(ulong{phys_mem_used}) / double(ulong{mem_info.totalPhysical}) ); + outCounters.emplace( ECounter::PhysicalMemoryUsed, double(ulong{phys_mem_used}) ); + + outCounters.emplace( ECounter::VirtualMemoryUsage, 100.0 * double(ulong{mem_info.usedVirtual}) / double(ulong{mem_info.totalVirtual}) ); + outCounters.emplace( ECounter::VirtualMemoryUsed, double(ulong{mem_info.usedVirtual}) ); + } + } + + // battery + { + auto& prev_cap = _impl->pervCapacity; + auto& clock = _impl->pervCapacityClock; + auto& timer = _impl->levelUpdateTimer; + auto& power = _impl->batteryPower; + auto& energy = _impl->batteryEnergyLost; + + PerformanceStat::BatteryStat stat; + if ( PerformanceStat::Battery_Get( OUT stat )) + { + if_unlikely( not timer.IsActive() ) + timer.Start( seconds{30} ); + + outCounters.emplace( ECounter::BatteryTemperature, double(stat.temperature) ); + outCounters.emplace( ECounter::BatteryCapacity, double(stat.capacity.GetScaled()) ); + + // update only when capacity changed + if ( prev_cap > Zero and stat.capacity < prev_cap ) + { + const auto dcap = prev_cap - stat.capacity; + const auto dt = PerformanceStat::Seconds_t{ clock.Tick() }; + const auto dE = dcap * stat.voltage; + power = dE / dt; + energy += dE; + } + + if ( timer.Tick() ) + { + outCounters.emplace( ECounter::BatteryLevel, double(stat.level.GetPercent()) ); + + if ( not stat.isCharging ) + outCounters.emplace( ECounter::BatteryDischargeTotal, double(energy.GetNonScaled()) ); + } + + if ( not stat.isCharging ) + { + outCounters.emplace( ECounter::BatteryCurrent, double(stat.current.GetScaled()) ); + outCounters.emplace( ECounter::BatteryVoltage, double(stat.voltage.GetScaled()) ); + outCounters.emplace( ECounter::BatteryDischarge, double(stat.power.GetScaled()) ); + outCounters.emplace( ECounter::BatteryDischargeAvg, double(power.GetScaled()) ); + } + else { + energy = Zero; + } + + if_unlikely( prev_cap < Zero ) + clock.Start(); + + prev_cap = stat.capacity; + } + } + } + +/* +================================================= + GetUsage +================================================= +*/ + bool GeneralProfiler::GetUsage (OUT CpuUsage_t &total, OUT CpuUsage_t &kernel) C_NE___ + { + if ( not IsInitialized() ) + return false; // not initialized + + if ( _impl->client ) + return _impl->client->GetUsage( OUT total, OUT kernel ); + + bool res = PerformanceStat::CPU_GetUsage( OUT total.data(), OUT kernel.data(), _impl->cpuCoreCount ); + + for (uint i = 0; i < _impl->cpuCoreCount; ++i) + total[i] += kernel[i]; + + return res; + } + +/* +================================================= + GetCpuClusters +================================================= +*/ + GeneralProfiler::CpuClusters_t GeneralProfiler::GetCpuClusters () C_NE___ + { + CHECK_ERR( IsInitialized() ); + + if ( _impl->client ) + return _impl->client->GetCpuClusters(); + + return _impl->clusters; + } + +/* +================================================= + EnabledCounterSet +================================================= +*/ + GeneralProfiler::ECounterSet GeneralProfiler::EnabledCounterSet () C_NE___ + { + CHECK_ERR( IsInitialized() ); + + if ( _impl->client ) + return _impl->client->EnabledCounterSet(); + + return _impl->enabled; + } + +/* +================================================= + GetCpuCoreCount +================================================= +*/ + uint GeneralProfiler::GetCpuCoreCount () C_NE___ + { + CHECK_ERR( IsInitialized() ); + + if ( _impl->client ) + return _impl->client->GetCpuCoreCount(); + + return _impl->cpuCoreCount; + } + +} // AE::Profiler +//----------------------------------------------------------------------------- +#endif // AE_ENABLE_REMOTE_GRAPHICS diff --git a/AE/engine/src/profiler/Profilers/GeneralProfiler.h b/AE/engine/src/profiler/Profilers/GeneralProfiler.h new file mode 100644 index 00000000..609619cd --- /dev/null +++ b/AE/engine/src/profiler/Profilers/GeneralProfiler.h @@ -0,0 +1,102 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#include "base/Utils/EnumSet.h" +#include "base/Pointers/RefCounter.h" + +namespace AE::Profiler +{ + using namespace AE::Base; + class GeneralProfilerClient; + + + // + // General Profiler + // + + class GeneralProfiler + { + // types + public: + using CoreBits_t = CpuArchInfo::CoreBits_t; + using CpuUsage_t = StaticArray< float, CpuArchInfo::MaxLogicalCores >; + + struct CpuCluster + { + String name; + CoreBits_t logicalCores; + }; + using CpuClusters_t = FixedArray< CpuCluster, CpuArchInfo::MaxCoreTypes >; + + enum class ECounter : ubyte + { + ProcessMemoryUsed, // bytes, current process + ProcessPeakMemory, // bytes, current process + PageFaults, // count, current process + + ProcessMemoryUsage, // % + + PhysicalMemoryUsage, // % + PhysicalMemoryUsed, // bytes + + VirtualMemoryUsage, // % + VirtualMemoryUsed, // bytes + + ContextSwitches_HighPrio, // higher priority process replace current process + ContextSwitches_IO, // context switch when awaiting availability of a resource (IO) + + KernelTime, // %, for current process + FSInput, // number of times the filesystem had to perform input for current process + FSOutput, // number of times the filesystem had to perform output for current process + + BatteryDischarge, // W, calculated as A * V + BatteryDischargeAvg, // W, calculated from capacity + BatteryDischargeTotal, // J + BatteryTemperature, // C + BatteryCapacity, // A*s + BatteryLevel, // % + BatteryCurrent, // A + BatteryVoltage, // V + + _Count + }; + + using ECounterSet = EnumSet< ECounter >; + using Counters_t = FlatHashMap< ECounter, double >; // TODO: float + + + private: + struct Impl; + + + // variables + private: + Unique _impl; + + + // methods + public: + GeneralProfiler () __NE___; + ~GeneralProfiler () __NE___; + + ND_ bool Initialize (const ECounterSet &counterSet) __NE___; + void Deinitialize () __NE___; + ND_ bool IsInitialized () C_NE___; + + ND_ bool InitClient (RC) __NE___; + + void Sample (OUT Counters_t &, INOUT float &invdt) C_NE___; + + // 'total' - sum of user time and kernel time. + // 'kernel' - only kernel time, zero if not supported. + ND_ bool GetUsage (OUT CpuUsage_t &total, + OUT CpuUsage_t &kernel) C_NE___; + + ND_ ECounterSet EnabledCounterSet () C_NE___; + ND_ CpuClusters_t GetCpuClusters () C_NE___; + ND_ uint GetCpuCoreCount () C_NE___; + }; + + +} // AE::Profiler diff --git a/AE/engine/src/profiler/Utils/MaliProfiler.cpp b/AE/engine/src/profiler/Profilers/MaliProfiler.cpp similarity index 61% rename from AE/engine/src/profiler/Utils/MaliProfiler.cpp rename to AE/engine/src/profiler/Profilers/MaliProfiler.cpp index 85c37978..464b4a80 100644 --- a/AE/engine/src/profiler/Utils/MaliProfiler.cpp +++ b/AE/engine/src/profiler/Profilers/MaliProfiler.cpp @@ -2,7 +2,7 @@ #ifdef AE_ENABLE_REMOTE_GRAPHICS # include "profiler/Profiler.pch.h" -# include "profiler/Utils/MaliProfiler.h" +# include "profiler/Profilers/MaliProfiler.h" namespace AE::Profiler { @@ -32,7 +32,7 @@ namespace AE::Profiler Initialize ================================================= */ - bool MaliProfiler::Initialize (const ECounterSet &cs) __NE___ + bool MaliProfiler::Initialize (const ECounterSet cs) __NE___ { CHECK_ERR( not IsInitialized() ); CHECK_ERR( cs.Any() ); @@ -59,9 +59,10 @@ namespace AE::Profiler Sample ================================================= */ - void MaliProfiler::Sample (OUT Counters_t &result) C_NE___ + void MaliProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { result.clear(); + Unused( invdt ); // keep current if ( not IsInitialized() ) return; @@ -83,10 +84,57 @@ namespace AE::Profiler # include # include -# include "profiler/Utils/MaliProfiler.h" +# include "profiler/Profilers/MaliProfiler.h" namespace AE::Profiler { +namespace +{ + static constexpr MaliProfiler::ECounterSet c_CustomCounters = MaliProfiler::ECounterSet{} + .insert( MaliProfiler::ECounter::ExtBusTotalBy ) + .insert( MaliProfiler::ECounter::ExtMemEnergy ) + .insert( MaliProfiler::ECounter::CoreEnergy ) + .insert( MaliProfiler::ECounter::TotalEnergy ) + .insert( MaliProfiler::ECounter::PerCoreActiveCy ); + static constexpr MaliProfiler::ECounterSet c_RequiredForCustomCounters = MaliProfiler::ECounterSet{} + .insert( MaliProfiler::ECounter::ExtBusRdBy ) + .insert( MaliProfiler::ECounter::ExtBusWrBy ) + .insert( MaliProfiler::ECounter::CoreActiveCy ); + + StaticAssert( (uint(MaliProfiler::ECounter::_Count) - uint(MaliProfiler::ECounter::_LastCounter)) == 5 ); + + #define CHECK_COUNTER( _name_ ) StaticAssert( uint(MaliProfiler::ECounter::_name_) == uint(hwcpipe_counter::Mali##_name_) ); + AE_MALI_COUNTERS( CHECK_COUNTER ) + #undef CHECK_COUNTER + +/* +================================================= + ToString2 (ECounter) +================================================= +*/ + ND_ static StringView ToString2 (MaliProfiler::ECounter c) __NE___ + { + switch_enum( c ) + { + #define ENUM( _name_ ) case MaliProfiler::ECounter::_name_ : return #_name_; + AE_MALI_COUNTERS( ENUM ) + #undef ENUM + + case MaliProfiler::ECounter::ExtBusTotalBy : return "ExtBusTotalBy"; + case MaliProfiler::ECounter::ExtMemEnergy : return "ExtMemEnergy"; + case MaliProfiler::ECounter::CoreEnergy : return "CoreEnergy"; + case MaliProfiler::ECounter::TotalEnergy : return "TotalEnergy"; + case MaliProfiler::ECounter::PerCoreActiveCy : return "PerCoreActiveCy"; + + case MaliProfiler::ECounter::_Count : break; + } + switch_end + return ""; + } + +} // namespace + + /* ================================================= Impl @@ -128,6 +176,7 @@ namespace AE::Profiler */ void MaliProfiler::Impl::Print () { + #ifdef AE_ENABLE_LOGS if ( not gpu.valid() ) return; @@ -136,7 +185,7 @@ namespace AE::Profiler auto counter_db = hwcpipe::counter_database{}; hwcpipe::counter_metadata meta; - String str; + String str = "ARM Mali GPU info"; str << "\nGPU family: "; switch_enum( gpu.get_gpu_family() ) @@ -187,22 +236,26 @@ namespace AE::Profiler } switch_end - str << "\nBus width: " << ToString( gpu.bus_width() ) << " bits" + str << "\nAXI bus width: " << ToString( gpu.bus_width() ) << " bits" << "\nNum cores: " << ToString( gpu.num_shader_cores() ) - << "\nExe engines: " << ToString( gpu.num_execution_engines() ) - << "\nL2 slices * size: " << ToString( gpu.get_constants().num_l2_slices ) << " * " << ToString( gpu.get_constants().l2_slice_size ) - << "\nTile size: " << ToString( gpu.get_constants().tile_size ) - << "\nWarp width: " << ToString( gpu.get_constants().warp_width ); + << "\nExec engines: " << ToString( gpu.num_execution_engines() ) // per core? + << "\nL2 slices * size: " << ToString( Bytes{ gpu.get_constants().num_l2_slices * gpu.get_constants().l2_slice_size }) + << " (" << ToString( gpu.get_constants().num_l2_slices ) << " * " << ToString( Bytes{ gpu.get_constants().l2_slice_size }) << ")" + << "\nTile size: " << ToString( gpu.get_constants().tile_size ) << 'x' << ToString( gpu.get_constants().tile_size ) << " px" + << "\nWarp width: " << ToString( gpu.get_constants().warp_width ) << " threads"; - str << "\nPerformance counters:"; + str << "\n\nPerformance counters:"; for (auto counter : counter_db.counters_for_gpu( gpu )) { auto ec = counter_db.describe_counter( counter, OUT meta ); if ( not ec ) - str << "\n " << meta.name << " (" << ToString( uint(counter) ) << ")"; + str << "\n| " << ToString2( ECounter(counter) ) << " | " << meta.name << " | " << meta.units << " |"; + else + str << "\n " << ToString2( ECounter(counter) ); } AE_LOGI( str ); + #endif } //----------------------------------------------------------------------------- @@ -224,10 +277,12 @@ namespace AE::Profiler Initialize ================================================= */ - bool MaliProfiler::Initialize (const ECounterSet &counterSet) __NE___ + bool MaliProfiler::Initialize (const ECounterSet inCounterSet) __NE___ { CHECK_ERR( not IsInitialized() ); - CHECK_ERR( counterSet.Any() ); + CHECK_ERR( inCounterSet.Any() ); + + ECounterSet counterSet = inCounterSet; auto impl = MakeUnique(); @@ -235,7 +290,29 @@ namespace AE::Profiler return false; if ( impl->gpu.get_gpu_family() == hwcpipe::device::gpu_family::midgard ) - return false; // TODO: used unavailable counter, it can be fixed in HWCPipe + { + counterSet.erase( ECounter::GPUIRQUtil ); + counterSet.erase( ECounter::FragQueueUtil ); + counterSet.erase( ECounter::NonFragQueueUtil ); + counterSet.erase( ECounter::ExtBusRdStallRate ); + counterSet.erase( ECounter::ExtBusWrStallRate ); + counterSet.erase( ECounter::TilerUtil ); + counterSet.erase( ECounter::GeomTotalPrim ); + counterSet.erase( ECounter::GeomTotalCullPrim ); + counterSet.erase( ECounter::NonFragUtil ); + counterSet.erase( ECounter::FragUtil ); + counterSet.erase( ECounter::FragEZSKillRate ); + counterSet.erase( ECounter::FragLZSKillRate ); + counterSet.erase( ECounter::FragOverdraw ); + counterSet.erase( ECounter::ALUUtil ); + counterSet.erase( ECounter::TexUtil ); + counterSet.erase( ECounter::LSUtil ); + counterSet.erase( ECounter::FragFPKBUtil ); + } + + // remove custom + counterSet &= ~c_CustomCounters; + counterSet |= c_RequiredForCustomCounters; //impl->Print(); @@ -266,6 +343,9 @@ namespace AE::Profiler return false; } + // add custom + impl->enabled |= (inCounterSet & c_CustomCounters); + impl->sampler.reset( new hwcpipe::sampler<>{ config }); if ( impl->sampler->start_sampling() ) @@ -339,9 +419,10 @@ namespace AE::Profiler Sample ================================================= */ - void MaliProfiler::Sample (OUT Counters_t &outCounters) C_NE___ + void MaliProfiler::Sample (OUT Counters_t &outCounters, INOUT float &invdt) C_NE___ { outCounters.clear(); + Unused( invdt ); // keep current if ( not _impl ) return; // not initialized @@ -363,9 +444,42 @@ namespace AE::Profiler case hwcpipe::counter_sample::type::float64 : val = sample.value.float64; break; case hwcpipe::counter_sample::type::uint64 : val = double(sample.value.uint64); break; } + switch_end outCounters.emplace( c, val ); } } + + // add custom counters + const auto Get = [&outCounters] (ECounter c) + {{ + if_likely( auto it = outCounters.find( c ); it != outCounters.end() ) + return it->second; + return -1.0; + }}; + + const double extbus_rd = Get( ECounter::ExtBusRdBy ); + const double extbus_wd = Get( ECounter::ExtBusWrBy ); + const double core_cy = Get( ECounter::CoreActiveCy ); + double total_energy = 0.0; + + if_likely( extbus_rd > 0.0 or extbus_wd > 0.0 ) + { + double energy = (extbus_rd + extbus_wd) * 1.0e-10; // 100mW per GB/s + outCounters.emplace( ECounter::ExtBusTotalBy, extbus_rd + extbus_wd ); + outCounters.emplace( ECounter::ExtMemEnergy, energy ); + total_energy += energy; + } + + if_likely( core_cy > 0.0 ) + { + double energy = core_cy * 1.0e-9; // approx 1W per 1GHz + outCounters.emplace( ECounter::PerCoreActiveCy, core_cy / _impl->gpu.num_shader_cores() ); + outCounters.emplace( ECounter::CoreEnergy, energy ); + total_energy += energy; + } + + if_likely( total_energy > 0.0 ) + outCounters.emplace( ECounter::TotalEnergy, total_energy ); } } // AE::Profiler @@ -373,7 +487,7 @@ namespace AE::Profiler #else // not AE_ENABLE_MALI_HWCPIPE and not AE_ENABLE_REMOTE_GRAPHICS -# include "profiler/Utils/MaliProfiler.h" +# include "profiler/Profilers/MaliProfiler.h" # include "profiler/Remote/RemoteMaliProfiler.h" namespace AE::Profiler @@ -385,19 +499,19 @@ namespace AE::Profiler Impl (RC c) __NE___ : client{RVRef(c)} {} }; - MaliProfiler::MaliProfiler () __NE___ {} - MaliProfiler::~MaliProfiler () __NE___ {} + MaliProfiler::MaliProfiler () __NE___ {} + MaliProfiler::~MaliProfiler () __NE___ {} - bool MaliProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } - bool MaliProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } + bool MaliProfiler::Initialize (const ECounterSet cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } + bool MaliProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } - MaliProfiler::ECounterSet MaliProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } - MaliProfiler::HWInfo MaliProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->client->GetHWInfo() : Default; } + MaliProfiler::ECounterSet MaliProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } + MaliProfiler::HWInfo MaliProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->client->GetHWInfo() : Default; } - void MaliProfiler::Sample (OUT Counters_t &result) C_NE___ { if (_impl) return _impl->client->Sample( OUT result ); } + void MaliProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { if (_impl) return _impl->client->Sample( OUT result, INOUT invdt ); } - bool MaliProfiler::InitClient (RC client) __NE___ + bool MaliProfiler::InitClient (RC client) __NE___ { CHECK_ERR( client ); @@ -405,7 +519,7 @@ namespace AE::Profiler return true; } - void MaliProfiler::Deinitialize () __NE___ + void MaliProfiler::Deinitialize () __NE___ { _impl.reset( null ); } diff --git a/AE/engine/src/profiler/Profilers/MaliProfiler.h b/AE/engine/src/profiler/Profilers/MaliProfiler.h new file mode 100644 index 00000000..ecf98460 --- /dev/null +++ b/AE/engine/src/profiler/Profilers/MaliProfiler.h @@ -0,0 +1,510 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + API for Mali GPU hardware performance counters. + (x.x.x) - link to counter description in docs. + By default it it link to 5th Gen performance counters guide, special key like V (Valhall), B (Bifrost) used for links to other architectures. + + [Performance counters description](https://github.com/azhirnov/cpu-gpu-arch/blob/main/gpu/ARM-Mali_PC.md) +*/ + +#pragma once + +#include "base/Utils/EnumSet.h" +#include "base/Pointers/RefCounter.h" + +namespace AE::Profiler +{ + using namespace AE::Base; + class MaliProfilerClient; + + + // + // Mali GPU Profiler + // + + class MaliProfiler + { + // types + public: + enum class ECounter : ushort + { + #define AE_MALI_COUNTERS( _visit_ )\ + _visit_( GPUActiveCy ) /* cycles | GPU active cycles (3.1.1) */\ + _visit_( GPUIRQActiveCy ) /* cycles | GPU interrupt pending cycles (3.1.6) */\ + _visit_( FragQueueJob ) /* jobs | Fragment jobs */\ + _visit_( FragQueueTask ) /* tasks | Fragment tasks */\ + _visit_( FragQueueActiveCy ) /* cycles | Fragment queue active cycles, Main phase queue active cycles (3.1.3) */\ + _visit_( NonFragQueueJob ) /* jobs | Non-fragment jobs */\ + _visit_( NonFragQueueTask ) /* tasks | Non-fragment tasks */\ + _visit_( NonFragQueueActiveCy ) /* cycles | Non-fragment queue active cycles, Binning phase queue active cycles (3.1.2) */\ + _visit_( ResQueueJob ) /* jobs | Reserved queue jobs */\ + _visit_( ResQueueTask ) /* tasks | Reserved queue tasks */\ + _visit_( ResQueueActiveCy ) /* cycles | Reserved active cycles */\ + _visit_( ExtBusWrBt ) /* beats | Output external write beats */\ + _visit_( ExtBusRdBt ) /* beats | Output external read beats */\ + _visit_( ExtBusRdStallCy ) /* cycles | Output external read stall cycles */\ + _visit_( ExtBusWrStallCy ) /* cycles | Output external write stall cycles */\ + _visit_( FragActiveCy ) /* cycles | Fragment active cycles */\ + _visit_( FragRdPrim ) /* primitives| Fragment primitives loaded */\ + _visit_( FragThread ) /* threads | Fragment threads */\ + _visit_( FragHelpThread ) /* threads | Fragment helper threads */\ + _visit_( FragRastQd ) /* quads | Rasterized fine quads */\ + _visit_( FragEZSTestQd ) /* quads | Early ZS tested quads */\ + _visit_( FragEZSKillQd ) /* quads | Early ZS killed quads */\ + _visit_( FragLZSTestTd ) /* threads | Late ZS tested threads */\ + _visit_( FragLZSKillTd ) /* threads | Late ZS killed threads */\ + _visit_( FragTile ) /* tiles | Tile count */\ + _visit_( FragTileKill ) /* tiles | Killed unchanged tiles */\ + _visit_( NonFragActiveCy ) /* cycles | Non-fragment active cycles */\ + _visit_( NonFragThread ) /* threads | Non-fragment threads */\ + _visit_( CoreActiveCy ) /* cycles | Execution core active cycles, (sum of all cores?) */\ + _visit_( EngInstr ) /* instructions | Executed instructions */\ + _visit_( LSIssueCy ) /* cycles | Load/store unit issue cycles (9.1.1) */\ + _visit_( TexInstr ) /* requests | Texture instructions */\ + _visit_( TexFiltIssueCy ) /* cycles | Texture filtering cycles (8.1.1) */\ + _visit_( LSRdHitCy ) /* requests | Load/store unit read hits */\ + _visit_( LSWrHitCy ) /* requests | Load/store unit write hits */\ + _visit_( GeomTrianglePrim ) /* primitives| Triangle primitives */\ + _visit_( GeomPointPrim ) /* primitives| Point primitives */\ + _visit_( GeomLinePrim ) /* primitives| Line primitives */\ + _visit_( GeomFrontFacePrim ) /* primitives| Visible front-facing primitives */\ + _visit_( GeomBackFacePrim ) /* primitives| Visible back-facing primitives */\ + _visit_( GeomVisiblePrim ) /* primitives| Visible primitives (4.1.3) */\ + _visit_( GeomFaceXYPlaneCullPrim ) /* primitives| Facing or XY plane test culled primitives */\ + _visit_( GeomZPlaneCullPrim ) /* primitives| Z plane culled primitives */\ + _visit_( TilerActiveCy ) /* cycles | Tiler active cycles (3.1.4) */\ + _visit_( GPUIRQUtil ) /* % | Interrupt pending utilization (3.2.5) */\ + _visit_( FragQueueUtil ) /* % | Fragment queue utilization, Main phase queue utilization (3.2.2) */\ + _visit_( NonFragQueueUtil ) /* % | Non-fragment queue utilization, Binning phase queue utilization (3.2.1) */\ + _visit_( ExtBusRdBy ) /* bytes | Output external read bytes (3.3.1) */\ + _visit_( ExtBusWrBy ) /* bytes | Output external write bytes (3.3.2) */\ + _visit_( ExtBusRdStallRate ) /* % | Output external read stall rate (3.4.1) */\ + _visit_( ExtBusWrStallRate ) /* % | Output external write stall rate (3.4.2) */\ + _visit_( TilerUtil ) /* % | Tiler utilization (3.2.3) */\ + _visit_( GeomTotalPrim ) /* primitives| Total input primitives (4.1.1) */\ + _visit_( GeomVisibleRate ) /* % | Visible primitives rate (4.2.1) */\ + _visit_( GeomTotalCullPrim ) /* Total culled primitives (4.1.2) */\ + _visit_( GeomFaceXYPlaneCullRate ) /* Facing or XY plane test cull (4.2.2) */\ + _visit_( GeomZPlaneCullRate ) /* Z plane test cull rate (4.2.3) */\ + _visit_( NonFragUtil ) /* Non-fragment utilization (5.3.2) */\ + _visit_( NonFragThroughputCy ) /* Average cycles per non-fragment thread (5.2.1) */\ + _visit_( FragUtil ) /* Fragment utilization (5.3.3) */\ + _visit_( FragThroughputCy ) /* Average cycles per fragment thread (5.2.2) */\ + _visit_( FragHelpTdRate )\ + _visit_( FragEZSTestRate ) /* Early ZS tested quad percentage (4.5.1) */\ + _visit_( FragEZSKillRate ) /* Early ZS killed quad percentage (4.5.3) */\ + _visit_( FragLZSTestRate ) /* Late ZS tested thread percentage (4.5.5 ?) */\ + _visit_( FragLZSKillRate ) /* Late ZS killed thread percentage (4.5.6 ?) */\ + _visit_( FragOverdraw ) /* Fragments per pixel (4.4.3) */\ + _visit_( FragTileKillRate ) /* Unchanged tile kill rate, transaction elimination(6.3.5) */\ + _visit_( CoreUtil ) /* Execution core utilization (5.3.5) */\ + _visit_( ALUUtil ) /* Arithmetic unit utilization (6.1.1) */\ + _visit_( TexSample ) /* Texture samples */\ + _visit_( TexCPI ) /* Texture filtering cycles per instruction (8.1.2) */\ + _visit_( TexUtil ) /* Texture unit utilization (6.1.3) */\ + _visit_( TexIssueCy ) /* Texture unit issue cycles */\ + _visit_( LSUtil ) /* Load/store unit utilization (6.1.4) */\ + _visit_( GPUPix ) /* total number of pixels that are shaded, assumes that all pixels in task are shaded (32x32, 5Gen: 64x64) (4.4.1) */\ + _visit_( GPUCyPerPix ) /* Average cycles per pixel (4.4.2) */\ + _visit_( FragQueueWaitRdCy ) /* Fragment queue job descriptor read wait cycles */\ + _visit_( FragQueueWaitIssueCy ) /* Fragment queue job issue wait cycles */\ + _visit_( FragQueueWaitDepCy ) /* Fragment queue job dependency wait cycles */\ + _visit_( FragQueueWaitFinishCy ) /* Fragment queue job finish wait cycles */\ + _visit_( NonFragQueueWaitRdCy ) /* Non-fragment queue job descriptor read wait cycles */\ + _visit_( NonFragQueueWaitIssueCy ) /* Non-fragment queue job issue wait cycles */\ + _visit_( NonFragQueueWaitDepCy ) /* Non-fragment queue job dependency wait cycles */\ + _visit_( NonFragQueueWaitFinishCy ) /* Non-fragment queue job finish wait cycles */\ + _visit_( ResQueueWaitRdCy ) /* Reserved queue job descriptor read wait cycles */\ + _visit_( ResQueueWaitIssueCy ) /* Reserved queue job issue wait cycles */\ + _visit_( ResQueueWaitDepCy ) /* Reserved queue job dependency wait cycles */\ + _visit_( ResQueueWaitFinishCy ) /* Reserved queue job finish wait cycles */\ + _visit_( MMUL2Hit ) /* MMU L2 lookup TLB hits */\ + _visit_( MMUL2Rd ) /* MMU L2 table read requests */\ + _visit_( MMULookup ) /* MMU lookup requests */\ + _visit_( L2CacheLookup )\ + _visit_( L2CacheRdLookup )\ + _visit_( L2CacheWrLookup )\ + _visit_( FragFPKActiveCy ) /* Forward pixel kill buffer active cycles */\ + _visit_( LSRdCy ) /* Load/store unit read issues */\ + _visit_( LSWrCy ) /* Load/store unit write issues */\ + _visit_( LSAtomic ) /* Load/store unit atomic issues (9.1.6) */\ + _visit_( TilerPosCacheHit ) /* Position cache hit requests */\ + _visit_( TilerPosCacheMiss ) /* Position cache miss requests */\ + _visit_( FragFPKBUtil ) /* Fragment FPK buffer utilization (5.3.4) */\ + _visit_( FragQueueWaitFlushCy ) /* Fragment queue cache flush wait cycles */\ + _visit_( NonFragQueueWaitFlushCy ) /* Non-fragment queue cache flush wait cycles */\ + _visit_( ResQueueWaitFlushCy ) /* Non-fragment queue cache flush wait cycles */\ + _visit_( L2CacheFlush ) /* L2 cache flush requests */\ + _visit_( GeomSampleCullPrim ) /* Sample test culled primitives */\ + _visit_( TilerRdBt ) /* Output internal read beats */\ + _visit_( TilerWrBt )\ + _visit_( GeomPosShadTask ) /* Tiler position shading requests */\ + _visit_( TilerPosShadStallCy ) /* Tiler position shading stall cycles */\ + _visit_( TilerPosShadFIFOFullCy ) /* Tiler position FIFO full cycles */\ + _visit_( TilerVarCacheHit ) /* Varying cache hits */\ + _visit_( TilerVarCacheMiss ) /* Varying cache misses */\ + _visit_( GeomVarShadTask ) /* Tiler varying shading requests */\ + _visit_( TilerVarShadStallCy ) /* Tiler varying shading stall cycles */\ + _visit_( FragRastPrim ) /* Rasterized primitives */\ + _visit_( FragWarp ) /* Fragment warps (5.1.2) */\ + _visit_( FragPartWarp )\ + _visit_( FragEZSUpdateQd ) /* Early ZS updated quads */\ + _visit_( FragLZSTestQd ) /* Late ZS tested quads */\ + _visit_( FragLZSKillQd ) /* Late ZS killed quads */\ + _visit_( FragOpaqueQd ) /* Occluding quads */\ + _visit_( NonFragTask )\ + _visit_( NonFragWarp ) /* Non-fragment warps (5.1.1) */\ + _visit_( EngActiveCy )\ + _visit_( EngDivergedInstr )\ + _visit_( EngStarveCy )\ + _visit_( TexQuads ) /* Texture quads */\ + _visit_( TexQuadPass )\ + _visit_( TexQuadPassDescMiss )\ + _visit_( TexQuadPassMip )\ + _visit_( TexQuadPassTri )\ + _visit_( TexCacheFetch )\ + _visit_( TexCacheCompressFetch )\ + _visit_( TexCacheLookup )\ + _visit_( LSFullRd ) /* Load/store unit full read issues (9.1.2) */\ + _visit_( LSPartRd ) /* Load/store unit partial read issues (9.1.3) */\ + _visit_( LSFullWr ) /* Load/store unit full write issues (9.1.4) */\ + _visit_( LSPartWr ) /* Load/store unit partial write issues (9.1.5) */\ + _visit_( VarInstr ) /* Varying unit instructions */\ + _visit_( Var32IssueSlot ) /* 32-bit interpolation slots */\ + _visit_( Var16IssueSlot ) /* 16-bit interpolation slots */\ + _visit_( AttrInstr ) /* Attribute instructions */\ + _visit_( SCBusFFEL2RdBt ) /* Fragment front-end read beats from L2 cache */\ + _visit_( SCBusFFEExtRdBt ) /* Fragment front-end read beats from external memory */\ + _visit_( SCBusLSL2RdBt ) /* Load/store unit read beats from L2 cache */\ + _visit_( SCBusLSExtRdBt ) /* Load/store unit read beats from external memory */\ + _visit_( SCBusTexL2RdBt ) /* Texture unit read beats from L2 cache */\ + _visit_( SCBusTexExtRdBt ) /* Texture unit read beats from external memory */\ + _visit_( SCBusOtherL2RdBt ) /* Miscellaneous read beats from L2 cache */\ + _visit_( SCBusLSWBWrBt ) /* Load/store unit write-back write beats */\ + _visit_( SCBusTileWrBt ) /* Tile unit write beats to L2 memory system */\ + _visit_( SCBusLSOtherWrBt ) /* Load/store unit other write beats */\ + _visit_( MMUL3Rd ) /* MMU L3 table read requests */\ + _visit_( MMUL3Hit ) /* MMU L3 lookup TLB hits */\ + _visit_( MMUS2Lookup ) /* MMU stage 2 lookup requests */\ + _visit_( MMUS2L3Rd ) /* MMU stage 2 L3 lookup requests */\ + _visit_( MMUS2L2Rd ) /* MMU stage 2 L2 lookup requests */\ + _visit_( MMUS2L3Hit ) /* MMU stage 2 L3 lookup TLB hits */\ + _visit_( MMUS2L2Hit ) /* MMU stage 2 L2 lookup TLB hits */\ + _visit_( L2CacheRd ) /* Input internal read requests */\ + _visit_( L2CacheRdStallCy ) /* Input internal read stall cycles */\ + _visit_( L2CacheWr ) /* Input internal write requests */\ + _visit_( L2CacheWrStallCy ) /* Input internal write stall cycles */\ + _visit_( L2CacheSnp ) /* Input internal snoop requests */\ + _visit_( L2CacheSnpStallCy ) /* Input internal snoop stall cycles */\ + _visit_( L2CacheL1Rd ) /* Output internal read requests */\ + _visit_( L2CacheL1RdStallCy ) /* Output internal read stall cycles */\ + _visit_( L2CacheL1Wr ) /* Output internal write requests */\ + _visit_( L2CacheSnpLookup ) /* Input external snoop lookup requests */\ + _visit_( ExtBusRd ) /* Output external read transactions */\ + _visit_( ExtBusRdNoSnoop ) /* Output external ReadNoSnoop transactions */\ + _visit_( ExtBusRdUnique ) /* Output external ReadUnique transactions */\ + _visit_( ExtBusRdOTQ1 ) /* Output external outstanding reads 0-25% */\ + _visit_( ExtBusRdOTQ2 ) /* Output external outstanding reads 25-50% */\ + _visit_( ExtBusRdOTQ3 ) /* Output external outstanding reads 50-75% */\ + _visit_( ExtBusRdLat0 ) /* Output external read latency 0-127 cycles (3.5.1) */\ + _visit_( ExtBusRdLat128 ) /* Output external read latency 128-191 cycles (3.5.2) */\ + _visit_( ExtBusRdLat192 ) /* Output external read latency 192-255 cycles (3.5.3) */\ + _visit_( ExtBusRdLat256 ) /* Output external read latency 256-319 cycles (3.5.4) */\ + _visit_( ExtBusRdLat320 ) /* Output external read latency 320-383 cycles (3.5.5) */\ + _visit_( ExtBusWr ) /* Output external write transactions */\ + _visit_( ExtBusWrNoSnoopFull ) /* Output external WriteNoSnoopFull transactions */\ + _visit_( ExtBusWrNoSnoopPart ) /* Output external WriteNoSnoopPartial transactions */\ + _visit_( ExtBusWrSnoopFull ) /* Output external WriteSnoopFull transactions */\ + _visit_( ExtBusWrSnoopPart ) /* Output external WriteSnoopPartial transactions */\ + _visit_( ExtBusWrOTQ1 ) /* Output external outstanding writes 0-25% */\ + _visit_( ExtBusWrOTQ2 ) /* Output external outstanding writes 25-50% */\ + _visit_( ExtBusWrOTQ3 ) /* Output external outstanding writes 50-75% */\ + _visit_( L2CacheIncSnp ) /* Input external snoop transactions */\ + _visit_( L2CacheIncSnpStallCy ) /* Input external snoop stall cycles */\ + _visit_( L2CacheRdMissRate ) /* L2 cache read miss rate */\ + _visit_( L2CacheWrMissRate ) /* L2 cache write miss rate */\ + _visit_( ExtBusRdLat384 ) /* Output external read latency 384+ cycles (3.5.6) */\ + _visit_( ExtBusRdOTQ4 ) /* Output external outstanding reads 75-100% */\ + _visit_( ExtBusWrOTQ4 ) /* Output external outstanding writes 75-100% */\ + _visit_( GeomSampleCullRate ) /* Sample coverage test cull rate (4.2.4) */\ + _visit_( GeomPosShadThread ) /* Position shader thread invocations (4.3.1) */\ + _visit_( GeomPosShadThreadPerPrim ) /* Position threads per input primitive, should be < 1.5 (4.3.3) */\ + _visit_( TilerPosCacheHitRate ) /* Position cache hit rate */\ + _visit_( GeomVarShadThread ) /* Varying shader thread invocations (4.3.2) */\ + _visit_( GeomVarShadThreadPerPrim ) /* Varying threads per input primitive (4.3.4) */\ + _visit_( TilerVarCacheHitRate )\ + _visit_( FragOpaqueQdRate ) /* Occluding quad percentage */\ + _visit_( FragTransparentQd ) /* Non-occluding quads */\ + _visit_( FragShadedQd ) /* Shaded coarse quads */\ + _visit_( FragPartWarpRate )\ + _visit_( FragEZSUpdateRate ) /* Early ZS updated quad percentage (4.5.2) */\ + _visit_( FragFPKKillQd ) /* Forward pixel kill killed quads */\ + _visit_( FragFPKKillRate ) /* Forward pixel kill killed quad percentage (4.5.4) */\ + _visit_( EngDivergedInstrRate ) /* Warp divergence percentage (6.2.2) */\ + _visit_( TexCacheUtil )\ + _visit_( TexMipInstrRate )\ + _visit_( TexCacheCompressFetchRate )\ + _visit_( TexTriInstrRate )\ + _visit_( Var32IssueCy ) /* 32-bit interpolation active cycles (7.1.3) */\ + _visit_( Var16IssueCy ) /* 16-bit interpolation active cycles (7.1.2) */\ + _visit_( VarIssueCy ) /* Varying unit issue cycles (7.1.1) */\ + _visit_( VarUtil ) /* Varying unit utilization (6.1.2) */\ + _visit_( SCBusFFEL2RdBy ) /* Front-end unit read bytes from L2 cache (11.1.1) */\ + _visit_( SCBusFFEExtRdBy ) /* Front-end unit read bytes from external memory (11.2.1) */\ + _visit_( SCBusLSL2RdBy ) /* Load/store unit read bytes from L2 cache (11.1.2) */\ + _visit_( SCBusLSL2RdByPerRd ) /* Load/store unit bytes read from L2 per access cycle (9.2.1) */\ + _visit_( SCBusLSExtRdBy ) /* Load/store unit read bytes from external memory (11.2.2) */\ + _visit_( SCBusLSExtRdByPerRd ) /* Load/store unit bytes read from external memory per access cycle (9.2.2) */\ + _visit_( SCBusTexL2RdBy ) /* Texture unit read bytes from L2 cache (11.1.3) */\ + _visit_( SCBusTexL2RdByPerRd ) /* Texture unit bytes read from L2 per texture cycle (8.2.1) */\ + _visit_( SCBusTexExtRdBy ) /* Texture unit read bytes from external memory (11.2.3) */\ + _visit_( SCBusTexExtRdByPerRd ) /* Texture unit bytes read from external memory per texture cycle (8.2.2) */\ + _visit_( SCBusLSWrBt ) /* Load/store unit write beats to L2 memory system */\ + _visit_( SCBusLSWrBy ) /* Load/store unit write bytes (11.3.1) */\ + _visit_( SCBusLSWrByPerWr ) /* Load/store unit bytes written to L2 per access cycle (9.2.3) */\ + _visit_( SCBusTileWrBy ) /* Tile unit write bytes (11.3.2) */\ + _visit_( CoreAllRegsWarp ) /* Warps using more than 32 registers */\ + _visit_( CoreFullWarp ) /* Full warps */\ + _visit_( CoreAllRegsWarpRate ) /* All registers warp rate (6.2.3) */\ + _visit_( CoreFullWarpRate ) /* Full warp rate (6.3.3) */\ + _visit_( TexMipInstr )\ + _visit_( TexCompressInstr )\ + _visit_( Tex3DInstr )\ + _visit_( TexTriInstr )\ + _visit_( TexCoordStallCy )\ + _visit_( TexDataStallCy )\ + _visit_( TexPartDataStallCy )\ + _visit_( SCBusOtherWrBt )\ + _visit_( TexCompressInstrRate )\ + _visit_( Tex3DInstrRate )\ + _visit_( SCBusOtherWrBy )\ + _visit_( FragRastPartQd ) /* Partial rasterized fine quads */\ + _visit_( EngFMAInstr ) /* Arithmetic FMA instructions */\ + _visit_( EngCVTInstr ) /* Arithmetic CVT instructions */\ + _visit_( EngSFUInstr ) /* Arithmetic SFU instructions */\ + _visit_( EngICacheMiss ) /* Instruction cache misses */\ + _visit_( EngSWBlendInstr )\ + _visit_( TexInBt )\ + _visit_( TexDescStallCy ) /* Texture descriptor stall cycles */\ + _visit_( TexDataFetchStallCy )\ + _visit_( TexFiltStallCy )\ + _visit_( TexFullBiFiltCy ) /* Texture filtering cycles using full bilinear (V: 8.1.2) */\ + _visit_( TexFullTriFiltCy ) /* Texture filtering cycles using full trilinear (V: 8.1.3) */\ + _visit_( TexOutMsg )\ + _visit_( TexOutBt )\ + _visit_( FragRastPartQdRate ) /* Partial coverage rate (6.3.1) */\ + _visit_( EngFMAPipeUtil ) /* FMA pipe utilization */\ + _visit_( EngCVTPipeUtil ) /* CVT pipe utilization */\ + _visit_( EngSFUPipeUtil ) /* SFU pipe utilization */\ + _visit_( EngArithInstr ) /* Arithmetic instruction issue cycles */\ + _visit_( EngSWBlendRate ) /* Shader blend percentage (6.2.4) */\ + _visit_( TexInBusUtil ) /* Texture input bus utilization */\ + _visit_( TexOutBusUtil ) /* Texture output bus utilization */\ + _visit_( TexFiltFullRate ) /* Texture full speed filtering percentage */\ + _visit_( AnyActiveCy )\ + _visit_( AnyUtil )\ + _visit_( CSFMCUActiveCy ) /* MCU active cycles (3.1.5) */\ + _visit_( GPUQueueActiveCy )\ + _visit_( GPUIRQ )\ + _visit_( L2CacheFlushCy )\ + _visit_( VertQueuedCy )\ + _visit_( VertQueueJob )\ + _visit_( VertQueueTask )\ + _visit_( VertQueueTotalActiveCy )\ + _visit_( VertQueueIRQActiveCy )\ + _visit_( VertQueueAssignStallCy )\ + _visit_( TilerQueueDrainStallCy )\ + _visit_( CompQueuedCy ) /* Compute queue active cycles (3.1.4) */\ + _visit_( CompQueueJob )\ + _visit_( CompQueueTask )\ + _visit_( CompQueueTotalActiveCy )\ + _visit_( CompQueueIRQActiveCy )\ + _visit_( CompQueueAssignStallCy )\ + _visit_( CompQueueDrainStallCy )\ + _visit_( FragQueuedCy )\ + _visit_( FragQueueTotalActiveCy )\ + _visit_( FragQueueIRQActiveCy )\ + _visit_( FragQueueAssignStallCy )\ + _visit_( CSFCEUActiveCy )\ + _visit_( CSFLSUActiveCy )\ + _visit_( CSFCS0ActiveCy )\ + _visit_( CS0WaitStallCy )\ + _visit_( CSFCS1ActiveCy )\ + _visit_( CS1WaitStallCy )\ + _visit_( CSFCS2ActiveCy )\ + _visit_( CS2WaitStallCy )\ + _visit_( CSFCS3ActiveCy )\ + _visit_( CS3WaitStallCy )\ + _visit_( L2CacheEvict )\ + _visit_( L2CacheCleanUnique )\ + _visit_( VertQueueActiveCy )\ + _visit_( VertQueueUtil )\ + _visit_( CompQueueActiveCy )\ + _visit_( CompQueueUtil ) /* Compute queue utilization (3.2.3) */\ + _visit_( CSFMCUUtil ) /* Microcontroller utilization (3.2.4) */\ + _visit_( CSFLSUUtil )\ + _visit_( CSFCEUUtil )\ + _visit_( GPUActive2Cy )\ + _visit_( EngNarrowInstr ) /* Narrow arithmetic instructions (8/16 bit) */\ + _visit_( FragRastCoarseQd )\ + _visit_( RTUTri )\ + _visit_( RTUBox )\ + _visit_( RTUTriBin1 ) /* Ray tracing triangle batches with 1-4 rays (10.4.4) */\ + _visit_( RTUTriBin5 ) /* Ray tracing triangle batches with 5-8 rays (10.4.3) */\ + _visit_( RTUTriBin9 ) /* Ray tracing triangle batches with 9-12 rays (10.4.2) */\ + _visit_( RTUTriBin13 ) /* Ray tracing triangle batches with 13-16 rays (10.4.1) */\ + _visit_( RTUBoxBin1 ) /* Ray tracing box nodes with 1-4 rays (10.3.4) */\ + _visit_( RTUBoxBin5 ) /* Ray tracing box nodes with 5-8 rays (10.3.3) */\ + _visit_( RTUBoxBin9 ) /* Ray tracing box nodes with 9-12 rays (10.3.2) */\ + _visit_( RTUBoxBin13 ) /* Ray tracing box nodes with 13-16 rays (10.3.1) */\ + _visit_( RTUOpaqueHit ) /* Ray tracing opaque triangle hits (10.2.2) */\ + _visit_( RTUNonOpaqueHit ) /* Ray tracing non-opaque triangle hits (10.2.3) */\ + _visit_( RTUFirstHitTerm ) /* Ray tracing first hit terminations (10.2.5) */\ + _visit_( RTUMiss ) /* Ray tracing ray misses (10.2.4) */\ + _visit_( RTURay ) /* Ray tracing started rays (10.2.1) */\ + _visit_( RTUBoxIssueCy ) /* Ray tracing box tester issue cycles (10.1.1) */\ + _visit_( RTUTriIssueCy ) /* Ray tracing triangle tester issue cycles (10.1.2) */\ + _visit_( GeomFaceCullPrim )\ + _visit_( GeomPlaneCullPrim )\ + _visit_( GeomFaceCullRate )\ + _visit_( GeomPlaneCullRate )\ + _visit_( FragShadRate ) /* Fragment shading rate (4.6.1) (6.3.4) */\ + _visit_( CoreFragWarpOcc ) /* Fragment warp occupancy % (6.3.2) */\ + _visit_( EngNarrowInstrRate ) /* Narrow arithmetic percentage (6.2.1) */\ + _visit_( RTUUtil ) /* Ray tracing unit utilization (6.1.5) */\ + _visit_( BinningQueuedCy ) /* Binning phase queue active cycles (3.1.2) */\ + _visit_( BinningQueueJob )\ + _visit_( BinningQueueTask )\ + _visit_( BinningQueueIRQActiveCy )\ + _visit_( BinningQueueAssignStallCy )\ + _visit_( MainQueuedCy ) /* Main phase queue active cycles (3.1.3) */\ + _visit_( MainQueueJob )\ + _visit_( MainQueueTask )\ + _visit_( MainQueueIRQActiveCy )\ + _visit_( MainQueueAssignStallCy )\ + _visit_( MainActiveCy )\ + _visit_( CompOrBinningActiveCy )\ + _visit_( TexL1CacheLoadCy )\ + _visit_( TexCacheSimpleLoadCy )\ + _visit_( TexL1CacheOutputCy )\ + _visit_( TexL1CacheLookupCy )\ + _visit_( TexOutSingleMsg )\ + _visit_( TexCacheLookupCy )\ + _visit_( TexCacheComplexLoadCy )\ + _visit_( TexIndexCy )\ + _visit_( TexClkStarvedCy )\ + _visit_( TexClkActiveCy )\ + _visit_( GeomScissorCullPrim )\ + _visit_( GeomVisibleDVSPrim )\ + _visit_( MainQueueActiveCy )\ + _visit_( MainQueueUtil )\ + _visit_( BinningQueueActiveCy )\ + _visit_( BinningQueueUtil )\ + _visit_( GeomScissorCullRate )\ + _visit_( CompOrBinningUtil )\ + _visit_( MainUtil )\ + _visit_( CSDoorbellIRQCy )\ + _visit_( MainQueueTotalActiveCy )\ + _visit_( BinningQueueTotalActiveCy )\ + _visit_( CSFCS4ActiveCy )\ + _visit_( CS4WaitStallCy )\ + _visit_( CSFCS5ActiveCy )\ + _visit_( CS5WaitStallCy )\ + _visit_( FragPrim )\ + _visit_( FragPrepassCullPrim )\ + _visit_( FragPrepassPrim )\ + _visit_( FragPrepassUncullPrim )\ + _visit_( FragPrepassEZSUpdateQd )\ + _visit_( FragPrepassTestQd )\ + _visit_( FragPrepassKillQd )\ + _visit_( FragPrepassWarp )\ + _visit_( FragMainPassStallCy )\ + _visit_( DefVertWarp )\ + _visit_( EngTexBackpressureCy )\ + _visit_( EngVarBackpressureCy )\ + _visit_( EngBlendBackpressureCy )\ + _visit_( EngZSBackpressureCy )\ + _visit_( EngLSBackpressureCy )\ + _visit_( EngAttrBackpressureCy )\ + _visit_( EngSlot1IssueCy )\ + _visit_( EngSlotAnyIssueCy )\ + _visit_( GeomPosShadPartTask )\ + _visit_( GeomVarShadPartTask )\ + _visit_( FragPrepassKillRate )\ + _visit_( FragMainPassStallRate )\ + _visit_( FragInputPrim )\ + _visit_( FragPrepassPrimRate )\ + _visit_( FragPrepassCullPrimRate )\ + _visit_( FragPrepassUncullPrimRate )\ + _visit_( FragPrepassWarpRate )\ + _visit_( FragPrepassThread )\ + _visit_( FragMainThread )\ + _visit_( EngSlot0IssueCy )\ + _visit_( EngAttrBackpressureRate )\ + _visit_( EngBlendBackpressureRate )\ + _visit_( EngLSBackpressureRate )\ + _visit_( EngTexBackpressureRate )\ + _visit_( EngVarBackpressureRate )\ + _visit_( EngZSBackpressureRate ) + + #define DECL_COUNTER( _name_ ) _name_, + AE_MALI_COUNTERS( DECL_COUNTER ) + #undef DECL_COUNTER + + _LastCounter, + + // custom // + ExtBusTotalBy = _LastCounter, // ExtBusRdBy + ExtBusWrBy + ExtMemEnergy, // (ExtBusRdBy + ExtBusWrBy) * power consumption, in joules + CoreEnergy, // CoreActiveCy * power consumption, in joules + TotalEnergy, // ExtMemEnergy + CoreEnergy, in joules + PerCoreActiveCy, // CoreActiveCy + + _Count + }; + + using ECounterSet = EnumSet< ECounter >; + using Counters_t = FlatHashMap< ECounter, double >; + + struct HWInfo + { + ubyte shaderCoreCount = 0; + ubyte execEngineCount = 0; // per core + ushort busWidth = 0; // bits + ushort l2Slices = 0; + Bytes32u l2SliceSize; + ushort tileSize = 0; // pixels + ushort warpSize = 0; + }; + + private: + struct Impl; + + + // variables + private: + Unique _impl; + + + // methods + public: + MaliProfiler () __NE___; + ~MaliProfiler () __NE___; + + ND_ bool Initialize (ECounterSet counterSet) __NE___; + void Deinitialize () __NE___; + ND_ bool IsInitialized () C_NE___; + + ND_ ECounterSet EnabledCounterSet () C_NE___; + ND_ HWInfo GetHWInfo () C_NE___; + + void Sample (OUT Counters_t &, INOUT float &invdt) C_NE___; + + + #ifndef AE_ENABLE_MALI_HWCPIPE + ND_ bool InitClient (RC) __NE___; + #endif + }; + + +} // AE::Profiler diff --git a/AE/engine/src/profiler/Utils/NVidiaProfiler.cpp b/AE/engine/src/profiler/Profilers/NVidiaProfiler.cpp similarity index 91% rename from AE/engine/src/profiler/Utils/NVidiaProfiler.cpp rename to AE/engine/src/profiler/Profilers/NVidiaProfiler.cpp index b0777ae5..e2067054 100644 --- a/AE/engine/src/profiler/Utils/NVidiaProfiler.cpp +++ b/AE/engine/src/profiler/Profilers/NVidiaProfiler.cpp @@ -7,7 +7,7 @@ #ifdef AE_ENABLE_REMOTE_GRAPHICS # include "profiler/Profiler.pch.h" -# include "profiler/Utils/NVidiaProfiler.h" +# include "profiler/Profilers/NVidiaProfiler.h" namespace AE::Profiler { @@ -64,9 +64,10 @@ namespace AE::Profiler Sample ================================================= */ - void NVidiaProfiler::Sample (OUT Counters_t &result) C_NE___ + void NVidiaProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { result.clear(); + Unused( invdt ); // keep current if ( not IsInitialized() ) return; @@ -85,7 +86,7 @@ namespace AE::Profiler #elif defined(AE_ENABLE_NVML) # include "nvml.h" # include "profiler/Profiler.pch.h" -# include "profiler/Utils/NVidiaProfiler.h" +# include "profiler/Profilers/NVidiaProfiler.h" namespace AE::Profiler { @@ -421,9 +422,10 @@ namespace AE::Profiler Sample ================================================= */ - void NVidiaProfiler::Sample (OUT Counters_t &outCounters) C_NE___ + void NVidiaProfiler::Sample (OUT Counters_t &outCounters, INOUT float &invdt) C_NE___ { outCounters.clear(); + Unused( invdt ); // keep current if ( not _impl ) return; // not initialized @@ -436,7 +438,7 @@ namespace AE::Profiler #else // not AE_ENABLE_NVML -# include "profiler/Utils/NVidiaProfiler.h" +# include "profiler/Profilers/NVidiaProfiler.h" # include "profiler/Remote/RemoteNVidiaProfiler.h" namespace AE::Profiler @@ -448,18 +450,18 @@ namespace AE::Profiler Impl (RC c) __NE___ : client{RVRef(c)} {} }; - NVidiaProfiler::NVidiaProfiler () __NE___ {} - NVidiaProfiler::~NVidiaProfiler () __NE___ {} + NVidiaProfiler::NVidiaProfiler () __NE___ {} + NVidiaProfiler::~NVidiaProfiler () __NE___ {} - bool NVidiaProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } - bool NVidiaProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } + bool NVidiaProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } + bool NVidiaProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } - NVidiaProfiler::ECounterSet NVidiaProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } - NVidiaProfiler::HWInfo NVidiaProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->client->GetHWInfo() : Default; } + NVidiaProfiler::ECounterSet NVidiaProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } + NVidiaProfiler::HWInfo NVidiaProfiler::GetHWInfo () C_NE___ { return _impl ? _impl->client->GetHWInfo() : Default; } - void NVidiaProfiler::Sample (OUT Counters_t &result) C_NE___ { if (_impl) return _impl->client->Sample( OUT result ); } + void NVidiaProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { if (_impl) return _impl->client->Sample( OUT result, INOUT invdt ); } - bool NVidiaProfiler::InitClient (RC client) __NE___ + bool NVidiaProfiler::InitClient (RC client) __NE___ { CHECK_ERR( client ); @@ -467,7 +469,7 @@ namespace AE::Profiler return true; } - void NVidiaProfiler::Deinitialize () __NE___ + void NVidiaProfiler::Deinitialize () __NE___ { _impl.reset( null ); } diff --git a/AE/engine/src/profiler/Utils/NVidiaProfiler.h b/AE/engine/src/profiler/Profilers/NVidiaProfiler.h similarity index 96% rename from AE/engine/src/profiler/Utils/NVidiaProfiler.h rename to AE/engine/src/profiler/Profilers/NVidiaProfiler.h index 2cf09a82..a93c81b7 100644 --- a/AE/engine/src/profiler/Utils/NVidiaProfiler.h +++ b/AE/engine/src/profiler/Profilers/NVidiaProfiler.h @@ -111,7 +111,7 @@ namespace AE::Profiler ND_ ECounterSet EnabledCounterSet () C_NE___; ND_ HWInfo GetHWInfo () C_NE___; - void Sample (OUT Counters_t &) C_NE___; + void Sample (OUT Counters_t &, INOUT float &invdt) C_NE___; #ifndef AE_ENABLE_NVML diff --git a/AE/engine/src/profiler/Utils/PowerVRProfiler.cpp b/AE/engine/src/profiler/Profilers/PowerVRProfiler.cpp similarity index 89% rename from AE/engine/src/profiler/Utils/PowerVRProfiler.cpp rename to AE/engine/src/profiler/Profilers/PowerVRProfiler.cpp index 24521415..d6009be3 100644 --- a/AE/engine/src/profiler/Utils/PowerVRProfiler.cpp +++ b/AE/engine/src/profiler/Profilers/PowerVRProfiler.cpp @@ -2,7 +2,7 @@ #ifdef AE_ENABLE_REMOTE_GRAPHICS # include "profiler/Profiler.pch.h" -# include "profiler/Utils/PowerVRProfiler.h" +# include "profiler/Profilers/PowerVRProfiler.h" namespace AE::Profiler { @@ -16,10 +16,6 @@ namespace AE::Profiler struct PowerVRProfiler::Impl { ECounterSet _enabled; - - Mutex _timingsGuard; - TimeScopeArr_t _timings; - RDevice const& dev; Impl (RDevice const& dev) __NE___ : dev{dev} {} @@ -31,6 +27,8 @@ namespace AE::Profiler bool PowerVRProfiler::IsInitialized () C_NE___ { return bool{_impl}; } void PowerVRProfiler::Deinitialize () __NE___ { _impl.reset( null ); } + void PowerVRProfiler::Tick () C_NE___ {} + PowerVRProfiler::ECounterSet PowerVRProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->_enabled : Default; } /* @@ -59,32 +57,15 @@ namespace AE::Profiler return res->ok; } -/* -================================================= - Tick -================================================= -*/ - void PowerVRProfiler::Tick () C_NE___ - { - if ( not IsInitialized() ) return; - - Msg::ProfPVR_Tick msg; - RC res; - - CHECK_ERRV( _impl->dev.SendAndWait( msg, OUT res )); - - EXLOCK( _impl->_timingsGuard ); - _impl->_timings = RVRef(res->timings); - } - /* ================================================= Sample ================================================= */ - void PowerVRProfiler::Sample (OUT Counters_t &result) C_NE___ + void PowerVRProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { result.clear(); + Unused( invdt ); // keep current if ( not IsInitialized() ) return; @@ -107,8 +88,11 @@ namespace AE::Profiler if ( not IsInitialized() ) return; - EXLOCK( _impl->_timingsGuard ); - result = RVRef(_impl->_timings); + Msg::ProfPVR_GetTiming msg; + RC res; + + CHECK_ERRV( _impl->dev.SendAndWait( msg, OUT res )); + result = RVRef(res->timings); } } // AE::Profiler @@ -117,7 +101,7 @@ namespace AE::Profiler #elif defined(AE_ENABLE_PVRCOUNTER) # include "PVRScopeStats.h" -# include "profiler/Utils/PowerVRProfiler.h" +# include "profiler/Profilers/PowerVRProfiler.h" namespace AE::Profiler { @@ -346,6 +330,7 @@ namespace */ void PowerVRProfiler::Impl::Print () { + #ifdef AE_ENABLE_LOGS if ( _counters == null or _numCounter == 0 ) return; @@ -360,6 +345,7 @@ namespace } AE_LOGI( str ); + #endif } /* @@ -415,9 +401,9 @@ namespace break; case ePVRScopeEventComputeEnd : pass = EPass::Compute; break; - case ePVRScopeEventTAEnd : pass = EPass::TileAccel; break; - case ePVRScopeEvent3DEnd : pass = EPass::TBDR; break; - case ePVRScopeEvent2DEnd : pass = EPass::Blit; break; + case ePVRScopeEventTAEnd : pass = EPass::Tiler; break; + case ePVRScopeEvent3DEnd : pass = EPass::Renderer; break; + case ePVRScopeEvent2DEnd : pass = EPass::Transfer; break; case ePVRScopeEventRTUEnd : pass = EPass::RayTracing; break; case ePVRScopeEventSHGEnd : pass = EPass::RTASBuild; break; } @@ -508,9 +494,10 @@ namespace Sample ================================================= */ - void PowerVRProfiler::Sample (OUT Counters_t &outCounters) C_NE___ + void PowerVRProfiler::Sample (OUT Counters_t &outCounters, INOUT float &invdt) C_NE___ { outCounters.clear(); + Unused( invdt ); // keep current if ( not _impl ) return; // not initialized @@ -563,7 +550,7 @@ namespace #else // not AE_ENABLE_PVRCOUNTER and not AE_ENABLE_REMOTE_GRAPHICS -# include "profiler/Utils/PowerVRProfiler.h" +# include "profiler/Profilers/PowerVRProfiler.h" # include "profiler/Remote/RemotePowerVRProfiler.h" namespace AE::Profiler @@ -575,20 +562,20 @@ namespace AE::Profiler Impl (RC c) __NE___ : client{RVRef(c)} {} }; - PowerVRProfiler::PowerVRProfiler () __NE___ {} - PowerVRProfiler::~PowerVRProfiler () __NE___ {} + PowerVRProfiler::PowerVRProfiler () __NE___ {} + PowerVRProfiler::~PowerVRProfiler () __NE___ {} - bool PowerVRProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } - bool PowerVRProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } + bool PowerVRProfiler::Initialize (const ECounterSet &cs) __NE___ { return _impl and _impl->client->Initialize( cs ); } + bool PowerVRProfiler::IsInitialized () C_NE___ { return _impl and _impl->client->IsInitialized(); } - PowerVRProfiler::ECounterSet PowerVRProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } + PowerVRProfiler::ECounterSet PowerVRProfiler::EnabledCounterSet () C_NE___ { return _impl ? _impl->client->EnabledCounterSet() : Default; } - void PowerVRProfiler::Tick () C_NE___ { if (_impl) return _impl->client->Tick(); } - void PowerVRProfiler::Sample (OUT Counters_t &result) C_NE___ { if (_impl) return _impl->client->Sample( OUT result ); } - void PowerVRProfiler::ReadTimingData (OUT TimeScopeArr_t &result) C_NE___ { if (_impl) return _impl->client->ReadTimingData( OUT result ); } + void PowerVRProfiler::Tick () C_NE___ { if (_impl) return _impl->client->Tick(); } + void PowerVRProfiler::Sample (OUT Counters_t &result, INOUT float &invdt) C_NE___ { if (_impl) return _impl->client->Sample( OUT result, INOUT invdt ); } + void PowerVRProfiler::ReadTimingData (OUT TimeScopeArr_t &result) C_NE___ { if (_impl) return _impl->client->ReadTimingData( OUT result ); } - bool PowerVRProfiler::InitClient (RC client) __NE___ + bool PowerVRProfiler::InitClient (RC client) __NE___ { CHECK_ERR( client ); @@ -596,7 +583,7 @@ namespace AE::Profiler return true; } - void PowerVRProfiler::Deinitialize () __NE___ + void PowerVRProfiler::Deinitialize () __NE___ { _impl.reset( null ); } diff --git a/AE/engine/src/profiler/Utils/PowerVRProfiler.h b/AE/engine/src/profiler/Profilers/PowerVRProfiler.h similarity index 93% rename from AE/engine/src/profiler/Utils/PowerVRProfiler.h rename to AE/engine/src/profiler/Profilers/PowerVRProfiler.h index 3fc062af..bcbff537 100644 --- a/AE/engine/src/profiler/Utils/PowerVRProfiler.h +++ b/AE/engine/src/profiler/Profilers/PowerVRProfiler.h @@ -4,7 +4,7 @@ (x.x) - link to counter description in docs. - [Performance counters description](https://github.com/azhirnov/cpu-gru-arch/blob/main/gpu/PowerVR_PC.md) + [Performance counters description](https://github.com/azhirnov/cpu-gpu-arch/blob/main/gpu/PowerVR_PC.md) */ #pragma once @@ -33,7 +33,7 @@ namespace AE::Profiler GeometryActive = _BXM_Begin, // % input primitives, tiler ? GeometryTimePerFrame, GeometryTime, - GPU_ClockSpeed, // (2.15) + GPU_ClockSpeed, // Hz (2.15) GPU_MemoryInterfaceLoad, // % (2.19) GPU_MemoryRead, // bytes/second (2.16) GPU_MemoryTotal, // bytes/second (2.17) @@ -93,9 +93,9 @@ namespace AE::Profiler { Unknown = 0, Compute, - TileAccel, // binning & VS - TBDR, // rasterization & FS - Blit, // 2D + Tiler, // binning & VS + Renderer, // rasterization & FS + Transfer, // 2D, image blit, image/buffer copy RayTracing, RTASBuild, // SHG }; @@ -130,7 +130,7 @@ namespace AE::Profiler ND_ ECounterSet EnabledCounterSet () C_NE___; void Tick () C_NE___; - void Sample (OUT Counters_t &) C_NE___; // use once per second + void Sample (OUT Counters_t &, INOUT float &invdt) C_NE___; // use once per second void ReadTimingData (OUT TimeScopeArr_t &) C_NE___; // use once per frame #ifndef AE_ENABLE_PVRCOUNTER diff --git a/AE/engine/src/profiler/Remote/Messages.h b/AE/engine/src/profiler/Remote/Messages.h index e2cefcda..2d4ed6bd 100644 --- a/AE/engine/src/profiler/Remote/Messages.h +++ b/AE/engine/src/profiler/Remote/Messages.h @@ -2,11 +2,12 @@ #pragma once -#include "profiler/Utils/ArmProfiler.h" -#include "profiler/Utils/MaliProfiler.h" -#include "profiler/Utils/NVidiaProfiler.h" -#include "profiler/Utils/AdrenoProfiler.h" -#include "profiler/Utils/PowerVRProfiler.h" +#include "profiler/Profilers/ArmProfiler.h" +#include "profiler/Profilers/MaliProfiler.h" +#include "profiler/Profilers/NVidiaProfiler.h" +#include "profiler/Profilers/AdrenoProfiler.h" +#include "profiler/Profilers/PowerVRProfiler.h" +#include "profiler/Profilers/GeneralProfiler.h" #include "pch/Networking.h" namespace AE::Networking @@ -23,7 +24,7 @@ namespace AE::Networking DECL_CSMSG( ArmProf_NextSample, Debug, ubyte index; - ushort dtInMs; + float invdt; // seconds ); DECL_CSMSG( ArmProf_Sample, Debug, @@ -37,7 +38,7 @@ namespace AE::Networking CSMSG_ENC_DEC( ArmProf_InitReq, enable, updateInterval ); CSMSG_ENC_DEC( ArmProf_InitRes, ok, enabled ); - CSMSG_ENC_DEC( ArmProf_NextSample, index, dtInMs ); + CSMSG_ENC_DEC( ArmProf_NextSample, index, invdt ); CSMSG_ENC_DEC_EXARRAY( ArmProf_Sample, count, arr, AE_ARGS( index, count )); //-------------------------------------------------------- @@ -68,7 +69,7 @@ namespace AE::Networking DECL_CSMSG( MaliProf_NextSample, Debug, ubyte index; - ushort dtInMs; + float invdt; // seconds ); DECL_CSMSG( MaliProf_Sample, Debug, @@ -82,7 +83,7 @@ namespace AE::Networking CSMSG_ENC_DEC( MaliProf_InitReq, enable, updateInterval ); CSMSG_ENC_DEC( MaliProf_InitRes, ok, enabled, info ); - CSMSG_ENC_DEC( MaliProf_NextSample, index, dtInMs ); + CSMSG_ENC_DEC( MaliProf_NextSample, index, invdt ); CSMSG_ENC_DEC_EXARRAY( MaliProf_Sample, count, arr, AE_ARGS( index, count )); //-------------------------------------------------------- @@ -112,7 +113,7 @@ namespace AE::Networking DECL_CSMSG( PVRProf_NextSample, Debug, ubyte index; - ushort dtInMs; + float invdt; // seconds ); DECL_CSMSG( PVRProf_Sample, Debug, @@ -133,7 +134,7 @@ namespace AE::Networking CSMSG_ENC_DEC( PVRProf_InitReq, enable, updateInterval ); CSMSG_ENC_DEC( PVRProf_InitRes, ok, enabled ); - CSMSG_ENC_DEC( PVRProf_NextSample, index, dtInMs ); + CSMSG_ENC_DEC( PVRProf_NextSample, index, invdt ); CSMSG_ENC_DEC_EXARRAY( PVRProf_Sample, count, arr, AE_ARGS( index, count )); CSMSG_ENC_DEC_EXARRAY( PVRProf_Timing, count, arr, AE_ARGS( index, count )); //-------------------------------------------------------- @@ -166,7 +167,7 @@ namespace AE::Networking DECL_CSMSG( AdrenoProf_NextSample, Debug, ubyte index; - ushort dtInMs; + float invdt; // seconds ); DECL_CSMSG( AdrenoProf_Sample, Debug, @@ -180,7 +181,7 @@ namespace AE::Networking CSMSG_ENC_DEC( AdrenoProf_InitReq, enable, updateInterval ); CSMSG_ENC_DEC( AdrenoProf_InitRes, ok, enabled, info ); - CSMSG_ENC_DEC( AdrenoProf_NextSample, index, dtInMs ); + CSMSG_ENC_DEC( AdrenoProf_NextSample, index, invdt ); CSMSG_ENC_DEC_EXARRAY( AdrenoProf_Sample, count, arr, AE_ARGS( index, count )); //-------------------------------------------------------- @@ -211,7 +212,7 @@ namespace AE::Networking DECL_CSMSG( NVidiaProf_NextSample, Debug, ubyte index; - ushort dtInMs; + float invdt; // seconds ); DECL_CSMSG( NVidiaProf_Sample, Debug, @@ -225,7 +226,7 @@ namespace AE::Networking CSMSG_ENC_DEC( NVidiaProf_InitReq, enable, updateInterval ); CSMSG_ENC_DEC( NVidiaProf_InitRes, ok, enabled ); - CSMSG_ENC_DEC( NVidiaProf_NextSample, index, dtInMs ); + CSMSG_ENC_DEC( NVidiaProf_NextSample, index, invdt ); CSMSG_ENC_DEC_EXARRAY( NVidiaProf_Sample, count, arr, AE_ARGS( index, count )); //-------------------------------------------------------- @@ -242,13 +243,76 @@ namespace AE::Networking //============================================================================= + + DECL_CSMSG( GenProf_InitReq, Debug, + Profiler::GeneralProfiler::ECounterSet enable; + secondsf updateInterval; + ); + + DECL_CSMSG( GenProf_InitRes, Debug, + bool ok; + Profiler::GeneralProfiler::ECounterSet enabled; + ); + + DECL_CSMSG( GenProf_CpuCluster, Debug, + ubyte idx; + ubyte length; + uint logicalCores; + char name [1]; + ); + + DECL_CSMSG( GenProf_NextSample, Debug, + ubyte index; + float invdt; // seconds + ); + + DECL_CSMSG( GenProf_Sample, Debug, + using KeyVal = Pair< Profiler::GeneralProfiler::ECounter, float >; + ubyte index; + ubyte count; + KeyVal arr [1]; + ); + + DECL_CSMSG( GenProf_CpuUsage, Debug, + ubyte index; + ubyte count; + ubyte type; // user or kernel + float arr [1]; + ); + //-------------------------------------------------------- + + + CSMSG_ENC_DEC( GenProf_InitReq, enable, updateInterval ); + CSMSG_ENC_DEC( GenProf_InitRes, ok, enabled ); + CSMSG_ENC_DEC( GenProf_NextSample, index, invdt ); + CSMSG_ENC_DEC_EXARRAY( GenProf_Sample, count, arr, AE_ARGS( index, count )); + CSMSG_ENC_DEC_EXARRAY( GenProf_CpuCluster, length, name, AE_ARGS( idx, length, logicalCores )); + CSMSG_ENC_DEC_EXARRAY( GenProf_CpuUsage, count, arr, AE_ARGS( index, count, type )); + //-------------------------------------------------------- + + + ND_ inline bool Register_GeneralProfiler (MessageFactory &mf) __NE___ + { + return mf.Register< + CSMsg_GenProf_InitReq, + CSMsg_GenProf_InitRes, + CSMsg_GenProf_NextSample, + CSMsg_GenProf_Sample, + CSMsg_GenProf_CpuCluster, + CSMsg_GenProf_CpuUsage + >( False{} ); + } +//============================================================================= + + ND_ inline bool Register_RemoteProfilers (MessageFactory &mf) __NE___ { return Register_ArmProfiler( mf ) and Register_AdrenoProfiler( mf ) and Register_MaliProfiler( mf ) and Register_PVRProfiler( mf ) and - Register_NVidiaProfiler( mf ); + Register_NVidiaProfiler( mf ) and + Register_GeneralProfiler( mf ); } diff --git a/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.cpp b/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.cpp index 872dd24f..5d45f5a4 100644 --- a/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.cpp +++ b/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.cpp @@ -6,6 +6,7 @@ namespace AE::Profiler { using namespace AE::Networking; +#ifdef AE_ENABLE_ADRENO_PERFCOUNTER /* ================================================= MsgConsumer::Consume @@ -112,7 +113,9 @@ namespace AE::Profiler if_likely( not dt ) return; - _prof.profiler.Sample( OUT _prof.counters ); + float invdt; + _prof.profiler.Sample( OUT _prof.counters, OUT invdt ); + invdt = 1.f / dt.As().count(); if ( _prof.counters.empty() ) return; @@ -124,7 +127,7 @@ namespace AE::Profiler if ( msg ) { msg->index = _prof.index; - msg->dtInMs = ushort(dt.As().count()); + msg->invdt = invdt; is_sent = _msgProducer->AddMessage( msg ); } @@ -139,12 +142,12 @@ namespace AE::Profiler for (auto it = _prof.counters.begin(); it != _prof.counters.end();) { - auto msg = _msgProducer->CreateMsg< CSMsg_AdrenoProf_Sample >( SizeOf * step ); + const usize count = Min( _prof.counters.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_AdrenoProf_Sample >( SizeOf * (count-1) ); if ( not msg ) break; - const usize count = Min( _prof.counters.size() - sent, step+1 ); - msg->index = _prof.index; msg->count = ubyte(count); @@ -165,6 +168,8 @@ namespace AE::Profiler _prof.index ++; } } + +#endif // AE_ENABLE_ADRENO_PERFCOUNTER //----------------------------------------------------------------------------- @@ -240,9 +245,11 @@ namespace AE::Profiler Sample ================================================= */ - void AdrenoProfilerClient::Sample (OUT Counters_t &result) __NE___ + void AdrenoProfilerClient::Sample (OUT Counters_t &result, INOUT float &invdt) __NE___ { result.clear(); + invdt = 0.f; + EXLOCK( _guard ); if ( _IsNotInitialized() ) @@ -253,6 +260,7 @@ namespace AE::Profiler } auto& curr = _counters[ _countersIdx & 1 ]; + invdt = _invdt[ _countersIdx & 1 ]; std::swap( result, curr ); curr.clear(); @@ -285,9 +293,10 @@ namespace AE::Profiler if ( _IsInitialized() ) { + _invdt[ _countersIdx & 1 ] = msg.invdt; + _countersIdx = (_countersIdx+1) & 1; _pendingIdx = msg.index; - _interval = milliseconds{ msg.dtInMs }; _connectionLostTimer.Restart(); } @@ -304,7 +313,7 @@ namespace AE::Profiler if ( _IsInitialized() and _pendingIdx == msg.index ) { - auto& curr = _counters[ _countersIdx & 1 ]; + auto& curr = _counters[ (_countersIdx+1) & 1 ]; for (uint i = 0, cnt = msg.count; i < cnt; ++i) curr.insert_or_assign( msg.arr[i].first, msg.arr[i].second ); diff --git a/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.h b/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.h index fd3b4f2f..c6432b87 100644 --- a/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.h +++ b/AE/engine/src/profiler/Remote/RemoteAdrenoProfiler.h @@ -101,7 +101,7 @@ namespace AE::Profiler Timer _connectionLostTimer {seconds{10}}; ECounterSet _requiredCS; - secondsf _interval; + float _invdt [2] = {}; Counters_t _counters [2]; ECounterSet _enabled; HWInfo _hwInfo; @@ -116,7 +116,7 @@ namespace AE::Profiler void Deinitialize () __NE___; ND_ bool IsInitialized () C_NE___ { SHAREDLOCK( _guard ); return _IsInitialized(); } - void Sample (OUT Counters_t &result) __NE___; + void Sample (OUT Counters_t &result, INOUT float &invdt) __NE___; ND_ ECounterSet EnabledCounterSet () C_NE___ { SHAREDLOCK( _guard ); return _enabled; } ND_ HWInfo GetHWInfo () C_NE___ { SHAREDLOCK( _guard ); return _hwInfo; } diff --git a/AE/engine/src/profiler/Remote/RemoteArmProfiler.cpp b/AE/engine/src/profiler/Remote/RemoteArmProfiler.cpp index 61e356a4..5c35f27e 100644 --- a/AE/engine/src/profiler/Remote/RemoteArmProfiler.cpp +++ b/AE/engine/src/profiler/Remote/RemoteArmProfiler.cpp @@ -6,6 +6,7 @@ namespace AE::Profiler { using namespace AE::Networking; +#ifdef AE_ENABLE_ARM_PMU /* ================================================= MsgConsumer::Consume @@ -90,16 +91,6 @@ namespace AE::Profiler ================================================= */ void ArmProfilerServer::Update () __NE___ - { - _UpdateArmProfiler(); - } - -/* -================================================= - _UpdateArmProfiler -================================================= -*/ - void ArmProfilerServer::_UpdateArmProfiler () __NE___ { if ( _prof.status.load() != EStatus::Initialized ) return; @@ -111,7 +102,9 @@ namespace AE::Profiler if_likely( not dt ) return; - _prof.profiler.Sample( OUT _prof.counters ); + float invdt; + _prof.profiler.Sample( OUT _prof.counters, OUT invdt ); + invdt = 1.f / dt.As().count(); if ( _prof.counters.empty() ) return; @@ -123,7 +116,7 @@ namespace AE::Profiler if ( msg ) { msg->index = _prof.index; - msg->dtInMs = ushort(dt.As().count()); + msg->invdt = invdt; is_sent = _msgProducer->AddMessage( msg ); } @@ -138,12 +131,12 @@ namespace AE::Profiler for (auto it = _prof.counters.begin(); it != _prof.counters.end();) { - auto msg = _msgProducer->CreateMsg< CSMsg_ArmProf_Sample >( SizeOf * step ); + const usize count = Min( _prof.counters.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_ArmProf_Sample >( SizeOf * (count-1) ); if ( not msg ) break; - const usize count = Min( _prof.counters.size() - sent, step+1 ); - msg->index = _prof.index; msg->count = ubyte(count); @@ -164,6 +157,8 @@ namespace AE::Profiler _prof.index ++; } } + +#endif // AE_ENABLE_ARM_PMU //----------------------------------------------------------------------------- @@ -239,9 +234,11 @@ namespace AE::Profiler Sample ================================================= */ - void ArmProfilerClient::Sample (OUT Counters_t &result) __NE___ + void ArmProfilerClient::Sample (OUT Counters_t &result, INOUT float &invdt) __NE___ { result.clear(); + invdt = 0.f; + EXLOCK( _guard ); if ( _IsNotInitialized() ) @@ -252,6 +249,7 @@ namespace AE::Profiler } auto& curr = _counters[ _countersIdx & 1 ]; + invdt = _invdt[ _countersIdx & 1 ]; std::swap( result, curr ); curr.clear(); @@ -283,9 +281,10 @@ namespace AE::Profiler if ( _IsInitialized() ) { + _invdt[ _countersIdx & 1 ] = msg.invdt; + _countersIdx = (_countersIdx+1) & 1; _pendingIdx = msg.index; - _interval = milliseconds{ msg.dtInMs }; _connectionLostTimer.Restart(); } diff --git a/AE/engine/src/profiler/Remote/RemoteArmProfiler.h b/AE/engine/src/profiler/Remote/RemoteArmProfiler.h index c0565bdd..0ad992ab 100644 --- a/AE/engine/src/profiler/Remote/RemoteArmProfiler.h +++ b/AE/engine/src/profiler/Remote/RemoteArmProfiler.h @@ -66,7 +66,6 @@ namespace AE::Profiler private: void _ArmProf_InitReq (Networking::CSMsg_ArmProf_InitReq const &) __NE___; - void _UpdateArmProfiler () __NE___; }; @@ -100,7 +99,7 @@ namespace AE::Profiler Timer _connectionLostTimer {seconds{10}}; ECounterSet _requiredCS; - secondsf _interval; + float _invdt [2] = {}; Counters_t _counters [2]; ECounterSet _enabled; EStatus _status = EStatus::NotInitialized; @@ -114,7 +113,7 @@ namespace AE::Profiler void Deinitialize () __NE___; ND_ bool IsInitialized () C_NE___ { SHAREDLOCK( _guard ); return _IsInitialized(); } - void Sample (OUT Counters_t &result) __NE___; + void Sample (OUT Counters_t &result, INOUT float &invdt) __NE___; ND_ ECounterSet EnabledCounterSet () C_NE___ { SHAREDLOCK( _guard ); return _enabled; } diff --git a/AE/engine/src/profiler/Remote/RemoteGeneralProfiler.cpp b/AE/engine/src/profiler/Remote/RemoteGeneralProfiler.cpp new file mode 100644 index 00000000..4edcda16 --- /dev/null +++ b/AE/engine/src/profiler/Remote/RemoteGeneralProfiler.cpp @@ -0,0 +1,454 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#include "profiler/Remote/RemoteGeneralProfiler.h" + +namespace AE::Profiler +{ + using namespace AE::Networking; + +/* +================================================= + MsgConsumer::Consume +================================================= +*/ + void GeneralProfilerServer::MsgConsumer::Consume (ChunkList msgList) __NE___ + { + for (auto& msg : msgList) + { + switch ( msg->UniqueId() ) + { + #define CASE( _name_ ) case CSMsg_ ## _name_::UID : _server._ ## _name_( *msg->As< CSMsg_ ## _name_ >() ); break; + CASE( GenProf_InitReq ) + #undef CASE + } + } + } + +/* +================================================= + Initialize +================================================= +*/ + bool GeneralProfilerServer::Initialize (ClientServer_t &clientServer, RC mp) __NE___ + { + CHECK_ERR( mp ); + + CHECK_ERR( clientServer.Add( _msgConsumer.GetRC() )); + + _msgProducer = RVRef(mp); + + _prof.status.store( EStatus::NotInitialized ); + return true; + } + +/* +================================================= + Deinitialize +================================================= +*/ + void GeneralProfilerServer::Deinitialize () __NE___ + { + _prof.profiler.Deinitialize(); + _prof.status.store( EStatus::NotInitialized ); + + _msgProducer = null; + } + +/* +================================================= + _GenProf_InitReq +================================================= +*/ + void GeneralProfilerServer::_GenProf_InitReq (CSMsg_GenProf_InitReq const &inMsg) __NE___ + { + if ( _prof.status.load() != EStatus::NotInitialized ) + return; + + _prof.timer.Start( inMsg.updateInterval ); + _prof.index = 0; + + _prof.profiler.Deinitialize(); + + bool ok = _prof.profiler.Initialize( inMsg.enable ); + + if ( ok ) + _SendCpuClusters(); + + auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_InitRes >(); + if ( msg ) + { + msg->ok = ok; + if ( ok ) { + msg->enabled = _prof.profiler.EnabledCounterSet(); + } + CHECK( _msgProducer->AddMessage( msg )); + } + + _prof.status.store( (ok ? EStatus::Initialized : EStatus::NotSupported), EMemoryOrder::Release ); + } + +/* +================================================= + _SendCpuClusters +================================================= +*/ + void GeneralProfilerServer::_SendCpuClusters () __NE___ + { + const auto src_clusters = _prof.profiler.GetCpuClusters(); + uint idx = 0; + + for (auto& src : src_clusters) + { + if ( auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_CpuCluster >( StringSizeOf(src.name) )) + { + msg->idx = ubyte(idx); + msg->logicalCores = src.logicalCores.to_ulong(); + + msg.Put( &CSMsg_GenProf_CpuCluster::name, &CSMsg_GenProf_CpuCluster::length, StringView{src.name} ); + + CHECK( _msgProducer->AddMessage( msg )); + } + ++idx; + } + + _prof.cpuCoreCount = _prof.profiler.GetCpuCoreCount(); + } + +/* +================================================= + Update +================================================= +*/ + void GeneralProfilerServer::Update () __NE___ + { + if ( _prof.status.load() != EStatus::Initialized ) + return; + + MemoryBarrier( EMemoryOrder::Acquire ); + + const auto dt = _prof.timer.Tick(); + + if_likely( not dt ) + return; + + float invdt; + _prof.profiler.Sample( OUT _prof.counters, OUT invdt ); + invdt = 1.f / dt.As().count(); + + CpuUsage_t user, kernel; + const bool has_cpu_usage = _prof.profiler.GetUsage( OUT user, OUT kernel ); + + if ( _prof.counters.empty() and not has_cpu_usage ) + return; + + // send first message + bool is_sent = false; + { + auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_NextSample >(); + if ( msg ) + { + msg->index = _prof.index; + msg->invdt = invdt; + + is_sent = _msgProducer->AddMessage( msg ); + } + } + + if ( not is_sent ) + return; + + // send payload + if ( not _prof.counters.empty() ) + { + using KeyVal = CSMsg_GenProf_Sample::KeyVal; + constexpr usize step = usize((NetConfig::TCP_MaxMsgSize - SizeOf) / SizeOf); + usize sent = 0; + + for (auto it = _prof.counters.begin(); it != _prof.counters.end();) + { + const usize count = Min( _prof.counters.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_Sample >( SizeOf * (count-1) ); + if ( not msg ) + break; + + msg->index = _prof.index; + msg->count = ubyte(count); + + auto it2 = it; + for (usize i = 0; i < count; ++i, ++it2) + { + msg->arr[i].first = it2->first; + msg->arr[i].second = float(it2->second); + } + + if ( not _msgProducer->AddMessage( msg )) + break; + + it = it2; + sent += count; + } + ASSERT( sent == _prof.counters.size() ); + } + + // cpu usage + if ( has_cpu_usage ) + { + const uint core_cnt = _prof.cpuCoreCount; + + if ( auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_CpuUsage >( SizeOf * (core_cnt-1) )) + { + msg->index = _prof.index; + msg->count = ubyte(core_cnt); + msg->type = 0; + + MemCopy( OUT msg->arr, user.data(), SizeOf * core_cnt ); + + Unused( _msgProducer->AddMessage( msg )); + } + + if ( auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_CpuUsage >( SizeOf * (core_cnt-1) )) + { + msg->index = _prof.index; + msg->count = ubyte(core_cnt); + msg->type = 1; + + MemCopy( OUT msg->arr, kernel.data(), SizeOf * core_cnt ); + + Unused( _msgProducer->AddMessage( msg )); + } + } + + _prof.index ++; + } +//----------------------------------------------------------------------------- + + + +/* +================================================= + constructor +================================================= +*/ + GeneralProfilerClient::GeneralProfilerClient (RC mp) __NE___ : + _msgProducer{ RVRef(mp) } + {} + +/* +================================================= + Initialize +================================================= +*/ + bool GeneralProfilerClient::Initialize (const ECounterSet &counterSet) __NE___ + { + CHECK_ERR( counterSet.Any() ); + + _requiredCS = counterSet; + return _Initialize( counterSet ); + } + + bool GeneralProfilerClient::_Initialize (const ECounterSet &counterSet) __NE___ + { + auto msg = _msgProducer->CreateMsg< CSMsg_GenProf_InitReq >(); + if ( msg ) + { + msg->enable = counterSet; + msg->updateInterval = secondsf{1.f}; + + if ( _msgProducer->AddMessage( msg )) + return true; + } + return false; + } + +/* +================================================= + Deinitialize +================================================= +*/ + void GeneralProfilerClient::Deinitialize () __NE___ + { + } + +/* +================================================= + _Consume +================================================= +*/ + void GeneralProfilerClient::_Consume (ChunkList msgList) __NE___ + { + for (auto& msg : msgList) + { + ASSERT( msg->GroupId() == CSMessageGroup::Debug ); + switch ( msg->UniqueId() ) + { + #define CASE( _name_ ) case CSMsg_GenProf_ ## _name_::UID : _ ## _name_( *msg->As< CSMsg_GenProf_ ## _name_ >() ); break; + CASE( InitRes ) + CASE( NextSample ) + CASE( Sample ) + CASE( CpuCluster ) + CASE( CpuUsage ) + #undef CASE + } + } + } + +/* +================================================= + Sample +================================================= +*/ + void GeneralProfilerClient::Sample (OUT Counters_t &result, INOUT float &invdt) __NE___ + { + result.clear(); + invdt = 0.f; + + EXLOCK( _guard ); + + if ( _IsNotInitialized() ) + { + if_unlikely( _connectionLostTimer.Tick() ) + Unused( _Initialize( _requiredCS )); + return; + } + + auto& curr = _counters[ _countersIdx & 1 ]; + invdt = _invdt[ _countersIdx & 1 ]; + + std::swap( result, curr ); + curr.clear(); + } + +/* +================================================= + GetUsage +================================================= +*/ + bool GeneralProfilerClient::GetUsage (OUT CpuUsage_t &user, OUT CpuUsage_t &kernel) C_NE___ + { + EXLOCK( _guard ); + + const uint idx = _countersIdx & 1; + + if ( _IsNotInitialized() or not _hasCpuUsage[idx] ) + return false; + + const auto& src_user = _userSpace[idx]; + const auto& src_kernel = _kernelSpace[idx]; + + MemCopy( OUT user, src_user ); + MemCopy( OUT kernel, src_kernel ); + + _hasCpuUsage[idx] = false; + return true; + } + +/* +================================================= + _InitRes +================================================= +*/ + inline void GeneralProfilerClient::_InitRes (CSMsg_GenProf_InitRes const& msg) __NE___ + { + EXLOCK( _guard ); + + _status = msg.ok ? EStatus::Initialized : EStatus::NotSupported; + _enabled = msg.enabled; + + _connectionLostTimer.Restart(); + } + +/* +================================================= + _NextSample +================================================= +*/ + inline void GeneralProfilerClient::_NextSample (CSMsg_GenProf_NextSample const& msg) __NE___ + { + EXLOCK( _guard ); + + if ( _IsInitialized() ) + { + _invdt[ _countersIdx & 1 ] = msg.invdt; + + _countersIdx = (_countersIdx+1) & 1; + _pendingIdx = msg.index; + + _connectionLostTimer.Restart(); + } + } + +/* +================================================= + _Sample +================================================= +*/ + inline void GeneralProfilerClient::_Sample (CSMsg_GenProf_Sample const& msg) __NE___ + { + EXLOCK( _guard ); + + if ( _IsInitialized() and _pendingIdx == msg.index ) + { + auto& curr = _counters[ (_countersIdx+1) & 1 ]; + + for (uint i = 0, cnt = msg.count; i < cnt; ++i) + curr.insert_or_assign( msg.arr[i].first, msg.arr[i].second ); + } + } + +/* +================================================= + _CpuCluster +================================================= +*/ + inline void GeneralProfilerClient::_CpuCluster (Networking::CSMsg_GenProf_CpuCluster const& msg) __NE___ + { + EXLOCK( _guard ); + CHECK_ERRV( msg.idx < _cpuClusters.capacity() ); + + _cpuClusters.resize( Max( _cpuClusters.size(), msg.idx+1u )); + + auto& dst = _cpuClusters[ msg.idx ]; + dst.name = StringView{ msg.name, msg.length }; + dst.logicalCores = msg.logicalCores; + + _cpuCoreCount = 0; + for (auto& cluster : _cpuClusters) + _cpuCoreCount += uint(cluster.logicalCores.count()); + } + +/* +================================================= + _CpuUsage +================================================= +*/ + inline void GeneralProfilerClient::_CpuUsage (Networking::CSMsg_GenProf_CpuUsage const& msg) __NE___ + { + EXLOCK( _guard ); + + if ( _IsInitialized() and _pendingIdx == msg.index ) + { + const uint idx = (_countersIdx+1) & 1; + auto& curr = (msg.type == 0 ? _userSpace[idx] : _kernelSpace[idx]); + + ASSERT_Eq( _cpuCoreCount, msg.count ); + + for (uint i = 0, cnt = msg.count; i < cnt; ++i) + curr[i] = msg.arr[i]; + + _hasCpuUsage[idx] = true; + } + } + +/* +================================================= + GetMsgConsumer +================================================= +*/ + RC GeneralProfilerClient::GetMsgConsumer () __NE___ + { + return MakeRCNe( [this](auto ml){ _Consume( ml ); }, CSMessageGroup::Debug ); + } + + +} // AE::Profiler diff --git a/AE/engine/src/profiler/Remote/RemoteGeneralProfiler.h b/AE/engine/src/profiler/Remote/RemoteGeneralProfiler.h new file mode 100644 index 00000000..61122d37 --- /dev/null +++ b/AE/engine/src/profiler/Remote/RemoteGeneralProfiler.h @@ -0,0 +1,155 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#include "profiler/Remote/Messages.h" + +namespace AE::Profiler +{ + + // + // Remote General Profiler Server + // + + class GeneralProfilerServer + { + // types + public: + using ECounter = GeneralProfiler::ECounter; + using ECounterSet = GeneralProfiler::ECounterSet; + using Counters_t = GeneralProfiler::Counters_t; + using CpuUsage_t = GeneralProfiler::CpuUsage_t; + private: + using ClientServer_t = Networking::ClientServerBase; + using MsgProducer = Networking::IAsyncCSMessageProducer; + + class MsgConsumer final : public Networking::ICSMessageConsumer + { + private: + GeneralProfilerServer& _server; + public: + MsgConsumer (GeneralProfilerServer &server) __NE___ : _server{server} {} + Networking::CSMessageGroupID GetGroupID () C_NE_OV { return CSMessageGroup::Debug; } + void Consume (ChunkList) __NE_OV; + }; + + friend class GeneralProfilerClient; + enum class EStatus : uint + { + NotInitialized, + NotSupported, + Initialized, + }; + + + // variables + private: + struct { + Atomic status {EStatus::NotInitialized}; + ubyte index = 0; + GeneralProfiler profiler; + GeneralProfiler::Counters_t counters; + Timer timer; + uint cpuCoreCount = 0; + } _prof; + + StaticRC _msgConsumer; + RC _msgProducer; + + + // methods + public: + GeneralProfilerServer () __NE___ : _msgConsumer{*this} {} + + ND_ bool Initialize (ClientServer_t &, RC mp) __NE___; + void Deinitialize () __NE___; + + void Update () __NE___; + + private: + void _GenProf_InitReq (Networking::CSMsg_GenProf_InitReq const &) __NE___; + void _SendCpuClusters () __NE___; + void _SendSamples () __NE___; + void _SendCpuUsage () __NE___; + }; + + + + // + // Remote General Profiler Client + // + + class GeneralProfilerClient final : public EnableRC + { + // types + public: + using ECounter = GeneralProfiler::ECounter; + using ECounterSet = GeneralProfiler::ECounterSet; + using Counters_t = GeneralProfiler::Counters_t; + using CpuUsage_t = GeneralProfiler::CpuUsage_t; + using CpuClusters_t = GeneralProfiler::CpuClusters_t; + + private: + using MsgProducer = Networking::IAsyncCSMessageProducer; + using MsgConsumer = Networking::ICSMessageConsumer; + using EStatus = GeneralProfilerServer::EStatus; + + + // variables + private: + mutable Threading::RWSpinLock _guard; + RC _msgProducer; + + ubyte _countersIdx = 0; + ubyte _pendingIdx = 0; + + Timer _connectionLostTimer {seconds{10}}; + ECounterSet _requiredCS; + + mutable bool _hasCpuUsage [2] = {}; + uint _cpuCoreCount = 0; + CpuUsage_t _userSpace [2]; + CpuUsage_t _kernelSpace [2]; + CpuClusters_t _cpuClusters; + + float _invdt [2] = {}; + Counters_t _counters [2]; + ECounterSet _enabled; + EStatus _status = EStatus::NotInitialized; + + + // methods + public: + explicit GeneralProfilerClient (RC mp) __NE___; + + ND_ bool Initialize (const ECounterSet &counterSet) __NE___; + void Deinitialize () __NE___; + ND_ bool IsInitialized () C_NE___ { SHAREDLOCK( _guard ); return _IsInitialized(); } + + void Sample (OUT Counters_t &result, INOUT float &invdt) __NE___; + ND_ bool GetUsage (OUT CpuUsage_t &, OUT CpuUsage_t &) C_NE___; + + ND_ ECounterSet EnabledCounterSet () C_NE___ { SHAREDLOCK( _guard ); return _enabled; } + ND_ CpuClusters_t GetCpuClusters () C_NE___ { SHAREDLOCK( _guard ); return _cpuClusters; } + ND_ uint GetCpuCoreCount () C_NE___ { SHAREDLOCK( _guard ); return _cpuCoreCount; } + + ND_ RC GetMsgConsumer () __NE___; + + + private: + ND_ bool _Initialize (const ECounterSet &counterSet) __NE___; + void _Consume (ChunkList) __NE___; + + ND_ bool _IsInitialized () C_NE___ { return _status == EStatus::Initialized; } + ND_ bool _IsNotInitialized () C_NE___ { return _status == EStatus::NotInitialized; } + + private: + void _InitRes (Networking::CSMsg_GenProf_InitRes const&) __NE___; + void _NextSample (Networking::CSMsg_GenProf_NextSample const&) __NE___; + void _Sample (Networking::CSMsg_GenProf_Sample const&) __NE___; + void _CpuCluster (Networking::CSMsg_GenProf_CpuCluster const&) __NE___; + void _CpuUsage (Networking::CSMsg_GenProf_CpuUsage const&) __NE___; + }; + + +} // AE::Profiler diff --git a/AE/engine/src/profiler/Remote/RemoteMaliProfiler.cpp b/AE/engine/src/profiler/Remote/RemoteMaliProfiler.cpp index 0b9b0a21..6240ca59 100644 --- a/AE/engine/src/profiler/Remote/RemoteMaliProfiler.cpp +++ b/AE/engine/src/profiler/Remote/RemoteMaliProfiler.cpp @@ -6,6 +6,7 @@ namespace AE::Profiler { using namespace AE::Networking; +#ifdef AE_ENABLE_MALI_HWCPIPE /* ================================================= MsgConsumer::Consume @@ -112,7 +113,9 @@ namespace AE::Profiler if_likely( not dt ) return; - _prof.profiler.Sample( OUT _prof.counters ); + float invdt; + _prof.profiler.Sample( OUT _prof.counters, OUT invdt ); + invdt = 1.f / dt.As().count(); if ( _prof.counters.empty() ) return; @@ -124,7 +127,7 @@ namespace AE::Profiler if ( msg ) { msg->index = _prof.index; - msg->dtInMs = ushort(dt.As().count()); + msg->invdt = invdt; is_sent = _msgProducer->AddMessage( msg ); } @@ -139,12 +142,12 @@ namespace AE::Profiler for (auto it = _prof.counters.begin(); it != _prof.counters.end();) { - auto msg = _msgProducer->CreateMsg< CSMsg_MaliProf_Sample >( SizeOf * step ); + const usize count = Min( _prof.counters.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_MaliProf_Sample >( SizeOf * (count-1) ); if ( not msg ) break; - const usize count = Min( _prof.counters.size() - sent, step+1 ); - msg->index = _prof.index; msg->count = ubyte(count); @@ -165,6 +168,8 @@ namespace AE::Profiler _prof.index ++; } } + +#endif // AE_ENABLE_MALI_HWCPIPE //----------------------------------------------------------------------------- @@ -240,9 +245,11 @@ namespace AE::Profiler Sample ================================================= */ - void MaliProfilerClient::Sample (OUT Counters_t &result) __NE___ + void MaliProfilerClient::Sample (OUT Counters_t &result, INOUT float &invdt) __NE___ { result.clear(); + invdt = 0.f; + EXLOCK( _guard ); if ( _IsNotInitialized() ) @@ -253,6 +260,7 @@ namespace AE::Profiler } auto& curr = _counters[ _countersIdx & 1 ]; + invdt = _invdt[ _countersIdx & 1 ]; std::swap( result, curr ); curr.clear(); @@ -285,9 +293,10 @@ namespace AE::Profiler if ( _IsInitialized() ) { + _invdt[ _countersIdx & 1 ] = msg.invdt; + _countersIdx = (_countersIdx+1) & 1; _pendingIdx = msg.index; - _interval = milliseconds{ msg.dtInMs }; _connectionLostTimer.Restart(); } diff --git a/AE/engine/src/profiler/Remote/RemoteMaliProfiler.h b/AE/engine/src/profiler/Remote/RemoteMaliProfiler.h index 3c517c09..30b95503 100644 --- a/AE/engine/src/profiler/Remote/RemoteMaliProfiler.h +++ b/AE/engine/src/profiler/Remote/RemoteMaliProfiler.h @@ -101,7 +101,7 @@ namespace AE::Profiler Timer _connectionLostTimer {seconds{10}}; ECounterSet _requiredCS; - secondsf _interval; + float _invdt [2] = {}; Counters_t _counters [2]; ECounterSet _enabled; HWInfo _hwInfo; @@ -116,7 +116,7 @@ namespace AE::Profiler void Deinitialize () __NE___; ND_ bool IsInitialized () C_NE___ { SHAREDLOCK( _guard ); return _IsInitialized(); } - void Sample (OUT Counters_t &result) __NE___; + void Sample (OUT Counters_t &result, INOUT float &invdt) __NE___; ND_ ECounterSet EnabledCounterSet () C_NE___ { SHAREDLOCK( _guard ); return _enabled; } ND_ HWInfo GetHWInfo () C_NE___ { SHAREDLOCK( _guard ); return _hwInfo; } diff --git a/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.cpp b/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.cpp index c293500c..3bc67dbc 100644 --- a/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.cpp +++ b/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.cpp @@ -6,6 +6,7 @@ namespace AE::Profiler { using namespace AE::Networking; +#ifdef AE_ENABLE_NVML /* ================================================= MsgConsumer::Consume @@ -112,7 +113,9 @@ namespace AE::Profiler if_likely( not dt ) return; - _prof.profiler.Sample( OUT _prof.counters ); + float invdt; + _prof.profiler.Sample( OUT _prof.counters, OUT invdt ); + invdt = 1.f / dt.As().count(); if ( _prof.counters.empty() ) return; @@ -124,7 +127,7 @@ namespace AE::Profiler if ( msg ) { msg->index = _prof.index; - msg->dtInMs = ushort(dt.As().count()); + msg->invdt = invdt; is_sent = _msgProducer->AddMessage( msg ); } @@ -139,12 +142,12 @@ namespace AE::Profiler for (auto it = _prof.counters.begin(); it != _prof.counters.end();) { - auto msg = _msgProducer->CreateMsg< CSMsg_NVidiaProf_Sample >( SizeOf * step ); + const usize count = Min( _prof.counters.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_NVidiaProf_Sample >( SizeOf * (count-1) ); if ( not msg ) break; - const usize count = Min( _prof.counters.size() - sent, step+1 ); - msg->index = _prof.index; msg->count = ubyte(count); @@ -165,6 +168,8 @@ namespace AE::Profiler _prof.index ++; } } + +#endif // AE_ENABLE_NVML //----------------------------------------------------------------------------- @@ -240,9 +245,11 @@ namespace AE::Profiler Sample ================================================= */ - void NVidiaProfilerClient::Sample (OUT Counters_t &result) __NE___ + void NVidiaProfilerClient::Sample (OUT Counters_t &result, INOUT float &invdt) __NE___ { result.clear(); + invdt = 0.f; + EXLOCK( _guard ); if ( _IsNotInitialized() ) @@ -253,6 +260,7 @@ namespace AE::Profiler } auto& curr = _counters[ _countersIdx & 1 ]; + invdt = _invdt[ _countersIdx & 1 ]; std::swap( result, curr ); curr.clear(); @@ -285,9 +293,10 @@ namespace AE::Profiler if ( _IsInitialized() ) { + _invdt[ _countersIdx & 1 ] = msg.invdt; + _countersIdx = (_countersIdx+1) & 1; _pendingIdx = msg.index; - _interval = milliseconds{ msg.dtInMs }; _connectionLostTimer.Restart(); } @@ -304,7 +313,7 @@ namespace AE::Profiler if ( _IsInitialized() and _pendingIdx == msg.index ) { - auto& curr = _counters[ _countersIdx & 1 ]; + auto& curr = _counters[ (_countersIdx+1) & 1 ]; for (uint i = 0, cnt = msg.count; i < cnt; ++i) curr.insert_or_assign( msg.arr[i].first, msg.arr[i].second ); diff --git a/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.h b/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.h index 8f26380c..bb4f4f11 100644 --- a/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.h +++ b/AE/engine/src/profiler/Remote/RemoteNVidiaProfiler.h @@ -101,7 +101,7 @@ namespace AE::Profiler Timer _connectionLostTimer {seconds{10}}; ECounterSet _requiredCS; - secondsf _interval; + float _invdt [2] = {}; Counters_t _counters [2]; ECounterSet _enabled; HWInfo _hwInfo; @@ -116,7 +116,7 @@ namespace AE::Profiler void Deinitialize () __NE___; ND_ bool IsInitialized () C_NE___ { SHAREDLOCK( _guard ); return _IsInitialized(); } - void Sample (OUT Counters_t &result) __NE___; + void Sample (OUT Counters_t &result, INOUT float &invdt) __NE___; ND_ ECounterSet EnabledCounterSet () C_NE___ { SHAREDLOCK( _guard ); return _enabled; } ND_ HWInfo GetHWInfo () C_NE___ { SHAREDLOCK( _guard ); return _hwInfo; } diff --git a/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.cpp b/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.cpp index a5a59611..03b5b4db 100644 --- a/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.cpp +++ b/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.cpp @@ -6,6 +6,7 @@ namespace AE::Profiler { using namespace AE::Networking; +#ifdef AE_ENABLE_PVRCOUNTER /* ================================================= MsgConsumer::Consume @@ -65,7 +66,8 @@ namespace AE::Profiler return; _prof.timer.Start( inMsg.updateInterval ); - _prof.index = 0; + _prof.samplesIdx = 0; + _prof.timingIdx = 0; _prof.profiler.Deinitialize(); @@ -106,15 +108,27 @@ namespace AE::Profiler MemoryBarrier( EMemoryOrder::Acquire ); - const auto dt = _prof.timer.Tick(); + _prof.profiler.Tick(); + + _SendTimings(); - if_likely( not dt ) + const auto dt = _prof.timer.Tick(); + if ( dt ) { - _prof.profiler.Tick(); - return; + _SendSamples( dt.As() ); } + } - _prof.profiler.Sample( OUT _prof.counters ); +/* +================================================= + _SendSamples +================================================= +*/ + void PowerVRProfilerServer::_SendSamples (milliseconds dt) __NE___ + { + float invdt; + _prof.profiler.Sample( OUT _prof.counters, OUT invdt ); + invdt = 1.f / TimeCast(dt).count(); if ( _prof.counters.empty() ) return; @@ -125,8 +139,8 @@ namespace AE::Profiler auto msg = _msgProducer->CreateMsg< CSMsg_PVRProf_NextSample >(); if ( msg ) { - msg->index = _prof.index; - msg->dtInMs = ushort(dt.As().count()); + msg->index = _prof.samplesIdx; + msg->invdt = invdt; is_sent = _msgProducer->AddMessage( msg ); } @@ -141,13 +155,13 @@ namespace AE::Profiler for (auto it = _prof.counters.begin(); it != _prof.counters.end();) { - auto msg = _msgProducer->CreateMsg< CSMsg_PVRProf_Sample >( SizeOf * step ); + const usize count = Min( _prof.counters.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_PVRProf_Sample >( SizeOf * (count-1) ); if ( not msg ) break; - const usize count = Min( _prof.counters.size() - sent, step+1 ); - - msg->index = _prof.index; + msg->index = _prof.samplesIdx; msg->count = ubyte(count); auto it2 = it; @@ -164,9 +178,53 @@ namespace AE::Profiler } ASSERT( sent == _prof.counters.size() ); - _prof.index ++; + _prof.samplesIdx ++; + } + } + +/* +================================================= + _SendTimings +================================================= +*/ + void PowerVRProfilerServer::_SendTimings () __NE___ + { + _prof.profiler.ReadTimingData( OUT _prof.timing ); + + if ( _prof.timing.empty() ) + return; + + using TimeScope = PowerVRProfiler::TimeScope; + constexpr usize step = usize((NetConfig::TCP_MaxMsgSize - SizeOf) / SizeOf); + usize sent = 0; + + for (; sent < _prof.timing.size();) + { + const usize count = Min( _prof.timing.size() - sent, step+1 ); + + auto msg = _msgProducer->CreateMsg< CSMsg_PVRProf_Timing >( SizeOf * (count-1) ); + if ( not msg ) + break; + + msg->index = _prof.timingIdx; + msg->count = ubyte(count); + + for (usize i = 0; i < count; ++i) + { + msg->arr[i] = _prof.timing[sent+i]; + } + + if ( not _msgProducer->AddMessage( msg )) + break; + + sent += count; } + ASSERT( sent == _prof.timing.size() ); + + _prof.timingIdx ++; } + +#endif // AE_ENABLE_PVRCOUNTER //----------------------------------------------------------------------------- @@ -243,9 +301,11 @@ namespace AE::Profiler Sample ================================================= */ - void PowerVRProfilerClient::Sample (OUT Counters_t &result) __NE___ + void PowerVRProfilerClient::Sample (OUT Counters_t &result, INOUT float &invdt) __NE___ { result.clear(); + invdt = 0.f; + EXLOCK( _guard ); if ( _IsNotInitialized() ) @@ -256,6 +316,7 @@ namespace AE::Profiler } auto& curr = _counters[ _countersIdx & 1 ]; + invdt = _invdt[ _countersIdx & 1 ]; std::swap( result, curr ); curr.clear(); @@ -312,9 +373,10 @@ namespace AE::Profiler if ( _IsInitialized() ) { + _invdt[ _countersIdx & 1 ] = msg.invdt; + _countersIdx = (_countersIdx+1) & 1; _pendingCountersIdx = msg.index; - _interval = milliseconds{ msg.dtInMs }; _connectionLostTimer.Restart(); } diff --git a/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.h b/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.h index 531656eb..f88d7e9f 100644 --- a/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.h +++ b/AE/engine/src/profiler/Remote/RemotePowerVRProfiler.h @@ -44,11 +44,13 @@ namespace AE::Profiler // variables private: struct { - Atomic status {EStatus::NotInitialized}; - ubyte index = 0; - PowerVRProfiler profiler; - PowerVRProfiler::Counters_t counters; - Timer timer; + Atomic status {EStatus::NotInitialized}; + ubyte samplesIdx = 0; + ubyte timingIdx = 0; + PowerVRProfiler profiler; + PowerVRProfiler::Counters_t counters; + PowerVRProfiler::TimeScopeArr_t timing; + Timer timer; } _prof; StaticRC _msgConsumer; @@ -67,6 +69,8 @@ namespace AE::Profiler private: void _InitReq (Networking::CSMsg_PVRProf_InitReq const &) __NE___; void _UpdatePVRProfiler () __NE___; + void _SendSamples (milliseconds dt) __NE___; + void _SendTimings () __NE___; }; @@ -103,7 +107,7 @@ namespace AE::Profiler Timer _connectionLostTimer {seconds{10}}; ECounterSet _requiredCS; - secondsf _interval; + float _invdt [2] = {}; Counters_t _counters [2]; TimeScopeArr_t _timings [2]; ECounterSet _enabled; @@ -119,7 +123,7 @@ namespace AE::Profiler ND_ bool IsInitialized () C_NE___ { SHAREDLOCK( _guard ); return _IsInitialized(); } void Tick () C_NE___ {} - void Sample (OUT Counters_t &result) __NE___; + void Sample (OUT Counters_t &result, INOUT float &invdt) __NE___; void ReadTimingData (OUT TimeScopeArr_t &) __NE___; ND_ ECounterSet EnabledCounterSet () C_NE___ { SHAREDLOCK( _guard ); return _enabled; } diff --git a/AE/engine/src/profiler/Utils/AdrenoProfiler.h b/AE/engine/src/profiler/Utils/AdrenoProfiler.h deleted file mode 100644 index fc06748a..00000000 --- a/AE/engine/src/profiler/Utils/AdrenoProfiler.h +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -/* - API for Adreno GPU hardware performance counters. - - [Performance counters description](https://github.com/azhirnov/cpu-gru-arch/blob/main/gpu/Adreno_PC.md) -*/ - -#pragma once - -#include "base/Utils/EnumSet.h" -#include "base/Pointers/RefCounter.h" - -namespace AE::Profiler -{ - using namespace AE::Base; - class AdrenoProfilerClient; - - - // - // Adreno GPU Profiler - // - - class AdrenoProfiler - { - // types - public: - enum class EGPUSeries : ubyte - { - Unknown, - A5xx, - A6xx, - A7xx, // TODO - }; - - enum class ECounter : ubyte - { - // RBBM - RBBM_RasterizerBusy, - RBBM_VSCbusy, // Visibility Stream Compressor - RBBM_UCHEbusy, // Unified L2 cache - RBBM_VBIFbusy, // ? - RBBM_TSEbusy, // ? - - // PC - PC_DeadPrim, // ? - PC_LivePrim, // ? - PC_IA_Vertices, // input vertices ? - PC_IA_Primitives, // input primitives ? - PC_VS_Invocations, // vertex shader invocations - PC_DrawCalls3D, // with DS attachment ? - PC_DrawCalls2D, // ? - PC_VPCPrimitives, // Varying/Position Cache primitives - - // VFD - VFD_TotalVertices, // ? - - // VPC - VPC_BusyCycles, - VPC_WorkingCycles, - - // Rasterizer - RAS_SuperTiles, - RAS_8x4Tiles, - RAS_MaskgenActive, - RAS_FullyCoveredSuperTiles, - RAS_FullyCovered8x4Tiles, - RAS_PrimKilledInvisible, - - // Render backend - RB_ZRead, // \__ Z buffer - RB_ZWrite, // / - RB_CRead, // \__ color - RB_CWrite, // / - RB_Z_Pass, // \. - RB_Z_Fail, // -|-- depth stencil test - RB_S_Fail, // / - RB_AliveCycles2D, - - // Visibility Stream Compressor - VSC_WorkingCycles, - - // Cache and Compression Unit - CCU_DepthBlocks, - CCU_ColorBlocks, - CCU_PartialBlockRead, - CCU_GMemRead, - CCU_GMemWrite, - CCU_2DPixels, - - // low resolution Z pass - LRZ_Read, - LRZ_Write, - LRZ_PrimKilledByMaskGen, - LRZ_PrimKilledByLRZ, - LRZ_PrimPassed, - LRZ_TileKilled, - LRZ_TotalPixel, - - _Count - }; - using ECounterSet = EnumSet< ECounter >; - using Counters_t = FlatHashMap< ECounter, ulong >; - - struct HWInfo - { - uint gpuId = 0; - EGPUSeries series = Default; - Bytes32u gmemSize; // on-chip memory - }; - - private: - struct Impl; - - - // variables - private: - Unique _impl; - - - // methods - public: - AdrenoProfiler () __NE___; - ~AdrenoProfiler () __NE___; - - ND_ bool Initialize (const ECounterSet &counterSet) __NE___; - void Deinitialize () __NE___; - ND_ bool IsInitialized () C_NE___; - - ND_ ECounterSet EnabledCounterSet () C_NE___; - ND_ HWInfo GetHWInfo () C_NE___; - - void Sample (OUT Counters_t &) C_NE___; - - - #ifndef AE_ENABLE_ADRENO_PERFCOUNTER - ND_ bool InitClient (RC) __NE___; - #endif - }; - - -} // AE::Profiler diff --git a/AE/engine/src/profiler/Utils/MaliProfiler.h b/AE/engine/src/profiler/Utils/MaliProfiler.h deleted file mode 100644 index be4dc638..00000000 --- a/AE/engine/src/profiler/Utils/MaliProfiler.h +++ /dev/null @@ -1,450 +0,0 @@ -// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -/* - API for Mali GPU hardware performance counters. - (x.x.x) - link to counter description in docs. - By default it it link to 5th Gen performance counters guide, special key like V (Valhall), B (Bifrost) used for links to other architectures. - - [Performance counters description](https://github.com/azhirnov/cpu-gru-arch/blob/main/gpu/ARM-Mali_PC.md) -*/ - -#pragma once - -#include "base/Utils/EnumSet.h" -#include "base/Pointers/RefCounter.h" - -namespace AE::Profiler -{ - using namespace AE::Base; - class MaliProfilerClient; - - - // - // Mali GPU Profiler - // - - class MaliProfiler - { - // types - public: - enum class ECounter : ushort - { - GPUActiveCy, // 0 // (3.1.1) - GPUIRQActiveCy, // GPU interrupt pending cycles (3.1.6) - FragQueueJob, - FragQueueTask, - FragQueueActiveCy, // Fragment queue active cycles, Main phase queue active cycles (3.1.3) - NonFragQueueJob, - NonFragQueueTask, - NonFragQueueActiveCy, // Non-fragment queue active cycles, Binning phase queue active cycles (3.1.2) - ResQueueJob, - ResQueueTask, - ResQueueActiveCy, // 10 - ExtBusWrBt, // Output external write beats - ExtBusRdBt, // Output external read beats - ExtBusRdStallCy, // Output external read stall cycles - ExtBusWrStallCy, // Output external write stall cycles - FragActiveCy, // Fragment active cycles - FragRdPrim, // Fragment primitives loaded - FragThread, // Fragment threads - FragHelpThread, - FragRastQd, // Rasterized fine quads - FragEZSTestQd, // 20 // Early ZS tested quads - FragEZSKillQd, // Early ZS killed quads - FragLZSTestTd, - FragLZSKillTd, - FragTile, // Tile count ??? - FragTileKill, // Killed unchanged tiles - NonFragActiveCy, // Non-fragment active cycles - NonFragThread, // Non-fragment threads - CoreActiveCy, // Execution core active cycles - EngInstr, - LSIssueCy, // 30 // Load/store unit issue cycles (9.1.1) - TexInstr, - TexFiltIssueCy, // Texture filtering cycles (8.1.1) - LSRdHitCy, - LSWrHitCy, - GeomTrianglePrim, // Triangle primitives - GeomPointPrim, // Point primitives - GeomLinePrim, // Line primitives - GeomFrontFacePrim, // Visible front-facing primitives - GeomBackFacePrim, // Visible back-facing primitives - GeomVisiblePrim, // 40 // Visible primitives (4.1.3) - GeomFaceXYPlaneCullPrim, // Facing or XY plane test culled primitives - GeomZPlaneCullPrim, // Z plane culled primitives - TilerActiveCy, // Tiler active cycles (3.1.4) - GPUIRQUtil, // Interrupt pending utilization (3.2.5) - FragQueueUtil, // Fragment queue utilization, Main phase queue utilization (3.2.2) - NonFragQueueUtil, // Non-fragment queue utilization, Binning phase queue utilization (3.2.1) - ExtBusRdBy, // Output external read bytes (3.3.1) - ExtBusWrBy, // Output external write bytes (3.3.2) - ExtBusRdStallRate, // Output external read stall rate (3.4.1) - ExtBusWrStallRate, // 50 // Output external write stall rate (3.4.2) - TilerUtil, // Tiler utilization (3.2.3) - GeomTotalPrim, // Total input primitives (4.1.1) - GeomVisibleRate, // Visible primitives rate (4.2.1) - GeomTotalCullPrim, // Total culled primitives (4.1.2) - GeomFaceXYPlaneCullRate, // Facing or XY plane test cull (4.2.2) - GeomZPlaneCullRate, // Z plane test cull rate (4.2.3) - NonFragUtil, // Non-fragment utilization (5.3.2) - NonFragThroughputCy, // Average cycles per non-fragment thread (5.2.1) - FragUtil, // Fragment utilization (5.3.3) - FragThroughputCy, // 60 // Average cycles per fragment thread (5.2.2) - FragHelpTdRate, - FragEZSTestRate, // Early ZS tested quad percentage (4.5.1) - FragEZSKillRate, // Early ZS killed quad percentage (4.5.3) - FragLZSTestRate, // Late ZS tested thread percentage (4.5.5 ?) - FragLZSKillRate, // Late ZS killed thread percentage (4.5.6 ?) - FragOverdraw, // Fragments per pixel (4.4.3) - FragTileKillRate, // Unchanged tile kill rate (6.3.5) - CoreUtil, // Execution core utilization (5.3.5) - ALUUtil, // Arithmetic unit utilization (6.1.1) - TexSample, // 70 // Texture samples - TexCPI, // Texture filtering cycles per instruction (8.1.2) - TexUtil, // Texture unit utilization (6.1.3) - TexIssueCy, // Texture unit issue cycles - LSUtil, // Load/store unit utilization (6.1.4) - GPUPix, // total number of pixels that are shaded, assumes that all pixels in task are shaded (32x32, 5Gen: 64x64) (4.4.1) - GPUCyPerPix, // Average cycles per pixel (4.4.2) - FragQueueWaitRdCy, // Fragment queue job descriptor read wait cycles - FragQueueWaitIssueCy, // Fragment queue job issue wait cycles - FragQueueWaitDepCy, // Fragment queue job dependency wait cycles - FragQueueWaitFinishCy, // 80 // Fragment queue job finish wait cycles - NonFragQueueWaitRdCy, // Non-fragment queue job descriptor read wait cycles - NonFragQueueWaitIssueCy, // Non-fragment queue job issue wait cycles - NonFragQueueWaitDepCy, // Non-fragment queue job dependency wait cycles - NonFragQueueWaitFinishCy, // Non-fragment queue job finish wait cycles - ResQueueWaitRdCy, // Reserved queue job descriptor read wait cycles - ResQueueWaitIssueCy, // Reserved queue job issue wait cycles - ResQueueWaitDepCy, // Reserved queue job dependency wait cycles - ResQueueWaitFinishCy, // Reserved queue job finish wait cycles - MMUL2Hit, // MMU L2 lookup TLB hits - MMUL2Rd, // 90 // MMU L2 table read requests - MMULookup, // MMU lookup requests - L2CacheLookup, - L2CacheRdLookup, - L2CacheWrLookup, - FragFPKActiveCy, // Forward pixel kill buffer active cycles - LSRdCy, // Load/store unit read issues - LSWrCy, // Load/store unit write issues - LSAtomic, // Load/store unit atomic issues (9.1.6) - TilerPosCacheHit, // Position cache hit requests - TilerPosCacheMiss, // 100 // Position cache miss requests - FragFPKBUtil, // Fragment FPK buffer utilization (5.3.4) - FragQueueWaitFlushCy, // Fragment queue cache flush wait cycles - NonFragQueueWaitFlushCy, // Non-fragment queue cache flush wait cycles - ResQueueWaitFlushCy, // Non-fragment queue cache flush wait cycles - L2CacheFlush, // L2 cache flush requests - GeomSampleCullPrim, // Sample test culled primitives - TilerRdBt, // Output internal read beats - TilerWrBt, - GeomPosShadTask, // Tiler position shading requests - TilerPosShadStallCy, // 110 // Tiler position shading stall cycles - TilerPosShadFIFOFullCy, // Tiler position FIFO full cycles - TilerVarCacheHit, // Varying cache hits - TilerVarCacheMiss, // Varying cache misses - GeomVarShadTask, // Tiler varying shading requests - TilerVarShadStallCy, // Tiler varying shading stall cycles - FragRastPrim, // Rasterized primitives - FragWarp, // Fragment warps (5.1.2) - FragPartWarp, - FragEZSUpdateQd, // Early ZS updated quads - FragLZSTestQd, // 120 // Late ZS tested quads - FragLZSKillQd, // Late ZS killed quads - FragOpaqueQd, // Occluding quads - NonFragTask, - NonFragWarp, // Non-fragment warps (5.1.1) - EngActiveCy, - EngDivergedInstr, - EngStarveCy, - TexQuads, // Texture quads - TexQuadPass, - TexQuadPassDescMiss, // 130 - TexQuadPassMip, - TexQuadPassTri, - TexCacheFetch, - TexCacheCompressFetch, - TexCacheLookup, - LSFullRd, // Load/store unit full read issues (9.1.2) - LSPartRd, // Load/store unit partial read issues (9.1.3) - LSFullWr, // Load/store unit full write issues (9.1.4) - LSPartWr, // Load/store unit partial write issues (9.1.5) - VarInstr, // 140 // Varying unit instructions - Var32IssueSlot, // 32-bit interpolation slots - Var16IssueSlot, // 16-bit interpolation slots - AttrInstr, // Attribute instructions - SCBusFFEL2RdBt, // Fragment front-end read beats from L2 cache - SCBusFFEExtRdBt, // Fragment front-end read beats from external memory - SCBusLSL2RdBt, // Load/store unit read beats from L2 cache - SCBusLSExtRdBt, // Load/store unit read beats from external memory - SCBusTexL2RdBt, // Texture unit read beats from L2 cache - SCBusTexExtRdBt, // Texture unit read beats from external memory - SCBusOtherL2RdBt, // 150 // Miscellaneous read beats from L2 cache - SCBusLSWBWrBt, // Load/store unit write-back write beats - SCBusTileWrBt, // Tile unit write beats to L2 memory system - SCBusLSOtherWrBt, // Load/store unit other write beats - MMUL3Rd, // MMU L3 table read requests - MMUL3Hit, // MMU L3 lookup TLB hits - MMUS2Lookup, // MMU stage 2 lookup requests - MMUS2L3Rd, // MMU stage 2 L3 lookup requests - MMUS2L2Rd, // MMU stage 2 L2 lookup requests - MMUS2L3Hit, // MMU stage 2 L3 lookup TLB hits - MMUS2L2Hit, // 160 // MMU stage 2 L2 lookup TLB hits - L2CacheRd, // Input internal read requests - L2CacheRdStallCy, // Input internal read stall cycles - L2CacheWr, // Input internal write requests - L2CacheWrStallCy, // Input internal write stall cycles - L2CacheSnp, // Input internal snoop requests - L2CacheSnpStallCy, // Input internal snoop stall cycles - L2CacheL1Rd, // Output internal read requests - L2CacheL1RdStallCy, // Output internal read stall cycles - L2CacheL1Wr, // Output internal write requests - L2CacheSnpLookup, // 170 // Input external snoop lookup requests - ExtBusRd, // Output external read transactions - ExtBusRdNoSnoop, // Output external ReadNoSnoop transactions - ExtBusRdUnique, // Output external ReadUnique transactions - ExtBusRdOTQ1, // Output external outstanding reads 0-25% - ExtBusRdOTQ2, // Output external outstanding reads 25-50% - ExtBusRdOTQ3, // Output external outstanding reads 50-75% - ExtBusRdLat0, // Output external read latency 0-127 cycles (3.5.1) - ExtBusRdLat128, // Output external read latency 128-191 cycles (3.5.2) - ExtBusRdLat192, // Output external read latency 192-255 cycles (3.5.3) - ExtBusRdLat256, // 180 // Output external read latency 256-319 cycles (3.5.4) - ExtBusRdLat320, // Output external read latency 320-383 cycles (3.5.5) - ExtBusWr, // Output external write transactions - ExtBusWrNoSnoopFull, // Output external WriteNoSnoopFull transactions - ExtBusWrNoSnoopPart, // Output external WriteNoSnoopPartial transactions - ExtBusWrSnoopFull, // Output external WriteSnoopFull transactions - ExtBusWrSnoopPart, // Output external WriteSnoopPartial transactions - ExtBusWrOTQ1, // Output external outstanding writes 0-25% - ExtBusWrOTQ2, // Output external outstanding writes 25-50% - ExtBusWrOTQ3, // Output external outstanding writes 50-75% - L2CacheIncSnp, // 190 // Input external snoop transactions - L2CacheIncSnpStallCy, // Input external snoop stall cycles - L2CacheRdMissRate, // L2 cache read miss rate - L2CacheWrMissRate, // L2 cache write miss rate - ExtBusRdLat384, // Output external read latency 384+ cycles (3.5.6) - ExtBusRdOTQ4, // Output external outstanding reads 75-100% - ExtBusWrOTQ4, // Output external outstanding writes 75-100% - GeomSampleCullRate, // Sample test cull rate (4.2.4) - GeomPosShadThread, // Position shader thread invocations (4.3.1) - GeomPosShadThreadPerPrim, // Position threads per input primitive, should be < 1.5 (4.3.3) - TilerPosCacheHitRate, // 200 // Position cache hit rate - GeomVarShadThread, // Varying shader thread invocations (4.3.2) - GeomVarShadThreadPerPrim, // Varying threads per input primitive (4.3.4) - TilerVarCacheHitRate, - FragOpaqueQdRate, // Occluding quad percentage - FragTransparentQd, // Non-occluding quads - FragShadedQd, // Shaded coarse quads - FragPartWarpRate, - FragEZSUpdateRate, // Early ZS updated quad percentage (4.5.2) - FragFPKKillQd, // Forward pixel kill killed quads - FragFPKKillRate, // 210 // Forward pixel kill killed quad percentage (4.5.4) - EngDivergedInstrRate, // Warp divergence percentage (6.2.2) - TexCacheUtil, - TexMipInstrRate, - TexCacheCompressFetchRate, - TexTriInstrRate, - Var32IssueCy, // 32-bit interpolation active cycles (7.1.3) - Var16IssueCy, // 16-bit interpolation active cycles (7.1.2) - VarIssueCy, // Varying unit issue cycles (7.1.1) - VarUtil, // Varying unit utilization (6.1.2) - SCBusFFEL2RdBy, // 220 // Front-end unit read bytes from L2 cache (11.1.1) - SCBusFFEExtRdBy, // Front-end unit read bytes from external memory (11.2.1) - SCBusLSL2RdBy, // Load/store unit read bytes from L2 cache (11.1.2) - SCBusLSL2RdByPerRd, // Load/store unit bytes read from L2 per access cycle (9.2.1) - SCBusLSExtRdBy, // Load/store unit read bytes from external memory (11.2.2) - SCBusLSExtRdByPerRd, // Load/store unit bytes read from external memory per access cycle (9.2.2) - SCBusTexL2RdBy, // Texture unit read bytes from L2 cache (11.1.3) - SCBusTexL2RdByPerRd, // Texture unit bytes read from L2 per texture cycle (8.2.1) - SCBusTexExtRdBy, // Texture unit read bytes from external memory (11.2.3) - SCBusTexExtRdByPerRd, // Texture unit bytes read from external memory per texture cycle (8.2.2) - SCBusLSWrBt, // 230 // Load/store unit write beats to L2 memory system - SCBusLSWrBy, // Load/store unit write bytes (11.3.1) - SCBusLSWrByPerWr, // Load/store unit bytes written to L2 per access cycle (9.2.3) - SCBusTileWrBy, // Tile unit write bytes (11.3.2) - CoreAllRegsWarp, // Warps using more than 32 registers - CoreFullQdWarp, // Full quad warps - CoreAllRegsWarpRate, // All registers warp rate (6.2.3) - CoreFullQdWarpRate, // Full quad warp rate (6.3.3) - TexMipInstr, - TexCompressInstr, - Tex3DInstr, // 240 - TexTriInstr, - TexCoordStallCy, - TexDataStallCy, - TexPartDataStallCy, - SCBusOtherWrBt, - TexCompressInstrRate, - Tex3DInstrRate, - SCBusOtherWrBy, - FragRastPartQd, // Partial rasterized fine quads - EngFMAInstr, // 250 // Arithmetic FMA instructions - EngCVTInstr, // Arithmetic CVT instructions - EngSFUInstr, // Arithmetic SFU instructions - EngICacheMiss, // Instruction cache misses - EngSWBlendInstr, - TexInBt, - TexDescStallCy, // Texture descriptor stall cycles - TexDataFetchStallCy, - TexFiltStallCy, - TexFullBiFiltCy, // Texture filtering cycles using full bilinear (V: 8.1.2) - TexFullTriFiltCy, // 260 // Texture filtering cycles using full trilinear (V: 8.1.3) - TexOutMsg, - TexOutBt, - FragRastPartQdRate, // Partial coverage rate (6.3.1) - EngFMAPipeUtil, // FMA pipe utilization - EngCVTPipeUtil, // CVT pipe utilization - EngSFUPipeUtil, // SFU pipe utilization - EngArithInstr, // Arithmetic instruction issue cycles - EngSWBlendRate, // Shader blend percentage (6.2.4) - TexInBusUtil, // Texture input bus utilization - TexOutBusUtil, // 270 // Texture output bus utilization - TexFiltFullRate, // Texture full speed filtering percentage - AnyActiveCy, - AnyUtil, - CSFMCUActiveCy, // MCU active cycles (3.1.5) - GPUIterActiveCy, - GPUIRQ, - L2CacheFlushCy, - VertIterQueuedCy, - VertIterJob, - VertIterTask, // 280 - VertIterTotalActiveCy, - VertIterIRQActiveCy, - VertIterAssignStallCy, - TilerIterDrainStallCy, - CompIterQueuedCy, // Compute queue active cycles (3.1.4) - CompIterJob, - CompIterTask, - CompIterTotalActiveCy, - CompIterIRQActiveCy, - CompIterAssignStallCy, // 290 - CompIterDrainStallCy, - FragIterQueuedCy, - FragIterJob, - FragIterTask, - FragIterTotalActiveCy, - FragIterIRQActiveCy, - FragIterAssignStallCy, - CSFCEUActiveCy, - CSFLSUActiveCy, - CSFCS0ActiveCy, // 300 - CS0WaitStallCy, - CSFCS1ActiveCy, - CS1WaitStallCy, - CSFCS2ActiveCy, - CS2WaitStallCy, - CSFCS3ActiveCy, - CS3WaitStallCy, - L2CacheEvict, - L2CacheCleanUnique, - FragIterActiveCy, // 310 - FragIterUtil, - VertIterActiveCy, - VertIterUtil, - CompIterActiveCy, - CompIterUtil, // Compute queue utilization ??? (3.2.3) - CSFMCUUtil, // Microcontroller utilization (3.2.4) - CSFLSUUtil, - CSFCEUUtil, - EngNarrowInstr, // Narrow arithmetic instructions (8/16 bit) - FragRastCoarseQd, // 320 - RTUTri, - RTUBox, - RTUTriBin1, // Ray tracing triangle batches with 1-4 rays (10.4.4) - RTUTriBin5, // Ray tracing triangle batches with 5-8 rays (10.4.3) - RTUTriBin9, // Ray tracing triangle batches with 9-12 rays (10.4.2) - RTUTriBin13, // Ray tracing triangle batches with 13-16 rays (10.4.1) - RTUBoxBin1, // Ray tracing box nodes with 1-4 rays (10.3.4) - RTUBoxBin5, // Ray tracing box nodes with 5-8 rays (10.3.3) - RTUBoxBin9, // Ray tracing box nodes with 9-12 rays (10.3.2) - RTUBoxBin13, // 330 // Ray tracing box nodes with 13-16 rays (10.3.1) - RTUOpaqueHit, // Ray tracing opaque triangle hits (10.2.2) - RTUNonOpaqueHit, // Ray tracing non-opaque triangle hits (10.2.3) - RTUFirstHitTerm, // Ray tracing first hit terminations (10.2.5) - RTUMiss, // Ray tracing ray misses (10.2.4) - RTURay, // Ray tracing started rays (10.2.1) - RTUBoxIssueCy, // Ray tracing box tester issue cycles (10.1.1) - RTUTriIssueCy, // Ray tracing triangle tester issue cycles (10.1.2) - GeomFaceCullPrim, - GeomPlaneCullPrim, - GeomFaceCullRate, // 340 - GeomPlaneCullRate, - FragShadRate, // Fragment shading rate (4.6.1) (6.3.4) - CoreFragWarpOcc, // Fragment warp occupancy % (6.3.2) - EngNarrowInstrRate, // Narrow arithmetic percentage (6.2.1) - RTUUtil, // Ray tracing unit utilization (6.1.5) - BinningIterQueuedCy, // Binning phase queue active cycles (3.1.2) - BinningIterJob, - BinningIterTask, - BinningIterIRQActiveCy, - BinningIterAssignStallCy, // 350 - MainIterQueuedCy, // Main phase queue active cycles (3.1.3) - MainIterJob, - MainIterTask, - MainIterIRQActiveCy, - MainIterAssignStallCy, - TexL1CacheLoadCy, - TexCacheSimpleLoadCy, - TexL1CacheOutputCy, - TexL1CacheLookupCy, - TexOutSingleMsg, // 360 - TexCacheLookupCy, - TexCacheComplexLoadCy, - TexIndexCy, - TexClkStarvedCy, - TexClkActiveCy, - GeomScissorCullPrim, - GeomVisibleDVSPrim, - MainIterActiveCy, - MainIterUtil, - BinningIterActiveCy, // 370 - BinningIterUtil, - GeomScissorCullRate, - _Count - }; - using ECounterSet = EnumSet< ECounter >; - using Counters_t = FlatHashMap< ECounter, double >; - - struct HWInfo - { - ubyte shaderCoreCount = 0; - ubyte execEngineCount = 0; - ushort busWidth = 0; // bits - ushort l2Slices = 0; - Bytes32u l2SliceSize; - ushort tileSize = 0; // pixels? - ushort warpSize = 0; - }; - - private: - struct Impl; - - - // variables - private: - Unique _impl; - - - // methods - public: - MaliProfiler () __NE___; - ~MaliProfiler () __NE___; - - ND_ bool Initialize (const ECounterSet &counterSet) __NE___; - void Deinitialize () __NE___; - ND_ bool IsInitialized () C_NE___; - - ND_ ECounterSet EnabledCounterSet () C_NE___; - ND_ HWInfo GetHWInfo () C_NE___; - - void Sample (OUT Counters_t &) C_NE___; - - - #ifndef AE_ENABLE_MALI_HWCPIPE - ND_ bool InitClient (RC) __NE___; - #endif - }; - - -} // AE::Profiler diff --git a/AE/engine/src/scripting/Bindings/CoreBindings.h b/AE/engine/src/scripting/Bindings/CoreBindings.h index 0a02345c..7575b1fe 100644 --- a/AE/engine/src/scripting/Bindings/CoreBindings.h +++ b/AE/engine/src/scripting/Bindings/CoreBindings.h @@ -36,6 +36,8 @@ namespace AE::Scripting packed_float4x2, packed_float4x3, packed_float4x4 >; + using QuatTypes = TypeList< PackedQuat >; + // functions private: @@ -46,6 +48,7 @@ namespace AE::Scripting static void BindStdTypes (const ScriptEnginePtr &se) __Th___; static void BindScalarMath (const ScriptEnginePtr &se) __Th___; static void BindVectorMath (const ScriptEnginePtr &se) __Th___; + static void BindQuaternion (const ScriptEnginePtr &se) __Th___; static void BindMatrixMath (const ScriptEnginePtr &se) __Th___; // requires 'Vec', 'Rect' static void BindVectorSwizzle (const ScriptEnginePtr &se) __Th___; // requires 'String' static void BindRect (const ScriptEnginePtr &se) __Th___; @@ -116,6 +119,8 @@ AE_DECL_SCRIPT_OBJ( AE::Math::packed_float4x2, "float4x2" ); AE_DECL_SCRIPT_OBJ( AE::Math::packed_float4x3, "float4x3" ); AE_DECL_SCRIPT_OBJ( AE::Math::packed_float4x4, "float4x4" ); +AE_DECL_SCRIPT_OBJ( AE::Math::PackedQuat, "Quat" ); + AE_DECL_SCRIPT_OBJ( AE::Math::VecSwizzle, "VecSwizzle"); AE_DECL_SCRIPT_OBJ( AE::Math::RGBA32f, "RGBA32f" ); diff --git a/AE/engine/src/scripting/Bindings/CoreBindings_BindQuaternion.cpp b/AE/engine/src/scripting/Bindings/CoreBindings_BindQuaternion.cpp new file mode 100644 index 00000000..1951b770 --- /dev/null +++ b/AE/engine/src/scripting/Bindings/CoreBindings_BindQuaternion.cpp @@ -0,0 +1,176 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#include "scripting/Bindings/CoreBindings.h" +#include "scripting/Impl/ClassBinder.h" +#include "scripting/Impl/ScriptEngine.inl.h" + +namespace AE::Scripting +{ +namespace +{ + +/* +================================================= + QuatCtor +================================================= +*/ + template + struct QuatCtor + { + private: + using T = typename Q::Value_t; + using Vec4_t = PackedVec< T, 4 >; + + + static void _Ctor3 (void* mem, T w, T x, T y, T z) + { + PlacementNew< Q >( OUT mem, w, x, y, z ); + } + + + static void _V4Ctor (void* mem, const Q &q) + { + PlacementNew< Vec4_t >( OUT mem, q.x, q.y, q.z, q.w ); + } + + + public: + static void Bind (ClassBinder &binder, ClassBinder &binder2) + { + binder.CreateClassValue(); + + binder.AddConstructor( &_Ctor3, {"w", "x", "y", "z"} ); + + binder.AddProperty( &Q::x, "x" ); + binder.AddProperty( &Q::y, "y" ); + binder.AddProperty( &Q::z, "z" ); + binder.AddProperty( &Q::w, "w" ); + + binder2.AddConstructor( &_V4Ctor, {"quat"} ); + } + }; + +/* +================================================= + QuatFunc +================================================= +*/ + template + struct QuatFunc + { + using Quat_t = Q; + using T = typename Q::Value_t; + using Vec3_t = PackedVec< T, 3 >; + using Vec4_t = PackedVec< T, 4 >; + using Rad_t = typename Q::Rad_t; + using Rad3_t = typename Q::Rad3_t; + + + static Quat_t Add_q_q (const Quat_t& lhs, const Quat_t &rhs) { return lhs + rhs; } + static Quat_t Sub_q_q (const Quat_t& lhs, const Quat_t &rhs) { return lhs - rhs; } + static Quat_t Mul_q_q (const Quat_t& lhs, const Quat_t &rhs) { return lhs * rhs; } + static Vec3_t Mul_q_v3 (const Quat_t& lhs, const Vec3_t &rhs) { return lhs * rhs; } + static Vec4_t Mul_q_v4 (const Quat_t& lhs, const Vec4_t &rhs) { return lhs * rhs; } + static Quat_t Mul_q_s (const Quat_t& lhs, T rhs) { return lhs * rhs; } + static Quat_t Div_q_s (const Quat_t& lhs, T rhs) { return lhs / rhs; } + + static Quat_t RotateX (const Quat_t&, T angle) { return Quat_t::RotateX( Rad_t{angle} ); } + static Quat_t RotateY (const Quat_t&, T angle) { return Quat_t::RotateY( Rad_t{angle} ); } + static Quat_t RotateZ (const Quat_t&, T angle) { return Quat_t::RotateZ( Rad_t{angle} ); } + + static Quat_t Rotate (const Quat_t&, T angle, const Vec3_t &axis) { return Quat_t::Rotate( Rad_t{angle}, axis ); } + static Quat_t Rotate1 (const Quat_t&, const Vec3_t &angles) { return Quat_t::Rotate( Rad3_t{angles} ); } + static Quat_t Rotate3 (const Quat_t&, T angleX, T angleY, T angleZ) { return Quat_t::Rotate( Rad3_t{Vec3_t{ angleX, angleY, angleZ }}); } + static Quat_t Rotate2 (const Quat_t&, const Vec3_t &angles) { return Quat_t::Rotate2( Rad3_t{angles} ); } + + static Quat_t LookAt (const Quat_t&, const Vec3_t &dir, const Vec3_t &up) { return Quat_t::LookAt( dir, up ); } + static Quat_t From2Normals (const Quat_t&, const Vec3_t &n1, const Vec3_t &n2) { return Quat_t::From2Normals( n1, n2 ); } + static Quat_t FromAngleAxis (const Quat_t&, T angle, const Vec3_t &axis) { return Quat_t::FromAngleAxis( Rad_t{angle}, axis ); } + }; + +/* +================================================= + BindFloatQuat +================================================= +*/ + template + static void BindFloatQuat (ClassBinder &binder, const ScriptEnginePtr &) + { + StaticAssert( IsQuat ); + + using F = QuatFunc; + + binder.Operators() + // .BinaryAssign( EBinaryOperator::Add, &F::Add_am_s ) + .Binary( EBinaryOperator::Add, &F::Add_q_q ) + + // .BinaryAssign( EBinaryOperator::Sub, &F::Sub_am_s ) + .Binary( EBinaryOperator::Sub, &F::Sub_q_q ) + + // .BinaryAssign( EBinaryOperator::Mul, &F::Mul_am_s ) + .Binary( EBinaryOperator::Mul, &F::Mul_q_q ) + .Binary( EBinaryOperator::Mul, &F::Mul_q_v3 ) + .Binary( EBinaryOperator::Mul, &F::Mul_q_v4 ) + .Binary( EBinaryOperator::Mul, &F::Mul_q_s ) + + .Binary( EBinaryOperator::Div, &F::Div_q_s ); + + binder.AddMethodFromGlobal( &F::RotateX, "RotateX", {"angle"} ); + binder.AddMethodFromGlobal( &F::RotateY, "RotateY", {"angle"} ); + binder.AddMethodFromGlobal( &F::RotateZ, "RotateZ", {"angle"} ); + + binder.AddMethodFromGlobal( &F::Rotate, "Rotate", {"angle", "axis"} ); + binder.AddMethodFromGlobal( &F::Rotate1, "Rotate", {"angles"} ); + binder.AddMethodFromGlobal( &F::Rotate2, "Rotate2", {"angles"} ); + binder.AddMethodFromGlobal( &F::Rotate3, "Rotate", {"angleX", "angleY", "angleZ"} ); + + binder.AddMethodFromGlobal( &F::LookAt, "LookAt", {"dir", "up"} ); + binder.AddMethodFromGlobal( &F::From2Normals, "From2Normals", {"norm1", "norm2"} ); + binder.AddMethodFromGlobal( &F::FromAngleAxis, "FromAngleAxis", {"angle", "axis"} ); + } + +/* +================================================= + BindQuat_Func +================================================= +*/ + struct BindQuat_Func + { + ScriptEnginePtr _se; + + explicit BindQuat_Func (const ScriptEnginePtr &se) : _se{se} + {} + + template + void operator () () + { + using Vec4_t = PackedVec< typename T::Value_t, 4 >; + + ClassBinder binder { _se }; + ClassBinder binder2 { _se }; + + QuatCtor::Bind( binder, binder2 ); + + BindFloatQuat( binder, _se ); + } + }; + +} // namespace + + +/* +================================================= + BindQuaternion +================================================= +*/ + void CoreBindings::BindQuaternion (const ScriptEnginePtr &se) __Th___ + { + CHECK_THROW( se and se->IsInitialized() ); + + BindQuat_Func func{ se }; + + QuatTypes::VisitTh( func ); + } + + +} // AE::Scripting diff --git a/AE/engine/src/scripting/Bindings/CoreBindings_BindVectorMath.inl.h b/AE/engine/src/scripting/Bindings/CoreBindings_BindVectorMath.inl.h index de501058..0b0e3836 100644 --- a/AE/engine/src/scripting/Bindings/CoreBindings_BindVectorMath.inl.h +++ b/AE/engine/src/scripting/Bindings/CoreBindings_BindVectorMath.inl.h @@ -111,19 +111,19 @@ namespace template static void _Ctor2 (void* mem, const PackedVec &value) { - PlacementNew< PackedVec >( OUT mem, PackedVec{value.x, value.y, B{0}} ); + PlacementNew< PackedVec >( OUT mem, PackedVec{ static_cast(value.x), static_cast(value.y), T{0} }); } template static void _Ctor3 (void* mem, const PackedVec &value) { - PlacementNew< PackedVec >( OUT mem, PackedVec{value} ); + PlacementNew< PackedVec >( OUT mem, PackedVec{ value }); } template static void _Ctor4 (void* mem, const PackedVec &value) { - PlacementNew< PackedVec >( OUT mem, PackedVec{value.x, value.y, value.z} ); + PlacementNew< PackedVec >( OUT mem, PackedVec{ static_cast(value.x), static_cast(value.y), static_cast(value.z) }); } static void _CtorArg2 (void* mem, const PackedVec &xy, T z) @@ -201,13 +201,13 @@ namespace template static void _Ctor2 (void* mem, const PackedVec &value) { - PlacementNew< PackedVec >( OUT mem, PackedVec{value.x, value.y, B{0}, B{0}} ); + PlacementNew< PackedVec >( OUT mem, PackedVec{ static_cast(value.x), static_cast(value.y), T{0}, T{0}} ); } template static void _Ctor3 (void* mem, const PackedVec &value) { - PlacementNew< PackedVec >( OUT mem, PackedVec{value.x, value.y, value.z, B{0}} ); + PlacementNew< PackedVec >( OUT mem, PackedVec{ static_cast(value.x), static_cast(value.y), static_cast(value.z), T{0} }); } template diff --git a/AE/engine/src/scripting/CMakeLists.txt b/AE/engine/src/scripting/CMakeLists.txt index 33e764e9..7bd0bb12 100644 --- a/AE/engine/src/scripting/CMakeLists.txt +++ b/AE/engine/src/scripting/CMakeLists.txt @@ -25,6 +25,15 @@ if (${AE_ENABLE_SCRIPTING} AND ${AE_ENABLE_EXCEPTIONS}) EnablePCH( "Scripting" ) EnablePrebuild( "Scripting" ) + EnableUnitBuild( "Scripting" ) + + if (${AE_USE_UNITY_BUILD}) + set( NON_UNITY_BUILD_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/Bindings/CoreBindings_BindVectorMath1.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/Bindings/CoreBindings_BindVectorMath2.cpp" + ) + set_property( SOURCE ${NON_UNITY_BUILD_SRC} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) + endif() install( TARGETS "Scripting" ARCHIVE DESTINATION "lib" ) endif() diff --git a/AE/engine/src/scripting/Impl/EnumBinder.h b/AE/engine/src/scripting/Impl/EnumBinder.h index 9f9eeaa5..24258de2 100644 --- a/AE/engine/src/scripting/Impl/EnumBinder.h +++ b/AE/engine/src/scripting/Impl/EnumBinder.h @@ -27,7 +27,10 @@ namespace AE::Scripting String _name; const bool _genHeader = false; + #if AE_SCRIPT_CPP_REFLECTION String _header; + String _header2; + #endif // methods @@ -36,7 +39,7 @@ namespace AE::Scripting ~EnumBinder () __NE___; void Create () __Th___; - ND_ bool IsRegistered () C_NE___; + ND_ bool IsRegistered () C_NE___ { return _engine->IsRegistered( _name ); } void AddValue (StringView name, T value) __Th___; @@ -70,8 +73,10 @@ namespace AE::Scripting template EnumBinder::~EnumBinder () __NE___ { + #if AE_SCRIPT_CPP_REFLECTION if_unlikely( _genHeader ) - _engine->AddCppHeader( RVRef(_name), RVRef(_header), AngelScript::asOBJ_ENUM ); + _engine->AddCppHeader( RVRef(_name), RVRef(_header << _header2), AngelScript::asOBJ_ENUM ); + #endif } /* @@ -89,25 +94,18 @@ namespace AE::Scripting AS_CHECK_THROW( res ); + #if AE_SCRIPT_CPP_REFLECTION if_unlikely( _genHeader ) { const String int_type = "uint"s << ToString(sizeof(T)*8); - _header << "struct " << _name << "\n{\n"; - _header << "\t" << _name << " () {}\n"; - _header << "\t" << _name << " (" << int_type << ") {}\n"; - _header << "\toperator " << int_type << " () const;\n"; - } - } + _header << "enum class " << _name << " : " << int_type << "\n{\n"; -/* -================================================= - IsRegistered -================================================= -*/ - template - bool EnumBinder::IsRegistered () C_NE___ - { - return _engine->IsRegistered( _name ); + _header2 << "};\n"; + _header2 << int_type << " operator | (" << _name << " lhs, " << _name << " rhs);\n"; + _header2 << int_type << " operator | (" << int_type << " lhs, " << _name << " rhs);\n"; + _header2 << int_type << " operator | (" << _name << " lhs, " << int_type << " rhs);\n"; + } + #endif } /* @@ -120,14 +118,24 @@ namespace AE::Scripting { ASSERT( slong(value) >= MinValue() and slong(value) <= MaxValue() ); - AS_CHECK_THROW( GetASEngine()->RegisterEnumValue( _name.c_str(), (String{Name()} + '_' + String{valueName}).c_str(), int(value) )); + CHECK_THROW( not valueName.empty() ); + AS_CHECK_THROW( GetASEngine()->RegisterEnumValue( _name.c_str(), (String{Name()} << '_' << valueName).c_str(), int(value) )); + #if AE_SCRIPT_CPP_REFLECTION if_unlikely( _genHeader ) { - _header << "\tstatic constexpr "; - _header << "uint"s << ToString(sizeof(T)*8) << ' '; - _header << valueName << " = " << ToString( ulong(value) ) << ";\n"; + if ( Parser::CPP.IsWordBegin( valueName[0] )) + { + _header << "\t" << valueName << ",\n"; + } + else + { + _header2 << "static constexpr " << _name << ' '; + _header2 << _name << "_" << valueName << " = "; + _header2 << _name << "(" << ToString( ulong(value) ) << ");\n"; + } } + #endif } /* @@ -138,6 +146,7 @@ namespace AE::Scripting template void EnumBinder::Comment (StringView text) __Th___ { + #if AE_SCRIPT_CPP_REFLECTION if_unlikely( _genHeader and not text.empty() ) { _header << '\n'; @@ -148,6 +157,8 @@ namespace AE::Scripting _header << "\t// " << line << '\n'; } } + #endif + Unused( text ); } diff --git a/AE/engine/src/scripting/Impl/ScriptArgList.h b/AE/engine/src/scripting/Impl/ScriptArgList.h index 07e709c0..13384439 100644 --- a/AE/engine/src/scripting/Impl/ScriptArgList.h +++ b/AE/engine/src/scripting/Impl/ScriptArgList.h @@ -39,7 +39,7 @@ namespace AE::Scripting // arguments // template - ND_ decltype(auto) Arg (uint argIndex) C_NE___; + ND_ exact_t Arg (uint argIndex) C_NE___; template ND_ bool IsArg (uint argIndex) C_NE___; diff --git a/AE/engine/src/scripting/Impl/ScriptArgList.inl.h b/AE/engine/src/scripting/Impl/ScriptArgList.inl.h index 6dd0d0e5..d1afffc3 100644 --- a/AE/engine/src/scripting/Impl/ScriptArgList.inl.h +++ b/AE/engine/src/scripting/Impl/ScriptArgList.inl.h @@ -9,7 +9,7 @@ namespace AE::Scripting ================================================= */ template - decltype(auto) ScriptArgList::Arg (uint idx) C_NE___ + exact_t ScriptArgList::Arg (uint idx) C_NE___ { using namespace AngelScript; diff --git a/AE/engine/src/scripting/Impl/ScriptEngine.cpp b/AE/engine/src/scripting/Impl/ScriptEngine.cpp index 32581851..2903bbcd 100644 --- a/AE/engine/src/scripting/Impl/ScriptEngine.cpp +++ b/AE/engine/src/scripting/Impl/ScriptEngine.cpp @@ -213,27 +213,10 @@ namespace } } - ND_ inline bool IsNumber (char c) __NE___ - { - return (c >= '0') and (c <= '9'); - } - - ND_ inline bool IsWordBegin (char c) __NE___ - { - return ((c >= 'A') and (c <= 'Z')) or - ((c >= 'a') and (c <= 'z')) or - (c == '_'); - } - - ND_ inline bool IsWord (char c) __NE___ - { - return IsWordBegin( c ) or IsNumber( c ); - } - - ND_ inline bool IsSpaceOrSymb (char c) __NE___ - { - return not IsWord( c ); - } + ND_ forceinline bool IsNumber (char c) __NE___ { return Parser::CPP.IsNumber( c ); } + ND_ forceinline bool IsWordBegin (char c) __NE___ { return Parser::CPP.IsWordBegin( c ); } + ND_ forceinline bool IsWord (char c) __NE___ { return Parser::CPP.IsWord( c ); } + ND_ forceinline bool IsSpaceOrSymb (char c) __NE___ { return not IsWord( c ); } } bool ScriptEngine::_Preprocessor (StringView str, @@ -361,9 +344,13 @@ namespace ++pos; #ifdef AE_DEBUG - if ( a == '\n' and multiline_strings_assert_once ) { + if ( a == '\n' and multiline_strings_assert_once ) + { + usize p = pos; + StringView line; + Parser::ReadCurrLine( str, INOUT p, OUT line ); multiline_strings_assert_once = false; - CHECK_MSG( false, "multiline strings are not supported" ); + CHECK_MSG( false, "multiline strings are not supported:\n"s << line ); } #endif Unused( multiline_strings_assert_once ); @@ -709,7 +696,6 @@ namespace str << "using uint8 = std::uint8_t;\n"; str << "using int16 = std::int16_t;\n"; str << "using uint16 = std::uint16_t;\n"; - str << "using int = std::int32_t;\n"; str << "using uint = std::uint32_t;\n"; str << "using int32 = std::int32_t;\n"; str << "using uint32 = std::uint32_t;\n"; @@ -720,21 +706,42 @@ namespace str << "template \nstruct RC;\n\n"; str << "template \nusing array = std::vector;\n\n"; + str << "using namespace std::string_literals;\n\n"; + + str << "template \n" + << "string operator + (const string &lhs, T rhs);\n\n"; + + // forward declaration for (auto& [name, p] : _cppHeaderMap) { if_unlikely( name.empty() ) continue; - if_unlikely( p.second == int(AngelScript::asOBJ_MASK_VALID_FLAGS) ) + if ( p.second == int(AngelScript::asOBJ_MASK_VALID_FLAGS) or + p.second == int(AngelScript::asOBJ_ENUM) ) continue; str << "struct " << name << ";\n"; } - str << "\n"; + // enums + for (auto& hdr : _cppHeaders) + { + if ( StartsWith( hdr, "enum class " )) + { + str << hdr; + hash << CT_Hash( hdr.data(), hdr.length(), 0 ); + str << '\n'; + } + } + + // structs & other for (auto& hdr : _cppHeaders) { + if ( StartsWith( hdr, "enum class " )) + continue; + str << hdr; hash << CT_Hash( hdr.data(), hdr.length(), 0 ); diff --git a/AE/engine/src/scripting/Impl/ScriptTypes.h b/AE/engine/src/scripting/Impl/ScriptTypes.h index db51ae0f..1e043e4e 100644 --- a/AE/engine/src/scripting/Impl/ScriptTypes.h +++ b/AE/engine/src/scripting/Impl/ScriptTypes.h @@ -399,7 +399,6 @@ namespace AE::Scripting template static void Constructor (AngelScript::asIScriptGeneric* gen) { - // TODO: bug in AngelScript: address is not aligned PlacementNew( OUT gen->GetObject() ); // throw } diff --git a/AE/engine/src/serializing/CMakeLists.txt b/AE/engine/src/serializing/CMakeLists.txt index 03ac1c62..39f234e8 100644 --- a/AE/engine/src/serializing/CMakeLists.txt +++ b/AE/engine/src/serializing/CMakeLists.txt @@ -14,5 +14,6 @@ target_link_libraries( "Serializing" PUBLIC "Base" ) EnablePCH( "Serializing" ) EnablePrebuild( "Serializing" ) +EnableUnitBuild( "Serializing" ) install( TARGETS "Serializing" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/serializing/Private/BitDeserializer.inl.h b/AE/engine/src/serializing/Private/BitDeserializer.inl.h new file mode 100644 index 00000000..4b2fbd42 --- /dev/null +++ b/AE/engine/src/serializing/Private/BitDeserializer.inl.h @@ -0,0 +1,59 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#include "serializing/Public/BitDeserializer.h" + +namespace AE::Serializing +{ + + template + bool BitDeserializer::operator () (INOUT Args& ...args) __NE___ + { + return _RecursiveDeserialize( INOUT args... ); + } + + + template + bool BitDeserializer::_RecursiveDeserialize (INOUT Arg0 &arg0, INOUT Args& ...args) __NE___ + { + StaticAssert( not IsConst ); + + bool res = _Deserialize( INOUT arg0 ); + + if constexpr( CountOf() > 0 ) + return res and _RecursiveDeserialize( INOUT args... ); + else + return res; + } + + + template + bool BitDeserializer::_Deserialize (INOUT PackedBits &value) __NE___ + { + const uint bit_cnt = value.MaxBitCount(); + BitType bits = _packedBits; + + if_likely( _bitCount >= bit_cnt ) + { + _bitCount -= bit_cnt; + _packedBits >>= bit_cnt; + } + else + { + // read next bits + if_unlikely( not stream.Read( OUT _packedBits )) + return false; + + bits |= (_packedBits << _bitCount); + + _packedBits = SafeRightBitShift( _packedBits, bit_cnt - _bitCount ); + _bitCount += CT_SizeOfInBits - bit_cnt; + } + + value.FromBits( bits ); + return true; + } + + +} // AE::Serializing diff --git a/AE/engine/src/serializing/Private/BitSerializer.inl.h b/AE/engine/src/serializing/Private/BitSerializer.inl.h new file mode 100644 index 00000000..76989d50 --- /dev/null +++ b/AE/engine/src/serializing/Private/BitSerializer.inl.h @@ -0,0 +1,68 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' + +#pragma once + +#include "serializing/Public/BitSerializer.h" + +namespace AE::Serializing +{ + + inline bool BitSerializer::_FlushBits () __NE___ + { + bool res = true; + + if_unlikely( _bitCount > 0 ) + { + DEBUG_ONLY( + dbgUnusedBits += CT_SizeOfInBits - _bitCount; + ) + res = stream.Write( _packedBits ); + + _bitCount = 0; + _packedBits = 0; + } + return res; + } + + + template + bool BitSerializer::operator () (const Args& ...args) __NE___ + { + return _RecursiveSerialize( args... ); + } + + + template + bool BitSerializer::_RecursiveSerialize (const Arg0 &arg0, const Args& ...args) __NE___ + { + bool res = _Serialize( arg0 ); + + if constexpr( CountOf() > 0 ) + return res and _RecursiveSerialize( args... ); + else + return res; + } + + + template + bool BitSerializer::_Serialize (const PackedBits &value) __NE___ + { + const uint bit_cnt = value.MaxBitCount(); + const BitType bits = value.ToBits(); + bool res = true; + + _packedBits |= (bits << _bitCount); + _bitCount += bit_cnt; + + if_unlikely( _bitCount >= CT_SizeOfInBits ) + { + res = stream.Write( _packedBits ); + + _bitCount &= (CT_SizeOfInBits - 1); + _packedBits = SafeRightBitShift( bits, bit_cnt - _bitCount ); + } + return res; + } + + +} // AE::Serializing diff --git a/AE/engine/src/serializing/Deserializer.inl.h b/AE/engine/src/serializing/Private/Deserializer.inl.h similarity index 91% rename from AE/engine/src/serializing/Deserializer.inl.h rename to AE/engine/src/serializing/Private/Deserializer.inl.h index aa89a052..ee18ebe2 100644 --- a/AE/engine/src/serializing/Deserializer.inl.h +++ b/AE/engine/src/serializing/Private/Deserializer.inl.h @@ -2,7 +2,7 @@ #pragma once -#include "serializing/Deserializer.h" +#include "serializing/Public/Deserializer.h" namespace AE::Serializing { @@ -11,7 +11,9 @@ namespace AE::Serializing bool Deserializer::_DeserializeObj (INOUT T &obj) __NE___ { if constexpr( IsBaseOf< ISerializable, T >) + { return obj.Deserialize( *this ); + } else { if ( factory ) @@ -64,34 +66,6 @@ namespace AE::Serializing } - template - bool Deserializer::_Deserialize (INOUT PackedBits &value) __NE___ - { - const uint bit_cnt = value.BitCount(); - BitType bits = _packedBits; - - if_likely( _bitCount >= bit_cnt ) - { - _bitCount -= bit_cnt; - _packedBits >>= bit_cnt; - } - else - { - // read next bits - if_unlikely( not stream.Read( OUT _packedBits )) - return false; - - bits |= (_packedBits << _bitCount); - - _packedBits >>= (bit_cnt - _bitCount); - _bitCount += CT_SizeOfInBits - bit_cnt; - } - - value.FromBits( bits ); - return true; - } - - template bool Deserializer::_Deserialize (INOUT Pair &value) __NE___ { @@ -102,12 +76,22 @@ namespace AE::Serializing template bool Deserializer::_Deserialize (INOUT BitSet &value) __NE___ { - PackedBits<0,N,BitType> temp; - - bool res = _Deserialize( OUT temp ); - value = BitSet{ BitType{temp} }; + StaticAssert( N <= 64 ); - return res; + if constexpr( N <= 32 ) + { + uint bits = 0; + bool res = _Deserialize( INOUT bits ); + value = BitSet{ bits }; + return res; + } + else + { + ulong bits = 0; + bool res = _Deserialize( INOUT bits ); + value = BitSet{ bits }; + return res; + } } @@ -155,8 +139,8 @@ namespace AE::Serializing } - template - bool Deserializer::_Deserialize (INOUT FixedArray &arr) __NE___ + template + bool Deserializer::_Deserialize (INOUT FixedArray &arr) __NE___ { uint count = 0; bool res = stream.Read( OUT count ); @@ -332,18 +316,7 @@ namespace AE::Serializing bool Deserializer::_Deserialize (INOUT Optional &value) __NE___ { bool has_value; - bool res; - - if ( _bitCount > 0 or IsPackedBits ) - { - BoolBit bit; - res = _Deserialize( OUT bit ); - has_value = bool{bit}; - } - else - { - res = stream.Read( OUT has_value ); - } + bool res = stream.Read( OUT has_value ); if ( res and has_value ) return _Deserialize( INOUT value.emplace() ); @@ -461,7 +434,7 @@ namespace AE::Serializing template bool Deserializer::_Deserialize (INOUT ArrayView &arr) __NE___ { - StaticAssert( IsTriviallyDestructible ); + StaticAssert( IsTriviallyDestructible ); // non-trivial destructors require 'Array<>' type CHECK_ERR( allocator ); uint count = 0; diff --git a/AE/engine/src/serializing/Serializer.inl.h b/AE/engine/src/serializing/Private/Serializer.inl.h similarity index 73% rename from AE/engine/src/serializing/Serializer.inl.h rename to AE/engine/src/serializing/Private/Serializer.inl.h index 49bee588..078d8cfc 100644 --- a/AE/engine/src/serializing/Serializer.inl.h +++ b/AE/engine/src/serializing/Private/Serializer.inl.h @@ -2,7 +2,7 @@ #pragma once -#include "serializing/Serializer.h" +#include "serializing/Public/Serializer.h" namespace AE::Serializing { @@ -12,16 +12,14 @@ namespace AE::Serializing { if constexpr( IsBaseOf< ISerializable, T >) { - return _FlushBits() and - obj.Serialize( *this ); + return obj.Serialize( *this ); } else { if ( factory ) { // write 'SerializedID' and then serialize - return _FlushBits() and - factory->Serialize( *this, obj ); + return factory->Serialize( *this, obj ); } DBG_WARNING( "unknown type" ); return false; @@ -57,51 +55,12 @@ namespace AE::Serializing bool Serializer::_Serialize (const T &value) __NE___ { if constexpr( IsTriviallySerializable ) - return _FlushBits() and stream.Write( value ); + return stream.Write( value ); else return _SerializeObj( value ); } - inline bool Serializer::_FlushBits () __NE___ - { - bool res = true; - - if_unlikely( _bitCount > 0 ) - { - DEBUG_ONLY( - dbgUnusedBits += CT_SizeOfInBits - _bitCount; - ) - res = stream.Write( _packedBits ); - - _bitCount = 0; - _packedBits = 0; - } - return res; - } - - - template - bool Serializer::_Serialize (const PackedBits &value) __NE___ - { - const uint bit_cnt = value.BitCount(); - const BitType bits = value.ToBits(); - bool res = true; - - _packedBits |= (bits << _bitCount); - _bitCount += bit_cnt; - - if_unlikely( _bitCount >= CT_SizeOfInBits ) - { - res = stream.Write( _packedBits ); - - _bitCount &= (CT_SizeOfInBits - 1); - _packedBits = (bits >> (bit_cnt - _bitCount)); - } - return res; - } - - template bool Serializer::_Serialize (const Pair &value) __NE___ { @@ -113,7 +72,12 @@ namespace AE::Serializing bool Serializer::_Serialize (const BitSet &value) __NE___ { StaticAssert( N <= 64 ); - return _Serialize( PackedBits<0,N,ulong>{ value.to_ullong() }); + + if constexpr( N <= 32 ) + return _Serialize( value.to_ulong() ); + + if constexpr( N <= 64 ) + return _Serialize( value.to_ullong() ); } @@ -122,8 +86,7 @@ namespace AE::Serializing { CHECK_ERR( arr.size() <= MaxArrayLength ); - bool res = _FlushBits() and - stream.Write( CheckCast(arr.size()) ); + bool res = stream.Write( CheckCast(arr.size()) ); if constexpr( IsTriviallySerializable ) return res and (arr.empty() or stream.Write( arr.data(), SizeOf * arr.size() )); @@ -141,8 +104,7 @@ namespace AE::Serializing bool Serializer::_Serialize (BasicStringView str) __NE___ { CHECK_ERR( str.length() <= MaxStringLength ); - return _FlushBits() and - stream.Write( CheckCast(str.length()) ) and + return stream.Write( CheckCast(str.length()) ) and (str.empty() or stream.Write( str.data(), StringSizeOf(str) )); } @@ -150,40 +112,35 @@ namespace AE::Serializing template bool Serializer::_Serialize (const TVec &vec) __NE___ { - return _FlushBits() and - stream.Write( &vec.x, SizeOf*I ); + return stream.Write( &vec.x, SizeOf*I ); } template bool Serializer::_Serialize (const Rectangle &rect) __NE___ { - return _FlushBits() and - stream.Write( rect.data(), SizeOf*4 ); + return stream.Write( rect.data(), SizeOf*4 ); } template bool Serializer::_Serialize (const RGBAColor &col) __NE___ { - return _FlushBits() and - stream.Write( col.data(), Sizeof(col) ); + return stream.Write( col.data(), Sizeof(col) ); } template bool Serializer::_Serialize (const HSVColor &col) __NE___ { - return _FlushBits() and - stream.Write( col.data(), Sizeof(col) ); + return stream.Write( col.data(), Sizeof(col) ); } template bool Serializer::_Serialize (const NamedID &id) __NE___ { - return _FlushBits() and - stream.Write( uint{id.GetHash32()} ); + return stream.Write( uint{id.GetHash32()} ); } @@ -191,7 +148,7 @@ namespace AE::Serializing bool Serializer::_Serialize (const NamedID &id) __NE___ { #if AE_SERIALIZE_HASH_ONLY - return _FlushBits() and stream.Write( uint{id.GetHash32()} ); + return stream.Write( uint{id.GetHash32()} ); #else return _Serialize( id.GetName() ); #endif @@ -203,8 +160,7 @@ namespace AE::Serializing { CHECK_ERR( map.size() <= MaxArrayLength ); - bool res = _FlushBits() and - stream.Write( CheckCast(map.size()) ); + bool res = stream.Write( CheckCast(map.size()) ); for (auto iter = map.begin(); (iter != map.end()) and res; ++iter) { @@ -219,8 +175,7 @@ namespace AE::Serializing { CHECK_ERR( set.size() <= MaxArrayLength ); - bool res = _FlushBits() and - stream.Write( CheckCast(set.size()) ); + bool res = stream.Write( CheckCast(set.size()) ); for (auto iter = set.begin(); (iter != set.end()) and res; ++iter) { @@ -270,8 +225,7 @@ namespace AE::Serializing bool Serializer::_Serialize (const FixedTupleArray &arr) __NE___ { CHECK_ERR( arr.size() <= MaxArrayLength ); - return _FlushBits() and - stream.Write( CheckCast(arr.size()) ) and + return stream.Write( CheckCast(arr.size()) ) and _RecursiveSerializeTupleArray<0>( arr ); } @@ -294,7 +248,7 @@ namespace AE::Serializing { CHECK_ERR( arr.size() <= MaxArrayLength ); - if_unlikely( not (_FlushBits() and stream.Write( uint(arr.size()) )) ) + if_unlikely( not stream.Write( uint(arr.size()) )) return false; if ( arr.empty() ) @@ -307,11 +261,7 @@ namespace AE::Serializing template bool Serializer::_Serialize (const Optional &value) __NE___ { - bool res; - if ( _bitCount > 0 or IsPackedBits ) - res = _Serialize( BoolBit{ value.has_value() }); - else - res = stream.Write( value.has_value() ); + bool res = stream.Write( value.has_value() ); if ( value.has_value() ) return res and _Serialize( *value ); @@ -323,8 +273,7 @@ namespace AE::Serializing template bool Serializer::_Serialize (const Union &un) __NE___ { - return _FlushBits() and - stream.Write( CheckCast(un.index()) ) and // TODO: use ubyte + return stream.Write( CheckCast(un.index()) ) and // TODO: use ubyte _RecursiveSerializeUnion< Types... >( un ); } diff --git a/AE/engine/src/serializing/Serializing.cpp b/AE/engine/src/serializing/Private/Serializing.cpp similarity index 60% rename from AE/engine/src/serializing/Serializing.cpp rename to AE/engine/src/serializing/Private/Serializing.cpp index 10592bb8..17573ae9 100644 --- a/AE/engine/src/serializing/Serializing.cpp +++ b/AE/engine/src/serializing/Private/Serializing.cpp @@ -1,3 +1,3 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -#include "serializing/ObjectFactory.h" +#include "serializing/Public/ObjectFactory.h" diff --git a/AE/engine/src/serializing/Public/BitDeserializer.h b/AE/engine/src/serializing/Public/BitDeserializer.h new file mode 100644 index 00000000..b8062e63 --- /dev/null +++ b/AE/engine/src/serializing/Public/BitDeserializer.h @@ -0,0 +1,51 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Thread-safe: no +*/ + +#pragma once + +#include "serializing/Public/BitSerializer.h" + +namespace AE::Serializing +{ + + // + // Bit Deserializer + // + + struct BitDeserializer final : Noncopyable + { + // types + public: + using BitType = PackedBits< 0, 1, int >::BitType; + + + // variables + public: + FastRStream stream; + + private: + BitType _packedBits = 0; + uint _bitCount = 0; + + + // methods + public: + explicit BitDeserializer (FastRStream rstream) __NE___ : stream{ RVRef(rstream) } {} + explicit BitDeserializer (RC rstream) __NE___ : stream{ RVRef(rstream) } {} + + template + ND_ bool operator () (INOUT Args& ...args) __NE___; + + ND_ bool IsEnd () C_NE___ { return stream.Empty(); } + + private: + template + ND_ bool _RecursiveDeserialize (INOUT Arg0 &, INOUT Args& ...) __NE___; + + template + ND_ bool _Deserialize (INOUT PackedBits &) __NE___; + }; + +} // AE::Serializing diff --git a/AE/engine/src/serializing/Public/BitSerializer.h b/AE/engine/src/serializing/Public/BitSerializer.h new file mode 100644 index 00000000..f45e7eb3 --- /dev/null +++ b/AE/engine/src/serializing/Public/BitSerializer.h @@ -0,0 +1,60 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Thread-safe: no +*/ + +#pragma once + +#include "serializing/Public/Common.h" + +namespace AE::Serializing +{ + + // + // Serializer + // + + struct BitSerializer : Noncopyable + { + // types + public: + using BitType = PackedBits< 0, 1, int >::BitType; + + + // variables + public: + FastWStream stream; + + DEBUG_ONLY( + ulong dbgUnusedBits = 0; + ) + + private: + BitType _packedBits = 0; + uint _bitCount = 0; + + + // methods + public: + explicit BitSerializer (FastWStream wstream) __NE___ : stream{ RVRef(wstream) } {} + explicit BitSerializer (RC wstream) __NE___ : stream{ RVRef(wstream) } {} + + ~BitSerializer () __NE___ { Unused( _FlushBits() ); } + + template + ND_ bool operator () (const Args& ...args) __NE___; + + // optional + ND_ bool Flush () __NE___ { return _FlushBits(); } + + private: + ND_ bool _FlushBits () __NE___; + + template + ND_ bool _RecursiveSerialize (const Arg0 &, const Args& ...) __NE___; + + template + ND_ bool _Serialize (const PackedBits &) __NE___; + }; + +} // AE::Serializing diff --git a/AE/engine/src/serializing/Common.h b/AE/engine/src/serializing/Public/Common.h similarity index 84% rename from AE/engine/src/serializing/Common.h rename to AE/engine/src/serializing/Public/Common.h index 18e4ea1a..6ced7f4f 100644 --- a/AE/engine/src/serializing/Common.h +++ b/AE/engine/src/serializing/Public/Common.h @@ -3,7 +3,7 @@ #pragma once #include "serializing/Serializing.pch.h" -#include "serializing/PackedBits.h" +#include "serializing/Public/PackedBits.h" // for NamedID #define AE_SERIALIZE_HASH_ONLY 1 @@ -19,6 +19,8 @@ namespace AE::Serializing struct Deserializer; class ObjectFactory; + struct BitSerializer; + struct BitDeserializer; // IsTriviallySerializable<> defined in 'base/CompileTime/TypeTraits.h' diff --git a/AE/engine/src/serializing/Deserializer.h b/AE/engine/src/serializing/Public/Deserializer.h similarity index 63% rename from AE/engine/src/serializing/Deserializer.h rename to AE/engine/src/serializing/Public/Deserializer.h index d394c143..c8ae9a17 100644 --- a/AE/engine/src/serializing/Deserializer.h +++ b/AE/engine/src/serializing/Public/Deserializer.h @@ -5,7 +5,7 @@ #pragma once -#include "serializing/Serializer.h" +#include "serializing/Public/Serializer.h" namespace AE::Serializing { @@ -21,8 +21,6 @@ namespace AE::Serializing static constexpr uint MaxStringLength = Serializer::MaxStringLength; static constexpr uint MaxArrayLength = Serializer::MaxArrayLength; - using BitType = PackedBits< 0, 1, int >::BitType; - // variables public: @@ -31,9 +29,6 @@ namespace AE::Serializing Ptr allocator; // optional private: - BitType _packedBits = 0; - uint _bitCount = 0; - #if AE_DEBUG_SERIALIZER uint _dbgCounter = 0; #endif @@ -45,13 +40,11 @@ namespace AE::Serializing explicit Deserializer (RC rstream, Ptr alloc = Default) __NE___ : stream{ RVRef(rstream) }, allocator{alloc} {} template - ND_ bool operator () (INOUT Args& ...args) __NE___; + ND_ bool operator () (INOUT Args& ...args) __NE___; - ND_ bool IsEnd () C_NE___ { return stream.Empty(); } + ND_ bool IsEnd () C_NE___ { return stream.Empty(); } private: - ND_ bool _ReadBits () __NE___; - template ND_ bool _RecursiveDeserialize (INOUT Arg0 &arg0, INOUT Args& ...args) __NE___; @@ -65,7 +58,6 @@ namespace AE::Serializing template ND_ bool _Deserialize (INOUT RGBAColor &) __NE___; template ND_ bool _Deserialize (INOUT HSVColor &) __NE___; template ND_ bool _Deserialize (INOUT StaticArray &) __NE___; - template ND_ bool _Deserialize (INOUT FixedArray &) __NE___; template ND_ bool _Deserialize (INOUT Array &) __NE___; // with allocator @@ -77,66 +69,74 @@ namespace AE::Serializing ND_ bool _Deserialize (INOUT AnyTypeRef &); // not defined ND_ bool _Deserialize (INOUT AnyTypeCRef &); // not defined - template - ND_ bool _Deserialize (INOUT PackedBits &) __NE___; + template + ND_ bool _Deserialize (INOUT FixedArray &) __NE___; template - ND_ bool _Deserialize (INOUT TVec &) __NE___; + ND_ bool _Deserialize (INOUT TVec &) __NE___; template - ND_ bool _Deserialize (INOUT NamedID &) __NE___; + ND_ bool _Deserialize (INOUT NamedID &) __NE___; template - ND_ bool _Deserialize (INOUT NamedID &)__NE___; + ND_ bool _Deserialize (INOUT NamedID &) __NE___; #ifdef AE_ENABLE_ABSEIL - template ND_ bool _Deserialize (INOUT FlatHashMap &map) __NE___ { return _DeserializeMap( INOUT map, UMax ); } + template + ND_ bool _Deserialize (INOUT FlatHashMap &map) __NE___ { return _DeserializeMap( INOUT map, UMax ); } #endif - template ND_ bool _Deserialize (INOUT HashMap &map) __NE___ { return _DeserializeMap( INOUT map, UMax ); } - template ND_ bool _Deserialize (INOUT FixedMap &map) __NE___ { return _DeserializeMap( INOUT map, uint(S) ); } - template ND_ bool _DeserializeMap (INOUT MapType &map, uint maxCount)__NE___; + template + ND_ bool _Deserialize (INOUT HashMap &map) __NE___ { return _DeserializeMap( INOUT map, UMax ); } + template + ND_ bool _Deserialize (INOUT FixedMap &map) __NE___ { return _DeserializeMap( INOUT map, uint(S) ); } + template + ND_ bool _DeserializeMap (INOUT MapType &map, uint maxCount) __NE___; #ifdef AE_ENABLE_ABSEIL - template ND_ bool _Deserialize (INOUT FlatHashSet &set) __NE___ { return _DeserializeSet( INOUT set, UMax ); } + template + ND_ bool _Deserialize (INOUT FlatHashSet &set) __NE___ { return _DeserializeSet( INOUT set, UMax ); } #endif - template ND_ bool _Deserialize (INOUT HashSet &set) __NE___ { return _DeserializeSet( INOUT set, UMax ); } - template ND_ bool _Deserialize (INOUT FixedSet &set) __NE___ { return _DeserializeSet( INOUT set, uint(S) ); } - template ND_ bool _DeserializeSet (INOUT SetType &set, uint maxCount)__NE___; + template + ND_ bool _Deserialize (INOUT HashSet &set) __NE___ { return _DeserializeSet( INOUT set, UMax ); } + template + ND_ bool _Deserialize (INOUT FixedSet &set) __NE___ { return _DeserializeSet( INOUT set, uint(S) ); } + template + ND_ bool _DeserializeSet (INOUT SetType &set, uint maxCount) __NE___; template - ND_ bool _Deserialize (INOUT Optional &) __NE___; + ND_ bool _Deserialize (INOUT Optional &) __NE___; template - ND_ bool _Deserialize (INOUT Tuple &) __NE___; + ND_ bool _Deserialize (INOUT Tuple &) __NE___; template - ND_ bool _Deserialize (INOUT FixedTupleArray &) __NE___; + ND_ bool _Deserialize (INOUT FixedTupleArray &) __NE___; template - ND_ bool _RecursiveDeserializeTuple (INOUT Tuple &) __NE___; + ND_ bool _RecursiveDeserializeTuple (INOUT Tuple &) __NE___; template - ND_ bool _Deserialize (INOUT Union &) __NE___; + ND_ bool _Deserialize (INOUT Union &) __NE___; template - ND_ bool _RecursiveDeserializeUnion (INOUT Union &, usize idx) __NE___; + ND_ bool _RecursiveDeserializeUnion (INOUT Union &, usize) __NE___; template ND_ bool _RecursiveDeserializeTupleArray (INOUT FixedTupleArray &) __NE___; template - ND_ bool _Deserialize (INOUT TupleArrayView &) __NE___; + ND_ bool _Deserialize (INOUT TupleArrayView &) __NE___; template ND_ bool _RecursiveDeserializeTupleArrayView (INOUT TupleArrayView &) __NE___; template - ND_ bool _Deserialize (INOUT PhysicalQuantity &) __NE___; + ND_ bool _Deserialize (INOUT PhysicalQuantity &) __NE___; template - ND_ bool _Deserialize (INOUT PhysicalQuantityVec &) __NE___; + ND_ bool _Deserialize (INOUT PhysicalQuantityVec &) __NE___; - ND_ bool _Deserialize (INOUT Path &path) __NE___; + ND_ bool _Deserialize (INOUT Path &path) __NE___; }; } // AE::Serializing diff --git a/AE/engine/src/serializing/ISerializable.h b/AE/engine/src/serializing/Public/ISerializable.h similarity index 96% rename from AE/engine/src/serializing/ISerializable.h rename to AE/engine/src/serializing/Public/ISerializable.h index f2b6a399..7ce91df2 100644 --- a/AE/engine/src/serializing/ISerializable.h +++ b/AE/engine/src/serializing/Public/ISerializable.h @@ -5,7 +5,7 @@ #pragma once -#include "serializing/Common.h" +#include "serializing/Public/Common.h" namespace AE::Serializing { diff --git a/AE/engine/src/serializing/ObjectFactory.h b/AE/engine/src/serializing/Public/ObjectFactory.h similarity index 93% rename from AE/engine/src/serializing/ObjectFactory.h rename to AE/engine/src/serializing/Public/ObjectFactory.h index cba7249b..2a2b00ec 100644 --- a/AE/engine/src/serializing/ObjectFactory.h +++ b/AE/engine/src/serializing/Public/ObjectFactory.h @@ -5,9 +5,9 @@ #pragma once -#include "serializing/ISerializable.h" -#include "serializing/Serializer.h" -#include "serializing/Deserializer.h" +#include "serializing/Public/ISerializable.h" +#include "serializing/Public/Serializer.h" +#include "serializing/Public/Deserializer.h" namespace AE::Serializing { @@ -136,7 +136,7 @@ namespace AE::Serializing ================================================= */ template - bool ObjectFactory::Serialize (Serializer &ser, const T *obj) C_NE___ + bool ObjectFactory::Serialize (Serializer &ser, const T* obj) C_NE___ { StaticAssert( not IsTriviallySerializable, "Can not serialize trivial type" ); @@ -257,5 +257,8 @@ namespace AE::Serializing } // AE::Serializing -#include "serializing/Serializer.inl.h" -#include "serializing/Deserializer.inl.h" +#include "serializing/Private/Serializer.inl.h" +#include "serializing/Private/Deserializer.inl.h" + +#include "serializing/Private/BitSerializer.inl.h" +#include "serializing/Private/BitDeserializer.inl.h" diff --git a/AE/engine/src/serializing/PackedBits.h b/AE/engine/src/serializing/Public/PackedBits.h similarity index 96% rename from AE/engine/src/serializing/PackedBits.h rename to AE/engine/src/serializing/Public/PackedBits.h index be81e999..f036092f 100644 --- a/AE/engine/src/serializing/PackedBits.h +++ b/AE/engine/src/serializing/Public/PackedBits.h @@ -36,12 +36,13 @@ namespace AE::Base constexpr PackedBits () __NE___ {} explicit constexpr PackedBits (T val) __NE___ : _value{val} {} + // for union/bitset/bitfields ND_ T* operator -> () __NE___ { return &_value; } ND_ T const* operator -> () C_NE___ { return &_value; } ND_ explicit constexpr operator T () C_NE___ { return _value; } - ND_ static constexpr uint BitCount () __NE___ { return BitCount_v; } + ND_ static constexpr uint MaxBitCount () __NE___ { return BitCount_v; } ND_ constexpr BitType ToBits () C_NE___; constexpr void FromBits (BitType bits) __NE___; }; diff --git a/AE/engine/src/serializing/Serializer.h b/AE/engine/src/serializing/Public/Serializer.h similarity index 70% rename from AE/engine/src/serializing/Serializer.h rename to AE/engine/src/serializing/Public/Serializer.h index eddc9096..659dd204 100644 --- a/AE/engine/src/serializing/Serializer.h +++ b/AE/engine/src/serializing/Public/Serializer.h @@ -5,7 +5,7 @@ #pragma once -#include "serializing/Common.h" +#include "serializing/Public/Common.h" namespace AE::Serializing { @@ -23,22 +23,13 @@ namespace AE::Serializing static constexpr uint MaxStringLength = 0xFFFF; static constexpr uint MaxArrayLength = 0xFFFFFF; // TODO: remove or limit mem size - using BitType = PackedBits< 0, 1, int >::BitType; - // variables public: FastWStream stream; Ptr factory; - DEBUG_ONLY( - ulong dbgUnusedBits = 0; - ) - private: - BitType _packedBits = 0; - uint _bitCount = 0; - #if AE_DEBUG_SERIALIZER uint _dbgCounter = 0; #endif @@ -49,18 +40,13 @@ namespace AE::Serializing explicit Serializer (FastWStream wstream) __NE___ : stream{ RVRef(wstream) } {} explicit Serializer (RC wstream) __NE___ : stream{ RVRef(wstream) } {} - ~Serializer () __NE___ { Unused( _FlushBits() ); } + ~Serializer () __NE___ {} template ND_ bool operator () (const Args& ...args) __NE___; - // optional - ND_ bool Flush () __NE___ { return _FlushBits(); } - private: - ND_ bool _FlushBits () __NE___; - template ND_ bool _RecursiveSerialize (const Arg0 &arg0, const Args& ...args) __NE___; @@ -70,7 +56,6 @@ namespace AE::Serializing template ND_ bool _Serialize (const BitSet &) __NE___; template ND_ bool _Serialize (ArrayView) __NE___; template ND_ bool _Serialize (const Array &v) __NE___ { return _Serialize(ArrayView{v}); } - template ND_ bool _Serialize (const FixedArray &arr) __NE___ { return _Serialize(ArrayView{arr}); } template ND_ bool _Serialize (const StaticArray &arr) __NE___ { return _Serialize(ArrayView{arr}); } template ND_ bool _Serialize (BasicStringView) __NE___; template ND_ bool _Serialize (const BasicString &str) __NE___ { return _Serialize(BasicStringView{str}); } @@ -79,9 +64,8 @@ namespace AE::Serializing template ND_ bool _Serialize (const HSVColor &) __NE___; template ND_ bool _Serialize (const TFixedString &str) __NE___ { return _Serialize(BasicStringView{str}); } - - template - ND_ bool _Serialize (const PackedBits &) __NE___; + template + ND_ bool _Serialize (const FixedArray &arr) __NE___ { return _Serialize(ArrayView{arr}); } template ND_ bool _Serialize (const TVec &) __NE___; @@ -93,19 +77,28 @@ namespace AE::Serializing ND_ bool _Serialize (const NamedID &) __NE___; #ifdef AE_ENABLE_ABSEIL - template ND_ bool _Serialize (const FlatHashMap &map) __NE___ { return _SerializeMap( map ); } + template + ND_ bool _Serialize (const FlatHashMap &map) __NE___ { return _SerializeMap( map ); } #endif - template ND_ bool _Serialize (const HashMap &map) __NE___ { return _SerializeMap( map ); } - template ND_ bool _Serialize (const HashMultiMap &map) __NE___ { return _SerializeMap( map ); } - template ND_ bool _Serialize (const FixedMap &map) __NE___ { return _SerializeMap( map ); } - template ND_ bool _SerializeMap (const MapType &) __NE___; + template + ND_ bool _Serialize (const HashMap &map) __NE___ { return _SerializeMap( map ); } + template + ND_ bool _Serialize (const HashMultiMap &map) __NE___ { return _SerializeMap( map ); } + template + ND_ bool _Serialize (const FixedMap &map) __NE___ { return _SerializeMap( map ); } + template + ND_ bool _SerializeMap (const MapType &) __NE___; #ifdef AE_ENABLE_ABSEIL - template ND_ bool _Serialize (const FlatHashSet &set) __NE___ { return _SerializeSet( set ); } + template + ND_ bool _Serialize (const FlatHashSet &set) __NE___ { return _SerializeSet( set ); } #endif - template ND_ bool _Serialize (const HashSet &set) __NE___ { return _SerializeSet( set ); } - template ND_ bool _Serialize (const FixedSet &set) __NE___ { return _SerializeSet( set ); } - template ND_ bool _SerializeSet (const SetType &) __NE___; + template + ND_ bool _Serialize (const HashSet &set) __NE___ { return _SerializeSet( set ); } + template + ND_ bool _Serialize (const FixedSet &set) __NE___ { return _SerializeSet( set ); } + template + ND_ bool _SerializeSet (const SetType &) __NE___; template ND_ bool _Serialize (const FixedTupleArray &) __NE___; diff --git a/AE/engine/src/threading/CMakeLists.txt b/AE/engine/src/threading/CMakeLists.txt index b4576195..b3ec8e53 100644 --- a/AE/engine/src/threading/CMakeLists.txt +++ b/AE/engine/src/threading/CMakeLists.txt @@ -46,5 +46,6 @@ endif() EnablePCH( "Threading" ) EnablePrebuild( "Threading" ) +EnableUnitBuild( "Threading" ) install( TARGETS "Threading" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/threading/DataSource/UnixAsyncDataSource_LinuxAIO.h b/AE/engine/src/threading/DataSource/UnixAsyncDataSource_LinuxAIO.h index bae3fedd..3b622dc0 100644 --- a/AE/engine/src/threading/DataSource/UnixAsyncDataSource_LinuxAIO.h +++ b/AE/engine/src/threading/DataSource/UnixAsyncDataSource_LinuxAIO.h @@ -15,7 +15,7 @@ namespace { - ND_ inline int io_setup (unsigned nr, OUT aio_context_t *ctxp) { + ND_ inline int io_setup (unsigned nr, OUT aio_context_t* ctxp) { return syscall( __NR_io_setup, nr, ctxp ); } @@ -23,7 +23,7 @@ namespace return syscall( __NR_io_destroy, ctx ); } - ND_ inline int io_cancel (aio_context_t ctx, struct iocb *iocb, OUT struct io_event *result) { + ND_ inline int io_cancel (aio_context_t ctx, struct iocb* iocb, OUT struct io_event* result) { return syscall( __NR_io_cancel, ctx, iocb, OUT result ); } @@ -31,7 +31,7 @@ namespace return syscall( __NR_io_submit, ctx, nr, iocbpp); } - ND_ inline int io_getevents (aio_context_t ctx, long min_nr, long max_nr, struct io_event *events, struct timespec *timeout) { + ND_ inline int io_getevents (aio_context_t ctx, long min_nr, long max_nr, struct io_event* events, struct timespec* timeout) { return syscall( __NR_io_getevents, ctx, min_nr, max_nr, events, timeout ); } @@ -56,7 +56,7 @@ namespace io_getevents_nonblock ================================================= */ - ND_ inline int io_getevents_nonblock (aio_context_t ctx, const long max_nr, OUT struct io_event *events) + ND_ inline int io_getevents_nonblock (aio_context_t ctx, const long max_nr, OUT struct io_event* events) { using namespace AE::Base; diff --git a/AE/engine/src/threading/Primitives/Synchronized.h b/AE/engine/src/threading/Primitives/Synchronized.h index f49da5b2..4864014b 100644 --- a/AE/engine/src/threading/Primitives/Synchronized.h +++ b/AE/engine/src/threading/Primitives/Synchronized.h @@ -294,6 +294,13 @@ namespace _hidden_ return _values; } + template + exact_t ReadAll (Fn &&fn) CNoExcept(IsNothrowInvocable< Fn, T0&, T1&, Types&... >) + { + SHAREDLOCK( _sync ); + return _values.Apply( FwdArg( fn )); + } + template @@ -308,12 +315,19 @@ namespace _hidden_ } template - void WriteAll (Args&& ...args) NoExcept(AllNothrowCopyCtor) + ____IA void WriteAll (Args&& ...args) NoExcept(AllNothrowCopyCtor) { EXLOCK( _sync ); _values.Set( FwdArg(args)... ); } + // read / write access + template + ____IA exact_t Modify (Fn &&fn) NoExcept(IsNothrowInvocable< Fn, T0&, T1&, Types&... >) + { + EXLOCK( _sync ); + return _values.Apply( FwdArg( fn )); + } template (), @@ -520,6 +534,13 @@ namespace _hidden_ return _value; } + template + exact_t Read (Fn &&fn) CNoExcept(IsNothrowInvocable< Fn, T >) + { + SHAREDLOCK( _sync ); + return fn( _value ); + } + void Write (const T &value) __NE___ { @@ -535,6 +556,14 @@ namespace _hidden_ PlacementNew( OUT std::addressof(this->_value), RVRef(value) ); } + // read / write access + template + ____IA exact_t Modify (Fn &&fn) NoExcept(IsNothrowInvocable< Fn, T >) + { + EXLOCK( _sync ); + return fn( _value ); + } + void Reset () __NE___ { diff --git a/AE/engine/src/threading/TaskSystem/Promise.h b/AE/engine/src/threading/TaskSystem/Promise.h index 9107e6b4..a7154472 100644 --- a/AE/engine/src/threading/TaskSystem/Promise.h +++ b/AE/engine/src/threading/TaskSystem/Promise.h @@ -249,7 +249,7 @@ namespace _hidden_ template _InternalImpl (A &&value, bool except, StringView, ETaskQueue, Promise::CompleteValueArg) __NE___; - ND_ decltype(auto) Result () C_NE___ + ND_ exact_t Result () C_NE___ { ASSERT( Status() == EStatus::Completed ); MemoryBarrier( EMemoryOrder::Acquire ); diff --git a/AE/engine/src/threading/TaskSystem/ThreadManager.cpp b/AE/engine/src/threading/TaskSystem/ThreadManager.cpp index cc23f2aa..f9afffb2 100644 --- a/AE/engine/src/threading/TaskSystem/ThreadManager.cpp +++ b/AE/engine/src/threading/TaskSystem/ThreadManager.cpp @@ -111,7 +111,7 @@ namespace { #ifdef AE_DBG_OR_DEV_OR_PROF const uint core_id = ThreadUtils::GetCoreIndex(); - const auto freq_mhz = CpuPerformance::GetFrequency( core_id ); + const auto freq_mhz = PerformanceStat::CPU_GetFrequency( core_id ); const auto* core = CpuArchInfo::Get().GetCore( core_id ); ProfilingInfo info; @@ -211,7 +211,7 @@ namespace info.coreId = _coreId; #ifdef AE_DBG_OR_DEV_OR_PROF - const auto freq_mhz = CpuPerformance::GetFrequency( uint(_coreId) ); + const auto freq_mhz = PerformanceStat::CPU_GetFrequency( uint(_coreId) ); const auto* core = CpuArchInfo::Get().GetCore( uint(_coreId) ); info.curFreq = freq_mhz; diff --git a/AE/engine/src/vfs/CMakeLists.txt b/AE/engine/src/vfs/CMakeLists.txt index 7656bfd9..44e2d8e8 100644 --- a/AE/engine/src/vfs/CMakeLists.txt +++ b/AE/engine/src/vfs/CMakeLists.txt @@ -14,5 +14,6 @@ target_link_libraries( "VFS" PUBLIC "Threading" "Networking" ) EnablePCH( "VFS" ) EnablePrebuild( "VFS" ) +EnableUnitBuild( "VFS" ) install( TARGETS "VFS" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/vfs/Network/NetworkStorageClient.cpp b/AE/engine/src/vfs/Network/NetworkStorageClient.cpp index 406d965e..2fa29007 100644 --- a/AE/engine/src/vfs/Network/NetworkStorageClient.cpp +++ b/AE/engine/src/vfs/Network/NetworkStorageClient.cpp @@ -7,7 +7,7 @@ namespace AE::VFS { using RWReqPromise_t = AsyncDSRequest::Value_t::Promise_t; - INTERNAL_LINKAGE( Ptr s_NetVFS ); + INTERNAL_LINKAGE( Ptr s_NetVFS_Client ); //----------------------------------------------------------------------------- @@ -167,7 +167,7 @@ namespace AE::VFS _memRC = null; _Cleanup(); - s_NetVFS->_readResultPool.Unassign( this ); + s_NetVFS_Client->_readResultPool.Unassign( this ); } //----------------------------------------------------------------------------- @@ -182,7 +182,7 @@ namespace AE::VFS { // is alive { - auto req = s_NetVFS->_GetWriteReq( _id ); + auto req = s_NetVFS_Client->_GetWriteReq( _id ); if_unlikely( not req ) return OnFailure(); } @@ -190,7 +190,7 @@ namespace AE::VFS for (uint i = 0; (i < _maxParts) and (_dataSize > _sent); ++i) { const Bytes size = Min( _dataSize - _sent, _partSize ); - auto msg = s_NetVFS->_CreateMsgOpt< CSMsg_VFS_WritePart >( size-1 ); + auto msg = s_NetVFS_Client->_CreateMsgOpt< CSMsg_VFS_WritePart >( size-1 ); if_likely( msg ) { @@ -199,7 +199,7 @@ namespace AE::VFS msg->index = ushort(_partIdx); msg.Put( &CSMsg_VFS_WritePart::data, _data + _sent, size ); - if_likely( s_NetVFS->_AddMessage( msg )) + if_likely( s_NetVFS_Client->_AddMessage( msg )) { _sent += size; _partIdx ++; @@ -216,14 +216,14 @@ namespace AE::VFS _memRC = null; // send completion message - auto msg = s_NetVFS->_CreateMsgOpt< CSMsg_VFS_WriteEnd >(); + auto msg = s_NetVFS_Client->_CreateMsgOpt< CSMsg_VFS_WriteEnd >(); if_likely( msg ) { msg->reqId = _id; msg->hash = XXHash64( _data, usize(_dataSize) ); msg->pos = _pos; - if_likely( s_NetVFS->_AddMessage( msg )) + if_likely( s_NetVFS_Client->_AddMessage( msg )) { return; // complete } @@ -244,19 +244,19 @@ namespace AE::VFS _memRC = null; { - Exclusive req {s_NetVFS->_GetWriteReq( _id )}; + Exclusive req {s_NetVFS_Client->_GetWriteReq( _id )}; if_likely( req ) req->Failed(); } - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_WriteEnd >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_WriteEnd >(); CHECK_ERRV( msg ); msg->reqId = _id; msg->hash = HashVal64{0}; // error msg->pos = _pos; - CHECK( s_NetVFS->_AddMessage( msg )); + CHECK( s_NetVFS_Client->_AddMessage( msg )); } //----------------------------------------------------------------------------- @@ -393,7 +393,7 @@ namespace AE::VFS ++_generation; _Cleanup(); - s_NetVFS->_writeResultPool.Unassign( this ); + s_NetVFS_Client->_writeResultPool.Unassign( this ); } //----------------------------------------------------------------------------- @@ -419,19 +419,19 @@ namespace AE::VFS { CHECK_ERR( IsOpen() ); - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_ReadRequest >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_ReadRequest >(); CHECK_ERR( msg ); Index_t idx; - CHECK_ERR( s_NetVFS->_readResultPool.Assign( OUT idx )); + CHECK_ERR( s_NetVFS_Client->_readResultPool.Assign( OUT idx )); - RC req{ &s_NetVFS->_readResultPool[ idx ]}; + RC req{ &s_NetVFS_Client->_readResultPool[ idx ]}; EXLOCK( req->Guard() ); if_unlikely( not req->Init( pos, dataSize, data, RVRef(mem) )) { UNTESTED; - s_NetVFS->_readResultPool.Unassign( idx ); + s_NetVFS_Client->_readResultPool.Unassign( idx ); RETURN_ERR( "failed to init read request" ); } @@ -440,10 +440,10 @@ namespace AE::VFS msg->pos = pos; msg->size = dataSize; - if_unlikely( not s_NetVFS->_AddMessage( msg )) + if_unlikely( not s_NetVFS_Client->_AddMessage( msg )) { UNTESTED; - s_NetVFS->_readResultPool.Unassign( idx ); + s_NetVFS_Client->_readResultPool.Unassign( idx ); RETURN_ERR( "failed to add message" ); } @@ -478,12 +478,12 @@ namespace AE::VFS */ bool NetworkStorageClient::NetRDataSource::CancelAllRequests () __NE___ { - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_CancelAllReadRequests >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_CancelAllReadRequests >(); CHECK_ERR( msg ); msg->fileId = NetDataSourceID{ _index.load(), Generation() }; - CHECK_ERR( s_NetVFS->_AddMessage( msg )); + CHECK_ERR( s_NetVFS_Client->_AddMessage( msg )); return true; } @@ -496,10 +496,10 @@ namespace AE::VFS { // close { - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_CloseReadFile >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_CloseReadFile >(); if ( msg ) { msg->fileId = ID(); - Unused( s_NetVFS->_AddMessage( msg )); + Unused( s_NetVFS_Client->_AddMessage( msg )); } } @@ -507,7 +507,7 @@ namespace AE::VFS _open.store( EStatus::Initial ); _fileSize.store( 0_b ); - s_NetVFS->_readDSPool.Unassign( _index.load() ); + s_NetVFS_Client->_readDSPool.Unassign( _index.load() ); } //----------------------------------------------------------------------------- @@ -543,13 +543,13 @@ namespace AE::VFS { CHECK_ERR( IsOpen() ); - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_WriteBegin >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_WriteBegin >(); CHECK_ERR( msg ); Index_t idx; - CHECK_ERR( s_NetVFS->_writeResultPool.Assign( OUT idx )); + CHECK_ERR( s_NetVFS_Client->_writeResultPool.Assign( OUT idx )); - RC req{ &s_NetVFS->_writeResultPool[ idx ]}; + RC req{ &s_NetVFS_Client->_writeResultPool[ idx ]}; EXLOCK( req->Guard() ); auto task = Scheduler().Run( @@ -563,7 +563,7 @@ namespace AE::VFS if_unlikely( not req->Init( RVRef(task), pos )) { UNTESTED; - s_NetVFS->_writeResultPool.Unassign( idx ); + s_NetVFS_Client->_writeResultPool.Unassign( idx ); RETURN_ERR( "failed to init write request" ); } @@ -571,10 +571,10 @@ namespace AE::VFS msg->reqId = NDSRequestID{ idx, req->Generation() }; msg->size = dataSize; - if_unlikely( not s_NetVFS->_AddMessage( msg )) + if_unlikely( not s_NetVFS_Client->_AddMessage( msg )) { UNTESTED; - s_NetVFS->_writeResultPool.Unassign( idx ); + s_NetVFS_Client->_writeResultPool.Unassign( idx ); RETURN_ERR( "failed to add message" ); } @@ -598,12 +598,12 @@ namespace AE::VFS */ bool NetworkStorageClient::NetWDataSource::CancelAllRequests () __NE___ { - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_CancelAllWriteRequests >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_CancelAllWriteRequests >(); CHECK_ERR( msg ); msg->fileId = NetDataSourceID{ _index.load(), Generation() }; - CHECK( s_NetVFS->_AddMessage( msg )); + CHECK( s_NetVFS_Client->_AddMessage( msg )); return true; } @@ -616,17 +616,17 @@ namespace AE::VFS { // close { - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_CloseWriteFile >(); + auto msg = s_NetVFS_Client->_CreateMsg< CSMsg_VFS_CloseWriteFile >(); if ( msg ) { msg->fileId = ID(); - Unused( s_NetVFS->_AddMessage( msg )); + Unused( s_NetVFS_Client->_AddMessage( msg )); } } _generation.fetch_add( 1 ); _open.store( EStatus::Initial ); - s_NetVFS->_writeDSPool.Unassign( _index.load() ); + s_NetVFS_Client->_writeDSPool.Unassign( _index.load() ); } //----------------------------------------------------------------------------- @@ -644,8 +644,8 @@ namespace AE::VFS NetworkStorageClient::NetworkStorageClient () __NE___ : _msgConsumer{ *this } { - CHECK_FATAL( s_NetVFS == null ); - s_NetVFS = this; + CHECK_FATAL( s_NetVFS_Client == null ); + s_NetVFS_Client = this; } /* @@ -655,7 +655,7 @@ namespace AE::VFS */ NetworkStorageClient::~NetworkStorageClient () __NE___ { - s_NetVFS = null; + s_NetVFS_Client = null; } /* diff --git a/AE/engine/src/vfs/Network/NetworkStorageServer.cpp b/AE/engine/src/vfs/Network/NetworkStorageServer.cpp index 3c91fe20..ec574257 100644 --- a/AE/engine/src/vfs/Network/NetworkStorageServer.cpp +++ b/AE/engine/src/vfs/Network/NetworkStorageServer.cpp @@ -5,7 +5,7 @@ namespace AE::VFS { - INTERNAL_LINKAGE( Ptr s_NetVFS ); + INTERNAL_LINKAGE( Ptr s_NetVFS_Server ); //----------------------------------------------------------------------------- @@ -23,7 +23,7 @@ namespace AE::VFS for (uint i = 0; (i < _maxParts) and (res.dataSize > _sent); ++i) { const Bytes size = Min( res.dataSize - _sent, _partSize ); - auto msg = s_NetVFS->_CreateMsgOpt< CSMsg_VFS_ReadResult >( _clientId, size-1 ); + auto msg = s_NetVFS_Server->_CreateMsgOpt< CSMsg_VFS_ReadResult >( _clientId, size-1 ); if_likely( msg ) { @@ -32,7 +32,7 @@ namespace AE::VFS msg->index = ushort(_partIdx); MemCopy( OUT msg->data, res.data + _sent, size ); - if_likely( s_NetVFS->_AddMessage( msg )) + if_likely( s_NetVFS_Server->_AddMessage( msg )) { _sent += size; _partIdx ++; @@ -48,14 +48,14 @@ namespace AE::VFS ASSERT( res.dataSize == _sent ); // complete - auto msg = s_NetVFS->_CreateMsgOpt< CSMsg_VFS_ReadComplete >( _clientId ); + auto msg = s_NetVFS_Server->_CreateMsgOpt< CSMsg_VFS_ReadComplete >( _clientId ); if_likely( msg ) { msg->reqId = _id; msg->size = res.dataSize; msg->hash = XXHash64( res.data, usize(res.dataSize) ); - if_likely( s_NetVFS->_AddMessage( msg )) + if_likely( s_NetVFS_Server->_AddMessage( msg )) { _req = null; return; // complete @@ -77,14 +77,14 @@ namespace AE::VFS _req = null; - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_ReadComplete >( _clientId ); + auto msg = s_NetVFS_Server->_CreateMsg< CSMsg_VFS_ReadComplete >( _clientId ); CHECK_ERRV( msg ); msg->reqId = _id; msg->size = 0_b; // error msg->hash = HashVal64{0}; - CHECK( s_NetVFS->_AddMessage( msg )); + CHECK( s_NetVFS_Server->_AddMessage( msg )); } //----------------------------------------------------------------------------- @@ -150,8 +150,8 @@ namespace AE::VFS NetworkStorageServer::NetworkStorageServer () __NE___ : _msgConsumer{ *this } { - CHECK_FATAL( s_NetVFS == null ); - s_NetVFS = this; + CHECK_FATAL( s_NetVFS_Server == null ); + s_NetVFS_Server = this; } /* @@ -161,7 +161,7 @@ namespace AE::VFS */ NetworkStorageServer::~NetworkStorageServer () __NE___ { - s_NetVFS = null; + s_NetVFS_Server = null; } /* @@ -488,14 +488,14 @@ namespace AE::VFS { AE_LOGI( "failed to find file" ); - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_ReadComplete >( inMsg.ClientId() ); + auto msg = s_NetVFS_Server->_CreateMsg< CSMsg_VFS_ReadComplete >( inMsg.ClientId() ); CHECK_ERRV( msg ); msg->reqId = inMsg.reqId; msg->size = 0_b; // error msg->hash = HashVal64{0}; - CHECK( s_NetVFS->_AddMessage( msg )); + CHECK( s_NetVFS_Server->_AddMessage( msg )); return; } @@ -511,13 +511,13 @@ namespace AE::VFS */ inline void NetworkStorageServer::_WriteRequestComplete (NDSRequestID reqId, Bytes written, EClientLocalID clientId) __NE___ { - auto msg = s_NetVFS->_CreateMsg< CSMsg_VFS_WriteComplete >( clientId ); + auto msg = s_NetVFS_Server->_CreateMsg< CSMsg_VFS_WriteComplete >( clientId ); CHECK_ERRV( msg ); msg->reqId = reqId; msg->size = written; - CHECK( s_NetVFS->_AddMessage( msg )); + CHECK( s_NetVFS_Server->_AddMessage( msg )); } /* @@ -602,13 +602,13 @@ namespace AE::VFS p.Then( [req_id = inMsg.reqId, cid = inMsg.ClientId()] (const AsyncWDataSource::Result_t &res) { - if ( s_NetVFS ) - s_NetVFS->_WriteRequestComplete( req_id, res.dataSize, cid ); + if ( s_NetVFS_Server ) + s_NetVFS_Server->_WriteRequestComplete( req_id, res.dataSize, cid ); }); p.Except( [req_id = inMsg.reqId, cid = inMsg.ClientId()] () { - if ( s_NetVFS ) - s_NetVFS->_WriteRequestFailed( req_id, cid ); + if ( s_NetVFS_Server ) + s_NetVFS_Server->_WriteRequestFailed( req_id, cid ); }); } diff --git a/AE/engine/src/video/CMakeLists.txt b/AE/engine/src/video/CMakeLists.txt index 60421f3e..f975efb5 100644 --- a/AE/engine/src/video/CMakeLists.txt +++ b/AE/engine/src/video/CMakeLists.txt @@ -22,5 +22,6 @@ endif() EnablePCH( "Video" ) EnablePrebuild( "Video" ) +EnableUnitBuild( "Video" ) install( TARGETS "Video" ARCHIVE DESTINATION "lib" ) diff --git a/AE/engine/src/video/FFmpeg/FFmpegVideoDecoder.cpp b/AE/engine/src/video/FFmpeg/FFmpegVideoDecoder.cpp index 23623807..0a65d761 100644 --- a/AE/engine/src/video/FFmpeg/FFmpegVideoDecoder.cpp +++ b/AE/engine/src/video/FFmpeg/FFmpegVideoDecoder.cpp @@ -57,7 +57,7 @@ namespace AE::Video ASSERT( outCodec == null ); ASSERT( outCodecCtx == null ); - const auto CreateCodecCtx = [&] (const AVCodec *codec) -> bool + const auto CreateCodecCtx = [&] (const AVCodec* codec) -> bool {{ if ( codec == null ) return false; @@ -326,7 +326,7 @@ namespace AE::Video CHECK_ERR( _avPacket != null ); } - const uint2 src_dim { _video.codecCtx->width, _video.codecCtx->height }; + const uint2 src_dim {int2{ _video.codecCtx->width, _video.codecCtx->height }}; if ( All( IsZero( _config.dstDim )) ) _config.dstDim = src_dim; @@ -905,7 +905,7 @@ namespace AE::Video else { // create codec context to get actual pixel format - const auto UseCodec = [this, &dst, params] (const AVCodec *codec) -> bool + const auto UseCodec = [this, &dst, params] (const AVCodec* codec) -> bool {{ if ( codec == null ) return false; diff --git a/AE/engine/tests/asset_packer/CMakeLists.txt b/AE/engine/tests/asset_packer/CMakeLists.txt index c5f49ccf..a942e132 100644 --- a/AE/engine/tests/asset_packer/CMakeLists.txt +++ b/AE/engine/tests/asset_packer/CMakeLists.txt @@ -17,12 +17,13 @@ if (TARGET "PipelineCompiler-shared" OR TARGET "PipelineCompiler-shared" OR TARG "${PIPELINE_COMPILER_DIR}/Packer/FeatureSetPack.cpp" ) set( GRAPHICS_SRC "${GRAPHICS_DIR}/Public/RenderState.h" - "${GRAPHICS_DIR}/Private/RenderState.cpp" + "${GRAPHICS_DIR}/Private/RenderState.cpp.h" "${GRAPHICS_DIR}/Private/EnumUtils.h" - "${GRAPHICS_DIR}/Private/EnumUtils.cpp" + "${GRAPHICS_DIR}/Private/EnumUtils.cpp.h" "${GRAPHICS_DIR}/Public/FeatureSet.h" - "${GRAPHICS_DIR}/Private/FeatureSet.cpp" - "${GRAPHICS_DIR}/Private/ImageDesc.cpp" ) + "${GRAPHICS_DIR}/Private/FeatureSet.cpp.h" + "${GRAPHICS_DIR}/Private/ImageDesc.cpp.h" + "${GRAPHICS_DIR}/Private/Shared.cpp" ) set( PLATFORM_SRC "${PLATFORM_DIR}/Private/SerializableInputActions.h" "${PLATFORM_DIR}/Private/SerializableInputActions.cpp" @@ -90,6 +91,15 @@ if (TARGET "PipelineCompiler-shared" OR TARGET "PipelineCompiler-shared" OR TARG target_sources( "Tests.AssetPacker" PRIVATE "${PipelineCompiler.trigger}" ) source_group( "_gen_" FILES "${PipelineCompiler.trigger}" ) set_property( SOURCE "${PipelineCompiler.trigger}" PROPERTY GENERATED 1 ) + + # same as in PipelineCompiler + if ( (TARGET "Vulkan-lib") OR (EXISTS "${AE_EXTERNAL_SHARED_PATH}/Vulkan/vulkan/vulkan.h") ) + set( HAS_VULKAN_HEADERS ON ) + endif() + if (${HAS_VULKAN_HEADERS}) + target_include_directories( "Tests.AssetPacker" PRIVATE "${AE_EXTERNAL_SHARED_PATH}/Vulkan" ) + target_compile_definitions( "Tests.AssetPacker" PRIVATE "AE_ENABLE_VULKAN" ) + endif() endif() if (TARGET "AssetPacker-shared") diff --git a/AE/engine/tests/asset_packer/pipeline_test/test1_ref.txt b/AE/engine/tests/asset_packer/pipeline_test/test1_ref.txt index 3d08361d..1ecdf894 100644 --- a/AE/engine/tests/asset_packer/pipeline_test/test1_ref.txt +++ b/AE/engine/tests/asset_packer/pipeline_test/test1_ref.txt @@ -655,9 +655,9 @@ RenderTechniques { SpirvShaders { [0] offset = 0 b [1] offset = 8 Kb - [2] offset = 10 Kb + [2] offset = 9 Kb [3] offset = 11 Kb - [4] offset = 13 Kb + [4] offset = 12 Kb [5] offset = 14 Kb [6] offset = 16 Kb [7] offset = 17 Kb @@ -677,8 +677,8 @@ SpirvShaderData { codeSize = 5 Kb withTrace [1] - codeSize = 1948 b - codeHash = dcaeae1cca0a4f63 + codeSize = 1876 b + codeHash = 8d355aae1434549e [2] codeSize = 1820 b codeHash = 549e64ca77d15f57 @@ -695,33 +695,33 @@ SpirvShaderData { codeSize = 1724 b codeHash = 4ec176b7e5df2f6e [7] - codeSize = 1680 b - codeHash = 89aa2f36bc81b37a + codeSize = 1776 b + codeHash = abfa56e707eba40f [8] codeSize = 3888 b codeHash = 47bb31a4c181b02e [9] - codeSize = 2284 b - codeHash = f39a6a9ed6b4dcbf + codeSize = 2324 b + codeHash = dc47fb7bbcae1270 [10] codeSize = 3824 b codeHash = 8466dabba1f78b8 [11] codeSize = 4 Kb - codeHash = 8951c441457e013a + codeHash = cbc3b204b9da8fa9 [12] - codeSize = 3032 b - codeHash = 5767bd21d2fb8e23 + codeSize = 2972 b + codeHash = fb68a21ab420a227 [13] - codeSize = 1736 b - codeHash = 1040aba06cf14fa2 + codeSize = 1664 b + codeHash = fc69a217fd1b589f [14] - codeSize = 2224 b - codeHash = f11af4197e504508 + codeSize = 2152 b + codeHash = d1c1d91a11241645 [15] - codeSize = 2504 b - codeHash = fb391321a38d40c2 + codeSize = 2432 b + codeHash = 84ae09077a958ceb [16] - codeSize = 2928 b - codeHash = 3b9bba3e2cbb1d8 + codeSize = 2856 b + codeHash = a1b0a442c9ff56d5 } \ No newline at end of file diff --git a/AE/engine/tests/asset_packer/pipeline_test/vk_types.h b/AE/engine/tests/asset_packer/pipeline_test/vk_types.h index 01212804..8c07e9bc 100644 --- a/AE/engine/tests/asset_packer/pipeline_test/vk_types.h +++ b/AE/engine/tests/asset_packer/pipeline_test/vk_types.h @@ -3,7 +3,7 @@ // size: 12, align: 4 struct VB_3_Pos { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xea1d5fa6u}}; // 'VB_3_Pos' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xea1d5fa6u}}; packed_float3 Position; }; @@ -16,7 +16,7 @@ // size: 16, align: 8 (16) struct PushConst1 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x7f418efau}}; // 'PushConst1' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x7f418efau}}; float2 scale; float2 bias; @@ -31,7 +31,7 @@ // size: 16, align: 16 struct PushConst2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xe648df40u}}; // 'PushConst2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xe648df40u}}; float4 color; }; @@ -44,7 +44,7 @@ // size: 64, align: 16 struct UBlock { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x697977dcu}}; // 'UBlock' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x697977dcu}}; float4x4_storage mvp; }; @@ -57,7 +57,7 @@ // size: 24 (32), align: 16 struct alignas(16) BufferRef { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x87910438u}}; // 'BufferRef' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x87910438u}}; float3 Position; float2 Texcoord; @@ -72,7 +72,7 @@ // size: 16, align: 4 struct vb_input1 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x892b138u}}; // 'vb_input1' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x892b138u}}; packed_float3 Position; packed_ushort2 Texcoord; @@ -87,7 +87,7 @@ // size: 8, align: 4 struct VB_3_Attribs { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x93789f1bu}}; // 'VB_3_Attribs' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x93789f1bu}}; packed_float2 Texcoord; }; diff --git a/AE/engine/tests/base/UnitTest_CPUInfo.cpp b/AE/engine/tests/base/UnitTest_CPUInfo.cpp index 49671d69..6fe8538a 100644 --- a/AE/engine/tests/base/UnitTest_CPUInfo.cpp +++ b/AE/engine/tests/base/UnitTest_CPUInfo.cpp @@ -12,29 +12,30 @@ namespace } - static void Test_CpuPerformance () + static void Test_PerformanceStat () { const uint core_count = CpuArchInfo::Get().cpu.logicalCoreCount; #ifndef AE_PLATFORM_WINDOWS - const auto core0_freq = CpuPerformance::GetFrequency( 0 ); + const auto core0_freq = PerformanceStat::CPU_GetFrequency( 0 ); //TEST( core0_freq > 0 ); - Array all_core_freq; + Array all_core_freq; all_core_freq.resize( core_count ); - TEST_Eq( CpuPerformance::GetFrequency( OUT all_core_freq.data(), core_count ), core_count ); + TEST_Eq( PerformanceStat::CPU_GetFrequency( OUT all_core_freq.data(), core_count ), core_count ); #endif Array user_fract; user_fract.resize( core_count ); Array kernel_fract; kernel_fract.resize( core_count ); - TEST_Eq( CpuPerformance::GetUsage( OUT user_fract.data(), OUT kernel_fract.data(), core_count ), core_count ); + TEST_Eq( PerformanceStat::CPU_GetUsage( OUT user_fract.data(), OUT kernel_fract.data(), core_count ), core_count ); - CpuPerformance::PerProcessCounters per_proc; - CpuPerformance::PerThreadCounters per_thread; + PerformanceStat::PerProcessCounters per_proc; + PerformanceStat::PerThreadCounters per_thread; + PerformanceStat::MemoryCounters mem; - if ( not CpuPerformance::GetPerfCounters( OUT per_proc, OUT per_thread )) + if ( not PerformanceStat::GetPerfCounters( OUT &per_proc, OUT &per_thread, OUT &mem )) { - AE_LOGW( "CpuPerformance::GetPerfCounters() - failed" ); + AE_LOGW( "PerformanceStat::GetPerfCounters() - failed" ); } } } @@ -45,7 +46,7 @@ extern void UnitTest_CPUInfo () Test_CpuArchInfo(); #ifndef AE_PLATFORM_APPLE - Test_CpuPerformance(); + Test_PerformanceStat(); #endif TEST_PASSED(); diff --git a/AE/engine/tests/base/UnitTest_Math.cpp b/AE/engine/tests/base/UnitTest_Math.cpp index 8832f4af..b59d08b7 100644 --- a/AE/engine/tests/base/UnitTest_Math.cpp +++ b/AE/engine/tests/base/UnitTest_Math.cpp @@ -247,6 +247,21 @@ namespace { StaticAssert( not IsScalar ); StaticAssert( IsBytes ); + + struct A { + alignas(8) char c [10]; + } a; + alignas(16) char b [10]; + + StaticAssert( sizeof(a) == 16 ); + StaticAssert( alignof(A) == 8 ); + StaticAssert( sizeof(b) == 10 ); + StaticAssert( alignof(decltype(b)) == 1 ); + + StaticAssert( Sizeof(a) == 16 ); + StaticAssert( Alignof(a) == 8 ); + StaticAssert( Sizeof(b) == 10 ); + StaticAssert( Alignof(b) == 1 ); } diff --git a/AE/engine/tests/base/UnitTest_Math_Vec.cpp b/AE/engine/tests/base/UnitTest_Math_Vec.cpp index 5139bdbb..cad41428 100644 --- a/AE/engine/tests/base/UnitTest_Math_Vec.cpp +++ b/AE/engine/tests/base/UnitTest_Math_Vec.cpp @@ -70,8 +70,10 @@ namespace bool2 a0 = int2(2) > 0; TEST( All( a0 )); bool2 a1 = int2(2) == 2; TEST( All( a1 )); bool3 a2 = int3(1, 2, 3) > int3(2, 1, 4); - bool3 a3 = a2 == bool3(false, true, false); TEST( All( a3 )); + bool3 a3 = a2 == bool3{false, true, false}; TEST( All( a3 )); bool3 a4 = uint3(1) < uint3(2); TEST( All( a4 )); + + int2(1) + int2(2); // must be compiler warning } diff --git a/AE/engine/tests/base/UnitTest_TypeTraits.cpp b/AE/engine/tests/base/UnitTest_TypeTraits.cpp index 517df552..de67e1d6 100644 --- a/AE/engine/tests/base/UnitTest_TypeTraits.cpp +++ b/AE/engine/tests/base/UnitTest_TypeTraits.cpp @@ -123,6 +123,13 @@ namespace } + static void Test_Tuple() + { + StaticAssert( IsNoExcept( Tuple{ 1u, 2.2f } )); + StaticAssert( not IsNoExcept( Tuple{ 0.9, -10, "aa"s } )); + } + + static void Test_TupleConcat () { auto t = TupleConcat( Tuple{ 1u, 2.2f }, Tuple{ 0.9, -10, "aa"s }, Tuple{ 9ull }); @@ -298,6 +305,23 @@ namespace using B = Test_IsNothrowInvocable_B; CheckNothrow( IsNothrowInvocable< decltype(&B::template operator()), B& >); } + + + static void Test_Attributes () + { + AE_INLINE_ALL Unused( TupleConcat( Tuple{ 1u, 2.2f }, Tuple{ 0.9, -10, "aa"s }, Tuple{ 9ull })); + AE_INLINE_CALLS Unused( TupleConcat( Tuple{ 1u, 2.2f }, Tuple{ 0.9, -10, "aa"s }, Tuple{ 9ull })); + + AE_INLINE_ALL { + auto t = TupleConcat( Tuple{ 1u, 2.2f }, Tuple{ 0.9, -10, "aa"s }, Tuple{ 9ull }); + Unused( t.Get<0>() ); + } + + AE_INLINE_CALLS { + auto t = TupleConcat( Tuple{ 1u, 2.2f }, Tuple{ 0.9, -10, "aa"s }, Tuple{ 9ull }); + Unused( t.Get<0>() ); + } + } } @@ -306,12 +330,17 @@ extern void UnitTest_TypeTraits () Test_IsSpecializationOf(); Test_IsTrivial(); Test_RemoveAllQualifiers(); + + Test_Tuple(); Test_TupleConcat(); Test_TupleRef(); + Test_Nothrow(); Test_ArrayView(); Test_IsConst(); Test_IsNothrowInvocable(); + Test_Attributes(); + TEST_PASSED(); } diff --git a/AE/engine/tests/ecs-st/UnitTest_Registry.cpp b/AE/engine/tests/ecs-st/UnitTest_Registry.cpp index e555f50e..5ce2c2f3 100644 --- a/AE/engine/tests/ecs-st/UnitTest_Registry.cpp +++ b/AE/engine/tests/ecs-st/UnitTest_Registry.cpp @@ -41,13 +41,13 @@ namespace StaticAssert(( IsSameTypes< typename A::template Get<0>, usize > )); using B = typename A::PopFront::type; - return CreateQuery1( reg, (B const*)null ); + return CreateQuery1( reg, Cast< B const >(null) ); } template static void EnqueueWithoutQuery (Registry ®, Fn &&fn) { - QueryID q = CreateQuery2( reg, (typename FunctionInfo::args::template Get<0> *)null ); + QueryID q = CreateQuery2( reg, Cast< typename FunctionInfo::args::template Get<0> >(null) ); reg.Enqueue( q, FwdArg(fn) ); } diff --git a/AE/engine/tests/geometry_tools/UnitTest_SphericalCubeMath.cpp b/AE/engine/tests/geometry_tools/UnitTest_SphericalCubeMath.cpp index 3063ee13..d9153d79 100644 --- a/AE/engine/tests/geometry_tools/UnitTest_SphericalCubeMath.cpp +++ b/AE/engine/tests/geometry_tools/UnitTest_SphericalCubeMath.cpp @@ -16,7 +16,7 @@ namespace for (uint y = 1; y < lod+2; ++y) for (uint x = 1; x < lod+2; ++x) { - const double2 ncoord = ToSNorm( double2{x,y} / (lod+2) ); + const double2 ncoord = ToSNorm( double2{uint2{ x,y }} / (lod+2) ); const double2 fwd_2d = Projection::Forward( ncoord ); const double2 inv_2d = Projection::Inverse( fwd_2d ); diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger4.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger4.cpp index 5d595008..201432fc 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger4.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger4.cpp @@ -92,6 +92,7 @@ namespace break; } } + switch_end typename CtxTypes::ASBuild as_ctx{ *this, copy_ctx.ReleaseCommandBuffer() }; diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger5.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger5.cpp index cc419fa2..bb5c79c8 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger5.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_Debugger5.cpp @@ -91,6 +91,7 @@ namespace break; } } + switch_end typename CtxTypes::ASBuild as_ctx{ *this, copy_ctx.ReleaseCommandBuffer() }; diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_ImageFormat.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_ImageFormat.cpp index 12583a6c..4bc49970 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_ImageFormat.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_ImageFormat.cpp @@ -4,7 +4,7 @@ namespace { - static bool ImageFormat1Test1 () + static bool ImageFormatTest1 () { auto& res_mngr = GraphicsScheduler().GetResourceManager(); @@ -13,39 +13,61 @@ namespace GAutorelease view_id_sampled; GAutorelease view_id_storage; + #ifdef AE_ENABLE_VULKAN + const bool image_fmt_list_supported = res_mngr.GetDevice().GetVExtensions().imageFormatList; + #else + const bool image_fmt_list_supported = false; // TODO + #endif + + // create image { ImageDesc desc; - desc.dimension = uint3{ 128, 128, 1 }; + desc.dimension = ImageDim_t{ 128, 128, 1 }; desc.imageDim = EImageDim_2D; desc.format = EPixelFormat::RGBA8_UNorm; desc.usage = EImageUsage::ColorAttachment | EImageUsage::Sampled | EImageUsage::Storage; desc.AddViewFormat( EPixelFormat::sRGB8_A8 ); - #ifdef AE_ENABLE_VULKAN - if ( res_mngr.GetDevice().GetVExtensions().imageFormatList ) - CHECK_ERR( res_mngr.IsSupported( desc )); - #endif - if ( not res_mngr.IsSupported( desc )) + { + CHECK_ERR( not image_fmt_list_supported ); return true; // not supported + } image_id = res_mngr.CreateImage( desc, Default, gfx_alloc ); CHECK_ERR( image_id ); - }{ + + } + + // create view + { ImageViewDesc desc; desc.viewType = EImage_2D; desc.format = EPixelFormat::sRGB8_A8; desc.extUsage = EImageUsage::Sampled; + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + view_id_sampled = res_mngr.CreateImageView( desc, image_id.Get() ); CHECK_ERR( view_id_sampled ); } + // check + { + auto& desc = res_mngr.GetDescription( image_id ); + auto& view = res_mngr.GetDescription( view_id_sampled ); + + if ( image_fmt_list_supported ) + CHECK_ERR( NoBits( desc.options, EImageOpt::MutableFormat )); + + CHECK_ERR( view.format == EPixelFormat::sRGB8_A8 ); + } + return true; } - static bool ImageFormat1Test2 () + static bool ImageFormatTest2 () { auto& res_mngr = GraphicsScheduler().GetResourceManager(); @@ -54,24 +76,30 @@ namespace GAutorelease view_id_sampled; GAutorelease view_id_storage; + // create image { ImageDesc desc; - desc.dimension = uint3{ 128, 128, 1 }; + desc.dimension = ImageDim_t{ 128, 128, 1 }; desc.imageDim = EImageDim_2D; desc.format = EPixelFormat::RGBA8_UNorm; desc.usage = EImageUsage::ColorAttachment | EImageUsage::Sampled | EImageUsage::Storage; desc.options = EImageOpt::MutableFormat; desc.AddViewFormat( EPixelFormat::sRGB8_A8 ) - .AddViewFormat( EPixelFormat::RGBA8_UNorm ); + .AddViewFormat( EPixelFormat::RGBA8_UNorm ); // optional image_id = res_mngr.CreateImage( desc, Default, gfx_alloc ); CHECK_ERR( image_id ); - }{ + } + + // create view + { ImageViewDesc desc; desc.viewType = EImage_2D; desc.format = EPixelFormat::sRGB8_A8; desc.extUsage = EImageUsage::Sampled; + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + view_id_sampled = res_mngr.CreateImageView( desc, image_id.Get() ); CHECK_ERR( view_id_sampled ); }{ @@ -80,10 +108,149 @@ namespace desc.format = EPixelFormat::RGBA8_UNorm; desc.extUsage = EImageUsage::Storage; + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + view_id_storage = res_mngr.CreateImageView( desc, image_id.Get() ); CHECK_ERR( view_id_storage ); } + // check + { + auto& desc = res_mngr.GetDescription( image_id ); + auto& view1 = res_mngr.GetDescription( view_id_sampled ); + auto& view2 = res_mngr.GetDescription( view_id_storage ); + + CHECK_ERR( AllBits( desc.options, EImageOpt::MutableFormat )); + CHECK_ERR( view1.format == EPixelFormat::sRGB8_A8 ); + CHECK_ERR( view2.format == EPixelFormat::RGBA8_UNorm ); + } + + return true; + } + + + static bool ImageFormatTest3 () + { + auto& res_mngr = GraphicsScheduler().GetResourceManager(); + + GfxMemAllocatorPtr gfx_alloc = res_mngr.CreateLinearGfxMemAllocator(); + GAutorelease image_id; + GAutorelease view_id_uncomp; + GAutorelease view_id_comp; + + // create image + { + ImageDesc desc; + desc.dimension = ImageDim_t{ 128, 128, 1 }; + desc.imageDim = EImageDim_2D; + desc.format = EPixelFormat::BC1_RGB8_UNorm; + desc.usage = EImageUsage::Sampled | EImageUsage::Transfer; + desc.options = EImageOpt::MutableFormat | EImageOpt::BlockTexelViewCompatible; + + if ( not res_mngr.IsSupported( desc )) + return true; // not supported + + image_id = res_mngr.CreateImage( desc, Default, gfx_alloc ); + CHECK_ERR( image_id ); + } + + // create view + { + ImageViewDesc desc; + desc.viewType = EImage_2D; + desc.format = EPixelFormat::RG32U; + + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + + view_id_uncomp = res_mngr.CreateImageView( desc, image_id.Get() ); + CHECK_ERR( view_id_uncomp ); + }{ + ImageViewDesc desc; + desc.viewType = EImage_2D; + desc.format = EPixelFormat::BC1_RGB8_UNorm; + + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + + view_id_comp = res_mngr.CreateImageView( desc, image_id.Get() ); + CHECK_ERR( view_id_comp ); + } + + // check + { + const auto& desc = res_mngr.GetDescription( image_id ); + const auto& view1 = res_mngr.GetDescription( view_id_comp ); + const auto& view2 = res_mngr.GetDescription( view_id_uncomp ); + + CHECK_ERR( AllBits( desc.options, EImageOpt::MutableFormat | EImageOpt::BlockTexelViewCompatible )); + CHECK_ERR( view1.format == EPixelFormat::BC1_RGB8_UNorm ); + CHECK_ERR( view2.format == EPixelFormat::RG32U ); + CHECK_ERR( All( view1.Dimension2() == uint2{128} )); + CHECK_ERR( All( view2.Dimension2() == uint2{128/4} )); + } + return true; + } + + + static bool ImageFormatTest4 () + { + auto& res_mngr = GraphicsScheduler().GetResourceManager(); + + GfxMemAllocatorPtr gfx_alloc = res_mngr.CreateLinearGfxMemAllocator(); + GAutorelease image_id; + GAutorelease view_id_uncomp; + GAutorelease view_id_comp; + + // create image + { + ImageDesc desc; + desc.dimension = ImageDim_t{ 128, 128, 1 }; + desc.imageDim = EImageDim_2D; + desc.format = EPixelFormat::BC1_RGB8_UNorm; + desc.usage = EImageUsage::Sampled | EImageUsage::Storage | EImageUsage::Transfer; + desc.options = EImageOpt::MutableFormat | EImageOpt::BlockTexelViewCompatible | EImageOpt::ExtendedUsage; + desc.AddViewFormat( EPixelFormat::BC1_RGB8_UNorm ) + .AddViewFormat( EPixelFormat::RG32U ); // optional + + if ( not res_mngr.IsSupported( desc )) + return true; // not supported + + image_id = res_mngr.CreateImage( desc, Default, gfx_alloc ); + CHECK_ERR( image_id ); + } + + // create view + { + ImageViewDesc desc; + desc.viewType = EImage_2D; + desc.format = EPixelFormat::RG32U; + desc.extUsage = EImageUsage::Storage; + + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + + view_id_uncomp = res_mngr.CreateImageView( desc, image_id.Get() ); + CHECK_ERR( view_id_uncomp ); + }{ + ImageViewDesc desc; + desc.viewType = EImage_2D; + desc.format = EPixelFormat::BC1_RGB8_UNorm; + desc.extUsage = EImageUsage::Sampled; + + CHECK_ERR( res_mngr.IsSupported( image_id.Get(), desc )); + + view_id_comp = res_mngr.CreateImageView( desc, image_id.Get() ); + CHECK_ERR( view_id_comp ); + } + + // check + { + auto& desc = res_mngr.GetDescription( image_id ); + auto& view1 = res_mngr.GetDescription( view_id_comp ); + auto& view2 = res_mngr.GetDescription( view_id_uncomp ); + + CHECK_ERR( AllBits( desc.options, EImageOpt::MutableFormat | EImageOpt::BlockTexelViewCompatible )); + CHECK_ERR( view1.format == EPixelFormat::BC1_RGB8_UNorm ); + CHECK_ERR( view2.format == EPixelFormat::RG32U ); + } return true; } @@ -94,8 +261,10 @@ bool RGTest::Test_ImageFormat () { bool result = true; - RG_CHECK( ImageFormat1Test1() ); - RG_CHECK( ImageFormat1Test2() ); + RG_CHECK( ImageFormatTest1() ); + RG_CHECK( ImageFormatTest2() ); + RG_CHECK( ImageFormatTest3() ); + RG_CHECK( ImageFormatTest4() ); AE_LOGI( TEST_NAME << " - passed" ); return result; diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayQuery1.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayQuery1.cpp index a31f14a3..984b5530 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayQuery1.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayQuery1.cpp @@ -89,6 +89,7 @@ namespace break; } } + switch_end typename CtxTypes::ASBuild as_ctx{ *this, copy_ctx.ReleaseCommandBuffer() }; diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing1.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing1.cpp index f4184d08..27d73e28 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing1.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing1.cpp @@ -90,6 +90,7 @@ namespace break; } } + switch_end typename CtxTypes::ASBuild as_ctx{ *this, copy_ctx.ReleaseCommandBuffer() }; diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing2.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing2.cpp index 88358d39..39ba8ca1 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing2.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing2.cpp @@ -91,6 +91,7 @@ namespace break; } } + switch_end ASBuildIndirectCommand ind_cmd = {}; ind_cmd.primitiveCount = 1; // instances diff --git a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing3.cpp b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing3.cpp index b1a4cfc7..9a8f31b8 100644 --- a/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing3.cpp +++ b/AE/engine/tests/graphics/RenderGraph/Test_RG_RayTracing3.cpp @@ -90,6 +90,7 @@ namespace break; } } + switch_end typename CtxTypes::ASBuild as_ctx{ *this, copy_ctx.ReleaseCommandBuffer() }; diff --git a/AE/engine/tests/graphics/RenderGraph/cpp/mtl_types.h b/AE/engine/tests/graphics/RenderGraph/cpp/mtl_types.h index a946f736..6afed6bf 100644 --- a/AE/engine/tests/graphics/RenderGraph/cpp/mtl_types.h +++ b/AE/engine/tests/graphics/RenderGraph/cpp/mtl_types.h @@ -3,7 +3,7 @@ // size: 12, align: 4 struct Vertex_draw2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x208fe808u}}; // 'Vertex_draw2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x208fe808u}}; packed_float2 Position; packed_ubyte4 Color; @@ -18,7 +18,7 @@ // size: 48, align: 16 struct PC_draw3 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x83006fc3u}}; // 'PC_draw3' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x83006fc3u}}; StaticArray< float4, 3 > PositionColorArr; }; diff --git a/AE/engine/tests/graphics/RenderGraph/cpp/vk_types.h b/AE/engine/tests/graphics/RenderGraph/cpp/vk_types.h index ed83f0ab..b72717ba 100644 --- a/AE/engine/tests/graphics/RenderGraph/cpp/vk_types.h +++ b/AE/engine/tests/graphics/RenderGraph/cpp/vk_types.h @@ -3,7 +3,7 @@ // size: 12, align: 4 struct Vertex_draw2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x208fe808u}}; // 'Vertex_draw2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x208fe808u}}; packed_float2 Position; packed_ubyte4 Color; @@ -18,7 +18,7 @@ // size: 48, align: 16 struct PC_draw3 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x83006fc3u}}; // 'PC_draw3' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x83006fc3u}}; StaticArray< float4, 3 > PositionColorArr; }; @@ -31,7 +31,7 @@ // size: 12, align: 4 (16) struct Vertex_VRS { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa70d7fceu}}; // 'Vertex_VRS' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa70d7fceu}}; packed_float2 Position; int ShadingRate; diff --git a/AE/engine/tests/graphics/Test_Buffer.cpp b/AE/engine/tests/graphics/Test_Buffer.cpp index d6533886..68136ceb 100644 --- a/AE/engine/tests/graphics/Test_Buffer.cpp +++ b/AE/engine/tests/graphics/Test_Buffer.cpp @@ -6,7 +6,7 @@ namespace { static void BufferView_Test (ResourceManager &resMngr, const BufferDesc &bufDesc) { - if ( not AnyBits( bufDesc.usage, EBufferUsage::UniformTexel | EBufferUsage::StorageTexel )) + if ( NoBits( bufDesc.usage, EBufferUsage::UniformTexel | EBufferUsage::StorageTexel )) return; auto buffer = resMngr.CreateBuffer( bufDesc ); diff --git a/AE/engine/tests/graphics/Test_Image.cpp b/AE/engine/tests/graphics/Test_Image.cpp index 760c1b05..f820787e 100644 --- a/AE/engine/tests/graphics/Test_Image.cpp +++ b/AE/engine/tests/graphics/Test_Image.cpp @@ -49,10 +49,10 @@ namespace switch_enum( desc.imageDim ) { - case EImageDim_1D : desc.dimension = uint3{128, 0, 0}; break; - case EImageDim_2D : desc.dimension = uint3{128, 128, 0}; break; - case EImageDim_3D : desc.dimension = uint3{128, 128, 128}; break; - case EImageDim::Unknown : DBG_WARNING( "not supported" ); break; + case EImageDim_1D : desc.dimension = ImageDim_t{128, 0, 0}; break; + case EImageDim_2D : desc.dimension = ImageDim_t{128, 128, 0}; break; + case EImageDim_3D : desc.dimension = ImageDim_t{128, 128, 128}; break; + case EImageDim::Unknown : DBG_WARNING( "not supported" ); break; } switch_end diff --git a/AE/engine/tests/graphics/UnitTest_EResourceState.cpp b/AE/engine/tests/graphics/UnitTest_EResourceState.cpp index 2ff23cf1..a578fe76 100644 --- a/AE/engine/tests/graphics/UnitTest_EResourceState.cpp +++ b/AE/engine/tests/graphics/UnitTest_EResourceState.cpp @@ -78,7 +78,7 @@ namespace if ( req_sh ){ TEST( AnyBits( stage, ShaderStages )); }else{ - TEST( not AnyBits( stage, ShaderStages ) or (i == _EResState::RTShaderBindingTable) ); + TEST( NoBits( stage, ShaderStages ) or (i == _EResState::RTShaderBindingTable) ); } } } diff --git a/AE/engine/tests/graphics/UnitTest_ImageDesc.cpp b/AE/engine/tests/graphics/UnitTest_ImageDesc.cpp index 742b7771..07d24091 100644 --- a/AE/engine/tests/graphics/UnitTest_ImageDesc.cpp +++ b/AE/engine/tests/graphics/UnitTest_ImageDesc.cpp @@ -12,12 +12,12 @@ namespace desc.SetDimension( 2 ); TEST( desc.imageDim == EImageDim_1D ); - TEST( All( desc.dimension == uint3{2, 1, 1} )); + TEST( All( desc.dimension == ImageDim_t{2, 1, 1} )); desc.Validate(); TEST( desc.imageDim == EImageDim_1D ); - TEST( All( desc.dimension == uint3{2, 1, 1} )); + TEST( All( desc.dimension == ImageDim_t{2, 1, 1} )); } { ImageDesc desc; @@ -25,12 +25,12 @@ namespace desc.SetDimension({ 2, 3 }); TEST( desc.imageDim == EImageDim_2D ); - TEST( All( desc.dimension == uint3{2, 3, 1} )); + TEST( All( desc.dimension == ImageDim_t{2, 3, 1} )); desc.Validate(); TEST( desc.imageDim == EImageDim_2D ); - TEST( All( desc.dimension == uint3{2, 3, 1} )); + TEST( All( desc.dimension == ImageDim_t{2, 3, 1} )); } { ImageDesc desc; @@ -38,12 +38,12 @@ namespace desc.SetDimension({ 2, 3, 4 }); TEST( desc.imageDim == EImageDim_3D ); - TEST( All( desc.dimension == uint3{2, 3, 4} )); + TEST( All( desc.dimension == ImageDim_t{2, 3, 4} )); desc.Validate(); TEST( desc.imageDim == EImageDim_3D ); - TEST( All( desc.dimension == uint3{2, 3, 4} )); + TEST( All( desc.dimension == ImageDim_t{2, 3, 4} )); } } @@ -160,7 +160,7 @@ namespace TEST( desc.imageDim == EImageDim_2D ); TEST( desc.mipLevels == 5_mipmap ); - TEST( All( desc.dimension == uint3{16,16,1} )); + TEST( All( desc.dimension == ImageDim_t{16,16,1} )); }{ ImageDesc desc; @@ -171,7 +171,7 @@ namespace TEST( desc.imageDim == EImageDim_2D ); TEST( desc.mipLevels == 4_mipmap ); - TEST( All( desc.dimension == uint3{15,15,1} )); + TEST( All( desc.dimension == ImageDim_t{15,15,1} )); }{ ImageDesc desc; desc.format = EPixelFormat::RGBA8_UNorm; @@ -181,7 +181,7 @@ namespace TEST( desc.imageDim == EImageDim_2D ); TEST( desc.mipLevels == 5_mipmap ); - TEST( All( desc.dimension == uint3{17,15,1} )); + TEST( All( desc.dimension == ImageDim_t{17,15,1} )); } } diff --git a/AE/engine/tests/graphics_hl/DrawTests/DrawTestCore.h b/AE/engine/tests/graphics_hl/DrawTests/DrawTestCore.h index 850a079f..1818cc19 100644 --- a/AE/engine/tests/graphics_hl/DrawTests/DrawTestCore.h +++ b/AE/engine/tests/graphics_hl/DrawTests/DrawTestCore.h @@ -48,7 +48,6 @@ class DrawTestCore #elif defined(AE_ENABLE_REMOTE_GRAPHICS) RDeviceInitializer _device; - const ushort _serverPort = 3000; #else # error not implemented diff --git a/AE/engine/tests/graphics_hl/DrawTests/cpp/mtl_types.h b/AE/engine/tests/graphics_hl/DrawTests/cpp/mtl_types.h index 429cfb62..e72fe69d 100644 --- a/AE/engine/tests/graphics_hl/DrawTests/cpp/mtl_types.h +++ b/AE/engine/tests/graphics_hl/DrawTests/cpp/mtl_types.h @@ -3,7 +3,7 @@ // size: 8, align: 4 struct VB_Position_f2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; // 'VB_Position_f2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; packed_float2 Position; }; @@ -16,7 +16,7 @@ // size: 4, align: 1 struct VB_Color8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xccba8f8eu}}; // 'VB_Color8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xccba8f8eu}}; packed_ubyte4 Color; }; @@ -29,7 +29,7 @@ // size: 12, align: 4 struct VB_UVf2_Col8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; // 'VB_UVf2_Col8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; packed_float2 UV; packed_ubyte4 Color; diff --git a/AE/engine/tests/graphics_hl/DrawTests/cpp/vk_types.h b/AE/engine/tests/graphics_hl/DrawTests/cpp/vk_types.h index 429cfb62..e72fe69d 100644 --- a/AE/engine/tests/graphics_hl/DrawTests/cpp/vk_types.h +++ b/AE/engine/tests/graphics_hl/DrawTests/cpp/vk_types.h @@ -3,7 +3,7 @@ // size: 8, align: 4 struct VB_Position_f2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; // 'VB_Position_f2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; packed_float2 Position; }; @@ -16,7 +16,7 @@ // size: 4, align: 1 struct VB_Color8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xccba8f8eu}}; // 'VB_Color8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xccba8f8eu}}; packed_ubyte4 Color; }; @@ -29,7 +29,7 @@ // size: 12, align: 4 struct VB_UVf2_Col8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; // 'VB_UVf2_Col8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; packed_float2 UV; packed_ubyte4 Color; diff --git a/AE/engine/tests/pipeline_compiler/UnitTest_DSLayout_GLSL.cpp b/AE/engine/tests/pipeline_compiler/UnitTest_DSLayout_GLSL.cpp index 51366a2c..634fc005 100644 --- a/AE/engine/tests/pipeline_compiler/UnitTest_DSLayout_GLSL.cpp +++ b/AE/engine/tests/pipeline_compiler/UnitTest_DSLayout_GLSL.cpp @@ -53,7 +53,7 @@ namespace // state: ShaderSample | FragmentShader layout(set=1, binding=2) uniform usamplerBuffer texBuffer; // state: ShaderStorage_Write | FragmentShader - layout(set=1, binding=3, rgba8) coherent uniform image2D storageImage; + layout(set=1, binding=3, rgba8) coherent writeonly uniform image2D storageImage; // state: ShaderSample | FragmentShader layout(set=1, binding=4) uniform sampler2D colorTex; // immutable sampler diff --git a/AE/engine/tests/pipeline_compiler/UnitTest_PipelineLayout_GLSL.cpp b/AE/engine/tests/pipeline_compiler/UnitTest_PipelineLayout_GLSL.cpp index 2ad33cf6..9152f025 100644 --- a/AE/engine/tests/pipeline_compiler/UnitTest_PipelineLayout_GLSL.cpp +++ b/AE/engine/tests/pipeline_compiler/UnitTest_PipelineLayout_GLSL.cpp @@ -51,7 +51,7 @@ namespace // state: ShaderSample | FragmentShader layout(set=0, binding=2) uniform usamplerBuffer texBuffer; // state: ShaderStorage_Write | FragmentShader - layout(set=0, binding=3, rgba8) coherent uniform image2D storageImage; + layout(set=0, binding=3, rgba8) coherent writeonly uniform image2D storageImage; // state: ShaderSample | FragmentShader layout(set=0, binding=4) uniform texture2D colorTex; // immutable sampler diff --git a/AE/engine/tests/pipeline_compiler/UnitTest_StructType.cpp b/AE/engine/tests/pipeline_compiler/UnitTest_StructType.cpp index 7e8d2b19..7b4c0ecf 100644 --- a/AE/engine/tests/pipeline_compiler/UnitTest_StructType.cpp +++ b/AE/engine/tests/pipeline_compiler/UnitTest_StructType.cpp @@ -214,7 +214,7 @@ static_assert( sizeof(StType2) == 96, "size mismatch" ); // size: 32, align: 16 struct StType1 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd9e31216u}}; // 'StType1' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd9e31216u}}; float4 ff; uint2 uu; @@ -231,7 +231,7 @@ static_assert( sizeof(StType2) == 96, "size mismatch" ); // size: 96, align: 16 struct StType2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x40ea43acu}}; // 'StType2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x40ea43acu}}; StType1 st; StaticArray< uint4, 4 > ua; @@ -253,7 +253,7 @@ static_assert( sizeof(StType2) == 96, "size mismatch" ); // size: 32, align: 16 struct StType1 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd9e31216u}}; // 'StType1' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd9e31216u}}; float4 ff; uint2 uu; @@ -270,7 +270,7 @@ static_assert( sizeof(StType2) == 96, "size mismatch" ); // size: 96, align: 16 struct StType2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x40ea43acu}}; // 'StType2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x40ea43acu}}; StType1 st; StaticArray< uint4, 4 > ua; @@ -395,7 +395,7 @@ static_assert( sizeof(StType4) == 72, "size mismatch" ); // size: 32, align: 4 struct StType3 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x37ed733au}}; // 'StType3' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x37ed733au}}; float f; packed_uint3 u; @@ -414,7 +414,7 @@ static_assert( sizeof(StType4) == 72, "size mismatch" ); // size: 72, align: 4 struct StType4 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa989e699u}}; // 'StType4' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa989e699u}}; packed_half3 h3; StType3 st; @@ -438,7 +438,7 @@ static_assert( sizeof(StType4) == 72, "size mismatch" ); // size: 32, align: 4 struct StType3 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x37ed733au}}; // 'StType3' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x37ed733au}}; float f; packed_uint3 u; @@ -457,7 +457,7 @@ static_assert( sizeof(StType4) == 72, "size mismatch" ); // size: 72, align: 4 struct StType4 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa989e699u}}; // 'StType4' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa989e699u}}; packed_half3 h3; StType3 st; @@ -520,7 +520,7 @@ static_assert( sizeof(StType5) == 36, "size mismatch" ); // size: 36, align: 4 (16) struct StType5 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xde8ed60fu}}; // 'StType5' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xde8ed60fu}}; packed_float3 Position; packed_float3 Normal; @@ -544,7 +544,7 @@ static_assert( sizeof(StType5) == 36, "size mismatch" ); // size: 36, align: 4 (16) struct StType5 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xde8ed60fu}}; // 'StType5' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xde8ed60fu}}; packed_float3 Position; packed_float3 Normal; @@ -646,7 +646,7 @@ Buffer { // size: 256, align: 16 struct StType6 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x478787b5u}}; // 'StType6' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x478787b5u}}; uint Count; StaticArray< packed_float2, 3 > Positions; @@ -678,7 +678,7 @@ Buffer { // size: 256, align: 16 struct StType6 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x478787b5u}}; // 'StType6' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x478787b5u}}; uint Count; StaticArray< packed_float2, 3 > Positions; @@ -731,7 +731,8 @@ struct StType8 // size: 0, align: 16 struct StType8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa03faab2u}}; // 'StType8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa03faab2u}}; + static constexpr size_t SizeOf (size_t count) { return (16 * count); } // float4 pos []; }; @@ -749,7 +750,7 @@ struct StType8 // size: 0, align: 16 struct StType8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa03faab2u}}; // 'StType8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa03faab2u}}; // float4 pos []; }; @@ -840,7 +841,7 @@ static_assert( sizeof(StType9) == 24, "size mismatch" ); // size: 32, align: 16 struct StType9A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xef0d42c0u}}; // 'StType9A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xef0d42c0u}}; float4 pos; float3 norm; @@ -855,7 +856,7 @@ static_assert( sizeof(StType9) == 24, "size mismatch" ); // size: 12 (16), align: 8 (16) struct alignas(8) StType9B { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x7604137au}}; // 'StType9B' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x7604137au}}; float2 a; int b; @@ -870,7 +871,7 @@ static_assert( sizeof(StType9) == 24, "size mismatch" ); // size: 24, align: 8 (16) struct StType9 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd7389a24u}}; // 'StType9' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd7389a24u}}; TDeviceAddress< StType9A > ref; TDeviceAddress< float2 *> arr; @@ -894,7 +895,7 @@ static_assert( sizeof(StType9) == 24, "size mismatch" ); // size: 32, align: 16 struct StType9A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xef0d42c0u}}; // 'StType9A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xef0d42c0u}}; float4 pos; float3 norm; @@ -909,7 +910,7 @@ static_assert( sizeof(StType9) == 24, "size mismatch" ); // size: 12 (16), align: 8 (16) struct alignas(8) StType9B { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x7604137au}}; // 'StType9B' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x7604137au}}; float2 a; int b; @@ -924,7 +925,7 @@ static_assert( sizeof(StType9) == 24, "size mismatch" ); // size: 24, align: 8 (16) struct StType9 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd7389a24u}}; // 'StType9' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd7389a24u}}; TDeviceAddress< StType9A > ref; TDeviceAddress< float2 *> arr; @@ -967,7 +968,7 @@ static_assert( sizeof(StType_10) == 16, "size mismatch" ); // size: 16, align: 16 struct StType_10 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xc35deb7u}}; // 'StType_10' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xc35deb7u}}; float4 pos; }; @@ -987,7 +988,7 @@ static_assert( sizeof(StType_10) == 16, "size mismatch" ); // size: 16, align: 16 struct StType_10 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xc35deb7u}}; // 'StType_10' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xc35deb7u}}; float4 pos; }; @@ -1035,7 +1036,7 @@ static_assert( sizeof(StType11) == 80, "size mismatch" ); // size: 72 (80), align: 16 struct alignas(16) StType11 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x77d1b9f4u}}; // 'StType11' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x77d1b9f4u}}; float4x4_storage transform; uint meshIdx; @@ -1059,7 +1060,7 @@ static_assert( sizeof(StType11) == 80, "size mismatch" ); // size: 72 (80), align: 16 struct alignas(16) StType11 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x77d1b9f4u}}; // 'StType11' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x77d1b9f4u}}; float4x4_storage transform; uint meshIdx; @@ -1108,7 +1109,8 @@ struct StType12 // size: 4 (16), align: 16 struct alignas(16) StType12 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xeed8e84eu}}; // 'StType12' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xeed8e84eu}}; + static constexpr size_t SizeOf (size_t count) { return 16 + (16 * count); } uint count; // float4 arr []; @@ -1129,7 +1131,7 @@ struct StType12 // size: 4 (16), align: 16 struct alignas(16) StType12 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xeed8e84eu}}; // 'StType12' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xeed8e84eu}}; uint count; // float4 arr []; @@ -1213,7 +1215,7 @@ static_assert( sizeof(StType13) == 144, "size mismatch" ); // size: 40 (48), align: 16 struct alignas(16) StType13A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x5b481bb5u}}; // 'StType13A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x5b481bb5u}}; float4 pos; float3 norm; @@ -1230,7 +1232,7 @@ static_assert( sizeof(StType13) == 144, "size mismatch" ); // size: 144, align: 8 struct StType13 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x99dfd8d8u}}; // 'StType13' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x99dfd8d8u}}; TDeviceAddress< DeviceAddress *> untypedAddrArr; StaticArray< TDeviceAddress< DeviceAddress *>, 8 > untypedAddrArrArr; @@ -1258,7 +1260,7 @@ static_assert( sizeof(StType13) == 144, "size mismatch" ); // size: 40 (48), align: 16 struct alignas(16) StType13A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x5b481bb5u}}; // 'StType13A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x5b481bb5u}}; float4 pos; float3 norm; @@ -1275,7 +1277,7 @@ static_assert( sizeof(StType13) == 144, "size mismatch" ); // size: 144, align: 8 struct StType13 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x99dfd8d8u}}; // 'StType13' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x99dfd8d8u}}; TDeviceAddress< DeviceAddress *> untypedAddrArr; StaticArray< TDeviceAddress< DeviceAddress *>, 8 > untypedAddrArrArr; @@ -1337,7 +1339,7 @@ static_assert( sizeof(StType14) == 8, "size mismatch" ); // size: 8, align: 8 struct StType14 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x7bb4d7bu}}; // 'StType14' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x7bb4d7bu}}; TDeviceAddress< packed_float3 *> normals; }; @@ -1357,7 +1359,7 @@ static_assert( sizeof(StType14) == 8, "size mismatch" ); // size: 8, align: 8 struct StType14 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x7bb4d7bu}}; // 'StType14' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x7bb4d7bu}}; TDeviceAddress< packed_float3 *> normals; }; @@ -1416,7 +1418,7 @@ static_assert( sizeof(StType15) == 256, "size mismatch" ); // size: 20 (32), align: 16 struct alignas(16) StType15A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd12bc33u}}; // 'StType15A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd12bc33u}}; uint3 a; float b; @@ -1431,7 +1433,7 @@ static_assert( sizeof(StType15) == 256, "size mismatch" ); // size: 256, align: 16 struct StType15 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x70bc7dedu}}; // 'StType15' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x70bc7dedu}}; StaticArray< StType15A, 8 > arr; }; @@ -1451,7 +1453,7 @@ static_assert( sizeof(StType15) == 256, "size mismatch" ); // size: 20 (32), align: 16 struct alignas(16) StType15A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd12bc33u}}; // 'StType15A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd12bc33u}}; uint3 a; float b; @@ -1466,7 +1468,7 @@ static_assert( sizeof(StType15) == 256, "size mismatch" ); // size: 256, align: 16 struct StType15 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x70bc7dedu}}; // 'StType15' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x70bc7dedu}}; StaticArray< StType15A, 8 > arr; }; @@ -1535,7 +1537,7 @@ struct StType16 // size: 12, align: 4 struct StType16A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x263feff0u}}; // 'StType16A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x263feff0u}}; float a; float b; @@ -1552,7 +1554,8 @@ struct StType16 // size: 20 (24), align: 8 struct alignas(8) StType16 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xe9b52c57u}}; // 'StType16' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xe9b52c57u}}; + static constexpr size_t SizeOf (size_t count) { return 20 + (12 * count); } float2 aa; float2 bb; @@ -1576,7 +1579,7 @@ struct StType16 // size: 12, align: 4 struct StType16A { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x263feff0u}}; // 'StType16A' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x263feff0u}}; float a; float b; @@ -1593,7 +1596,7 @@ struct StType16 // size: 20 (24), align: 8 struct alignas(8) StType16 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xe9b52c57u}}; // 'StType16' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xe9b52c57u}}; float2 aa; float2 bb; diff --git a/AE/engine/tests/res_loaders/UnitTest_AEImage.cpp b/AE/engine/tests/res_loaders/UnitTest_AEImage.cpp index b7804f08..eb2e43a8 100644 --- a/AE/engine/tests/res_loaders/UnitTest_AEImage.cpp +++ b/AE/engine/tests/res_loaders/UnitTest_AEImage.cpp @@ -7,7 +7,7 @@ #include "res_loaders/AE/AEImageLoader.h" #include "res_loaders/STB/STBImageSaver.h" -#include "graphics/Private/EnumUtils.cpp" +#include "graphics/Private/EnumUtils.cpp.h" #include "graphics/Private/ImageMemView.cpp" using namespace AE::Graphics; diff --git a/AE/engine/tests/serializing/UnitTest_Serialization.cpp b/AE/engine/tests/serializing/UnitTest_Serialization.cpp index e1fd3b91..95c735aa 100644 --- a/AE/engine/tests/serializing/UnitTest_Serialization.cpp +++ b/AE/engine/tests/serializing/UnitTest_Serialization.cpp @@ -127,35 +127,37 @@ namespace const uint u_val = 0x12345678; const float f_val = 2.1345678f; const double d_val = 987.1234567; - const String s_val = "kjadsnfaoskldm;asdcnksdjnfkdsl"; - - PackedBits<0, 8, uint> u0 {u_val}; - PackedBits<2, 9, uint> u1 {u_val}; - PackedFloatBits<0> f0 {f_val}; - PackedFloatBits<13> f1 {f_val}; - PackedFloatBits<20> f2 {f_val}; - PackedDoubleBits<0> d0 {d_val}; - PackedDoubleBits<20> d1 {d_val}; - PackedDoubleBits<40> d2 {d_val}; - Optional os {s_val}; + + PackedBits<0, 8, uint> u0 {u_val}; + PackedBits<2, 9, uint> u1 {u_val}; + PackedFloatBits<0> f0 {f_val}; + PackedFloatBits<13> f1 {f_val}; + PackedFloatBits<20> f2 {f_val}; + PackedDoubleBits<0> d0 {d_val}; + PackedDoubleBits<20> d1 {d_val}; + PackedDoubleBits<40> d2 {d_val}; + + PackedBits<0, 64, ulong> u2 {0x080}; + PackedBits<0, 64, ulong> u3 {0x00F}; + PackedBits<0, 64, ulong> u4 {0xC00}; { - Serializer ser{ stream }; + BitSerializer ser{ stream }; TEST( ser( u0, u1 )); TEST( ser( f0, f1, f2 )); TEST( ser( d0, d1, d2 )); - TEST( ser( os )); + TEST( ser( u2, u3, u4 )); DEBUG_ONLY( AE_LOGI( "UnusedBits: "s << ToString( ser.dbgUnusedBits ));) } { auto rstream = MakeRC( stream->GetData() ); - Deserializer des{ rstream }; + BitDeserializer des{ rstream }; TEST( des( u0, u1 )); TEST( des( f0, f1, f2 )); TEST( des( d0, d1, d2 )); - TEST( des( os )); + TEST( des( u2, u3, u4 )); TEST( des.IsEnd() ); } @@ -171,8 +173,9 @@ namespace TEST( BitEqual( double{d1}, d_val, EnabledBitCount(64-20) )); TEST( BitEqual( double{d2}, d_val, EnabledBitCount(64-40) )); - TEST( os.has_value() ); - TEST( os.value() == s_val ); + TEST( u2.ToBits() == 0x080 ); + TEST( u3.ToBits() == 0x00F ); + TEST( u4.ToBits() == 0xC00 ); } @@ -226,6 +229,63 @@ namespace } + static void Serialization_Test4 () + { + struct CpuCluster final : ISerializable + { + String name; + CpuArchInfo::CoreBits_t logicalCores; + + ND_ bool operator == (const CpuCluster &rhs) const { + return name == rhs.name and + logicalCores == rhs.logicalCores; + } + + bool Serialize (Serializer &ser) C_NE_OV { + return ser( name, logicalCores ); + } + + bool Deserialize (Deserializer &des) __NE_OV { + return des( OUT name, OUT logicalCores ); + } + }; + using CpuClusters_t = FixedArray< CpuCluster, CpuArchInfo::MaxCoreTypes >; + + CpuClusters_t clusters; + { + auto& dst = clusters.emplace_back(); + dst.name = "Cortex X1"; + dst.logicalCores.set( 7 ); + }{ + auto& dst = clusters.emplace_back(); + dst.name = "Cortex A78"; + dst.logicalCores.set( 5 ).set( 6 ); + }{ + auto& dst = clusters.emplace_back(); + dst.name = "Cortex A55"; + dst.logicalCores.set( 0 ).set( 1 ).set( 2 ).set( 3 ); + } + + auto stream = MakeRC(); + { + Serializer ser{ stream }; + TEST( ser( clusters )); + } + + { + auto rstream = MakeRC( stream->GetData() ); + Deserializer des{ rstream }; + LinearAllocator<> alloc; + des.allocator = &alloc; + + CpuClusters_t clusters2; + TEST( des( OUT clusters2 )); + + TEST( clusters2 == clusters ); + } + } + + static void SerializationTraits () { StaticAssert( IsTriviallySerializable< int >); @@ -244,6 +304,7 @@ extern void UnitTest_Serialization () Serialization_Test1(); Serialization_Test2(); Serialization_Test3(); + Serialization_Test4(); TEST_PASSED(); } diff --git a/AE/engine/tests/shader_trace/ShaderTrace_Test6.cpp b/AE/engine/tests/shader_trace/ShaderTrace_Test6.cpp index bc4863e2..54e5270f 100644 --- a/AE/engine/tests/shader_trace/ShaderTrace_Test6.cpp +++ b/AE/engine/tests/shader_trace/ShaderTrace_Test6.cpp @@ -135,6 +135,7 @@ static bool CreatePipeline (TestDevice &vulkan, VkShaderModule vertShader, VkSha rasterization.polygonMode = VK_POLYGON_MODE_FILL; rasterization.cullMode = VK_CULL_MODE_NONE; rasterization.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization.lineWidth = 1.f; VkPipelineMultisampleStateCreateInfo multisample = {}; multisample.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; diff --git a/AE/engine/tests/shader_trace/TestDevice.cpp b/AE/engine/tests/shader_trace/TestDevice.cpp index 70ab0bd9..0f74cefd 100644 --- a/AE/engine/tests/shader_trace/TestDevice.cpp +++ b/AE/engine/tests/shader_trace/TestDevice.cpp @@ -4,7 +4,7 @@ #include "base/Algorithms/Parser.h" #include "base/DataSource/MemStream.h" #include "base/DataSource/File.h" -#include "serializing/ObjectFactory.h" +#include "serializing/Public/ObjectFactory.h" #include "res_loaders/Intermediate/IntermImage.h" #include "res_loaders/DDS/DDSImageSaver.h" @@ -811,6 +811,7 @@ bool TestDevice::CreateGraphicsPipelineVar1 (VkShaderModule vertShader, VkShade rasterization.polygonMode = VK_POLYGON_MODE_FILL; rasterization.cullMode = VK_CULL_MODE_NONE; rasterization.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization.lineWidth = 1.f; VkPipelineMultisampleStateCreateInfo multisample = {}; multisample.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; @@ -920,6 +921,7 @@ bool TestDevice::CreateGraphicsPipelineVar2 (VkShaderModule vertShader, VkShade rasterization.polygonMode = VK_POLYGON_MODE_FILL; rasterization.cullMode = VK_CULL_MODE_NONE; rasterization.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization.lineWidth = 1.f; VkPipelineMultisampleStateCreateInfo multisample = {}; multisample.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; @@ -1026,6 +1028,7 @@ bool TestDevice::CreateMeshPipelineVar1 (VkShaderModule meshShader, VkShaderMod rasterization.polygonMode = VK_POLYGON_MODE_FILL; rasterization.cullMode = VK_CULL_MODE_NONE; rasterization.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization.lineWidth = 1.f; VkPipelineMultisampleStateCreateInfo multisample = {}; multisample.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; diff --git a/AE/engine/tools/atlas_tools/CMakeLists.txt b/AE/engine/tools/atlas_tools/CMakeLists.txt index 3c097103..e07cd51f 100644 --- a/AE/engine/tools/atlas_tools/CMakeLists.txt +++ b/AE/engine/tools/atlas_tools/CMakeLists.txt @@ -18,3 +18,4 @@ endif() EnablePCH( "AtlasTools" ) EnablePrebuild( "AtlasTools" ) +EnableUnitBuild( "AtlasTools" ) diff --git a/AE/engine/tools/atlas_tools/RectPacker.cpp b/AE/engine/tools/atlas_tools/RectPacker.cpp index 2855b1b3..a90d3dc0 100644 --- a/AE/engine/tools/atlas_tools/RectPacker.cpp +++ b/AE/engine/tools/atlas_tools/RectPacker.cpp @@ -4,14 +4,13 @@ namespace AE::AtlasTools { -namespace -{ + /* ================================================= - BestDimensionForArea + _BestDimensionForArea ================================================= */ - ND_ static uint2 BestDimensionForArea (ulong area, const uint2 minSize) + uint2 RectPacker::_BestDimensionForArea (ulong area, const uint2 minSize) { const uint side = Max( 1, IntLog2( area ) / 2 ); uint2 result {1u << side}; @@ -28,7 +27,6 @@ namespace } return result; } -} /* ================================================= @@ -37,7 +35,7 @@ namespace */ bool RectPacker::Pack () { - uint2 size = BestDimensionForArea( _maxArea, _maxSize ); + uint2 size = _BestDimensionForArea( _maxArea, _maxSize ); std::sort( _rects.begin(), _rects.end(), [] (auto& lhs, auto& rhs) diff --git a/AE/engine/tools/atlas_tools/RectPacker.h b/AE/engine/tools/atlas_tools/RectPacker.h index 8a4e64db..d9ac3de5 100644 --- a/AE/engine/tools/atlas_tools/RectPacker.h +++ b/AE/engine/tools/atlas_tools/RectPacker.h @@ -52,6 +52,9 @@ namespace AE::AtlasTools ND_ float PackingRate () const; ND_ ArrayView GetResult () const { return _rects; } + + private: + ND_ static uint2 _BestDimensionForArea (ulong area, const uint2 minSize); }; diff --git a/AE/engine/tools/atlas_tools/RectPackerSTB.cpp b/AE/engine/tools/atlas_tools/RectPackerSTB.cpp index c2f4f243..b8debb28 100644 --- a/AE/engine/tools/atlas_tools/RectPackerSTB.cpp +++ b/AE/engine/tools/atlas_tools/RectPackerSTB.cpp @@ -5,14 +5,13 @@ namespace AE::AtlasTools { -namespace -{ + /* ================================================= - BestDimensionForArea + _BestDimensionForArea ================================================= */ - ND_ static uint2 BestDimensionForArea (ulong area, const uint2 minSize) + uint2 RectPackerSTB::_BestDimensionForArea (ulong area, const uint2 minSize) { const uint side = Max( 1, IntLog2( area ) / 2 ); uint2 result {1u << side}; @@ -28,7 +27,6 @@ namespace ASSERT( All( minSize <= result )); return result; } -} /* ================================================= @@ -37,7 +35,7 @@ namespace */ bool RectPackerSTB::Pack () { - uint2 size = BestDimensionForArea( _maxArea, _maxSize ); + uint2 size = _BestDimensionForArea( _maxArea, _maxSize ); Array packer_nodes; for (uint i = 0; i < 100; ++i) diff --git a/AE/engine/tools/atlas_tools/RectPackerSTB.h b/AE/engine/tools/atlas_tools/RectPackerSTB.h index e85bfc34..1d6fcb4b 100644 --- a/AE/engine/tools/atlas_tools/RectPackerSTB.h +++ b/AE/engine/tools/atlas_tools/RectPackerSTB.h @@ -38,6 +38,9 @@ namespace AE::AtlasTools ND_ float PackingRate () const; ND_ ArrayView GetResult () const { return _rects; } + + private: + ND_ static uint2 _BestDimensionForArea (ulong area, const uint2 minSize); }; diff --git a/AE/engine/tools/cicd/BaseMachine.cpp b/AE/engine/tools/cicd/BaseMachine.cpp index 534ef0c9..ae1358e1 100644 --- a/AE/engine/tools/cicd/BaseMachine.cpp +++ b/AE/engine/tools/cicd/BaseMachine.cpp @@ -405,6 +405,8 @@ namespace AE::CICD case EType::Unknown : return false; } + switch_end + return true; } diff --git a/AE/engine/tools/cicd/BaseMachine_Vulkan.cpp b/AE/engine/tools/cicd/BaseMachine_Vulkan.cpp index 2a12408e..036eeaa6 100644 --- a/AE/engine/tools/cicd/BaseMachine_Vulkan.cpp +++ b/AE/engine/tools/cicd/BaseMachine_Vulkan.cpp @@ -121,6 +121,4 @@ namespace AE::CICD } // AE::CICD -#include "graphics/Private/EnumUtils.cpp" - #endif // AE_ENABLE_VULKAN diff --git a/AE/engine/tools/cicd/CMakeLists.txt b/AE/engine/tools/cicd/CMakeLists.txt index 136b749b..3a6281a7 100644 --- a/AE/engine/tools/cicd/CMakeLists.txt +++ b/AE/engine/tools/cicd/CMakeLists.txt @@ -5,16 +5,18 @@ if (NOT (DEFINED ENGINE_LIBS_PATH)) file( GLOB_RECURSE CPP_SOURCES "*.cpp" ) endif() +set( EXT_SOURCES "${MAIN_SOURCE_DIR}/engine/src/graphics/Private/Shared.cpp" ) + file( GLOB_RECURSE AS_SOURCES "*.as" ) set( SOURCES ${HPP_SOURCES} ${CPP_SOURCES} ${AS_SOURCES} ) if (ANDROID) - add_library( "CICD" SHARED ${SOURCES} ) + add_library( "CICD" SHARED ${SOURCES} ${EXT_SOURCES} ) target_compile_definitions( "CICD" PUBLIC CICD_TEST_MACHINE ) target_link_libraries( "CICD" PUBLIC "Platform" ) elseif (NOT (TARGET "Scripting")) - add_executable( "CICD" MACOSX_BUNDLE ${SOURCES} ) + add_executable( "CICD" MACOSX_BUNDLE ${SOURCES} ${EXT_SOURCES} ) target_compile_definitions( "CICD" PUBLIC CICD_TEST_MACHINE CICD_BUILD_MACHINE CICD_CLIENT ) else() @@ -25,7 +27,7 @@ else() file( GLOB_RECURSE ANDROID_SOURCES "${ANDROID_SRC_PATH}/*.*" ) list( APPEND ANDROID_SOURCES "${ANDROID_SRC_PATH}/../../build.gradle" ) - add_executable( "CICD" ${SOURCES} ${PREBUILD_CPP_FILE} ${ANDROID_SOURCES} ) + add_executable( "CICD" ${SOURCES} ${PREBUILD_CPP_FILE} ${ANDROID_SOURCES} ${EXT_SOURCES} ) source_group( "Apk" FILES ${ANDROID_SOURCES} ) set_source_files_properties( ${ANDROID_SOURCES} PROPERTIES HEADER_FILE_ONLY TRUE ) @@ -48,10 +50,16 @@ if (${AE_ENABLE_VULKAN}) endif() endif() +set_property( SOURCE ${EXT_SOURCES} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) +target_compile_definitions( "CICD" PUBLIC AE_GRAPHICS_STRONG_VALIDATION=0 ) + +source_group( "external" FILES ${EXT_SOURCES} ) source_group( TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES} ) + target_link_libraries( "CICD" PUBLIC "Networking" ) target_include_directories( "CICD" PUBLIC ".." "scripts" ) set_property( TARGET "CICD" PROPERTY FOLDER "Engine/ToolApps" ) EnablePCH( "CICD" ) EnablePrebuild( "CICD" ) +EnableUnitBuild( "CICD" ) diff --git a/AE/engine/tools/cicd/NetBase.cpp b/AE/engine/tools/cicd/NetBase.cpp index aae3adb7..83dbc0da 100644 --- a/AE/engine/tools/cicd/NetBase.cpp +++ b/AE/engine/tools/cicd/NetBase.cpp @@ -62,7 +62,7 @@ namespace AE::CICD _Send ================================================= */ - bool NetBase::_Send (const void *data, const Bytes dataSize) + bool NetBase::_Send (const void* data, const Bytes dataSize) { if ( not _isConnected ) { diff --git a/AE/engine/tools/cicd/NetBase.h b/AE/engine/tools/cicd/NetBase.h index 7116d977..b96dfe90 100644 --- a/AE/engine/tools/cicd/NetBase.h +++ b/AE/engine/tools/cicd/NetBase.h @@ -56,7 +56,7 @@ namespace AE::CICD ND_ bool _SendLog (StringView text, uint part); ND_ bool _SendLogGroup (StringView group); - ND_ bool _Send (const void *data, Bytes dataSize); + ND_ bool _Send (const void* data, Bytes dataSize); template )> ND_ bool _Send (T &); @@ -187,7 +187,7 @@ namespace AE::CICD Msg::Serializer enc {FastWStream{ buf, buf + Sizeof(buf) }}; enc.factory = &_factory; - CHECK_ERR( enc( &msg ) and enc.Flush() ); + CHECK_ERR( enc( &msg )); size = Sizeof(buf) - enc.stream.RemainingSize(); } diff --git a/AE/engine/tools/cicd/Server.cpp b/AE/engine/tools/cicd/Server.cpp index a82e2f75..b31a741c 100644 --- a/AE/engine/tools/cicd/Server.cpp +++ b/AE/engine/tools/cicd/Server.cpp @@ -622,6 +622,7 @@ namespace AE::CICD } case ECopyMode::Unknown : break; } + switch_end } _copyCmds.clear(); } diff --git a/AE/engine/tools/feature_set_gen/CMakeLists.txt b/AE/engine/tools/feature_set_gen/CMakeLists.txt index 31789b64..33798237 100644 --- a/AE/engine/tools/feature_set_gen/CMakeLists.txt +++ b/AE/engine/tools/feature_set_gen/CMakeLists.txt @@ -8,7 +8,7 @@ if (${AE_ENABLE_VULKAN} AND WIN32) set_property( TARGET "FeatureSetGen" PROPERTY FOLDER "Engine/ToolApps" ) - target_link_libraries( "FeatureSetGen" PRIVATE "Serializing" "Scripting" "Vulkan-lib" ) + target_link_libraries( "FeatureSetGen" PRIVATE "Serializing" "Vulkan-lib" ) target_include_directories( "FeatureSetGen" PRIVATE "." ) target_compile_definitions( "FeatureSetGen" PRIVATE diff --git a/AE/engine/tools/feature_set_gen/FeatureSetUtils.cpp b/AE/engine/tools/feature_set_gen/FeatureSetUtils.cpp index 6dcb23de..afa17897 100644 --- a/AE/engine/tools/feature_set_gen/FeatureSetUtils.cpp +++ b/AE/engine/tools/feature_set_gen/FeatureSetUtils.cpp @@ -1039,10 +1039,10 @@ namespace if ( AllBits( val, VK_QUEUE_GRAPHICS_BIT )) queues.supported |= EQueueMask::Graphics; - if ( AllBits( val, VK_QUEUE_COMPUTE_BIT ) and not AnyBits( val, VK_QUEUE_GRAPHICS_BIT )) + if ( AllBits( val, VK_QUEUE_COMPUTE_BIT ) and NoBits( val, VK_QUEUE_GRAPHICS_BIT )) queues.supported |= EQueueMask::AsyncCompute; - if ( AllBits( val, VK_QUEUE_TRANSFER_BIT ) and not AnyBits( val, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT )) + if ( AllBits( val, VK_QUEUE_TRANSFER_BIT ) and NoBits( val, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT )) queues.supported |= EQueueMask::AsyncTransfer; } return true; diff --git a/AE/engine/tools/feature_set_gen/main.cpp b/AE/engine/tools/feature_set_gen/main.cpp index ef5f66b5..f25e5743 100644 --- a/AE/engine/tools/feature_set_gen/main.cpp +++ b/AE/engine/tools/feature_set_gen/main.cpp @@ -13,10 +13,10 @@ #include "FeatureSetUtils.h" #include "base/Algorithms/StringUtils.h" -#include "graphics/Private/ImageDesc.cpp" -#include "graphics/Private/FeatureSet.cpp" -#include "graphics/Private/RenderState.cpp" -#include "graphics/Private/EnumUtils.cpp" +#include "graphics/Private/ImageDesc.cpp.h" +#include "graphics/Private/FeatureSet.cpp.h" +#include "graphics/Private/RenderState.cpp.h" +#include "graphics/Private/EnumUtils.cpp.h" #include "graphics/Metal/MFeatureSet.cpp" using namespace AE; diff --git a/AE/engine/tools/geometry_tools/CMakeLists.txt b/AE/engine/tools/geometry_tools/CMakeLists.txt index e7e900aa..4b1168a1 100644 --- a/AE/engine/tools/geometry_tools/CMakeLists.txt +++ b/AE/engine/tools/geometry_tools/CMakeLists.txt @@ -22,3 +22,4 @@ endif() EnablePCH( "GeometryTools" ) EnablePrebuild( "GeometryTools" ) +EnableUnitBuild( "GeometryTools" ) diff --git a/AE/engine/tools/geometry_tools/SphericalCube/SphericalCubeGen.cpp b/AE/engine/tools/geometry_tools/SphericalCube/SphericalCubeGen.cpp index df81f1cc..07cf098c 100644 --- a/AE/engine/tools/geometry_tools/SphericalCube/SphericalCubeGen.cpp +++ b/AE/engine/tools/geometry_tools/SphericalCube/SphericalCubeGen.cpp @@ -171,7 +171,7 @@ namespace for (uint y = 0; y < vcount; ++y) for (uint x = 0; x < vcount; ++x) { - double2 ncoord = ToSNorm( double2{x,y} / double(vcount-1) ); + double2 ncoord = ToSNorm( double2{uint2{ x, y }} / double(vcount-1) ); auto& vert = dst_vertices[vert_i++]; double3 pos = ForwardProjection( ncoord, ECubeFace(face) ); double2 proj = VertexProjection_t::Forward( ncoord ); diff --git a/AE/engine/tools/graphics_lib/GraphicsLib.h b/AE/engine/tools/graphics_lib/GraphicsLib.h index 6567b49d..abd3aa36 100644 --- a/AE/engine/tools/graphics_lib/GraphicsLib.h +++ b/AE/engine/tools/graphics_lib/GraphicsLib.h @@ -117,6 +117,7 @@ namespace AE::GraphicsLib void SetStencilWriteMask (uint writeMask) __Th_OV { _ctx->SetStencilWriteMask( writeMask ); } void SetStencilWriteMask (uint frontWriteMask, uint backWriteMask) __Th_OV { _ctx->SetStencilWriteMask( frontWriteMask, backWriteMask ); } void SetFragmentShadingRate (EShadingRate rate, EShadingRateCombinerOp primitiveOp, EShadingRateCombinerOp texOp) __Th_OV { _ctx->SetFragmentShadingRate( rate, primitiveOp, texOp ); } + void SetViewportWScaling (ArrayView scaling) __Th_OV { _ctx->SetViewportWScaling( scaling ); } ) void BindIndexBuffer (BufferID buffer, Bytes offset, EIndex indexType) __Th_OV { _ctx->BindIndexBuffer( buffer, offset, indexType ); } diff --git a/AE/engine/tools/remote_graphics_device/RemoteDevice.cpp b/AE/engine/tools/remote_graphics_device/RemoteDevice.cpp index a97ac695..f487689d 100644 --- a/AE/engine/tools/remote_graphics_device/RemoteDevice.cpp +++ b/AE/engine/tools/remote_graphics_device/RemoteDevice.cpp @@ -28,7 +28,7 @@ namespace AE::RemoteGraphics _device._nativeWnd = wnd.GetNative(); CHECK_FATAL( _device._nativeWnd ); - _device._windowSize.store( ushort2{wnd.GetSurfaceSize()} ); + _device._windowSize.store( ImageDim2_t{wnd.GetSurfaceSize()} ); _device._StartServer(); } @@ -46,13 +46,13 @@ namespace AE::RemoteGraphics _device._PrintSelfIP(); case EState::InForeground : - _device._windowSize.store( ushort2{wnd.GetSurfaceSize()} ); + _device._windowSize.store( ImageDim2_t{wnd.GetSurfaceSize()} ); break; case EState::InBackground : case EState::Stopped : case EState::Destroyed : - _device._windowSize.store( ushort2{0} ); break; + _device._windowSize.store( ImageDim2_t{0} ); break; case EState::Unknown : case EState::Created : @@ -152,7 +152,7 @@ namespace AE::RemoteGraphics } if ( _window ) - _windowSize.store( ushort2{_window->GetSurfaceSize()} ); + _windowSize.store( ImageDim2_t{_window->GetSurfaceSize()} ); ThreadUtils::Sleep_15ms(); } @@ -204,15 +204,38 @@ namespace AE::RemoteGraphics _PrintSelfIP(); + const auto& cpu_info = CpuArchInfo::Get(); + auto FindCoreId = [&cpu_info, used = 0ull] () mutable -> uint + {{ + if ( auto* p_core = cpu_info.GetCore( ECoreType::Performance )) + { + int idx = BitScanForward( p_core->physicalBits.to_ullong() & ~used ); + if ( idx >= 0 ) + { + used |= 1ull << idx; + return uint(idx); + } + } + return UMax; + }}; + //ThreadUtils::SetAffinity( FindCoreId() ); + for (usize i = 0; i < _threadArr.size(); ++i) { + uint core_id = FindCoreId(); + _threadArr[i].looping.store( true ); - _threadArr[i].thread = StdThread{ [this, i] () + _threadArr[i].thread = StdThread{ [this, i, core_id] () { auto& looping = _threadArr[i].looping; auto& conn = _threadArr[i].conn; - CHECK_ERRV( conn.InitServer( ushort(RmNetConfig::serverPort + i), &_objFactory )); + ThreadUtils::SetName( "RG-Server-"s + ToString(i) ); + + if ( core_id != UMax ) + ThreadUtils::SetAffinity( core_id ); + + CHECK_FATAL( conn.InitServer( ushort(RmNetConfig::serverPort + i), &_objFactory )); _GetThreadData() = &_threadArr[i]; @@ -230,7 +253,7 @@ namespace AE::RemoteGraphics { bool ok = conn.Receive(); - if ( auto msg = conn.Encode() ) + for (; auto msg = conn.Encode(); ) { _ProcessMessage( *msg ); ok = true; @@ -336,6 +359,7 @@ namespace AE::RemoteGraphics #ifdef AE_ENABLE_NVML _profilers.nv.Deinitialize(); #endif + _profilers.gen.Deinitialize(); _swapchain.Destroy(); _swapchain.DestroySurface(); @@ -393,7 +417,7 @@ namespace AE::RemoteGraphics _Send ================================================= */ - void RmGAppListener::_Send (const void *data, Bytes dataSize) __Th___ + void RmGAppListener::_Send (const void* data, Bytes dataSize) __Th___ { auto* td = _GetThreadData(); CHECK_THROW( td != null ); diff --git a/AE/engine/tools/remote_graphics_device/RemoteDevice.h b/AE/engine/tools/remote_graphics_device/RemoteDevice.h index df28929a..44fad51d 100644 --- a/AE/engine/tools/remote_graphics_device/RemoteDevice.h +++ b/AE/engine/tools/remote_graphics_device/RemoteDevice.h @@ -212,7 +212,7 @@ namespace AE::RemoteGraphics WindowPtr _window; Ptr _app; NativeWindow _nativeWnd; - StructAtomic _windowSize; + StructAtomic _windowSize; Serializing::ObjectFactory _objFactory; Serializing::ObjectFactory _cmdFactory; @@ -256,12 +256,12 @@ namespace AE::RemoteGraphics Profiler::AdrenoProfiler adreno; #endif #ifdef AE_ENABLE_PVRCOUNTER - Profiler::PowerVRProfiler pvr; - Profiler::PowerVRProfiler::TimeScopeArr_t pvrTimings; + Profiler::PowerVRProfiler pvr; #endif #ifdef AE_ENABLE_NVML Profiler::NVidiaProfiler nv; #endif + Profiler::GeneralProfiler gen; } _profilers; @@ -341,7 +341,7 @@ namespace AE::RemoteGraphics ND_ static PerThreadData*& _GetThreadData (); void _Send (const Msg::BaseResponse &) __Th___; - void _Send (const void *data, Bytes dataSize) __Th___; + void _Send (const void* data, Bytes dataSize) __Th___; void _ReadReceived (OUT void* data, Bytes size) __Th___; void _PushMemStack (RC) __Th___; @@ -458,12 +458,15 @@ namespace AE::RemoteGraphics void _Cb_ProfAdreno_Sample (const Msg::ProfAdreno_Sample &); void _Cb_ProfPVR_Initialize (const Msg::ProfPVR_Initialize &); - void _Cb_ProfPVR_Tick (const Msg::ProfPVR_Tick &); + void _Cb_ProfPVR_GetTiming (const Msg::ProfPVR_GetTiming &); void _Cb_ProfPVR_Sample (const Msg::ProfPVR_Sample &); void _Cb_ProfNVidia_Initialize (const Msg::ProfNVidia_Initialize &); void _Cb_ProfNVidia_Sample (const Msg::ProfNVidia_Sample &); + void _Cb_ProfGeneral_Initialize (const Msg::ProfGeneral_Initialize &); + void _Cb_ProfGeneral_Sample (const Msg::ProfGeneral_Sample &); + void _Cb_DescUpd_Flush (const Msg::DescUpd_Flush &); void _Cb_SBM_GetBufferRanges (const Msg::SBM_GetBufferRanges &); diff --git a/AE/engine/tools/remote_graphics_device/RemoteDevice_Msg.cpp b/AE/engine/tools/remote_graphics_device/RemoteDevice_Msg.cpp index 2a71110b..0a5c45e8 100644 --- a/AE/engine/tools/remote_graphics_device/RemoteDevice_Msg.cpp +++ b/AE/engine/tools/remote_graphics_device/RemoteDevice_Msg.cpp @@ -19,6 +19,8 @@ namespace AE::RemoteGraphics void RmGAppListener::_Cb_Device_Init (const Msg::Device_Init &msg) { + using EFeature = Msg::Device_Init_Response::EFeature; + const auto Create = [this, &msg] () -> bool {{ CHECK_ERR( _app != null ); @@ -49,7 +51,7 @@ namespace AE::RemoteGraphics return true; }}; - using EFeature = Msg::Device_Init_Response::EFeature; + AE_LOGI( CpuArchInfo::Get().Print() ); Msg::Device_Init_Response res; if ( Create() ) @@ -85,33 +87,41 @@ namespace AE::RemoteGraphics auto& fs_feats = _device.GetVProperties().fragShadingRateFeats; auto& ext = _device.GetVExtensions(); - if ( feats.depthBounds ) res.features.insert( EFeature::DepthBounds ); - - res.features.insert( EFeature::StencilCompareMask ); - res.features.insert( EFeature::StencilWriteMask ); - - if ( fs_feats.attachmentFragmentShadingRate ) res.features.insert( EFeature::FragmentShadingRate ); - if ( ext.drawIndirectCount ) res.features.insert( EFeature::DrawIndirectCount ); - if ( ext.drawIndirectCount ) res.features.insert( EFeature::DrawIndexedIndirectCount ); - if ( ext.meshShader ) res.features.insert( EFeature::DrawMeshTasksIndirectCount ); - - res.features.insert( EFeature::ClearColorImage ); - res.features.insert( EFeature::ClearDepthStencilImage ); - res.features.insert( EFeature::ResolveImage ); - - if ( rt_feats.rayTracingPipelineTraceRaysIndirect ) res.features.insert( EFeature::TraceRaysIndirect_DevAddr ); - if ( rt1_feats.rayTracingPipelineTraceRaysIndirect2 ) res.features.insert( EFeature::TraceRaysIndirect2_DevAddr ); - - if ( as_feats.accelerationStructure ) res.features.insert( EFeature::SerializeToMemory ); - if ( as_feats.accelerationStructure ) res.features.insert( EFeature::DeserializeFromMemory ); - if ( as_feats.accelerationStructureIndirectBuild ) res.features.insert( EFeature::BuildIndirect ); - - res.features.insert( EFeature::WriteTimestamp ); - - if ( ext.timelineSemaphore ) res.features.insert( EFeature::TimelineSemaphore ); - if ( ext.hostQueryReset ) res.features.insert( EFeature::HostQueryReset ); - - StaticAssert( uint(EFeature::_Count) == 19 ); + for (uint i = 0; i < uint(EFeature::_Count); ++i) + { + const auto t = EFeature(i); + switch_enum( t ) + { + case EFeature::DepthBounds : if ( feats.depthBounds ) res.features.insert( t ); break; + case EFeature::StencilCompareMask : res.features.insert( t ); break; + case EFeature::StencilWriteMask : res.features.insert( t ); break; + case EFeature::FragmentShadingRate : if ( fs_feats.attachmentFragmentShadingRate ) res.features.insert( t ); break; + case EFeature::DrawIndirectCount : if ( ext.drawIndirectCount ) res.features.insert( t ); break; + case EFeature::DrawIndexedIndirectCount : if ( ext.drawIndirectCount ) res.features.insert( t ); break; + case EFeature::DrawMeshTasksIndirectCount : if ( ext.meshShader ) res.features.insert( t ); break; + case EFeature::ViewportWScaling : if ( ext.clipSpaceWScalingNV ) res.features.insert( t ); break; + + case EFeature::ClearColorImage : res.features.insert( t ); break; + case EFeature::ClearDepthStencilImage : res.features.insert( t ); break; + case EFeature::ResolveImage : res.features.insert( t ); break; + + case EFeature::TraceRaysIndirect_DevAddr : if ( rt_feats.rayTracingPipelineTraceRaysIndirect ) res.features.insert( t ); break; + case EFeature::TraceRaysIndirect2_DevAddr: if ( rt1_feats.rayTracingPipelineTraceRaysIndirect2 ) res.features.insert( t ); break; + + case EFeature::BuildIndirect : if ( as_feats.accelerationStructureIndirectBuild ) res.features.insert( t ); break; + case EFeature::SerializeToMemory : if ( as_feats.accelerationStructure ) res.features.insert( t ); break; + case EFeature::DeserializeFromMemory : if ( as_feats.accelerationStructure ) res.features.insert( t ); break; + + case EFeature::WriteTimestamp : res.features.insert( t ); break; + + case EFeature::TimelineSemaphore : if ( ext.timelineSemaphore ) res.features.insert( t ); break; + case EFeature::HostQueryReset : if ( ext.hostQueryReset ) res.features.insert( t ); break; + + case EFeature::Unknown : + case EFeature::_Count : break; + } + switch_end + } } #elif defined(AE_ENABLE_METAL) res.api = EGraphicsAPI::Metal; @@ -428,7 +438,7 @@ namespace AE::RemoteGraphics _Send( res ); #ifdef AE_ENABLE_PVRCOUNTER - _profilers.pvr.ReadTimingData( OUT _profilers.pvrTimings ); + _profilers.pvr.Tick(); #endif } @@ -1392,7 +1402,8 @@ namespace AE::RemoteGraphics { Msg::ProfArm_Sample_Response res; #ifdef AE_ENABLE_ARM_PMU - _profilers.arm.Sample( OUT res.counters ); + float invdt; + _profilers.arm.Sample( OUT res.counters, INOUT invdt ); #endif _Send( res ); } @@ -1418,7 +1429,8 @@ namespace AE::RemoteGraphics { Msg::ProfMali_Sample_Response res; #ifdef AE_ENABLE_MALI_HWCPIPE - _profilers.mali.Sample( OUT res.counters ); + float invdt; + _profilers.mali.Sample( OUT res.counters, INOUT invdt ); #endif _Send( res ); } @@ -1442,7 +1454,8 @@ namespace AE::RemoteGraphics { Msg::ProfAdreno_Sample_Response res; #ifdef AE_ENABLE_ADRENO_PERFCOUNTER - _profilers.adreno.Sample( OUT res.counters ); + float invdt; + _profilers.adreno.Sample( OUT res.counters, INOUT invdt ); #endif _Send( res ); } @@ -1462,12 +1475,11 @@ namespace AE::RemoteGraphics _Send( res ); } - void RmGAppListener::_Cb_ProfPVR_Tick (const Msg::ProfPVR_Tick &) + void RmGAppListener::_Cb_ProfPVR_GetTiming (const Msg::ProfPVR_GetTiming &) { - Msg::ProfPVR_Tick_Response res; + Msg::ProfPVR_GetTiming_Response res; #ifdef AE_ENABLE_PVRCOUNTER - _profilers.pvr.Tick(); - res.timings = _profilers.pvrTimings; + _profilers.pvr.ReadTimingData( OUT res.timings ); #endif _Send( res ); } @@ -1476,7 +1488,8 @@ namespace AE::RemoteGraphics { Msg::ProfPVR_Sample_Response res; #ifdef AE_ENABLE_PVRCOUNTER - _profilers.pvr.Sample( OUT res.counters ); + float invdt; + _profilers.pvr.Sample( OUT res.counters, INOUT invdt ); #endif _Send( res ); } @@ -1500,10 +1513,40 @@ namespace AE::RemoteGraphics { Msg::ProfNVidia_Sample_Response res; #ifdef AE_ENABLE_NVML - _profilers.nv.Sample( OUT res.counters ); + float invdt; + _profilers.nv.Sample( OUT res.counters, INOUT invdt ); #endif _Send( res ); } + + + void RmGAppListener::_Cb_ProfGeneral_Initialize (const Msg::ProfGeneral_Initialize &msg) + { + Msg::ProfGeneral_Initialize_Response res; + + res.ok = _profilers.gen.Initialize( msg.required ); + if ( res.ok ) + res.cpuClusters = _profilers.gen.GetCpuClusters(); + + _Send( res ); + } + + void RmGAppListener::_Cb_ProfGeneral_Sample (const Msg::ProfGeneral_Sample &) + { + Msg::ProfGeneral_Sample_Response res; + Profiler::GeneralProfiler::CpuUsage_t total, kernel; + const uint count = _profilers.gen.GetCpuCoreCount(); + + float invdt; + _profilers.gen.Sample( OUT res.counters, INOUT invdt ); + + if ( _profilers.gen.GetUsage( OUT total, OUT kernel )) + { + res.totalCpuUsage = ArrayView{ total.data(), count }; + res.kernelUsage = ArrayView{ kernel.data(), count }; + } + _Send( res ); + } //----------------------------------------------------------------------------- @@ -2426,6 +2469,13 @@ namespace AE::RemoteGraphics::Msg ctx.draw.SetFragmentShadingRate( rate, primitiveOp, textureOp ); } + void CmdBuf_Bake::Draw_SetViewportWScalingCmd::Execute (void* inCtx) __Th___ + { + auto& ctx = *Cast(inCtx); + CHECK_THROW( ctx.type == EContextType::RenderPass ); + ctx.draw.SetViewportWScaling( scaling ); + } + void CmdBuf_Bake::Draw_BindIndexBufferCmd::Execute (void* inCtx) __Th___ { auto& ctx = *Cast(inCtx); diff --git a/AE/engine/tools/res_loaders/AE/AEImageLoader.cpp b/AE/engine/tools/res_loaders/AE/AEImageLoaderSaver.cpp similarity index 69% rename from AE/engine/tools/res_loaders/AE/AEImageLoader.cpp rename to AE/engine/tools/res_loaders/AE/AEImageLoaderSaver.cpp index 8ef161a9..c4f12b39 100644 --- a/AE/engine/tools/res_loaders/AE/AEImageLoader.cpp +++ b/AE/engine/tools/res_loaders/AE/AEImageLoaderSaver.cpp @@ -1,13 +1,16 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' #include "res_loaders/AE/AEImageLoader.h" +#include "res_loaders/AE/AEImageSaver.h" #include "res_loaders/Intermediate/IntermImage.h" #include "res_pack/asset_packer/Packer/ImagePacker.h" namespace AE::ResLoader { namespace { +# define AE_BUILD_ASSET_PACKER # include "res_pack/asset_packer/Packer/ImagePacker.cpp.h" +# undef AE_BUILD_ASSET_PACKER } /* @@ -71,5 +74,31 @@ namespace { return true; } +/* +================================================= + SaveImage +================================================= +*/ + bool AEImageSaver::SaveImage (WStream &stream, const IntermImage &image, EImageFormat fileFormat, Bool flipY) __NE___ + { + CHECK( not flipY ); + + if ( not (fileFormat == Default or fileFormat == EImageFormat::AEImg) ) + return false; + + CHECK_ERR( not image.GetData().empty() ); + CHECK_ERR( not image.GetData()[0].empty() ); + + AssetPacker::ImagePacker::Header header; + header.dimension = packed_ushort3{image.GetData()[0][0].dimension}; + header.arrayLayers = ushort(image.GetData()[0].size()); + header.mipmaps = ushort(image.GetData().size()); + header.viewType = image.GetType(); + header.format = image.GetData()[0][0].format; + + CHECK_ERR( ImagePacker_SaveHeader( stream, header )); + CHECK_ERR( ImagePacker_SaveImage( stream, header, image )); + return true; + } } // AE::ResLoader diff --git a/AE/engine/tools/res_loaders/AE/AEImageSaver.cpp b/AE/engine/tools/res_loaders/AE/AEImageSaver.cpp deleted file mode 100644 index 6302de4e..00000000 --- a/AE/engine/tools/res_loaders/AE/AEImageSaver.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' - -#include "res_loaders/AE/AEImageSaver.h" -#include "res_loaders/Intermediate/IntermImage.h" -#include "res_pack/asset_packer/Packer/ImagePacker.h" - -namespace AE::ResLoader -{ -namespace { -# define AE_BUILD_ASSET_PACKER -# include "res_pack/asset_packer/Packer/ImagePacker.cpp.h" -# undef AE_BUILD_ASSET_PACKER -} - -/* -================================================= - SaveImage -================================================= -*/ - bool AEImageSaver::SaveImage (WStream &stream, const IntermImage &image, EImageFormat fileFormat, Bool flipY) __NE___ - { - CHECK( not flipY ); - - if ( not (fileFormat == Default or fileFormat == EImageFormat::AEImg) ) - return false; - - CHECK_ERR( not image.GetData().empty() ); - CHECK_ERR( not image.GetData()[0].empty() ); - - AssetPacker::ImagePacker::Header header; - header.dimension = packed_ushort3{image.GetData()[0][0].dimension}; - header.arrayLayers = ushort(image.GetData()[0].size()); - header.mipmaps = ushort(image.GetData().size()); - header.viewType = image.GetType(); - header.format = image.GetData()[0][0].format; - - CHECK_ERR( ImagePacker_SaveHeader( stream, header )); - CHECK_ERR( ImagePacker_SaveImage( stream, header, image )); - return true; - } - - -} // AE::ResLoader - diff --git a/AE/engine/tools/res_loaders/CMakeLists.txt b/AE/engine/tools/res_loaders/CMakeLists.txt index 340f1477..a83974e3 100644 --- a/AE/engine/tools/res_loaders/CMakeLists.txt +++ b/AE/engine/tools/res_loaders/CMakeLists.txt @@ -37,3 +37,4 @@ endif() EnablePCH( "ResourceLoaders" ) EnablePrebuild( "ResourceLoaders" ) +EnableUnitBuild( "ResourceLoaders" ) diff --git a/AE/engine/tools/res_loaders/DDS/DDSUtils.cpp.h b/AE/engine/tools/res_loaders/DDS/DDSUtils.cpp.h index 6758a3f5..0558eaac 100644 --- a/AE/engine/tools/res_loaders/DDS/DDSUtils.cpp.h +++ b/AE/engine/tools/res_loaders/DDS/DDSUtils.cpp.h @@ -4,6 +4,8 @@ https://docs.microsoft.com/en-us/windows/desktop/direct3ddds/dx-graphics-dds-pguide */ +#pragma once + #include "res_loaders/Intermediate/IntermImage.h" namespace AE::ResLoader diff --git a/AE/engine/tools/res_loaders/KTX/KTXImageLoader.cpp b/AE/engine/tools/res_loaders/KTX/KTXImageLoader.cpp index 58d561dc..16e52882 100644 --- a/AE/engine/tools/res_loaders/KTX/KTXImageLoader.cpp +++ b/AE/engine/tools/res_loaders/KTX/KTXImageLoader.cpp @@ -165,7 +165,7 @@ namespace Function< bool (int miplevel, int layer, const uint3 mipDim, ArrayView pixels) > _load; ND_ static KTX_error_code Load (int miplevel, int layer, int width, int height, int depth, ktx_uint64_t faceLodSize, void* pixels, void* userdata) { - return Cast(userdata)->_load( uint(miplevel), layer, uint3{width, height, depth}, ArrayView{Cast(pixels), usize(faceLodSize)} ) ? + return Cast(userdata)->_load( uint(miplevel), layer, uint3{int3{ width, height, depth }}, ArrayView{Cast(pixels), usize(faceLodSize)} ) ? KTX_SUCCESS : KTX_FILE_DATA_ERROR; } }; diff --git a/AE/engine/tools/res_loaders/STB/STBImageLoader.cpp b/AE/engine/tools/res_loaders/STB/STBImageLoader.cpp index 71408e59..13c6edf1 100644 --- a/AE/engine/tools/res_loaders/STB/STBImageLoader.cpp +++ b/AE/engine/tools/res_loaders/STB/STBImageLoader.cpp @@ -82,7 +82,7 @@ namespace AE::ResLoader image_data[0].resize( 1 ); IntermImage::Level& image_level = image_data[0][0]; - image_level.dimension = uint3{ x, y, 1 }; + image_level.dimension = uint3{int3{ x, y, 1 }}; image_level.mipmap = 0_mipmap; image_level.layer = 0_layer; image_level.rowPitch = Bytes{uint( comp * x )}; diff --git a/AE/engine/tools/res_loaders/WAV/WaveUtils.cpp.h b/AE/engine/tools/res_loaders/WAV/WaveUtils.cpp.h index dd060169..3c609918 100644 --- a/AE/engine/tools/res_loaders/WAV/WaveUtils.cpp.h +++ b/AE/engine/tools/res_loaders/WAV/WaveUtils.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "res_loaders/Public/Common.h" namespace AE::ResLoader diff --git a/AE/engine/tools/res_pack/asset_packer/CMakeLists.txt b/AE/engine/tools/res_pack/asset_packer/CMakeLists.txt index 874c2b51..3029b718 100644 --- a/AE/engine/tools/res_pack/asset_packer/CMakeLists.txt +++ b/AE/engine/tools/res_pack/asset_packer/CMakeLists.txt @@ -9,9 +9,11 @@ set( GRAPHICS_HPP_SRC if (NOT (DEFINED ENGINE_LIBS_PATH)) set( GRAPHICS_CPP_SRC "${GRAPHICS_DIR}/Private/ImageMemView.cpp" - "${GRAPHICS_DIR}/Private/EnumUtils.cpp" - "${GRAPHICS_DIR}/Private/RenderState.cpp" - "${GRAPHICS_DIR}/Scripting/GraphicsBindings.cpp" ) + "${GRAPHICS_DIR}/Private/EnumUtils.cpp.h" + "${GRAPHICS_DIR}/Private/RenderState.cpp.h" + "${GRAPHICS_DIR}/Scripting/GraphicsBindings.cpp.h" + "${GRAPHICS_DIR}/Private/Shared.cpp" ) + set_property( SOURCE ${GRAPHICS_CPP_SRC} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) endif() @@ -68,9 +70,13 @@ endif() if (TARGET "AstcEncoder-lib") target_link_libraries( "AssetPacker" PUBLIC "AstcEncoder-lib" ) endif() +#if (TARGET "ETCPACK-lib") +# target_link_libraries( "AssetPacker" PUBLIC "ETCPACK-lib" ) +#endif() EnablePCH( "AssetPacker" ) EnablePrebuild( "AssetPacker" ) +EnableUnitBuild( "AssetPacker" ) add_library( "AssetPacker-shared" SHARED "AssetPacker.h" diff --git a/AE/engine/tools/res_pack/asset_packer/Packer/AssetPacker.cpp b/AE/engine/tools/res_pack/asset_packer/Packer/AssetPacker.cpp index ae5875e7..68214b3f 100644 --- a/AE/engine/tools/res_pack/asset_packer/Packer/AssetPacker.cpp +++ b/AE/engine/tools/res_pack/asset_packer/Packer/AssetPacker.cpp @@ -36,7 +36,7 @@ namespace { auto& item = info->inFiles[i]; - if ( not AnyBits( item.flags, EPathParamsFlags::Folder | EPathParamsFlags::RecursiveFolder )) + if ( NoBits( item.flags, EPathParamsFlags::Folder | EPathParamsFlags::RecursiveFolder )) continue; Path path {item.path}; diff --git a/AE/engine/tools/res_pack/asset_packer/Packer/ImageAtlasPacker.cpp.h b/AE/engine/tools/res_pack/asset_packer/Packer/ImageAtlasPacker.cpp.h index 2ed21439..83059e12 100644 --- a/AE/engine/tools/res_pack/asset_packer/Packer/ImageAtlasPacker.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/Packer/ImageAtlasPacker.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + /* ================================================= IsValid diff --git a/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.cpp.h b/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.cpp.h index 3a142434..a0f4439e 100644 --- a/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + using ImgPackHeader_t = AssetPacker::ImagePacker::Header; using ImgPackFileHeader_t = AssetPacker::ImagePacker::FileHeader; using ImageUtils_t = Graphics::ImageUtils; diff --git a/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.h b/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.h index f8f65e29..1c8e12f2 100644 --- a/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.h +++ b/AE/engine/tools/res_pack/asset_packer/Packer/ImagePacker.h @@ -39,9 +39,9 @@ namespace AE::AssetPacker struct Header { - packed_ushort3 dimension; - ushort arrayLayers = 0; - ushort mipmaps = 0; + ImageDim_t dimension; + LayerCount_t arrayLayers = 0; + MipmapCount_t mipmaps = 0; EImage viewType = Default; EPixelFormat format = Default; ushort flags = 0; // 0 diff --git a/AE/engine/tools/res_pack/asset_packer/Packer/RasterFontPacker.cpp.h b/AE/engine/tools/res_pack/asset_packer/Packer/RasterFontPacker.cpp.h index a275c94d..90b01a38 100644 --- a/AE/engine/tools/res_pack/asset_packer/Packer/RasterFontPacker.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/Packer/RasterFontPacker.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + /* ================================================= SaveImage diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptImageAtlas.cpp b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptImageAtlas.cpp index aabdf6f9..fa2f8b17 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptImageAtlas.cpp +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptImageAtlas.cpp @@ -5,7 +5,7 @@ #include "graphics/Private/EnumUtils.h" -#include "serializing/ObjectFactory.h" +#include "serializing/Public/ObjectFactory.h" #include "scripting/Impl/ClassBinder.h" @@ -210,7 +210,7 @@ namespace { ASSERT( All( texc.Size() == int2(src.region.Size()) )); - CHECK_ERR( dst_view.Blit( uint3{texc.left, texc.top, 0}, uint3{src.region.left, src.region.top, 0}, src_img, uint3(texc.Size(),1) )); + CHECK_ERR( dst_view.Blit( uint3{int3{ texc.left, texc.top, 0 }}, uint3{src.region.left, src.region.top, 0u}, src_img, uint3{int3{ texc.Size(), 1 }} )); // TODO: fill border? } diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptRasterFont.cpp b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptRasterFont.cpp index dbd478b7..b85af181 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptRasterFont.cpp +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptRasterFont.cpp @@ -307,7 +307,7 @@ namespace { ASSERT( All( img_rect.Size() == int2(src.dimension) )); - CHECK_ERR( dst_view.Blit( uint3{img_rect.left, img_rect.top, 0}, uint3{0}, src_view, uint3(img_rect.Size(),1) )); + CHECK_ERR( dst_view.Blit( uint3{int3{ img_rect.left, img_rect.top, 0 }}, uint3{0}, src_view, uint3{int3{ img_rect.Size(), 1 }} )); const RectI texc = RectI{ int2{r.w, r.h} - _paddingPix*2 } + (int2{r.x, r.y} + _paddingPix); src.texcoord = FloatToUNormShort( RectF{texc} / float2(dst_view.Dimension()) ); @@ -592,7 +592,7 @@ namespace AE::AssetPacker // GlyphData result.symbol = unicodeChar; - result.dimension = uint2{ Ceil((bounds.r - bounds.l) * projScale), Ceil((bounds.t - bounds.b) * projScale) } + border_px*2; + result.dimension = uint2{double2{ Ceil((bounds.r - bounds.l) * projScale), Ceil((bounds.t - bounds.b) * projScale) }} + border_px*2; proj_translate.x = -bounds.l + border_px / projScale; proj_translate.y = -bounds.b + border_px / projScale; diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.cpp b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.cpp index 7449c882..0830c451 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.cpp +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.cpp @@ -300,6 +300,9 @@ namespace { }else if ( EPixelFormat_IsASTC( _dstFormat )) { CHECK_ERR( _CompressASTC( OUT dst_image )); + }else + if ( EPixelFormat_IsEAC( _dstFormat )) { + CHECK_ERR( _CompressEAC( OUT dst_image )); }else RETURN_ERR( "compression is not supported" ); @@ -519,3 +522,63 @@ namespace AE::AssetPacker #endif // AE_ENABLE_ASTC_ENCODER //----------------------------------------------------------------------------- + + +#ifdef AE_ENABLE_ETCPACK + +namespace AE::AssetPacker +{ +namespace { +# include "Utils/EacPack.cpp.h" +} + +/* +================================================= + _CompressEAC +================================================= +*/ + bool ScriptTexture::_CompressEAC (OUT IntermImage &dstImage) const + { + CHECK_ERR( not EPixelFormat_IsCompressed( _intermFormat )); + CHECK_ERR( EPixelFormat_IsEAC( _dstFormat )); + + CHECK_ERR( dstImage.Allocate( _imgData->GetType(), _dstFormat, _imgData->Dimension(), ImageLayer{_imgData->ArrayLayers()}, MipmapLevel{_imgData->MipLevels()} )); + + auto& src_img_data = *_imgData->GetMutableData(); + auto& dst_img_data = *dstImage.GetMutableData(); + CHECK_ERR( src_img_data.size() == dst_img_data.size() ); + + for (usize mip = 0; mip < src_img_data.size(); ++mip) + { + auto& src_layers = src_img_data[mip]; + auto& dst_layers = dst_img_data[mip]; + CHECK_ERR( src_layers.size() == dst_layers.size() ); + + for (usize layer = 0; layer < src_layers.size(); ++layer) + { + CHECK_ERR( EacEncode( _imgData->ToView( MipmapLevel{mip}, ImageLayer{layer} ), + dstImage.ToView( MipmapLevel{mip}, ImageLayer{layer} ), + CompressionThreadCount(), + CompressionQuality() + )); + } + } + + Unused( &EacDecode ); + return true; + } + +} // AE::AssetPacker + +#else + +namespace AE::AssetPacker +{ + bool ScriptTexture::_CompressEAC (OUT IntermImage &) const + { + RETURN_ERR( "EAC compression is not supported" ); + } +} + +#endif // AE_ENABLE_ETCPACK +//----------------------------------------------------------------------------- diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.h b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.h index 05739f45..3be1d554 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.h +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptTexture.h @@ -62,6 +62,7 @@ namespace AE::AssetPacker ND_ bool _Convert (OUT ResLoader::IntermImage &dstImage) const; ND_ bool _CompressBC_ETC2 (OUT ResLoader::IntermImage &dstImage) const; ND_ bool _CompressASTC (OUT ResLoader::IntermImage &dstImage) const; + ND_ bool _CompressEAC (OUT ResLoader::IntermImage &dstImage) const; void _AddLayer (ResLoader::IntermImage &img, uint layer) __Th___; diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Controller.cpp.h b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Controller.cpp.h index 0dae6c1e..b2ea4958 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Controller.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Controller.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + namespace AE::AssetPacker { using EControllerType = UI::IController::EType; diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Drawable.cpp.h b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Drawable.cpp.h index 04aa5947..9e0eeeb1 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Drawable.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Drawable.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + namespace AE::AssetPacker { using EDrawableType = UI::IDrawable::EType; diff --git a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Layout.cpp.h b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Layout.cpp.h index 1f9ba4cb..59f97e3c 100644 --- a/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Layout.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/ScriptObjects/ScriptUIWidget_Layout.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + namespace AE::AssetPacker { using ELayoutType = UI::ILayout::EType; diff --git a/AE/engine/tools/res_pack/asset_packer/Utils/AstcEncoder.cpp.h b/AE/engine/tools/res_pack/asset_packer/Utils/AstcEncoder.cpp.h index 0dc9b6ab..94965378 100644 --- a/AE/engine/tools/res_pack/asset_packer/Utils/AstcEncoder.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/Utils/AstcEncoder.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "MtCompression.h" struct AstcContext diff --git a/AE/engine/tools/res_pack/asset_packer/Utils/Compressonator.cpp.h b/AE/engine/tools/res_pack/asset_packer/Utils/Compressonator.cpp.h index e856dea6..98b54930 100644 --- a/AE/engine/tools/res_pack/asset_packer/Utils/Compressonator.cpp.h +++ b/AE/engine/tools/res_pack/asset_packer/Utils/Compressonator.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "MtCompression.h" /* @@ -114,8 +116,8 @@ }; CHECK_ERR_MSG( srcView.Format() == EPixelFormat::RGBA16F, - "Input image in "s << ToString(srcView.Format()) << - " format, but BC6 format requires input in RGBA16F format" ); + "Input image in '"s << ToString(srcView.Format()) << + "' format, but BC6 format requires input in 'RGBA16F' format" ); BC6Enc bc6_enc { dstView.Format() == EPixelFormat::BC6H_RGB16F, quality }; CHECK_ERR( bc6_enc.enc != null ); @@ -166,8 +168,8 @@ ND_ static bool Compressonator_DecodeBC6 (ImageMemView srcView, ImageMemView dstView, const uint threadCount) { CHECK_ERR_MSG( dstView.Format() == EPixelFormat::RGBA16F, - "Output image in "s << ToString(srcView.Format()) << - " format, but BC6 format requires output in RGBA16F format" ); + "Output image in '"s << ToString(srcView.Format()) << + "' format, but BC6 format requires output in 'RGBA16F' format" ); CMP_BYTE* src_ptr = Cast( srcView.Parts().front().ptr ); half* dst_ptr = Cast( dstView.Parts().front().ptr ); @@ -233,8 +235,8 @@ }; CHECK_ERR_MSG( srcView.Format() == EPixelFormat::RGBA8_UNorm, - "Input image in "s << ToString(srcView.Format()) << - " format, but BC7 format requires input in RGBA8_UNorm format" ); + "Input image in '"s << ToString(srcView.Format()) << + "' format, but BC7 format requires input in 'RGBA8_UNorm' format" ); BC7Enc bc7_enc { quality }; CHECK_ERR( bc7_enc.enc != null ); @@ -285,8 +287,8 @@ ND_ static bool Compressonator_DecodeBC7 (ImageMemView srcView, ImageMemView dstView, const uint threadCount) { CHECK_ERR_MSG( dstView.Format() == EPixelFormat::RGBA8_UNorm, - "Output image in "s << ToString(srcView.Format()) << - " format, but BC7 format requires output in RGBA8_UNorm format" ); + "Output image in '"s << ToString(srcView.Format()) << + "' format, but BC7 format requires output in 'RGBA8_UNorm' format" ); CMP_BYTE* src_ptr = Cast( srcView.Parts().front().ptr ); ubyte* dst_ptr = Cast( dstView.Parts().front().ptr ); diff --git a/AE/engine/tools/res_pack/input_actions/CMakeLists.txt b/AE/engine/tools/res_pack/input_actions/CMakeLists.txt index fe8c94b0..eaf3f5d6 100644 --- a/AE/engine/tools/res_pack/input_actions/CMakeLists.txt +++ b/AE/engine/tools/res_pack/input_actions/CMakeLists.txt @@ -31,6 +31,7 @@ target_include_directories( "InputActionsBinding" PUBLIC "." ) EnablePCH( "InputActionsBinding" ) EnablePrebuild( "InputActionsBinding" ) +EnableUnitBuild( "InputActionsBinding" ) add_library( "InputActionsBinding-shared" SHARED "InputActionsBinding.h" diff --git a/AE/engine/tools/res_pack/pipeline_compiler/CMakeLists.txt b/AE/engine/tools/res_pack/pipeline_compiler/CMakeLists.txt index fc2ceb5a..02913057 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/CMakeLists.txt +++ b/AE/engine/tools/res_pack/pipeline_compiler/CMakeLists.txt @@ -18,15 +18,17 @@ if (TARGET "GLSLang-lib") "${GRAPHICS_DIR}/Vulkan/VEnumCast.h" ) if (NOT (DEFINED ENGINE_LIBS_PATH)) set( GRAPHICS_CPP_SRC - "${GRAPHICS_DIR}/Scripting/GraphicsBindings.cpp" - "${GRAPHICS_DIR}/Private/FeatureSet.cpp" - "${GRAPHICS_DIR}/Private/RenderState.cpp" - "${GRAPHICS_DIR}/Private/EnumUtils.cpp" - "${GRAPHICS_DIR}/Private/ImageDesc.cpp" - "${GRAPHICS_DIR}/Private/BufferDesc.cpp" ) + "${GRAPHICS_DIR}/Scripting/GraphicsBindings.cpp.h" + "${GRAPHICS_DIR}/Private/FeatureSet.cpp.h" + "${GRAPHICS_DIR}/Private/RenderState.cpp.h" + "${GRAPHICS_DIR}/Private/EnumUtils.cpp.h" + "${GRAPHICS_DIR}/Private/ImageDesc.cpp.h" + "${GRAPHICS_DIR}/Private/BufferDesc.cpp.h" + "${GRAPHICS_DIR}/Private/Shared.cpp" ) if (${HAS_VULKAN_HEADERS}) - set( GRAPHICS_CPP_SRC ${GRAPHICS_CPP_SRC} "${GRAPHICS_DIR}/Vulkan/VEnumCast.cpp" ) + set( GRAPHICS_CPP_SRC ${GRAPHICS_CPP_SRC} "${GRAPHICS_DIR}/Vulkan/VEnumCast.cpp.h" ) endif() + set_property( SOURCE ${GRAPHICS_CPP_SRC} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) endif() file( GLOB_RECURSE SOURCES "*.*" ) @@ -73,6 +75,7 @@ if (TARGET "GLSLang-lib") EnablePCH( "PipelineCompiler" ) EnablePrebuild( "PipelineCompiler" ) + EnableUnitBuild( "PipelineCompiler" ) add_library( "PipelineCompiler-shared" SHARED "PipelineCompiler.h" diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleGLSLPreprocessor.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleGLSLPreprocessor.cpp index 00c6f0c0..76e414d3 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleGLSLPreprocessor.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleGLSLPreprocessor.cpp @@ -4,15 +4,6 @@ namespace AE::PipelineCompiler { -namespace { - ND_ bool IsPartOfWord (const char c) - { - return (c == '_') or - ((c >= 'a') and (c <= 'z')) or - ((c >= 'A') and (c <= 'Z')) or - ((c >= '0') and (c <= '9')); - } -} /* ================================================= @@ -171,6 +162,8 @@ namespace { _typeMap.emplace( "ulong4", "u64vec4" ); _typeMap.emplace( "ulong_vec_t", "u64vec" ); + _typeMap.emplace( "WGShared", "shared" ); + _typeMap.emplace( "gl::SubpassInput", "subpassInput" ); _typeMap.emplace( "gl::SubpassInputMS", "subpassInputMS" ); @@ -612,6 +605,7 @@ namespace { _typeMap.emplace( "gl::Nonuniform", "nonuniformEXT" ); // https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_memory_scope_semantics.txt + // https://registry.khronos.org/SPIR-V/specs/1.0/SPIR-V-execution-and-memory-model.pdf /*/ layout _typeMap.emplace( "gl::Coherent", "coherent" ); _typeMap.emplace( "gl::Devicecoherent", "devicecoherent" ); @@ -724,6 +718,8 @@ namespace { */ bool AEStyleGLSLPreprocessor::Process (EShader, const PathAndLine &fileLoc, usize headerLines, StringView inStr, OUT String &outStr) { + const auto IsPartOfWord = [](char c) { return Parser::CPP.IsWord( c ); }; + usize hdr_size = 0; Parser::MoveToLine( inStr, INOUT hdr_size, headerLines ); @@ -735,7 +731,7 @@ namespace { outStr = source; #ifdef AE_CFG_DEBUG - const auto FindAndPrint = [source, &fileLoc] (StringView src, StringView dst) + const auto FindAndPrint = [source, &fileLoc, &IsPartOfWord] (StringView src, StringView dst) {{ if ( dst.size() <= 1 ) return; if ( dst == "uint" ) return; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleMSLPreprocessor.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleMSLPreprocessor.cpp index 18a257ae..906345c1 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleMSLPreprocessor.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Compiler/AEStyleMSLPreprocessor.cpp @@ -4,15 +4,6 @@ namespace AE::PipelineCompiler { -namespace { - ND_ bool IsPartOfWord (const char c) - { - return (c == '_') or - ((c >= 'a') and (c <= 'z')) or - ((c >= 'A') and (c <= 'Z')) or - ((c >= '0') and (c <= '9')); - } -} /* ================================================= @@ -31,6 +22,8 @@ namespace { */ bool AEStyleMSLPreprocessor::Process (EShader, const PathAndLine &, usize headerLines, StringView inStr, OUT String &outStr) { + const auto IsPartOfWord = [](char c) { return Parser::CPP.IsWord( c ); }; + usize hdr_size = 0; Parser::MoveToLine( inStr, INOUT hdr_size, headerLines ); diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Compiler/SpirvCompiler.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Compiler/SpirvCompiler.cpp index c3707702..ad257add 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Compiler/SpirvCompiler.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Compiler/SpirvCompiler.cpp @@ -735,10 +735,10 @@ namespace AE::PipelineCompiler SpvOptions spv_options; spv::SpvBuildLogger logger; - spv_options.generateDebugInfo = AllBits( options, EShaderOpt::DebugInfo ); + spv_options.generateDebugInfo = AllBits( options, EShaderOpt::DebugInfo ); spv_options.stripDebugInfo = false; - spv_options.disableOptimizer = not AllBits( options, EShaderOpt::Optimize ); - spv_options.optimizeSize = AllBits( options, EShaderOpt::OptimizeSize ); + spv_options.disableOptimizer = NoBits( options, EShaderOpt::Optimize ); + spv_options.optimizeSize = AllBits( options, EShaderOpt::OptimizeSize ); spv_options.disassemble = false; spv_options.validate = false; spv_options.emitNonSemanticShaderDebugInfo = spv_options.generateDebugInfo; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Packer/HashToName.h b/AE/engine/tools/res_pack/pipeline_compiler/Packer/HashToName.h index 305a9ec5..9e346ec6 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Packer/HashToName.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/Packer/HashToName.h @@ -2,7 +2,7 @@ #pragma once -#include "serializing/ObjectFactory.h" +#include "serializing/Public/ObjectFactory.h" #include "base/Algorithms/StringUtils.h" namespace AE::Base diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelineCompiler.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelineCompiler.cpp index 3891a25b..fc9d2007 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelineCompiler.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelineCompiler.cpp @@ -8,7 +8,7 @@ #include "base/Algorithms/StringUtils.h" #include "base/Algorithms/Parser.h" -#include "serializing/Serializer.h" +#include "serializing/Public/Serializer.h" #include "scripting/Impl/EnumBinder.h" #include "scripting/Impl/ScriptFn.h" @@ -52,7 +52,7 @@ namespace { auto& item = info->inPipelines[i]; - if ( not AnyBits( item.flags, EPathParamsFlags::Folder | EPathParamsFlags::RecursiveFolder )) + if ( NoBits( item.flags, EPathParamsFlags::Folder | EPathParamsFlags::RecursiveFolder )) continue; Path path {item.path}; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.cpp index febdac36..e25fdb51 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.cpp @@ -3,8 +3,8 @@ #include "PipelinePack.h" #include "base/Algorithms/StringUtils.h" #include "base/DataSource/MemStream.h" -#include "serializing/Serializer.h" -#include "serializing/ObjectFactory.h" +#include "serializing/Public/Serializer.h" +#include "serializing/Public/ObjectFactory.h" #include "graphics/Private/EnumUtils.h" #ifdef AE_ENABLE_GLSL_TRACE @@ -1273,7 +1273,7 @@ namespace { bool result = true; result &= ser( templUID ); result &= Serialize_BasePipelineDesc( ser, desc ); - result &= ser( desc.localSize ); + result &= ser( desc.localSize, desc.subgroupSize ); return result; } @@ -1284,7 +1284,7 @@ namespace { */ HashVal SerializableComputePipelineSpec::CalcHash () C_NE___ { - return HashOf( uint(templUID) ) + BasePipelineDesc_Hash( desc ) + HashOf( desc.localSize ); + return HashOf( uint(templUID) ) + BasePipelineDesc_Hash( desc ) + HashOf( desc.localSize ) + HashOf( desc.subgroupSize ); } /* @@ -1296,7 +1296,8 @@ namespace { { return templUID == rhs.templUID and BasePipelineDesc_Compare( desc, rhs.desc ) and - All( desc.localSize == rhs.desc.localSize ); + All( desc.localSize == rhs.desc.localSize ) and + desc.subgroupSize == rhs.desc.subgroupSize; } //----------------------------------------------------------------------------- diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.h b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.h index a981438d..9a27ad2a 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePack.h @@ -138,7 +138,7 @@ namespace AE::PipelineCompiler ND_ bool EImageType_IsCompatible (EImageType lhs, EImageType rhs) __NE___; ND_ EImageType EImageType_FromPixelFormat (EPixelFormat fmt) __NE___; ND_ EImageType EImageType_FromPixelFormatRelaxed (EPixelFormat fmt) __NE___; - ND_ EImageType EImageType_FromImage (EImage type, bool ms) __NE___; + ND_ EImageType EImageType_FromImage (EImage type, bool ms, bool cm = true) __NE___; ND_ String EImageType_ToString (EImageType type) __Th___; ND_ EShaderIO EImageType_ToShaderIO (EImageType type) __NE___; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePackDeserializer.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePackDeserializer.cpp index 5fdd0aa6..5751c374 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePackDeserializer.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Packer/PipelinePackDeserializer.cpp @@ -167,7 +167,7 @@ namespace AE::PipelineCompiler EImageType_FromImage ================================================= */ - EImageType EImageType_FromImage (EImage type, bool ms) __NE___ + EImageType EImageType_FromImage (EImage type, bool ms, bool cm) __NE___ { switch_enum( type ) { @@ -176,8 +176,8 @@ namespace AE::PipelineCompiler case EImage_3D : ASSERT( not ms ); return EImageType::Img3D; case EImage_1DArray : ASSERT( not ms ); return EImageType::Img1DArray; case EImage_2DArray : return ms ? EImageType::Img2DMSArray : EImageType::Img2DArray; - case EImage_Cube : ASSERT( not ms ); return EImageType::ImgCube; - case EImage_CubeArray : ASSERT( not ms ); return EImageType::ImgCubeArray; + case EImage_Cube : ASSERT( not ms ); return cm ? EImageType::ImgCube : EImageType::Img2DArray; + case EImage_CubeArray : ASSERT( not ms ); return cm ? EImageType::ImgCubeArray : EImageType::Img2DArray; case EImage::Unknown : case EImage::_Count : default : ASSERT( not ms ); break; @@ -188,7 +188,7 @@ namespace AE::PipelineCompiler /* ================================================= - EImageType_FromImage + EImageType_ToString ================================================= */ String EImageType_ToString (EImageType type) __Th___ @@ -536,7 +536,7 @@ namespace { bool result = true; result &= des( OUT templUID ); result &= Deserialize_BasePipelineDesc( des, OUT desc ); - result &= des( OUT desc.localSize ); + result &= des( OUT desc.localSize, OUT desc.subgroupSize ); return result; } //----------------------------------------------------------------------------- diff --git a/AE/engine/tools/res_pack/pipeline_compiler/Packer/RenderPassPack.cpp b/AE/engine/tools/res_pack/pipeline_compiler/Packer/RenderPassPack.cpp index da4ea5eb..f18aa3ca 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/Packer/RenderPassPack.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/Packer/RenderPassPack.cpp @@ -281,6 +281,9 @@ namespace { CHECK_ERR( compat._attachments.size() <= GraphicsConfig::MaxAttachments ); + if ( compat._attachments.empty() ) + return true; + const uint count = uint(compat._attachments.size()); auto* dst_attachments = _allocator.Allocate( count ); CHECK_ERR( dst_attachments != null ); @@ -1632,7 +1635,7 @@ namespace { const auto& att = _mtlAtt[i]; - if ( not att.IsDefined() or not AllBits( att.flags, MtlAttachmentFlags::Color )) + if ( not att.IsDefined() or NoBits( att.flags, MtlAttachmentFlags::Color )) { col_count = i; break; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.cpp index 8f49240e..224056cb 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.cpp @@ -102,7 +102,7 @@ namespace */ void BasePipelineTmpl::_Define (const String &value) __Th___ { - CHECK_THROW_MSG( not AnyBits( _states, EStateBits::HasShaders ), + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders ), "can not add global definition when one of the shaders is set" ); _defines << '\n' << value; @@ -117,7 +117,7 @@ namespace */ void BasePipelineTmpl::_Include (const String &value) __Th___ { - CHECK_THROW_MSG( not AnyBits( _states, EStateBits::HasShaders ), + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders ), "can not add global include when one of the shaders is set" ); for (auto& inc : _includes) { @@ -143,7 +143,7 @@ namespace CHECK_THROW_MSG( inShader->type != Default ); CHECK_THROW_MSG( not outShader, String{ToString(inShader->type)} << " is already defined" ); - CHECK_THROW_MSG( not AnyBits( _states, EStateBits::HasSpec ), + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasSpec ), "can not add new shader when one of pipeline specializations is added" ); const EShaderStages stage = EShaderStages::Unknown | inShader->type; @@ -180,16 +180,16 @@ namespace if ( same_stage ) { CHECK_THROW_MSG( (dbg_mode != Default) == dbg_ds.IsDefined(), - "Shader must have EShaderOpt with Trace/FnProfiling/TimeHeatMap or DebugDSLayout is not defined" ); + "Shader must have EShaderOpt with Trace/FnProfiling/TimeHeatMap or DebugDSLayout must not be defined" ); CHECK_THROW_MSG( dbg_mode == dbg_ds.mode, - "shader debug mode is not compatible with DebugDSLayout in pipeline layout" ); + "Shader debug mode is not compatible with DebugDSLayout in pipeline layout" ); } } if ( fragOut.has_value() ) resources << _FragOutputToGLSL( *fragOut ); - ObjectStorage::Instance()->CompileShaderGLSL( INOUT outShader, inShader, version, _defines, resources, _includes, _features, dbg_ds_idx, use_arg_buf ); + ObjectStorage::Instance()->CompileShaderGLSL( INOUT outShader, inShader, version, _defines, RVRef(resources), _includes, _features, dbg_ds_idx, use_arg_buf ); } else if ( AllBits( version, EShaderVersion::_Metal_iOS, EShaderVersion::_Mask ) or @@ -253,7 +253,7 @@ namespace resources << "\n" << BuildMSLEntry( version, inShader, RVRef(entry_res), RVRef(entry_args), _features ); - ObjectStorage::Instance()->CompileShaderMSL( INOUT outShader, inShader, version, _defines, resources, _includes, _features ); + ObjectStorage::Instance()->CompileShaderMSL( INOUT outShader, inShader, version, _defines, RVRef(resources), _includes, _features ); } else { @@ -290,7 +290,7 @@ namespace void BasePipelineTmpl::_SetLayout (PipelineLayoutPtr ptr) __Th___ { - CHECK_THROW_MSG( not AnyBits( _states, EStateBits::HasShaders ), + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders ), "can not set pipeline layout when one of the shaders is set" ); CHECK_THROW_MSG( not _layoutUID.has_value() ); @@ -465,10 +465,10 @@ namespace */ void BasePipelineTmpl::_AddFeatureSet (const String &name) __Th___ { - CHECK_THROW_MSG( not AnyBits( _states, EStateBits::HasShaders ), + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders ), "can not add feature set when shaders is already set" ); - CHECK_THROW_MSG( not AnyBits( _states, EStateBits::HasSpec ), + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasSpec ), "can not add feature set when one of pipeline specializations is added" ); _states |= EStateBits::HasFeatures; @@ -565,7 +565,7 @@ namespace if ( not usage.output.IsDefined() ) continue; - CHECK_ERR_MSG( AnyEqual( usage.type, EAttachment::Color ), + CHECK_ERR_MSG( AnyEqual( usage.type, EAttachment::Color, EAttachment::ReadWrite ), "Attachment '"s << storage.GetName( name ) << "' is not a color attachment" ); fragOut.push_back( usage.output ); @@ -609,7 +609,10 @@ namespace } switch_end - str << " " << storage.GetName( fo.name ) << ";\n"; + String name = storage.GetName( fo.name ); + CHECK_THROW_MSG( not name.empty(), "failed to get name for fragment output" ); + + str << " " << name << ";\n"; } str << "\n"; return str; @@ -914,6 +917,7 @@ namespace //case EPipelineDynamicState::DepthBounds : case EPipelineDynamicState::RTStackSize : case EPipelineDynamicState::FragmentShadingRate : + case EPipelineDynamicState::ViewportWScaling : break; // skip case EPipelineDynamicState::Unknown : @@ -1036,13 +1040,13 @@ namespace ================================================= */ void BasePipelineSpec::_CheckDepthStencil (const Graphics::RenderState &rs, const SubpassShaderIO &fragIO, - const CompatRenderPassName::Optimized_t &rpName, const SubpassName::Optimized_t &subpass) + const CompatRenderPassName::Optimized_t &rpName, const SubpassName::Optimized_t &subpass) C_Th___ { auto& storage = *ObjectStorage::Instance(); const bool req_depth = rs.depth.test or rs.depth.write; const bool req_stencil = rs.stencil.enabled; - storage.TestRenderPass( rpName, subpass, fragIO, req_depth, req_stencil ); // throw + storage.TestRenderPass( rpName, subpass, fragIO, req_depth, req_stencil, NameStr() ); // throw } /* diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.h index 5755b84d..21a4ebb3 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/BasePipeline.h @@ -158,9 +158,9 @@ namespace AE::PipelineCompiler static void _SetLocalGroupSize (String prefix, const uint3 &spec, const uint3 &maxSize, uint totalSize, const uint3 &inSize, OUT packed_ushort3 &outSize) __Th___; - static void _CheckDepthStencil (const Graphics::RenderState &, const SubpassShaderIO &, - const CompatRenderPassName::Optimized_t &rpName, - const SubpassName::Optimized_t &subpass) __Th___; + void _CheckDepthStencil (const Graphics::RenderState &, const SubpassShaderIO &, + const CompatRenderPassName::Optimized_t &rpName, + const SubpassName::Optimized_t &subpass) C_Th___; }; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/Common.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/Common.h index d3b983bc..7afc0fd7 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/Common.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/Common.h @@ -158,7 +158,7 @@ namespace AE::PipelineCompiler Unknown = 0, Coherent, Volatile, - Restrict, + Restrict, // better for performance _MemoryModel, DeviceCoherent, QueueFamilyCoherent, diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.cpp index 0ecd1c2f..f9d77ac3 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.cpp @@ -197,7 +197,7 @@ namespace auto ds = EPipelineDynamicState(states); CHECK_THROW_MSG( (ds & ~EPipelineDynamicState::ComputePipelineMask) == Default, "unsupported dynamic state for compute pipeline" ); - //desc.dynamicState = ds; + desc.dynamicState = ds; } /* @@ -210,16 +210,64 @@ namespace CHECK_THROW_MSG( GetBase() != null and GetBase()->shader, "shader is not compiled" ); const auto& spec = GetBase()->shader->reflection.compute.localGroupSpec; - uint total_size = Max( 1u, GetMaxValueFromFeatures( GetBase()->GetFeatures(), &FeatureSet::maxComputeWorkGroupInvocations )); - uint3 max_threads = uint3{ GetMaxValueFromFeatures( GetBase()->GetFeatures(), &FeatureSet::maxComputeWorkGroupSizeX ), - GetMaxValueFromFeatures( GetBase()->GetFeatures(), &FeatureSet::maxComputeWorkGroupSizeY ), - GetMaxValueFromFeatures( GetBase()->GetFeatures(), &FeatureSet::maxComputeWorkGroupSizeZ )}; - max_threads = Max( max_threads, uint3{1} ); + const uint inv_count = x * y * z; + uint total_size = 0; + uint3 max_threads; + for (auto& feat : GetFeatures()) + { + if ( inv_count <= feat->fs.maxComputeWorkGroupInvocations and + x <= feat->fs.maxComputeWorkGroupSizeX and + y <= feat->fs.maxComputeWorkGroupSizeY and + z <= feat->fs.maxComputeWorkGroupSizeZ ) + { + total_size = feat->fs.maxComputeWorkGroupInvocations; + max_threads = uint3{feat->fs.maxComputeWorkGroupSizeX, feat->fs.maxComputeWorkGroupSizeY, feat->fs.maxComputeWorkGroupSizeZ}; + break; + } + } _SetLocalGroupSize( "compute localSize ", spec, max_threads, total_size, uint3{x,y,z}, OUT desc.localSize ); } +/* +================================================= + SetSubgroupSize +================================================= +*/ + void ComputePipelineSpecScriptBinding::SetSubgroupSize (uint value) __Th___ + { + CHECK_THROW_MSG( GetBase() != null and GetBase()->shader, "shader is not compiled" ); + CHECK_THROW_MSG( All( desc.localSize != Zero ), "Specify subgroup size after workgroup size (local size)" ); + + TEST_FEATURE( GetFeatures(), subgroupSizeControl ); + + const auto& def_size = GetBase()->shader->reflection.compute.localGroupSize; + const auto& spec = GetBase()->shader->reflection.compute.localGroupSpec; + const uint3 local_dim { spec.x == UMax or desc.localSize.x == UMax ? def_size.x : desc.localSize.x, + spec.y == UMax or desc.localSize.y == UMax ? def_size.y : desc.localSize.y, + spec.z == UMax or desc.localSize.z == UMax ? def_size.z : desc.localSize.z }; + bool supported = false; + + for (auto& feat : GetFeatures()) + { + if ( value >= feat->fs.minSubgroupSize and + value <= feat->fs.maxSubgroupSize and + AllBits( feat->fs.requiredSubgroupSizeStages, EShaderStages::Compute )) + { + supported = true; + break; + } + } + CHECK_THROW_MSG( supported, + "Subgroup size ("s << ToString(value) << ") must be in range [minSubgroupSize, maxSubgroupSize] in at least one feature set" ); + + CHECK_THROW_MSG( IsMultipleOf( local_dim.x, value ), + "Local size X ("s << ToString( local_dim.x ) << ") must be multiple of subgroup size (" << ToString( value ) << ")" ); + + desc.subgroupSize = ushort(value); + } + /* ================================================= Build @@ -260,6 +308,10 @@ namespace binder.AddMethod( &ComputePipelineSpecScriptBinding::SetSpecValueI, "SetSpecValue", {"name", "value"} ); binder.AddMethod( &ComputePipelineSpecScriptBinding::SetSpecValueF, "SetSpecValue", {"name", "value"} ); + binder.Comment( "Set subgroup size.\n" + "Requires 'subgroupSizeControl' feature, value must be in range [minSubgroupSize, maxSubgroupSize]." ); + binder.AddMethod( &ComputePipelineSpecScriptBinding::SetSubgroupSize, "SubgroupSize", {} ); + binder.Comment( "Set dynamic states (EPipelineDynamicState).\n" "None of the states are supported for compute pipeline." ); binder.AddMethod( &ComputePipelineSpecScriptBinding::SetDynamicState, "SetDynamicState", {"states"} ); diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.h index 01f2fc06..5666b9c0 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ComputePipeline.h @@ -32,6 +32,8 @@ namespace AE::PipelineCompiler void SetLocalGroupSize2 (uint x, uint y) __Th___ { SetLocalGroupSize3( x, y, UMax ); } void SetLocalGroupSize3 (uint x, uint y, uint z) __Th___; + void SetSubgroupSize (uint value) __Th___; + void SetDynamicState (/*EPipelineDynamicState*/uint states) __Th___; void AddToRenderTech (const String &rtech, const String &pass) __Th___ { return BasePipelineSpec::_AddToRenderTech( rtech, pass ); } diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.cpp index 3c7dbf6c..e7b11b9b 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.cpp @@ -21,25 +21,36 @@ namespace AccessToStr ================================================= */ - ND_ static StringView AccessToStr (EAccessType type) __Th___ + ND_ static String AccessToStr (EAccessType type, EResourceState state) __Th___ { - // requires GL_KHR_memory_scope_semantics + String str; switch_enum( type ) { - case EAccessType::DeviceCoherent : return "devicecoherent"; - case EAccessType::QueueFamilyCoherent : return "queuefamilycoherent"; - case EAccessType::WorkgroupCoherent : return "workgroupcoherent"; - case EAccessType::SubgroupCoherent : return "subgroupcoherent"; - case EAccessType::NonPrivate : return "nonprivate"; - case EAccessType::Volatile : return "volatile"; - case EAccessType::Restrict : return "restrict"; - case EAccessType::Coherent : return "coherent"; + // requires GL_KHR_memory_scope_semantics + case EAccessType::DeviceCoherent : str << "devicecoherent"; break; + case EAccessType::QueueFamilyCoherent : str << "queuefamilycoherent"; break; + case EAccessType::WorkgroupCoherent : str << "workgroupcoherent"; break; + case EAccessType::SubgroupCoherent : str << "subgroupcoherent"; break; + case EAccessType::NonPrivate : str << "nonprivate"; break; + + case EAccessType::Volatile : str << "volatile"; break; + case EAccessType::Restrict : str << "restrict"; break; + case EAccessType::Coherent : str << "coherent"; break; + case EAccessType::Unknown : case EAccessType::_MemoryModel : - case EAccessType::_Count : break; + case EAccessType::_Count : + default : + CHECK_THROW_MSG( false, "unknown access type" ); break; } switch_end - CHECK_THROW_MSG( false, "unknown access type" ); + + switch ( ToEResState( state )) + { + case _EResState::ShaderStorage_Read : str << " readonly"; break; + case _EResState::ShaderStorage_Write : str << " writeonly"; break; + } + return str; } /* @@ -315,7 +326,7 @@ namespace CHECK_THROW_MSG( not _dsLayout.uniforms.empty() ); CHECK_THROW_MSG( IsCompatibleWithVulkan() ); - if ( not AnyBits( _dsLayout.stages, stages )) + if ( NoBits( _dsLayout.stages, stages )) return; auto& storage = *ObjectStorage::Instance(); @@ -331,7 +342,7 @@ namespace CHECK_THROW_MSG( not name_str.empty() ); CHECK_THROW_MSG( un.binding.IsVkDefined() ); - if ( not AnyBits( stages, un.stages )) + if ( NoBits( stages, un.stages )) continue; const String idx_str = ToString( un.binding.vkIndex ); @@ -381,7 +392,7 @@ namespace << ", array stride: " << ToString( un.buffer.arrayStride ); if ( un.buffer.HasDynamicOffset() ) str << ", dynamic offset"; - str << "\n layout(set=" << ds_idx << ", binding=" << idx_str << ", std430) " << AccessToStr( aux_info->access ) + str << "\n layout(set=" << ds_idx << ", binding=" << idx_str << ", std430) " << AccessToStr( aux_info->access, un.buffer.state ) << " buffer AE_Type_" << aux_info->type->Typename() << " {\n" << fields << " } " << name_str << ArraySizeToStr( un.arraySize ) << ";\n"; break; @@ -398,7 +409,7 @@ namespace CHECK_THROW_MSG( aux_info != null ); str << " // state: " << ToString( un.texelBuffer.state ) << "\n layout(set=" << ds_idx << ", binding=" << idx_str << ") " - << AccessToStr( aux_info->access ) << " uniform " << ImageToStr( un.texelBuffer.type, "image" ) + << AccessToStr( aux_info->access, un.texelBuffer.state ) << " uniform " << ImageToStr( un.texelBuffer.type, "image" ) << ' ' << name_str << ArraySizeToStr( un.arraySize ) << ";\n"; break; } @@ -409,7 +420,7 @@ namespace << "\n layout(set=" << ds_idx << ", binding=" << idx_str; if ( un.image.format != Default ) str << ", " << FormatToStr( un.image.format ); - str << ") " << AccessToStr( aux_info->access ) + str << ") " << AccessToStr( aux_info->access, un.image.state ) << " uniform " << ImageToStr( un.image.type, "image" ) << ' ' << name_str << ArraySizeToStr( un.arraySize ) << ";\n"; break; @@ -485,7 +496,7 @@ namespace CHECK_THROW_MSG( IsSingleBitSet( stages )); CHECK_THROW_MSG( IsCompatibleWithMetal() ); - if ( not AnyBits( _dsLayout.stages, stages )) + if ( NoBits( _dsLayout.stages, stages )) return; const auto ValTypeToStr = [] (EImageType type) -> StringView @@ -556,7 +567,7 @@ namespace // argument buffer must have same layout if ( not is_argbuf ) { - if ( not AnyBits( stages, un.stages )) + if ( NoBits( stages, un.stages )) continue; index_ptr = un.binding.mtlPerStageIndex.PtrForShader( stages ); @@ -784,7 +795,7 @@ namespace CHECK_ERR( un.arraySize > 0 ); // TODO // argument buffer must have same layout - if ( not is_argbuf and not AnyBits( stages, un.stages )) + if ( not is_argbuf and NoBits( stages, un.stages )) continue; switch_enum( un.type ) @@ -1471,10 +1482,13 @@ namespace _CheckAccessType ================================================= */ - void DescriptorSetLayout::_CheckAccessType (EAccessType access) C_Th___ + void DescriptorSetLayout::_CheckAccessType (INOUT EAccessType &access) C_Th___ { CHECK_THROW_MSG( access < EAccessType::_Count ); - CHECK_THROW_MSG( access != EAccessType::Unknown and access != EAccessType::_MemoryModel ); + CHECK_THROW_MSG( access != EAccessType::_MemoryModel ); + + if ( access == EAccessType::Unknown ) + access = EAccessType::Coherent; if ( access > EAccessType::_MemoryModel ) { @@ -1482,6 +1496,21 @@ namespace } } +/* +================================================= + _CheckStateForStorage +================================================= +*/ + void DescriptorSetLayout::_CheckStateForStorage (EResourceState state) C_Th___ + { + switch ( ToEResState( state )) { + case _EResState::ShaderStorage_Read : + case _EResState::ShaderStorage_Write : + case _EResState::ShaderStorage_RW : break; + default : CHECK_THROW_MSG( false, "state must be ShaderStorage_***" ); + } + } + /* ================================================= _CheckStorageFormat @@ -2119,18 +2148,13 @@ namespace */ void DescriptorSetLayout::AddStorageBuffer (EShaderStages stages, const String &name, const ArraySize &arraySize, const String &typeName, EAccessType access, EResourceState state, Bool dynamic) __Th___ { - switch ( ToEResState( state )) { - case _EResState::ShaderStorage_Read : - case _EResState::ShaderStorage_Write : - case _EResState::ShaderStorage_RW : break; - default : CHECK_THROW_MSG( false, "state must be ShaderStorage_***" ); - } CHECK_THROW_MSG( stages != Default ); state |= EResourceState_FromShaders( stages ); + _CheckStateForStorage( state ); _CheckUniformName( name ); _CheckArraySize( arraySize.value ); - _CheckAccessType( access ); + _CheckAccessType( INOUT access ); const auto& st_map = ObjectStorage::Instance()->structTypes; auto st_it = st_map.find( typeName ); @@ -2191,20 +2215,15 @@ namespace */ void DescriptorSetLayout::AddStorageTexelBuffer (EShaderStages stages, const String &name, const ArraySize &arraySize, EImageType type, EPixelFormat format, EAccessType access, EResourceState state) __Th___ { - switch ( ToEResState( state )) { - case _EResState::ShaderStorage_Read : - case _EResState::ShaderStorage_Write : - case _EResState::ShaderStorage_RW : break; - default : CHECK_THROW_MSG( false, "state must be ShaderStorage_***" ); - } CHECK_THROW_MSG( stages != Default ); state |= EResourceState_FromShaders( stages ); CHECK_THROW_MSG( (type & EImageType::_TexMask) == EImageType::Buffer ); CHECK_THROW_MSG( (type & EImageType::_ValMask) != Default ); + _CheckStateForStorage( state ); _CheckUniformName( name ); _CheckArraySize( arraySize.value ); - _CheckAccessType( access ); + _CheckAccessType( INOUT access ); _CheckStorageFormat( format, ToEResState(state) == _EResState::ShaderStorage_Read ); Uniform un; @@ -2230,19 +2249,14 @@ namespace */ void DescriptorSetLayout::AddStorageImage (EShaderStages stages, const String &name, const ArraySize &arraySize, EImageType type, EPixelFormat format, EAccessType access, EResourceState state) __Th___ { - switch ( ToEResState( state )) { - case _EResState::ShaderStorage_Read : - case _EResState::ShaderStorage_Write : - case _EResState::ShaderStorage_RW : break; - default : CHECK_THROW_MSG( false, "state must be ShaderStorage_***" ); - } CHECK_THROW_MSG( stages != Default ); state |= EResourceState_FromShaders( stages ); CHECK_THROW_MSG( (type & EImageType::_TexMask) != Default ); + _CheckStateForStorage( state ); _CheckUniformName( name ); _CheckArraySize( arraySize.value ); - _CheckAccessType( access ); + _CheckAccessType( INOUT access ); _CheckStorageFormat( format, ToEResState(state) == _EResState::ShaderStorage_Read ); const EImageType val_flags = EImageType_FromPixelFormat( format ); diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.h index da5c091a..6c95661e 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/DescriptorSetLayout.h @@ -134,9 +134,10 @@ namespace AE::PipelineCompiler void _CheckUniformName (const String &name) __Th___; void _CheckArraySize (uint size) C_Th___; void _CheckSamplerName (const String &name) __Th___; - void _CheckAccessType (EAccessType access) C_Th___; + void _CheckAccessType (INOUT EAccessType &access) C_Th___; void _CheckStorageFormat (EPixelFormat fmt, bool isReadOnly) C_Th___; void _CheckFields (const String &fields) C_Th___; + void _CheckStateForStorage (EResourceState state) C_Th___; }; using DescriptorSetLayoutPtr = ScriptRC< DescriptorSetLayout >; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/GraphicsPipeline.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/GraphicsPipeline.cpp index 3ae14709..6e24f307 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/GraphicsPipeline.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/GraphicsPipeline.cpp @@ -173,7 +173,7 @@ namespace SubpassShaderIO frag_io; GetSubpassShaderIO( OUT frag_io ); - ObjectStorage::Instance()->TestRenderPass( compatRP, subpass, frag_io, false, false ); // throw + ObjectStorage::Instance()->TestRenderPass( compatRP, subpass, frag_io, false, false, GetName() ); // throw } /* @@ -356,7 +356,7 @@ namespace void GraphicsPipelineScriptBinding::SetFragmentOutputFromRenderTech (const String &renTechName, const String &passName) __Th___ { CHECK_THROW_MSG( not _fragOutput.has_value() ); - CHECK_THROW_MSG( not AllBits( _states, EStateBits::HasShaders )); + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders )); _fragOutput = FragOutput_t{}; CHECK_THROW_MSG( BasePipelineTmpl::_FragmentOutputFromRenderTech( renTechName, passName, OUT *_fragOutput )); @@ -370,7 +370,7 @@ namespace void GraphicsPipelineScriptBinding::SetFragmentOutputFromRenderPass (const String &compatRPassName, const String &subpassName) __Th___ { CHECK_THROW_MSG( not _fragOutput.has_value() ); - CHECK_THROW_MSG( not AllBits( _states, EStateBits::HasShaders )); + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders )); _fragOutput = FragOutput_t{}; CHECK_THROW_MSG( BasePipelineTmpl::_FragmentOutputFromCompatRenderPass( compatRPassName, subpassName, OUT *_fragOutput )); @@ -532,7 +532,7 @@ namespace SubpassShaderIO frag_io; GetBase()->GetSubpassShaderIO( OUT frag_io ); - storage.TestRenderPass( iter->second, subpass, frag_io, false, false ); // throw + storage.TestRenderPass( iter->second, subpass, frag_io, false, false, NameStr() ); // throw desc.renderPass = iter->second; desc.subpass = subpass; @@ -626,6 +626,9 @@ namespace CHECK_THROW_MSG( value > 0 ); CHECK_THROW_MSG( value <= GraphicsConfig::MaxViewports ); + if ( value > 1 ) + TEST_FEATURE( GetBase()->GetFeatures(), multiViewport, ", it required when viewport count ("s << ToString(value) << ") is greater than 1" ); + TestFeature_Min( GetBase()->GetFeatures(), &FeatureSet::maxViewports, value, "maxViewports", "viewportCount" ); desc.viewportCount = CheckCast(value); diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/MeshPipeline.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/MeshPipeline.cpp index 181166d9..147f39e0 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/MeshPipeline.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/MeshPipeline.cpp @@ -141,7 +141,7 @@ namespace SubpassShaderIO frag_io; GetSubpassShaderIO( OUT frag_io ); - ObjectStorage::Instance()->TestRenderPass( compatRP, subpass, frag_io, false, false ); // throw + ObjectStorage::Instance()->TestRenderPass( compatRP, subpass, frag_io, false, false, GetName() ); // throw } /* @@ -256,7 +256,7 @@ namespace void MeshPipelineScriptBinding::SetFragmentOutputFromRenderTech (const String &renTechName, const String &passName) __Th___ { CHECK_THROW_MSG( not _fragOutput.has_value() ); - CHECK_THROW_MSG( not AllBits( _states, EStateBits::HasShaders )); + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders )); _fragOutput = FragOutput_t{}; CHECK_THROW_MSG( BasePipelineTmpl::_FragmentOutputFromRenderTech( renTechName, passName, OUT *_fragOutput )); @@ -270,7 +270,7 @@ namespace void MeshPipelineScriptBinding::SetFragmentOutputFromRenderPass (const String &compatRPassName, const String &subpassName) __Th___ { CHECK_THROW_MSG( not _fragOutput.has_value() ); - CHECK_THROW_MSG( not AllBits( _states, EStateBits::HasShaders )); + CHECK_THROW_MSG( NoBits( _states, EStateBits::HasShaders )); _fragOutput = FragOutput_t{}; CHECK_THROW_MSG( BasePipelineTmpl::_FragmentOutputFromCompatRenderPass( compatRPassName, subpassName, OUT *_fragOutput )); @@ -398,7 +398,7 @@ namespace SubpassShaderIO frag_io; GetBase()->GetSubpassShaderIO( OUT frag_io ); - storage.TestRenderPass( iter->second, subpass, frag_io, false, false ); // throw + storage.TestRenderPass( iter->second, subpass, frag_io, false, false, NameStr() ); // throw desc.renderPass = iter->second; desc.subpass = subpass; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.cpp index cc7d0ac6..66baddd8 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.cpp @@ -111,12 +111,12 @@ namespace AE::PipelineCompiler ================================================= */ void ObjectStorage::TestRenderPass (const String &compatRP, const String &subpass, const SubpassShaderIO &fragIO, - bool requireDepth, bool requireStencil) __Th___ + bool requireDepth, bool requireStencil, StringView pplnName) __Th___ { AddName( compatRP ); AddName( subpass ); - return TestRenderPass( CompatRenderPassName{compatRP}, SubpassName{subpass}, fragIO, requireDepth, requireStencil ); // throw + return TestRenderPass( CompatRenderPassName{compatRP}, SubpassName{subpass}, fragIO, requireDepth, requireStencil, pplnName ); // throw } /* @@ -127,7 +127,7 @@ namespace AE::PipelineCompiler ================================================= */ void ObjectStorage::TestRenderPass (const CompatRenderPassName::Optimized_t &compatRP, const SubpassName::Optimized_t &subpass, - const SubpassShaderIO &fragIO, bool requireDepth, bool requireStencil) __Th___ + const SubpassShaderIO &fragIO, bool requireDepth, bool requireStencil, StringView pplnName) __Th___ { CHECK( HasHashName( compatRP ) and HasHashName( subpass )); @@ -142,7 +142,7 @@ namespace AE::PipelineCompiler // Metal: no way to extract fragment output from shader, use 'SetFragmentOutputFromRenderPass()' in script to avoid it. CHECK_THROW_MSG( sp_it->second.colorAttachments.size() == fragIO.colorAttachments.size(), "Color attachments in render pass '"s << GetName( compatRP ) << "' subpass '" << - GetName( subpass ) << "' doesn't match with color outputs in shader (" << + GetName( subpass ) << "' doesn't match with color outputs in FS in pipeline '" << pplnName << "' (" << ToString(sp_it->second.colorAttachments.size()) << " != " << ToString(fragIO.colorAttachments.size()) << ") " ); @@ -169,7 +169,7 @@ namespace AE::PipelineCompiler CHECK_THROW_MSG( sp_it->second.inputAttachments.size() == fragIO.inputAttachments.size(), "Input attachments in render pass '"s << GetName( compatRP ) << "' subpass '" << - GetName( subpass ) << "' doesn't match with input attachments in shader (" << + GetName( subpass ) << "' doesn't match with input attachments in FS in pipeline '" << pplnName << "' (" << ToString(sp_it->second.inputAttachments.size()) << " != " << ToString(fragIO.inputAttachments.size()) << ") " ); @@ -576,7 +576,7 @@ namespace AE::PipelineCompiler for (auto& [name, st] : this->structTypes) { - if ( not AnyBits( st->Usage(), EUsage::BufferLayout | EUsage::VertexLayout )) + if ( NoBits( st->Usage(), EUsage::BufferLayout | EUsage::VertexLayout )) continue; CHECK_ERR( st->ToCPP( INOUT types, INOUT unique )); diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.h index e687414c..86d94430 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage.h @@ -168,6 +168,7 @@ namespace AE::PipelineCompiler EStructLayout defaultLayout = EStructLayout::Compatible_Std140; EPipelineOpt defaultPipelineOpt = Default; String defaultFeatureSet; + String defaultShaderDefines; // methods @@ -179,9 +180,9 @@ namespace AE::PipelineCompiler void Clear (); void TestRenderPass (const String &compatRP, const String &subpass, const SubpassShaderIO &fragIO, - bool requireDepth, bool requireStencil) __Th___; + bool requireDepth, bool requireStencil, StringView pplnName) __Th___; void TestRenderPass (const CompatRenderPassName::Optimized_t &compatRP, const SubpassName::Optimized_t &subpass, - const SubpassShaderIO &fragIO, bool requireDepth, bool requireStencil) __Th___; + const SubpassShaderIO &fragIO, bool requireDepth, bool requireStencil, StringView pplnName) __Th___; ND_ CompatibleRenderPassDescPtr RenderPassExists (const String &rpName, const String &subpass) __Th___; ND_ CompatibleRenderPassDescPtr RenderPassExists (RenderPassName::Ref rpName, SubpassName::Ref subpass) C_Th___; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage_GLSL.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage_GLSL.cpp index 25d44b02..049a94cd 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage_GLSL.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ObjectStorage_GLSL.cpp @@ -161,7 +161,7 @@ namespace AE::PipelineCompiler "#version 460 core\n" "#extension GL_ARB_separate_shader_objects : require\n" "#extension GL_ARB_shading_language_420pack : require\n" - "#extension GL_GOOGLE_include_directive : require\n" + "#extension GL_GOOGLE_include_directive : require\n" // or GL_ARB_shading_language_include "#extension GL_GOOGLE_cpp_style_line_directive : require\n" "#extension GL_EXT_control_flow_attributes : require\n" "#extension GL_EXT_control_flow_attributes2 : require\n" @@ -842,7 +842,7 @@ namespace AE::PipelineCompiler key.version = version; key.options = inShader->options; key.include = Array{ include }; - _SetAndSortDefines( OUT key.defines, String{defines} << inShader->GetDefines() ); + _SetAndSortDefines( OUT key.defines, String{defines} << inShader->GetDefines() << this->defaultShaderDefines ); // find in existing shader source { diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.cpp index 4a4c51bd..0bd6350b 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.cpp @@ -173,6 +173,22 @@ namespace AE::PipelineCompiler storage.defaultFeatureSet = value; } +/* +================================================= + SetShaderDefines +================================================= +*/ + void ScriptConfig::SetShaderDefines (const String &value) __Th___ + { + CHECK_THROW_MSG( not value.empty() ); + + auto& storage = *ObjectStorage::Instance(); + + CHECK( storage.defaultShaderDefines.empty() ); + storage.defaultShaderDefines.clear(); + storage.defaultShaderDefines << '\n' << value; + } + /* ================================================= Bind @@ -207,6 +223,10 @@ namespace AE::PipelineCompiler binder.Comment( "Set FeatureSet which will be added to all resources." ); binder.AddMethod( &ScriptConfig::SetDefaultFeatureSet, "SetDefaultFeatureSet", {"fsName"} ); + + binder.Comment( "Set defines which will be used in all shaders.\n" + "Format: DEF=1\nDEF2" ); + binder.AddMethod( &ScriptConfig::SetShaderDefines, "SetShaderDefines", {} ); } diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.h index 3ebdbc52..9558ab5f 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptConfig.h @@ -27,6 +27,7 @@ namespace AE::PipelineCompiler void SetPipelineOptions (EPipelineOpt value) __Th___; void SetPreprocessor (EShaderPreprocessor value) __Th___; void SetDefaultFeatureSet (const String &value) __Th___; + void SetShaderDefines (const String &value) __Th___; static void Bind (const ScriptEnginePtr &se) __Th___; }; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptFeatureSet.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptFeatureSet.h index 44a96940..49e97a32 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptFeatureSet.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptFeatureSet.h @@ -6,8 +6,6 @@ * feature sets are incompatible if feature marked as 'True' and 'False'. * min limit is allowed if one of feature set in array is >= than this limit. * max limit is allowed if one of feature set in array is <= than this limit. - - TODO: review all FS checks */ #pragma once diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.cpp index 381ff327..e40ef803 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.cpp @@ -90,14 +90,12 @@ namespace */ static void ShaderIO_Ctor (void* mem, const String &name, Optional type, uint index) { - ObjectStorage::Instance()->AddName( name ); - if ( type.has_value() ) { CHECK_THROW_MSG( *type != Default ); CHECK_THROW_MSG( *type < EShaderIO::_Count ); } - PlacementNew( OUT mem, ShaderIOName{name}, type.value_or(EShaderIO::Unknown), index ); + PlacementNew( OUT mem, name, type.value_or(EShaderIO::Unknown), index ); } static void ShaderIO_Ctor1 (void* mem, const String &name) @@ -132,6 +130,20 @@ namespace //----------------------------------------------------------------------------- +/* +================================================= + ShaderIO ctor +================================================= +*/ + RPAttachment::ShaderIO::ShaderIO (const String &inName, EShaderIO type, uint idx) __Th___ : + name{inName}, type{type}, index{idx} + { + auto& storage = *ObjectStorage::Instance(); + storage.AddName( inName ); + } +//----------------------------------------------------------------------------- + + /* ================================================= AddUsage @@ -221,7 +233,7 @@ namespace } else { - CHECK_THROW_MSG( inVar.has_value() or outVar.has_value(), + CHECK_MSG( inVar.has_value() == outVar.has_value(), "for 'ReadWrite' usage both input and output shader IO must be defined" ); // set default @@ -266,6 +278,12 @@ namespace } switch_end + if ( iter->second.input.IsDefined() ) + CHECK( not storage.GetName( iter->second.input.name ).empty() ); + + if ( iter->second.output.IsDefined() ) + CHECK( not storage.GetName( iter->second.output.name ).empty() ); + CHECK_THROW_MSG( format != Default, "pixel format must be defined" ); if ( format < EPixelFormat::_Count ) { CHECK_THROW_MSG( not EPixelFormat_IsCompressed( format ), "unsupported pixel format '"s << Base::ToString(format) << "'" ); @@ -604,7 +622,7 @@ namespace { EResourceState new_state; auto usage = rt->usageMap.find( sp.name ); - const bool has_content = rt_states.empty() ? false : not AllBits( rt_states.back(), EResourceState::Invalidate ); + const bool has_content = rt_states.empty() ? false : NoBits( rt_states.back(), EResourceState::Invalidate ); // if usage for subpass is not defined then content of the attachment may be invalidated if ( rt_states.empty() ) @@ -706,7 +724,7 @@ namespace { if ( EResourceState_IsReadOnly( finalState ) and not rt_states.empty() ) { - CHECK_THROW_MSG( not AllBits( rt_states.back(), EResourceState::Invalidate ), + CHECK_THROW_MSG( NoBits( rt_states.back(), EResourceState::Invalidate ), "Attachment '"s << storage.GetName( _name ) << "' final state (" << Base::ToString( rt_states.back() ) << ") has read-only access, but current content of attachment is invalidated" ); } diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.h b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.h index ad01c9d6..218dd6bc 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.h +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ScriptRenderPass.h @@ -32,7 +32,7 @@ namespace AE::PipelineCompiler uint index = UMax; // color/input attachment index ShaderIO () {} - ShaderIO (ShaderIOName::Ref name, EShaderIO type, uint idx) : name{name}, type{type}, index{idx} {} + ShaderIO (const String &name, EShaderIO type, uint idx) __Th___; ND_ bool IsDefined () const { return name.IsDefined(); } }; diff --git a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ShaderStructType.cpp b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ShaderStructType.cpp index 5eefa923..abfe0b5f 100644 --- a/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ShaderStructType.cpp +++ b/AE/engine/tools/res_pack/pipeline_compiler/ScriptObjects/ShaderStructType.cpp @@ -2247,8 +2247,21 @@ namespace { src << Typename() << "\n" << "\t{\n" - << "\t\tstatic constexpr auto TypeName = ShaderStructName{HashVal32{0x" - << ToString<16>( uint{ShaderStructName{Typename()}} ) << "u}}; // '" << Typename() << "'\n\n"; + << "\t\tstatic constexpr auto TypeName = ShaderStructName{HashVal32{0x" + << ToString<16>( uint{ShaderStructName{Typename()}} ) << "u}};\n"; + + if ( HasDynamicArray() ) + { + auto& field = _fields.back(); + src << "\t\tstatic constexpr size_t SizeOf (size_t count) { return "; + + if ( _size > 0 ) + src << ToString(usize( AlignUp( _size, field.align ))) << " + "; + + src << "(" << ToString(usize( field.size )) + << " * count); }\n"; + } + src << "\n"; for (auto& field : _fields) { @@ -2290,6 +2303,7 @@ namespace { if ( not field.IsDynamicArray() ) test << "\tStaticAssert( offsetof(" << Typename() << ", " << field.name << ") == " << ToString(usize( field.offset )) << " );\n"; } + src << "\t};\n" << "#endif\n" << test; diff --git a/AE/engine/tools/res_pack/shader_trace/Impl/ShaderTrace.cpp b/AE/engine/tools/res_pack/shader_trace/Impl/ShaderTrace.cpp index cc1e5df5..d8fa3e3b 100644 --- a/AE/engine/tools/res_pack/shader_trace/Impl/ShaderTrace.cpp +++ b/AE/engine/tools/res_pack/shader_trace/Impl/ShaderTrace.cpp @@ -1,7 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' #include "Common.h" -#include "serializing/ObjectFactory.h" +#include "serializing/Public/ObjectFactory.h" namespace AE::PipelineCompiler { diff --git a/AE/engine/tools/res_pack/shader_trace/Impl/TraceRecording.cpp.h b/AE/engine/tools/res_pack/shader_trace/Impl/TraceRecording.cpp.h index 9b0ba927..436fe72c 100644 --- a/AE/engine/tools/res_pack/shader_trace/Impl/TraceRecording.cpp.h +++ b/AE/engine/tools/res_pack/shader_trace/Impl/TraceRecording.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "Common.h" namespace AE::PipelineCompiler diff --git a/AE/engine/tools/res_pack/shader_trace/Public/ShaderTrace.h b/AE/engine/tools/res_pack/shader_trace/Public/ShaderTrace.h index 9deecc8f..77f696a1 100644 --- a/AE/engine/tools/res_pack/shader_trace/Public/ShaderTrace.h +++ b/AE/engine/tools/res_pack/shader_trace/Public/ShaderTrace.h @@ -3,15 +3,15 @@ GLSL Trace project. [old standalone project](https://github.com/azhirnov/glsl_trace) + [new standalone project](https://github.com/azhirnov/glsl_trace/tree/v2023) [new project as part of AsEn](https://github.com/azhirnov/as-en/tree/preview/engine/tools/res_pack/shader_trace) [new project as part of AsEn - mirror](https://gitflic.ru/project/azhirnov/as-en/file?file=engine%2Ftools%2Fres_pack%2Fshader_trace) - [new standalone project](https://github.com/azhirnov/glsl_trace/tree/v2023) */ #pragma once #include "base/FileSystem/Path.h" -#include "serializing/ISerializable.h" +#include "serializing/Public/ISerializable.h" namespace glslang { class TIntermediate; diff --git a/AE/engine/tools/vulkan_header_gen/GenVulkanLoaders.cpp b/AE/engine/tools/vulkan_header_gen/GenVulkanLoaders.cpp index d459edf5..cfa148c7 100644 --- a/AE/engine/tools/vulkan_header_gen/GenVulkanLoaders.cpp +++ b/AE/engine/tools/vulkan_header_gen/GenVulkanLoaders.cpp @@ -627,6 +627,7 @@ namespace AE::Vulkan // { "viewportArrayNV", VK_NV_VIEWPORT_ARRAY_2_EXTENSION_NAME, NoVer, {1,0}, {} }, // { "viewportSwizzleNV", VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, NoVer, {1,0}, {} }, // { "linearColorAttachmentNV", VK_NV_LINEAR_COLOR_ATTACHMENT_EXTENSION_NAME, NoVer, {1,0}, {VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME} }, + { "clipSpaceWScalingNV", VK_NV_CLIP_SPACE_W_SCALING_EXTENSION_NAME, NoVer, {1,0}, {} }, // AMD // // { "coherentMemoryAMD", VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME, NoVer, {1,0}, {} }, diff --git a/AE/engine/tools/vulkan_image_zcurve/VulkanImageZCurve.cpp b/AE/engine/tools/vulkan_image_zcurve/VulkanImageZCurve.cpp index 5cb2583f..22dade71 100644 --- a/AE/engine/tools/vulkan_image_zcurve/VulkanImageZCurve.cpp +++ b/AE/engine/tools/vulkan_image_zcurve/VulkanImageZCurve.cpp @@ -216,13 +216,13 @@ static void PrintImageZCurve (const VDevice &dev, EPixelFormat fmt, bool printS for (uint i = 0; i < pixel_count; ++i) { - uint2 coord = (uint2(mapped[i], mapped[i] >> 8) & 0xFFu); + uint2 coord = (uint2{ uint(mapped[i]), uint(mapped[i]) >> 8 } & 0xFFu); bits.insert( E( coord.x + coord.y * img_dim.x )); if constexpr( sizeof(T) >= 4 ) { - uint2 coord2 = (~uint2(mapped[i] >> 24, mapped[i] >> 16)) & 0xFFu; + uint2 coord2 = (~uint2{ uint(mapped[i]) >> 24, uint(mapped[i]) >> 16 }) & 0xFFu; CHECK( All( coord == coord2 )); } @@ -256,7 +256,7 @@ static void PrintImageZCurve (const VDevice &dev, EPixelFormat fmt, bool printS for (uint i = 0; i < block_size and i < pixel_count; ++i) { - uint2 coord = (uint2(mapped[i], mapped[i] >> 8) & 0xFFu); + uint2 coord = (uint2{ uint(mapped[i]), uint(mapped[i]) >> 8 } & 0xFFu); str << '[' << FormatAlignedI<10>( coord.x, align.x, ' ' ) << ',' << FormatAlignedI<10>( coord.y, align.y, ' ' ) << ']' diff --git a/AE/samples/demo/_data/cpp/mac_types.h b/AE/samples/demo/_data/cpp/mac_types.h index 5af7ea40..bd237e26 100644 --- a/AE/samples/demo/_data/cpp/mac_types.h +++ b/AE/samples/demo/_data/cpp/mac_types.h @@ -3,7 +3,7 @@ // size: 8, align: 4 struct VB_Position_f2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; // 'VB_Position_f2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; packed_float2 Position; }; @@ -16,7 +16,7 @@ // size: 12, align: 2 struct VB_UVs2_SCs1_Col8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x594166a8u}}; // 'VB_UVs2_SCs1_Col8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x594166a8u}}; packed_ushort4 UV_Scale; packed_ubyte4 Color; @@ -31,7 +31,7 @@ // size: 12, align: 4 struct VB_UVf2_Col8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; // 'VB_UVf2_Col8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; packed_float2 UV; packed_ubyte4 Color; @@ -46,7 +46,7 @@ // size: 16, align: 8 (16) struct imgui_ub { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xb41e4542u}}; // 'imgui_ub' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xb41e4542u}}; float2 scale; float2 translate; @@ -61,7 +61,7 @@ // size: 20, align: 4 struct imgui_vertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x9e6b2802u}}; // 'imgui_vertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x9e6b2802u}}; packed_float2 Position; packed_float2 UV; @@ -73,25 +73,12 @@ StaticAssert( offsetof(imgui_vertex, Color) == 16 ); StaticAssert( sizeof(imgui_vertex) == 20 ); -#ifndef camera3d_ub_DEFINED -# define camera3d_ub_DEFINED - // size: 64, align: 16 - struct camera3d_ub - { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd53dbd02u}}; // 'camera3d_ub' - - float4x4_storage mvp; - }; -#endif - StaticAssert( offsetof(camera3d_ub, mvp) == 0 ); - StaticAssert( sizeof(camera3d_ub) == 64 ); - #ifndef CubeVertex_DEFINED # define CubeVertex_DEFINED // size: 40, align: 2 struct CubeVertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x38ec4b6bu}}; // 'CubeVertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x38ec4b6bu}}; packed_short4 Position; packed_short4 Texcoord; @@ -107,31 +94,12 @@ StaticAssert( offsetof(CubeVertex, BiTangent) == 32 ); StaticAssert( sizeof(CubeVertex) == 40 ); -#ifndef SphericalCubeVertex_DEFINED -# define SphericalCubeVertex_DEFINED - // size: 32, align: 2 - struct SphericalCubeVertex - { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x35a6eeecu}}; // 'SphericalCubeVertex' - - packed_short4 Position; - packed_short4 Texcoord; - packed_short4 Tangent; - packed_short4 BiTangent; - }; -#endif - StaticAssert( offsetof(SphericalCubeVertex, Position) == 0 ); - StaticAssert( offsetof(SphericalCubeVertex, Texcoord) == 8 ); - StaticAssert( offsetof(SphericalCubeVertex, Tangent) == 16 ); - StaticAssert( offsetof(SphericalCubeVertex, BiTangent) == 24 ); - StaticAssert( sizeof(SphericalCubeVertex) == 32 ); - #ifndef sdf_font_ublock_DEFINED # define sdf_font_ublock_DEFINED // size: 48, align: 16 struct sdf_font_ublock { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x5a07d037u}}; // 'sdf_font_ublock' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x5a07d037u}}; float2 rotation0; float2 rotation1; @@ -149,3 +117,35 @@ StaticAssert( offsetof(sdf_font_ublock, bgColor) == 32 ); StaticAssert( sizeof(sdf_font_ublock) == 48 ); +#ifndef camera3d_ub_DEFINED +# define camera3d_ub_DEFINED + // size: 64, align: 16 + struct camera3d_ub + { + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd53dbd02u}}; + + float4x4_storage mvp; + }; +#endif + StaticAssert( offsetof(camera3d_ub, mvp) == 0 ); + StaticAssert( sizeof(camera3d_ub) == 64 ); + +#ifndef SphericalCubeVertex_DEFINED +# define SphericalCubeVertex_DEFINED + // size: 32, align: 2 + struct SphericalCubeVertex + { + static constexpr auto TypeName = ShaderStructName{HashVal32{0x35a6eeecu}}; + + packed_short4 Position; + packed_short4 Texcoord; + packed_short4 Tangent; + packed_short4 BiTangent; + }; +#endif + StaticAssert( offsetof(SphericalCubeVertex, Position) == 0 ); + StaticAssert( offsetof(SphericalCubeVertex, Texcoord) == 8 ); + StaticAssert( offsetof(SphericalCubeVertex, Tangent) == 16 ); + StaticAssert( offsetof(SphericalCubeVertex, BiTangent) == 24 ); + StaticAssert( sizeof(SphericalCubeVertex) == 32 ); + diff --git a/AE/samples/demo/_data/cpp/vk_types.h b/AE/samples/demo/_data/cpp/vk_types.h index 5af7ea40..bd237e26 100644 --- a/AE/samples/demo/_data/cpp/vk_types.h +++ b/AE/samples/demo/_data/cpp/vk_types.h @@ -3,7 +3,7 @@ // size: 8, align: 4 struct VB_Position_f2 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; // 'VB_Position_f2' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa843e002u}}; packed_float2 Position; }; @@ -16,7 +16,7 @@ // size: 12, align: 2 struct VB_UVs2_SCs1_Col8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x594166a8u}}; // 'VB_UVs2_SCs1_Col8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x594166a8u}}; packed_ushort4 UV_Scale; packed_ubyte4 Color; @@ -31,7 +31,7 @@ // size: 12, align: 4 struct VB_UVf2_Col8 { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; // 'VB_UVf2_Col8' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xf5d3da88u}}; packed_float2 UV; packed_ubyte4 Color; @@ -46,7 +46,7 @@ // size: 16, align: 8 (16) struct imgui_ub { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xb41e4542u}}; // 'imgui_ub' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xb41e4542u}}; float2 scale; float2 translate; @@ -61,7 +61,7 @@ // size: 20, align: 4 struct imgui_vertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x9e6b2802u}}; // 'imgui_vertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x9e6b2802u}}; packed_float2 Position; packed_float2 UV; @@ -73,25 +73,12 @@ StaticAssert( offsetof(imgui_vertex, Color) == 16 ); StaticAssert( sizeof(imgui_vertex) == 20 ); -#ifndef camera3d_ub_DEFINED -# define camera3d_ub_DEFINED - // size: 64, align: 16 - struct camera3d_ub - { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd53dbd02u}}; // 'camera3d_ub' - - float4x4_storage mvp; - }; -#endif - StaticAssert( offsetof(camera3d_ub, mvp) == 0 ); - StaticAssert( sizeof(camera3d_ub) == 64 ); - #ifndef CubeVertex_DEFINED # define CubeVertex_DEFINED // size: 40, align: 2 struct CubeVertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x38ec4b6bu}}; // 'CubeVertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x38ec4b6bu}}; packed_short4 Position; packed_short4 Texcoord; @@ -107,31 +94,12 @@ StaticAssert( offsetof(CubeVertex, BiTangent) == 32 ); StaticAssert( sizeof(CubeVertex) == 40 ); -#ifndef SphericalCubeVertex_DEFINED -# define SphericalCubeVertex_DEFINED - // size: 32, align: 2 - struct SphericalCubeVertex - { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x35a6eeecu}}; // 'SphericalCubeVertex' - - packed_short4 Position; - packed_short4 Texcoord; - packed_short4 Tangent; - packed_short4 BiTangent; - }; -#endif - StaticAssert( offsetof(SphericalCubeVertex, Position) == 0 ); - StaticAssert( offsetof(SphericalCubeVertex, Texcoord) == 8 ); - StaticAssert( offsetof(SphericalCubeVertex, Tangent) == 16 ); - StaticAssert( offsetof(SphericalCubeVertex, BiTangent) == 24 ); - StaticAssert( sizeof(SphericalCubeVertex) == 32 ); - #ifndef sdf_font_ublock_DEFINED # define sdf_font_ublock_DEFINED // size: 48, align: 16 struct sdf_font_ublock { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x5a07d037u}}; // 'sdf_font_ublock' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x5a07d037u}}; float2 rotation0; float2 rotation1; @@ -149,3 +117,35 @@ StaticAssert( offsetof(sdf_font_ublock, bgColor) == 32 ); StaticAssert( sizeof(sdf_font_ublock) == 48 ); +#ifndef camera3d_ub_DEFINED +# define camera3d_ub_DEFINED + // size: 64, align: 16 + struct camera3d_ub + { + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd53dbd02u}}; + + float4x4_storage mvp; + }; +#endif + StaticAssert( offsetof(camera3d_ub, mvp) == 0 ); + StaticAssert( sizeof(camera3d_ub) == 64 ); + +#ifndef SphericalCubeVertex_DEFINED +# define SphericalCubeVertex_DEFINED + // size: 32, align: 2 + struct SphericalCubeVertex + { + static constexpr auto TypeName = ShaderStructName{HashVal32{0x35a6eeecu}}; + + packed_short4 Position; + packed_short4 Texcoord; + packed_short4 Tangent; + packed_short4 BiTangent; + }; +#endif + StaticAssert( offsetof(SphericalCubeVertex, Position) == 0 ); + StaticAssert( offsetof(SphericalCubeVertex, Texcoord) == 8 ); + StaticAssert( offsetof(SphericalCubeVertex, Tangent) == 16 ); + StaticAssert( offsetof(SphericalCubeVertex, BiTangent) == 24 ); + StaticAssert( sizeof(SphericalCubeVertex) == 32 ); + diff --git a/AE/samples/res_editor/CMakeLists.txt b/AE/samples/res_editor/CMakeLists.txt index 966c73b9..ed5709d1 100644 --- a/AE/samples/res_editor/CMakeLists.txt +++ b/AE/samples/res_editor/CMakeLists.txt @@ -22,6 +22,14 @@ if ((TARGET "Scripting") AND (TARGET "PipelineCompiler") AND (TARGET "ResourceLo ) EnablePCH( "ResourceEditor" ) + EnableUnitBuild( "ResourceEditor" ) + + if (${AE_USE_UNITY_BUILD}) + set( NON_UNITY_BUILD_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/Scripting/ScriptExe.cpp" + ) + set_property( SOURCE ${NON_UNITY_BUILD_SRC} PROPERTY SKIP_UNITY_BUILD_INCLUSION ON ) + endif() target_link_libraries( "ResourceEditor" PUBLIC # tools diff --git a/AE/samples/res_editor/Changelog.md b/AE/samples/res_editor/Changelog.md index fb7c9fef..c396586a 100644 --- a/AE/samples/res_editor/Changelog.md +++ b/AE/samples/res_editor/Changelog.md @@ -1,3 +1,9 @@ +## 09.2024 +- Tests: GPU performance tests. +- Sample: procedural sphere without geometry. +- Sample: Visibility buffer. + + ## 08.2024 - Tests with sRGB blend/filter. - Sample: render scene to cubemap + fisheye projection diff --git a/AE/samples/res_editor/Core/EditorCore.cpp b/AE/samples/res_editor/Core/EditorCore.cpp index e68262cb..f195b2e3 100644 --- a/AE/samples/res_editor/Core/EditorCore.cpp +++ b/AE/samples/res_editor/Core/EditorCore.cpp @@ -74,14 +74,14 @@ namespace // window { cfg.window.title = "ResourceEditor"; - cfg.window.size = {1600, 900}; + cfg.window.size = uint2{ s_REConfig.screenWidth, s_REConfig.screenHeight }; cfg.window.mode = c_WindowMode; } // VR { cfg.enableVR = false; - cfg.vr.dimension = uint2{2048}; + cfg.vr.dimension = ImageDim2_t{2048}; cfg.vr.format = EPixelFormat::BGRA8_UNorm; cfg.vr.usage = EImageUsage::ColorAttachment | EImageUsage::Sampled | EImageUsage::Transfer; // default cfg.vr.options = EImageOpt::BlitDst; @@ -412,6 +412,8 @@ namespace binder.AddMethodFromGlobal( &ResEditorAppConfig_SetRemoteInputServerPort, "RemoteInputServerPort",{} ); binder.AddProperty( &ResEditorAppConfig::setStableGPUClock, "setStableGPUClock" ); binder.AddProperty( &ResEditorAppConfig::enableRenderDoc, "enableRenderDoc" ); + binder.AddProperty( &ResEditorAppConfig::screenWidth, "screenWidth" ); + binder.AddProperty( &ResEditorAppConfig::screenHeight, "screenHeight" ); } ScriptEngine::ModuleSource src; @@ -458,6 +460,7 @@ void main (Config &out cfg) const string local_path = "data/"; const string shader_data_path = "shared_data/"; const string ui_path = "ui"; + const string test_ref_path = "test_ref/"; )"; } else @@ -480,7 +483,8 @@ void main (Config &out cfg) " const string vfs_path = base_path + \"AE-Data/\";\n" " const string local_path = base_path + \"AE/samples/res_editor/_data/\";\n" " const string shader_data_path = base_path + \"AE/engine/shared_data/\";\n" -" const string ui_path = base_path + \"AE-Temp/samples/res_editor\";\n"; +" const string ui_path = base_path + \"AE-Temp/samples/res_editor\";\n" +" const string test_ref_path = vfs_path + \"/samples/res_editor/ref\";\n"; } str << R"( @@ -525,9 +529,10 @@ void main (Config &out cfg) cfg.ExportDir( local_path + "../_export" ); // graphics settings // - // NV only: set stable GPU clock for profiling, otherwise driver can move GPU to low power mode. + cfg.screenWidth = 1600; + cfg.screenHeight = 900; + // AMD/NV only: set stable GPU clock for profiling, otherwise driver can move GPU to low power mode. cfg.setStableGPUClock = false; - // on start attach RenderDoc to the app, this will disable some new extensions. cfg.enableRenderDoc = false; @@ -539,14 +544,15 @@ void main (Config &out cfg) str << R"( // remote graphics device // cfg.RemoteDeviceIpAddress( 192, 168, 0, 0 ); - //cfg.GraphicsLibPath( "" ); + cfg.GraphicsLibPath( "GraphicsLib.dll" ); )"; #endif str << R"( // tests // /* - cfg.TestOutput( vfs_path + "/samples/res_editor/ref" ); + // uncomment to run tests on start + cfg.TestOutput( test_ref_path ); cfg.TestFolder( "callable" ); cfg.TestFolder( "games" ); cfg.TestFolder( "samples-2d" ); @@ -1002,7 +1008,7 @@ void main (Config &out cfg) { auto infos = output->GetTargetInfo(); CHECK_ERR( infos.size() == 1 ); - cfg.dynSize->Resize( infos[0].dimension ); + cfg.dynSize->Resize( infos[0].Dimension() ); } auto renderer = _script->Run( scriptPath, cfg ); diff --git a/AE/samples/res_editor/Core/EditorCore.h b/AE/samples/res_editor/Core/EditorCore.h index 700b3f6a..7a55d486 100644 --- a/AE/samples/res_editor/Core/EditorCore.h +++ b/AE/samples/res_editor/Core/EditorCore.h @@ -48,6 +48,8 @@ namespace AE::ResEditor Path exportFolder; // graphics settings + uint screenWidth = 1600; + uint screenHeight = 900; bool setStableGPUClock = false; bool enableRenderDoc = false; diff --git a/AE/samples/res_editor/Core/EditorUI.cpp b/AE/samples/res_editor/Core/EditorUI.cpp index e58f4842..eeef6b74 100644 --- a/AE/samples/res_editor/Core/EditorUI.cpp +++ b/AE/samples/res_editor/Core/EditorUI.cpp @@ -455,7 +455,7 @@ namespace UploadImageDesc upload; upload.aspectMask = EImageAspect::Color; upload.heapType = EStagingHeapType::Dynamic; - upload.imageDim = uint3{width, height, 1}; + upload.imageDim = int3{ width, height, 1 }; upload.dataRowPitch = Bytes{width * 4u}; const Bytes data_size = width * height * 4 * SizeOf; @@ -552,6 +552,11 @@ namespace Graphics::DirectCtx::Transfer tctx {*this}; if ( isFirst ) { + #if 1 + // for profiling + tctx.ImageBarrier( rt.imageId, rt.initialState | EResourceState::Invalidate, rt.finalState ); + tctx.CommitBarriers(); + #else tctx.ImageBarrier( rt.imageId, rt.initialState | EResourceState::Invalidate, EResourceState::ClearDst ); tctx.CommitBarriers(); @@ -559,6 +564,7 @@ namespace tctx.ImageBarrier( rt.imageId, EResourceState::ClearDst, rt.finalState ); tctx.CommitBarriers(); + #endif } Execute( tctx ); @@ -696,11 +702,19 @@ namespace if ( ImGui::SliderInt( "##SurfaceScaleSlider", INOUT &scale, -4, 2, SurfaceScaleName( scale )) ) g_mode->dynSize->SetScale( int3{SurfaceScaleFromLog2( scale )} ); - ImGui::Text( "Surface size: %s", ToString( g_mode->dynSize->Dimension2() ).c_str() ); + uint2 dim = g_mode->dynSize->Dimension2(); + ImGui::Text( "Surface size: %ix%i", dim.x, dim.y ); + ImGui::Text( "Mega pix: %0.2f", double(dim.x * dim.y) * 1.0e-6 ); - bool linear = g_mode->filterMode->Get() > 0; + const uint prev_fm = g_mode->filterMode->Get(); + + bool linear = HasBit( prev_fm, 0 ); if ( ImGui::Checkbox( "Linear filter", INOUT &linear )) - g_mode->filterMode->Set( uint{linear} ); + g_mode->filterMode->Set( SetBit( prev_fm, linear, 0 )); + + bool copy = HasBit( prev_fm, 1 ); + if ( ImGui::Checkbox( "Copy instead of blit (if possible)", INOUT © )) + g_mode->filterMode->Set( SetBit( prev_fm, copy, 1 )); ImGui::Separator(); } @@ -793,14 +807,15 @@ namespace }}; const auto sp = s_UIInteraction.selectedPixel.Read(); - ImGui::Text( "mouse pos: %s", ToString( sp.pos ).c_str() ); + ImGui::Text( "mouse pos: %s", ToString( sp.pos ).c_str() ); + ImGui::Text( "mouse unorm: %s", ToString( sp.pendingPos ).c_str() ); - ImGui::Text( "raw color: %s", ToString( sp.color, 3 ).c_str() ); + ImGui::Text( "raw color: %s", ToString( sp.color, 3 ).c_str() ); ImGui::SameLine(); ColoredButton( sp.color ); RGBA32f srgb = RemoveSRGBCurve( Saturate( sp.color )); - ImGui::Text( "sRGB color: %s", ToString( srgb, 3 ).c_str() ); + ImGui::Text( "sRGB color: %s", ToString( srgb, 3 ).c_str() ); ImGui::SameLine(); ColoredButton( srgb ); @@ -1724,7 +1739,9 @@ namespace _CopySliderState(); break; case IA.UI_MouseRBDown : - case IA.UI_ResExport : break; // ignore + case IA.UI_ResExport : + case IA.CustomKey1 : + case IA.Freeze : break; // ignore } switch_end } diff --git a/AE/samples/res_editor/Core/EditorUI.h b/AE/samples/res_editor/Core/EditorUI.h index ce62e692..ad45f560 100644 --- a/AE/samples/res_editor/Core/EditorUI.h +++ b/AE/samples/res_editor/Core/EditorUI.h @@ -75,8 +75,8 @@ namespace AE::ResEditor struct SelectedPixel { FrameUID frame; - uint2 pos; - float2 pendingPos; + uint2 pos; // pixels + float2 pendingPos; // unorm RGBA32f color; // TODO: colors [8] ??? }; diff --git a/AE/samples/res_editor/Dynamic/DynamicDimension.h b/AE/samples/res_editor/Dynamic/DynamicDimension.h index d62fb7a1..7ba2efa1 100644 --- a/AE/samples/res_editor/Dynamic/DynamicDimension.h +++ b/AE/samples/res_editor/Dynamic/DynamicDimension.h @@ -17,6 +17,7 @@ namespace AE::ResEditor // types public: using ERounding = EDynamicVarRounding; + using GetValueFn_t = uint3 (*) (EnableRCBase*); // variables @@ -26,64 +27,74 @@ namespace AE::ResEditor int3 _scale {1,1,1}; ERounding _rounding = ERounding::Round; const EImageDim _numDimensions; - const RC _base; + const RC<> _base; + const GetValueFn_t _getValue = null; // methods public: - explicit DynamicDim (const uint dim) __NE___ : DynamicDim{ uint3{ dim, 0u, 0u }, EImageDim_1D } {} - explicit DynamicDim (const uint2 &dim) __NE___ : DynamicDim{ uint3{ dim, 0u }, EImageDim_2D } {} - explicit DynamicDim (const uint3 &dim, EImageDim imgDim = EImageDim_2D) __NE___; - explicit DynamicDim (RC base) __NE___; - - void Resize (const uint &dim) __NE___ { return Resize( uint3{ dim, 1u, 1u }); } - void Resize (const uint2 &dim) __NE___ { return Resize( uint3{ dim, 1u }); } - void Resize (const uint3 &dim) __NE___; - - void SetScale (int3 scale) __NE___ { return SetScale( scale, ERounding::Round ); } - void SetScale (int3, ERounding) __NE___; - - ND_ bool IsChanged (INOUT uint3 &dim) C_NE___; - ND_ bool IsChanged_NonZero (INOUT uint3 &dim) C_NE___; - ND_ bool IsChanged (INOUT float &aspect) C_NE___; - - ND_ float Aspect () C_NE___; - ND_ uint Dimension1 () C_NE___ { ASSERT( _numDimensions == EImageDim_1D ); return Dimension3().x; } - ND_ uint Dimension1_NonZero () C_NE___ { ASSERT( _numDimensions == EImageDim_1D ); return Dimension3_NonZero().x; } - ND_ uint2 Dimension2 () C_NE___ { ASSERT( _numDimensions == EImageDim_2D ); return uint2(Dimension3()); } - ND_ uint2 Dimension2_NonZero () C_NE___ { ASSERT( _numDimensions == EImageDim_2D ); return uint2(Dimension3_NonZero()); } - ND_ uint3 Dimension3 () C_NE___; - ND_ uint3 Dimension3_NonZero () C_NE___ { return Max( Dimension3(), 1u ); } - ND_ int3 Scale () C_NE___ { SHAREDLOCK( _guard ); return _scale; } - ND_ EImageDim NumDimensions () C_NE___ { return _numDimensions; } - ND_ uint3 BaseDimension () C_NE___; - - ND_ uint Area () C_NE___; - ND_ uint Volume () C_NE___; - - ND_ uint Remap (uint src) C_NE___ { return Remap(uint3{ src, 0u, 0u }).x; } - ND_ uint2 Remap (uint2 src) C_NE___ { return uint2{Remap(uint3{ src, 0u })}; } - ND_ uint3 Remap (uint3 src) C_NE___; - - ND_ uint Remap (float src) C_NE___ { return Remap(float3{ src, 0.f, 0.f }).x; } - ND_ uint2 Remap (float2 src) C_NE___ { return uint2{Remap(float3{ src, 0.f })}; } - ND_ uint3 Remap (float3 src) C_NE___; - - ND_ RC GetDynamicX () __NE___; - ND_ RC GetDynamicY () __NE___; - ND_ RC GetDynamicXY () __NE___; - ND_ RC GetDynamicArea () __NE___; - ND_ RC GetDynamicVolume () __NE___; + explicit DynamicDim (const uint dim) __NE___ : DynamicDim{ uint3{ dim, 0u, 0u }, EImageDim_1D } {} + explicit DynamicDim (const uint2 &dim) __NE___ : DynamicDim{ uint3{ dim, 0u }, EImageDim_2D } {} + explicit DynamicDim (const uint3 &dim, + EImageDim imgDim = EImageDim_2D) __NE___; + explicit DynamicDim (RC base) __NE___; + explicit DynamicDim (RC<> base, + GetValueFn_t getValue, + EImageDim imgDim = EImageDim_2D) __NE___; + + void Resize (const uint &dim) __NE___ { return Resize( uint3{ dim, 1u, 1u }); } + void Resize (const uint2 &dim) __NE___ { return Resize( uint3{ dim, 1u }); } + void Resize (const uint3 &dim) __NE___; + + void SetScale (int3 scale) __NE___ { return SetScale( scale, ERounding::Round ); } + void SetScale (int3, ERounding) __NE___; + + ND_ bool IsChanged (INOUT uint3 &dim) C_NE___; + ND_ bool IsChanged_NonZero (INOUT uint3 &dim) C_NE___; + ND_ bool IsChanged_NonZero (INOUT ImageDim_t &) C_NE___; + ND_ bool IsChanged (INOUT float &aspect) C_NE___; + + ND_ float Aspect () C_NE___; + ND_ uint Dimension1 () C_NE___ { ASSERT( _numDimensions == EImageDim_1D ); return Dimension3().x; } + ND_ uint Dimension1_NonZero () C_NE___ { ASSERT( _numDimensions == EImageDim_1D ); return Dimension3_NonZero().x; } + ND_ uint2 Dimension2 () C_NE___ { ASSERT( _numDimensions == EImageDim_2D ); return uint2(Dimension3()); } + ND_ uint2 Dimension2_NonZero () C_NE___ { ASSERT( _numDimensions == EImageDim_2D ); return uint2(Dimension3_NonZero()); } + ND_ uint3 Dimension3 () C_NE___; + ND_ uint3 Dimension3_NonZero () C_NE___ { return Max( Dimension3(), 1u ); } + ND_ int3 Scale () C_NE___ { SHAREDLOCK( _guard ); return _scale; } + ND_ EImageDim NumDimensions () C_NE___ { return _numDimensions; } + ND_ uint3 BaseDimension () C_NE___; + + ND_ uint Area () C_NE___; + ND_ uint Volume () C_NE___; + + ND_ uint Remap (uint src) C_NE___ { return Remap(uint3{ src, 0u, 0u }).x; } + ND_ uint2 Remap (uint2 src) C_NE___ { return uint2{Remap(uint3{ src, 0u })}; } + ND_ uint3 Remap (uint3 src) C_NE___; + + ND_ uint Remap (float src) C_NE___ { return Remap(float3{ src, 0.f, 0.f }).x; } + ND_ uint2 Remap (float2 src) C_NE___ { return uint2{Remap(float3{ src, 0.f })}; } + ND_ uint3 Remap (float3 src) C_NE___; + + ND_ RC GetDynamicX () __NE___; + ND_ RC GetDynamicY () __NE___; + ND_ RC GetDynamicXY () __NE___; + ND_ RC GetDynamicArea () __NE___; + ND_ RC GetDynamicVolume () __NE___; private: - ND_ static uint _ApplyScale (uint, int, ERounding) __NE___; - ND_ static uint _ApplyScale (float, int, ERounding) __NE___; - - ND_ static uint _GetX (EnableRCBase*) __NE___; - ND_ static uint _GetY (EnableRCBase*) __NE___; - ND_ static uint2 _GetXY (EnableRCBase*) __NE___; - ND_ static uint _GetArea (EnableRCBase*) __NE___; - ND_ static uint _GetVolume (EnableRCBase*) __NE___; + ND_ uint3 _BaseDim () C_NE___; + + ND_ static uint _ApplyScale (uint, int, ERounding) __NE___; + ND_ static uint _ApplyScale (float, int, ERounding) __NE___; + + ND_ static uint _GetX (EnableRCBase*) __NE___; + ND_ static uint _GetY (EnableRCBase*) __NE___; + ND_ static uint2 _GetXY (EnableRCBase*) __NE___; + ND_ static uint _GetArea (EnableRCBase*) __NE___; + ND_ static uint _GetVolume (EnableRCBase*) __NE___; + + ND_ static uint3 _GetDim (EnableRCBase*) __NE___; }; @@ -105,9 +116,16 @@ namespace AE::ResEditor } } + inline DynamicDim::DynamicDim (RC<> base, GetValueFn_t getValue, EImageDim imgDim) __NE___ : + _numDimensions{ imgDim }, + _base{ RVRef(base) }, + _getValue{ getValue } + {} + inline DynamicDim::DynamicDim (RC base) __NE___ : _numDimensions{ base ? base->NumDimensions() : EImageDim_2D }, - _base{ RVRef(base) } + _base{ RVRef(base) }, + _getValue{ _base ? &_GetDim : null } {} /* @@ -224,14 +242,25 @@ namespace AE::ResEditor { SHAREDLOCK( _guard ); - uint3 dim = _base ? _base->Dimension3() : _dimension; + uint3 dim = _BaseDim(); const bool3 was_zero = (dim == uint3{0}); + + switch ( _numDimensions ) + { + case EImageDim::_1D : + dim.x = _ApplyScale( dim.x, _scale.x, _rounding ); break; - dim.x = _ApplyScale( dim.x, _scale.x, _rounding ); - dim.y = _ApplyScale( dim.y, _scale.y, _rounding ); - dim.z = _ApplyScale( dim.z, _scale.z, _rounding ); - dim *= uint3{not was_zero}; + case EImageDim::_2D : + dim.x = _ApplyScale( dim.x, _scale.x, _rounding ); + dim.y = _ApplyScale( dim.y, _scale.y, _rounding ); break; + case EImageDim::_3D : + dim.x = _ApplyScale( dim.x, _scale.x, _rounding ); + dim.y = _ApplyScale( dim.y, _scale.y, _rounding ); + dim.z = _ApplyScale( dim.z, _scale.z, _rounding ); break; + } + + dim *= uint3{not was_zero}; return dim; } @@ -243,7 +272,12 @@ namespace AE::ResEditor inline uint3 DynamicDim::BaseDimension () C_NE___ { SHAREDLOCK( _guard ); - return _base ? _base->Dimension3() : _dimension; + return _BaseDim(); + } + + inline uint3 DynamicDim::_BaseDim () C_NE___ + { + return _getValue ? _getValue( _base.get() ) : _dimension; } /* @@ -263,6 +297,14 @@ namespace AE::ResEditor return false; } + inline bool DynamicDim::IsChanged_NonZero (INOUT ImageDim_t &oldDim) C_NE___ + { + uint3 old_dim {oldDim}; + bool res = IsChanged_NonZero( INOUT old_dim ); + oldDim = ImageDim_t{old_dim}; + return res; + } + /* ================================================= IsChanged @@ -421,5 +463,77 @@ namespace AE::ResEditor return MakeRC( RC<>{GetRC()}, &_GetXY ); } +/* +================================================= + _GetDim +================================================= +*/ + inline uint3 DynamicDim::_GetDim (EnableRCBase* base) __NE___ + { + NonNull( base ); + return Cast(base)->Dimension3(); + } +//----------------------------------------------------------------------------- + + + +/* +================================================= + ToDim2 +================================================= +*/ + template <> + inline uint3 TDynamicScalar::_GetDim2 (EnableRCBase* base) __NE___ + { + return uint3{ uint2{Cast>( base )->Get()}, 1u }; + } + + template <> + inline RC TDynamicScalar::ToDim2 () __NE___ + { + return MakeRC( GetRC(), &_GetDim2, EImageDim_2D ); + } + +/* +================================================= + ToDim3 +================================================= +*/ + template <> + inline uint3 TDynamicScalar::_GetDim3 (EnableRCBase* base) __NE___ + { + return uint3{ Cast>( base )->Get() }; + } + + template <> + inline RC TDynamicScalar::ToDim3 () __NE___ + { + return MakeRC( GetRC(), &_GetDim3, EImageDim_3D ); + } +//----------------------------------------------------------------------------- + + + +/* +================================================= + ToDim +================================================= +*/ + template + uint3 TDynamicVec::_GetDim (EnableRCBase* base) __NE___ + { + auto src = Cast>( base )->Get(); + if constexpr( I == 2 ) + return uint3{ src, 1u }; + if constexpr( I == 3 ) + return src; + } + + template + RC TDynamicVec::ToDim () __NE___ + { + return MakeRC( this->GetRC(), &_GetDim, (I == 2 ? EImageDim_2D : EImageDim_3D) ); + } + } // AE::ResEditor diff --git a/AE/samples/res_editor/Dynamic/DynamicScalar.h b/AE/samples/res_editor/Dynamic/DynamicScalar.h index c0b4673d..413fc01e 100644 --- a/AE/samples/res_editor/Dynamic/DynamicScalar.h +++ b/AE/samples/res_editor/Dynamic/DynamicScalar.h @@ -17,17 +17,19 @@ namespace AE::ResEditor enum class EDynamicVarOperator : ubyte { Unknown, - Mul, - Div, - DivNear, - DivCeil, - Add, - Sub, - Pow, + Mul, // x * const + Div, // x / const + DivNear, // (x + const/2) / const + DivCeil, // (x + const - 1) / const + Add, // x + const + Sub, // x - const + Pow, // pow( x, const ) + PowOf2, // const << 2 }; template class TDynamicVec; + class DynamicDim; @@ -73,10 +75,15 @@ namespace AE::ResEditor ND_ RC> ToX1 () __NE___; ND_ RC> ToX11 () __NE___; + ND_ RC ToDim2 () __NE___; + ND_ RC ToDim3 () __NE___; + private: ND_ static T _Get (EnableRCBase*) __NE___; ND_ static Vec _GetX1 (EnableRCBase*) __NE___; ND_ static Vec _GetX11 (EnableRCBase*) __NE___; + ND_ static uint3 _GetDim2 (EnableRCBase*) __NE___; + ND_ static uint3 _GetDim3 (EnableRCBase*) __NE___; }; @@ -137,6 +144,13 @@ namespace AE::ResEditor case EOperator::Add : result += _opValue; break; case EOperator::Sub : result -= _opValue; break; + case EOperator::PowOf2 : + if constexpr( IsFloatPoint ) + result = _opValue * Pow( T(2), result ); + else + result = _opValue << result; + break; + case EOperator::Pow : if constexpr( IsFloatPoint ) result = Pow( result, _opValue ); diff --git a/AE/samples/res_editor/Dynamic/DynamicVec.h b/AE/samples/res_editor/Dynamic/DynamicVec.h index 935f0b67..ad30ebcc 100644 --- a/AE/samples/res_editor/Dynamic/DynamicVec.h +++ b/AE/samples/res_editor/Dynamic/DynamicVec.h @@ -19,12 +19,15 @@ namespace AE::ResEditor using Self = TDynamicVec< T, I >; using Vec_t = Vec< T, I >; using GetValueFn_t = Vec_t (*) (EnableRCBase*); + using EOperator = EDynamicVarOperator; // variables private: mutable RWSpinLock _guard; Vec_t _vec; + Vec_t _opValue; + EOperator _op = Default; const RC<> _base; const GetValueFn_t _getValue = null; @@ -35,6 +38,7 @@ namespace AE::ResEditor explicit TDynamicVec (const Vec_t &v) __NE___ : _vec{v} {} TDynamicVec (RC<> base, GetValueFn_t getValue) __NE___ : _base{RVRef(base)}, _getValue{getValue} {} + void SetOp (const Vec_t &, EOperator) __NE___; void Set (const Vec_t &v) __NE___; ND_ Vec_t Get () C_NE___; @@ -47,6 +51,8 @@ namespace AE::ResEditor ND_ RC> GetDynamicZ () __NE___; ND_ RC> GetDynamicW () __NE___; + ND_ RC ToDim () __NE___; + private: ND_ static Vec_t _Get (EnableRCBase*) __NE___; @@ -54,6 +60,7 @@ namespace AE::ResEditor ND_ static T _GetY (EnableRCBase*) __NE___; ND_ static T _GetZ (EnableRCBase*) __NE___; ND_ static T _GetW (EnableRCBase*) __NE___; + ND_ static uint3 _GetDim (EnableRCBase*) __NE___; }; @@ -70,6 +77,19 @@ namespace AE::ResEditor using DynamicUInt4 = TDynamicVec< uint, 4 >; +/* +================================================= + SetOp +================================================= +*/ + template + void TDynamicVec::SetOp (const Vec_t &val, EOperator op) __NE___ + { + EXLOCK( _guard ); + _opValue = val; + _op = op; + } + /* ================================================= Set @@ -98,6 +118,30 @@ namespace AE::ResEditor if_unlikely( _getValue != null ) result = _getValue( _base.get() ); + switch_enum( _op ) + { + case_likely EOperator::Unknown : break; + case EOperator::Mul : result *= _opValue; break; + case EOperator::Div : result /= _opValue; break; + case EOperator::DivNear : result = (result + _opValue / T(2)) / _opValue; break; + case EOperator::DivCeil : result = (result + _opValue - T(1)) / _opValue; break; + case EOperator::Add : result += _opValue; break; + case EOperator::Sub : result -= _opValue; break; + + case EOperator::PowOf2 : + if constexpr( IsFloatPoint ) + result = _opValue * Pow( Vec_t{T(2)}, result ); + else + result = _opValue << result; + break; + + case EOperator::Pow : + if constexpr( IsFloatPoint ) + result = Pow( result, _opValue ); + break; + } + switch_end + return result; } diff --git a/AE/samples/res_editor/GeomSource/ModelGeomSource.cpp b/AE/samples/res_editor/GeomSource/ModelGeomSource.cpp index b5ffd791..0aafb385 100644 --- a/AE/samples/res_editor/GeomSource/ModelGeomSource.cpp +++ b/AE/samples/res_editor/GeomSource/ModelGeomSource.cpp @@ -35,10 +35,11 @@ namespace AE::ResEditor ModelGeomSource::Mesh::Mesh (Renderer &r, RC scene, const Transformation &initialTransform, - RTGeometryTypes_t && rtGeoms) __Th___ : + RTGeometryTypes_t && rtGeoms, + uint instanceCount) __Th___ : IResource{ r }, _temp{new TmpDataForUploading{}}, _intermScene{ RVRef(scene) }, _initialTransform{ initialTransform }, - _rtGeometries{ RVRef(rtGeoms) } + _rtGeometries{ RVRef(rtGeoms) }, _instanceCount{ instanceCount } { _uploadStatus.store( EUploadStatus::InProgress ); @@ -72,7 +73,7 @@ namespace AE::ResEditor "Vertices & Indices", r.ChooseAllocator( False{"static"}, mesh_data_size )); CHECK_THROW( _meshData ); - _temp->nodeDataSize = SizeOf * _temp->nodeCount; + _temp->nodeDataSize = Bytes{ShaderTypes::ModelNode_Array::SizeOf( _temp->nodeCount )}; _nodeBuffer = res_mngr.CreateBuffer( BufferDesc{ _temp->nodeDataSize, usage }, "ModelNodes", r.ChooseAllocator( False{"static"}, _temp->nodeDataSize )); CHECK_THROW( _nodeBuffer ); @@ -95,6 +96,8 @@ namespace AE::ResEditor size += SizeOf * _temp->nodeCount; // materialsPerInstance size = AlignUp( size, alignof(float3x3) ); size += SizeOf * _temp->nodeCount; // normalMatPerInstance + size = AlignUp( size, alignof(float4x4) ); + size += SizeOf * _temp->nodeCount; // modelMatPerInstance _temp->rtInstancesDataSize = size; _rtInstances = res_mngr.CreateBuffer( BufferDesc{ size, usage }, "ModelRTInstances", @@ -225,9 +228,12 @@ namespace AE::ResEditor if ( mem_view.DataSize() < _temp->nodeDataSize ) return false; // out of memory + ShaderTypes::ModelNode_Array node_arr; Array nodes; nodes.reserve( usize(_temp->nodeDataSize / SizeOf) ); + node_arr.instanceCount = _instanceCount; + _intermScene->ForEachNode( [this, &nodes] (StringView, const ResLoader::IntermScene::NodeData_t &data, const Transformation &tr) { @@ -259,8 +265,8 @@ namespace AE::ResEditor return true; }); - CHECK_THROW( _temp->nodeDataSize == ArraySizeOf(nodes) ); - CHECK_THROW( mem_view.CopyFrom( nodes ) == _temp->nodeDataSize ); + CHECK_THROW( mem_view.CopyFrom( &node_arr, Sizeof(node_arr) ) == Sizeof(node_arr) ); + CHECK_THROW( mem_view.CopyFrom( nodes, Sizeof(node_arr) ) + Sizeof(node_arr) == _temp->nodeDataSize ); return true; } @@ -391,7 +397,7 @@ namespace AE::ResEditor switch_end } - CHECK_THROW( mem_view.CopyFrom( 0_b, &lights_data, Sizeof(lights_data) ) == SizeOf ); + CHECK_THROW( mem_view.CopyFrom( &lights_data, Sizeof(lights_data) ) == SizeOf ); return true; } @@ -535,10 +541,12 @@ namespace AE::ResEditor StaticArray< Array, uint(ERTGeometryType::_Count) > rt_meshes; StaticArray< Array, uint(ERTGeometryType::_Count) > rt_materials; StaticArray< Array, uint(ERTGeometryType::_Count) > rt_norm_mats; + StaticArray< Array, uint(ERTGeometryType::_Count) > rt_model_mats; StaticAssert( inst_to_mesh.meshesPerInstance.size() == rt_meshes.size() ); StaticAssert( inst_to_mesh.materialsPerInstance.size() == rt_materials.size() ); StaticAssert( inst_to_mesh.normalMatPerInstance.size() == rt_norm_mats.size() ); + StaticAssert( inst_to_mesh.modelMatPerInstance.size() == rt_model_mats.size() ); const DeviceAddress mesh_addr = GraphicsScheduler().GetResourceManager().GetDeviceAddress( _meshData ); const DeviceAddress inst_addr = GraphicsScheduler().GetResourceManager().GetDeviceAddress( _rtInstances ); @@ -558,27 +566,24 @@ namespace AE::ResEditor const bool dual_sided = mtr->GetSettings().cullMode == ECullMode::None; const auto mtr_id = _intermScene->IndexOfMaterial( mtr ); const auto& mesh_info = _temp->meshInfoArr[ _intermScene->IndexOfMesh( mesh ) ]; + const auto model_mat = (_initialTransform + tr).ToMatrix(); const auto norm_mat = float3x3{(_initialTransform + tr).orientation.Inversed()}.Transpose(); + const uint idx = translucent ? uint(ERTGeometryType::Translucent) : + dual_sided ? uint(ERTGeometryType::OpaqueDualSided) : + uint(ERTGeometryType::Opaque); ShaderTypes::ModelRTMesh rt_mesh; + rt_mesh.positions = mesh_addr + mesh_info.positions; rt_mesh.normals = mesh_addr + mesh_info.normals; rt_mesh.texcoords = mesh_addr + mesh_info.texcoords; rt_mesh.indices = mesh_addr + mesh_info.indices; - if ( translucent ){ - rt_meshes[ uint(ERTGeometryType::Translucent) ].push_back( rt_mesh ); - rt_materials[ uint(ERTGeometryType::Translucent) ].push_back( mtr_id ); - rt_norm_mats[ uint(ERTGeometryType::Translucent) ].push_back( norm_mat ); - }else - if ( dual_sided ){ - rt_meshes[ uint(ERTGeometryType::OpaqueDualSided) ].push_back( rt_mesh ); - rt_materials[ uint(ERTGeometryType::OpaqueDualSided) ].push_back( mtr_id ); - rt_norm_mats[ uint(ERTGeometryType::OpaqueDualSided) ].push_back( norm_mat ); - }else{ - rt_meshes[ uint(ERTGeometryType::Opaque) ].push_back( rt_mesh ); - rt_materials[ uint(ERTGeometryType::Opaque) ].push_back( mtr_id ); - rt_norm_mats[ uint(ERTGeometryType::Opaque) ].push_back( norm_mat ); - } + rt_meshes[ idx ].push_back( rt_mesh ); + rt_materials[ idx ].push_back( mtr_id ); + rt_norm_mats[ idx ].push_back( norm_mat ); + rt_model_mats[ idx ].push_back( model_mat ); + + // TODO: Volumetric }, [] (const NullUnion &) {} ); @@ -595,7 +600,7 @@ namespace AE::ResEditor if ( rt_meshes[i].empty() ) continue; - CHECK_THROW( mem_view.CopyFrom( offset, rt_meshes[i].data(), ArraySizeOf(rt_meshes[i]) ) == ArraySizeOf(rt_meshes[i]) ); + CHECK_THROW( mem_view.CopyFrom( rt_meshes[i].data(), ArraySizeOf(rt_meshes[i]), offset ) == ArraySizeOf(rt_meshes[i]) ); inst_to_mesh.meshesPerInstance[i] = inst_addr + offset; offset += ArraySizeOf( rt_meshes[i] ); @@ -610,7 +615,7 @@ namespace AE::ResEditor if ( rt_materials[i].empty() ) continue; - CHECK_THROW( mem_view.CopyFrom( offset, rt_materials[i].data(), ArraySizeOf(rt_materials[i]) ) == ArraySizeOf(rt_materials[i]) ); + CHECK_THROW( mem_view.CopyFrom( rt_materials[i].data(), ArraySizeOf(rt_materials[i]), offset ) == ArraySizeOf(rt_materials[i]) ); inst_to_mesh.materialsPerInstance[i] = inst_addr + offset; offset += ArraySizeOf( rt_materials[i] ); @@ -623,14 +628,27 @@ namespace AE::ResEditor if ( rt_norm_mats[i].empty() ) continue; - CHECK_THROW( mem_view.CopyFrom( offset, rt_norm_mats[i].data(), ArraySizeOf(rt_norm_mats[i]) ) == ArraySizeOf(rt_norm_mats[i]) ); + CHECK_THROW( mem_view.CopyFrom( rt_norm_mats[i].data(), ArraySizeOf(rt_norm_mats[i]), offset ) == ArraySizeOf(rt_norm_mats[i]) ); inst_to_mesh.normalMatPerInstance[i] = inst_addr + offset; offset += ArraySizeOf( rt_norm_mats[i] ); } + offset = AlignUp( offset, alignof(float4x4) ); + + for (auto i : IndicesOnly( rt_model_mats )) + { + if ( rt_model_mats[i].empty() ) + continue; + + CHECK_THROW( mem_view.CopyFrom( rt_model_mats[i].data(), ArraySizeOf(rt_model_mats[i]), offset ) == ArraySizeOf(rt_model_mats[i]) ); + + inst_to_mesh.modelMatPerInstance[i] = inst_addr + offset; + offset += ArraySizeOf( rt_model_mats[i] ); + } + CHECK( offset == _temp->rtInstancesDataSize ); - CHECK_THROW( mem_view.CopyFrom( 0_b, &inst_to_mesh, Sizeof(inst_to_mesh) ) == Sizeof(inst_to_mesh) ); + CHECK_THROW( mem_view.CopyFrom( &inst_to_mesh, Sizeof(inst_to_mesh) ) == Sizeof(inst_to_mesh) ); return true; } @@ -684,7 +702,7 @@ namespace AE::ResEditor Draw ================================================= */ - void ModelGeomSource::Mesh::Draw (DirectCtx::Draw &ctx, const Material::GPplnGroups_t &drawGroups, uint instanceCount) C_Th___ + void ModelGeomSource::Mesh::Draw (DirectCtx::Draw &ctx, const Material::GPplnGroups_t &drawGroups) C_Th___ { if_unlikely( _drawCalls.empty() ) return; // not uploaded yet @@ -701,10 +719,10 @@ namespace AE::ResEditor DrawIndexedCmd cmd; cmd.indexCount = dc.indexCount; - cmd.instanceCount = instanceCount; + cmd.instanceCount = _instanceCount; cmd.firstIndex = dc.firstIndex; cmd.vertexOffset = dc.vertexOffset; - cmd.firstInstance = dc.nodeIdx; + cmd.firstInstance = dc.nodeIdx * _instanceCount; ctx.DrawIndexed( cmd ); } @@ -834,9 +852,8 @@ namespace AE::ResEditor RTGeometryTypes_t && rtGeoms, uint instanceCount) __Th___ : IGeomSource{ r }, - _meshData{ new Mesh{ r, scene, initialTransform, RVRef(rtGeoms) }}, - _textures{ new Textures{ r, scene, texSearchDirs, maxTextures }}, - _instanceCount{ instanceCount } + _meshData{ new Mesh{ r, scene, initialTransform, RVRef(rtGeoms), instanceCount }}, + _textures{ new Textures{ r, scene, texSearchDirs, maxTextures }} { r.GetDataTransferQueue().EnqueueForUpload( _meshData ); r.GetDataTransferQueue().EnqueueForUpload( _textures ); @@ -884,7 +901,7 @@ namespace AE::ResEditor ctx.BindDescriptorSet( mtr.passDSIndex, in.passDS ); ctx.BindDescriptorSet( mtr.mtrDSIndex, mtr_ds ); - _meshData->Draw( ctx, drawGroups, _instanceCount ); + _meshData->Draw( ctx, drawGroups ); }, [] (Material::MPplnGroups_t const &) { CHECK_MSG( false, "mesh pipeline is not supported" ); diff --git a/AE/samples/res_editor/GeomSource/ModelGeomSource.h b/AE/samples/res_editor/GeomSource/ModelGeomSource.h index 59a464ac..58a70b45 100644 --- a/AE/samples/res_editor/GeomSource/ModelGeomSource.h +++ b/AE/samples/res_editor/GeomSource/ModelGeomSource.h @@ -136,13 +136,16 @@ namespace AE::ResEditor RTGeometryTypes_t _rtGeometries; + const uint _instanceCount = 1; + // methods public: Mesh (Renderer &r, RC scene, const Transformation &initialTransform, - RTGeometryTypes_t && rtGeoms) __Th___; + RTGeometryTypes_t && rtGeoms, + uint instanceCount) __Th___; ~Mesh (); ND_ bool BindForGraphics (DescriptorUpdater &updater) C_NE___; @@ -152,12 +155,10 @@ namespace AE::ResEditor void StateTransition (Ctx &) C_Th___; void Draw (DirectCtx::Draw &ctx, - const Material::GPplnGroups_t &drawGroups, - uint instanceCount) C_Th___; + const Material::GPplnGroups_t &drawGroups) C_Th___; void Draw (DirectCtx::Draw &ctx, - const Material::MPplnGroups_t &drawGroups, - uint instanceCount) C_Th___; + const Material::MPplnGroups_t &drawGroups) C_Th___; void BindBuffers (DirectCtx::Draw &ctx) C_Th___; diff --git a/AE/samples/res_editor/Passes/ComputePass.cpp b/AE/samples/res_editor/Passes/ComputePass.cpp index 7244b1ba..97fc69f9 100644 --- a/AE/samples/res_editor/Passes/ComputePass.cpp +++ b/AE/samples/res_editor/Passes/ComputePass.cpp @@ -96,7 +96,7 @@ namespace AE::ResEditor ppln = it->second; DirectCtx::Transfer tctx{ pd.rtask, RVRef(pd.cmdbuf) }; - CHECK( pd.dbg.debugger->AllocForCompute( OUT dbg, tctx, ppln, uint3{uint2{pd.dbg.coord * float2{dim} + 0.5f}, 0u })); + CHECK( pd.dbg.debugger->AllocForCompute( OUT dbg, tctx, ppln, uint3{pd.dbg.coord * float2(dim-1u), 0u })); pd.cmdbuf = tctx.ReleaseCommandBuffer(); } } @@ -104,39 +104,42 @@ namespace AE::ResEditor if ( not dbg ) ppln = _pipelines.find( IPass::EDebugMode::Unknown )->second; - DirectCtx::Compute ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; - DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; - - _resources.SetStates( ctx, Default ); - ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::ComputeShader ); - ctx.CommitBarriers(); + for (uint i = 0, cnt = _GetRepeatCount(); i < cnt; ++i) + { + DirectCtx::Compute ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; + DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; - ctx.BindPipeline( ppln ); - ctx.BindDescriptorSet( _dsIndex, ds ); - if ( dbg ) ctx.BindDescriptorSet( dbg.DSIndex(), dbg.DescSet() ); + _resources.SetStates( ctx, Default ); + ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::ComputeShader ); + ctx.CommitBarriers(); - ShaderTypes::ComputePassPC pc; - pc.dispatchIndex = 0; + ctx.BindPipeline( ppln ); + ctx.BindDescriptorSet( _dsIndex, ds ); + if ( dbg ) ctx.BindDescriptorSet( dbg.DSIndex(), dbg.DescSet() ); - for (const auto& it : _iterations) - { - ctx.PushConstant( _pcIndex, pc ); - pc.dispatchIndex++; + ShaderTypes::ComputePassPC pc; + pc.dispatchIndex = 0; - if ( it.indirect ){ - ctx.DispatchIndirect( it.indirect->GetBufferId( ctx.GetFrameId() ), it.indirectOffset ); - }else{ - ctx.Dispatch( it.GroupCount( _localSize )); - } - - if ( not IsLastElement( it, _iterations )) + for (const auto& it : _iterations) { - ctx.ExecutionBarrier( EPipelineScope::Compute, EPipelineScope::Compute ); - ctx.CommitBarriers(); + ctx.PushConstant( _pcIndex, pc ); + pc.dispatchIndex++; + + if ( it.indirect ){ + ctx.DispatchIndirect( it.indirect->GetBufferId( ctx.GetFrameId() ), it.indirectOffset ); + }else{ + ctx.Dispatch( it.GroupCount( _localSize )); + } + + if ( not IsLastElement( it, _iterations )) + { + ctx.ExecutionBarrier( EPipelineScope::Compute, EPipelineScope::Compute ); + ctx.CommitBarriers(); + } } - } - pd.cmdbuf = ctx.ReleaseCommandBuffer(); + pd.cmdbuf = ctx.ReleaseCommandBuffer(); + } return true; } diff --git a/AE/samples/res_editor/Passes/IPass.cpp b/AE/samples/res_editor/Passes/IPass.cpp index 3b51e785..74071418 100644 --- a/AE/samples/res_editor/Passes/IPass.cpp +++ b/AE/samples/res_editor/Passes/IPass.cpp @@ -154,5 +154,18 @@ namespace return true; } +/* +================================================= + _GetRepeatCount +================================================= +*/ + uint IPass::_GetRepeatCount () const + { + if ( not _repeatCount ) + return 1; + + return _repeatCount->Get(); + } + } // AE::ResEditor diff --git a/AE/samples/res_editor/Passes/IPass.h b/AE/samples/res_editor/Passes/IPass.h index 0945b0fb..35aa2edf 100644 --- a/AE/samples/res_editor/Passes/IPass.h +++ b/AE/samples/res_editor/Passes/IPass.h @@ -144,6 +144,8 @@ namespace AE::ResEditor RC _controller; Constants _shConst; + RC _repeatCount; + String _dbgName; RGBA8u _dbgColor; @@ -193,6 +195,7 @@ namespace AE::ResEditor OUT StaticArray &) const; ND_ bool _IsEnabled () const; + ND_ uint _GetRepeatCount () const; }; AE_BIT_OPERATORS( IPass::EPassType ); diff --git a/AE/samples/res_editor/Passes/ImageCompression.cpp b/AE/samples/res_editor/Passes/ImageCompression.cpp index 9479528b..a62f6894 100644 --- a/AE/samples/res_editor/Passes/ImageCompression.cpp +++ b/AE/samples/res_editor/Passes/ImageCompression.cpp @@ -111,9 +111,38 @@ namespace AE::ResEditor ================================================= */ ImageCompressionPass::ImageCompressionPass (RC src, RC dst, EPixelFormat dstFormat, StringView dbgName) __NE___ : - IPass{ dbgName }, _src{RVRef(src)}, _dst{RVRef(dst)}, - _srcId{_src->GetImageId()}, _dstId{_dst->GetImageId()} + IPass{ dbgName }, _src{RVRef(src)}, _dst{RVRef(dst)}, _dstFormat{dstFormat} { + #ifdef AE_ENABLE_COMPRESSONATOR + Unused( Compressonator_GetBCLib() ); + #endif + } + +/* +================================================= + destructor +================================================= +*/ + ImageCompressionPass::~ImageCompressionPass () + { + CHECK_Eq( _availableBlocks.ZeroBitCount(), _toUpload->size() ); + } + +/* +================================================= + _Initialize +================================================= +*/ + bool ImageCompressionPass::_Initialize () + { + if_likely( _srcId and _dstId ) + return true; + + if ( _src->GetStatus() != IResource::EUploadStatus::Completed or + _dst->GetStatus() != IResource::EUploadStatus::Completed or + _dst->RequireResize() ) + return false; + const auto src_view = _src->GetViewDesc(); const auto dst_view = _dst->GetViewDesc(); @@ -121,16 +150,30 @@ namespace AE::ResEditor const auto dst_desc = _dst->GetImageDesc(); const auto& src_fmt = EPixelFormat_GetInfo( src_view.format ); - const auto& dst_fmt = EPixelFormat_GetInfo( dstFormat ); + const auto& dst_fmt = EPixelFormat_GetInfo( _dstFormat ); + + CHECK_THROW( not src_fmt.IsCompressed() and dst_fmt.IsCompressed() ); + CHECK_THROW( src_fmt.IsColor() and dst_fmt.IsColor() ); + + CHECK_THROW( All( src_view.Dimension() == dst_view.Dimension() )); + CHECK_THROW( src_view.layerCount == dst_view.layerCount ); + CHECK_THROW( src_view.mipmapCount == dst_view.mipmapCount ); + + CHECK_THROW( All( IsMultipleOf( uint2{dst_view.Dimension2()}, dst_fmt.TexBlockDim() ))); + CHECK_THROW( dst_view.format == src_view.format or _dstFormat == dst_view.format ); + + _srcId = _src->GetImageId(); + _dstId = _dst->GetImageId(); _srcFormat = src_view.format; - _dstFormat = dstFormat; _srcBitsPerBlock = src_fmt.bitsPerBlock; _dstBitsPerBlock = dst_fmt.bitsPerBlock; _decompress = (src_view.format == dst_view.format); _texelBlockDim = dst_fmt.TexBlockDim(); - _imageDim = src_desc.dimension; + _imageDim = src_desc.Dimension(); + _imageLayers = src_desc.arrayLayers.Get(); + _imageMipmaps = src_desc.mipLevels.Get(); _tileDim = AlignUp( uint2{c_TileSize}, _texelBlockDim ); _memBlockSize = SizeOf; @@ -144,19 +187,7 @@ namespace AE::ResEditor if ( _storage.Alloc( _memBlockSize * _BlockCount, Bytes{_BlockAlign}, null )) _availableBlocks.SetRange( 0, _BlockCount ); - #ifdef AE_ENABLE_COMPRESSONATOR - Unused( Compressonator_GetBCLib() ); - #endif - } - -/* -================================================= - destructor -================================================= -*/ - ImageCompressionPass::~ImageCompressionPass () - { - CHECK_Eq( _availableBlocks.ZeroBitCount(), _toUpload->size() ); + return true; } /* @@ -166,36 +197,48 @@ namespace AE::ResEditor */ bool ImageCompressionPass::Execute (SyncPassData &pd) __Th___ { + if_unlikely( not _Initialize() ) + return true; // skip + DirectCtx::Transfer ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{GetName()} }; // read new block if ( auto* block = _AllocBlock() ) { - const uint3 mip_dim = _imageDim; + const uint3 mip_dim = Max( _imageDim << _mipOffset, 1u ); - block->offset = _imageOffset; - block->dim = Min( _imageOffset + uint3{_tileDim, 1}, mip_dim ) - _imageOffset; + block->offset = _dimOffset; + block->dim = Min( _dimOffset + uint3{_tileDim, 1}, mip_dim ) - _dimOffset; block->srcSize = ImageUtils::SliceSize( uint2{block->dim}, _srcBitsPerBlock, uint2{1} ); block->dstSize = ImageUtils::SliceSize( AlignUp( uint2{block->dim}, _texelBlockDim ), _dstBitsPerBlock, _texelBlockDim ); block->arrayLayer = 0; block->mipmap = 0; - _imageOffset.x += block->dim.x; - if ( _imageOffset.x >= mip_dim.x ) + _dimOffset.x += block->dim.x; + if ( _dimOffset.x >= mip_dim.x ) { - _imageOffset.x = 0; - _imageOffset.y += block->dim.y; + _dimOffset.x = 0; + _dimOffset.y += block->dim.y; } - if ( _imageOffset.y >= mip_dim.y ) + if ( _dimOffset.y >= mip_dim.y ) { - _imageOffset.x = 0; - _imageOffset.y = 0; - _imageOffset.z += block->dim.z; + _dimOffset.x = 0; + _dimOffset.y = 0; + _dimOffset.z += block->dim.z; } - if ( _imageOffset.z >= mip_dim.z ) + if ( _dimOffset.z >= mip_dim.z ) { - _imageOffset = uint3{0}; - // TODO: inc arrayLayer and mipmap + _dimOffset = uint3{0}; + ++_layerOffset; + } + if ( _layerOffset >= _imageLayers ) + { + _layerOffset = 0; + ++_mipOffset; + } + if ( _mipOffset >= _imageMipmaps ) + { + _mipOffset = 0; } ReadbackImageDesc read; @@ -302,4 +345,19 @@ namespace AE::ResEditor return q->ExtractFront(); } +/* +================================================= + GetResourcesToResize +================================================= +*/ + void ImageCompressionPass::GetResourcesToResize (INOUT Array> &resources) __NE___ + { + if ( _src->RequireResize() ) + resources.push_back( _src ); + + if ( _dst->RequireResize() ) + resources.push_back( _dst ); + } + + } // AE::ResEditor diff --git a/AE/samples/res_editor/Passes/ImageCompression.h b/AE/samples/res_editor/Passes/ImageCompression.h index 5cac0772..24bec68b 100644 --- a/AE/samples/res_editor/Passes/ImageCompression.h +++ b/AE/samples/res_editor/Passes/ImageCompression.h @@ -52,16 +52,20 @@ namespace AE::ResEditor uint2 _tileDim; uint2 _texelBlockDim; uint3 _imageDim; + uint _imageLayers = 0; + uint _imageMipmaps = 0; uint _srcBitsPerBlock; uint _dstBitsPerBlock; bool _decompress; EPixelFormat _srcFormat; - EPixelFormat _dstFormat; + const EPixelFormat _dstFormat; Bytes _memBlockSize; // mutable - uint3 _imageOffset; + uint3 _dimOffset; + uint _layerOffset = 0; + uint _mipOffset = 0; DynUntypedStorage _storage; AvailableBlockBits_t _availableBlocks; // 1 - available block @@ -77,12 +81,14 @@ namespace AE::ResEditor // IPass // EPassType GetType () C_NE_OV { return EPassType::Sync; } bool Execute (SyncPassData &) __Th_OV; - void GetResourcesToResize (INOUT Array> &) __NE_OV {} + void GetResourcesToResize (INOUT Array> &) __NE_OV; private: ND_ Block* _AllocBlock (); void _FreeBlock (Block*); ND_ Block* _GetBlockToUpload (); + + ND_ bool _Initialize (); }; diff --git a/AE/samples/res_editor/Passes/OtherPasses.cpp b/AE/samples/res_editor/Passes/OtherPasses.cpp index cacdbd5d..ac3fb0e6 100644 --- a/AE/samples/res_editor/Passes/OtherPasses.cpp +++ b/AE/samples/res_editor/Passes/OtherPasses.cpp @@ -12,6 +12,17 @@ namespace AE::ResEditor { +/* +================================================= + constructor +================================================= +*/ + Present::Present (Array> src, StringView dbgName, RC dynSize) __NE___ : + IPass{dbgName}, _src{RVRef(src)}, _dynSize{dynSize}, _filterMode{UIInteraction::Instance().GetFilterMode()} + { + _dbgColor = RGBA8u{150}; + } + /* ================================================= PresentAsync @@ -22,7 +33,7 @@ namespace AE::ResEditor const auto infos = pd.surface->GetTargetInfo(); if ( _dynSize and not infos.empty() ) - _dynSize->Resize( infos[0].dimension ); + _dynSize->Resize( infos[0].Dimension() ); { auto& ui = UIInteraction::Instance(); @@ -38,7 +49,14 @@ namespace AE::ResEditor encoder = null; if ( capture.video ) - _videoEncoder.store( _CreateEncoder( capture.bitrate, capture.videoFormat, capture.videoCodec, capture.colorPreset )); + { + encoder = _CreateEncoder( capture.bitrate, capture.videoFormat, capture.videoCodec, capture.colorPreset ); + + if ( encoder ) + _videoEncoder.store( RVRef(encoder) ); + else + ui.capture->video = false; + } } } @@ -56,25 +74,43 @@ namespace AE::ResEditor IOutputSurface::RenderTargets_t targets; CHECK_CE( surface.GetTargets( OUT targets )); - auto& src = self->_src[0]; - const auto desc = src->GetImageDesc(); - auto& dst = targets[0]; - RenderTask& rtask = co_await RenderTask_GetRef; - const auto filter = self->_filterMode->Get() == 0 ? EBlitFilter::Nearest : EBlitFilter::Linear; + auto& src = self->_src[0]; + const auto src_desc = src->GetImageDesc(); + const auto src_view = src->GetViewDesc(); + const uint2 src_dim = src_view.Dimension2(); + + auto& dst = targets[0]; + RenderTask& rtask = co_await RenderTask_GetRef; + const auto filter = self->_filterMode->Get() == 0 ? EBlitFilter::Nearest : EBlitFilter::Linear; + const bool copy = self->_filterMode->Get() == 2 and + EPixelFormat_IsCopySupported( src_desc.format, dst.format ) and + All( src_dim == dst.RegionSize() ); - DirectCtx::Transfer ctx{ rtask, Default, DebugLabel{ self->_dbgName, HtmlColor::Blue }}; + DirectCtx::Transfer ctx{ rtask, Default, DebugLabel{ self->_dbgName, self->_dbgColor }}; ctx.AddSurfaceTargets( targets ); - ImageBlit blit; - blit.srcSubres = { EImageAspect::Color, 0_mipmap, 0_layer, 1u }; - blit.srcOffset0 = { 0u, 0u, 0u }; - blit.srcOffset1 = { desc.dimension.x, desc.dimension.y, 1u }; - blit.dstSubres = { EImageAspect::Color, 0_mipmap, 0_layer, 1u }; - blit.dstOffset0 = { dst.region.left, dst.region.top, 0 }; - blit.dstOffset1 = { dst.region.right, dst.region.bottom, 1 }; + if ( copy ) + { + ImageCopy range; + range.extent = uint3{src_dim, 1}; + range.srcSubres.aspectMask = EImageAspect::Color; + range.dstSubres.aspectMask = EImageAspect::Color; - ctx.BlitImage( src->GetImageId(), dst.imageId, filter, ArrayView{ &blit, 1 }); + ctx.CopyImage( src->GetImageId(), dst.imageId, {range} ); + } + else + { + ImageBlit blit; + blit.srcSubres = { EImageAspect::Color, src_view.baseMipmap, src_view.baseLayer, 1u }; + blit.srcOffset0 = uint3{ 0 }; + blit.srcOffset1 = uint3{ src_dim.x, src_dim.y, 1u }; + blit.dstSubres = { EImageAspect::Color, 0_mipmap, 0_layer, 1u }; + blit.dstOffset0 = int3{ dst.region.left, dst.region.top, 0 }; + blit.dstOffset1 = int3{ dst.region.right, dst.region.bottom, 1 }; + + ctx.BlitImage( src->GetImageId(), dst.imageId, filter, {blit} ); + } // read pixel color for debugging { @@ -83,8 +119,8 @@ namespace AE::ResEditor ReadbackImageDesc readback; readback.heapType = EStagingHeapType::Static; readback.imageDim = uint3{1}; - readback.imageOffset = uint3{float3{ unorm_pos * float2{desc.dimension}, 0.f }}; - readback.imageOffset = Min( readback.imageOffset, desc.dimension-1u ); + readback.imageOffset = uint3{float3{ unorm_pos * float2{src_dim}, 0.f }}; + readback.imageOffset = Min( readback.imageOffset, uint3{src_dim - 1u, 0u} ); ctx.ReadbackImage( src->GetImageId(), readback ) .Then( [fid = ctx.GetFrameId()] (const ImageMemView &inView) @@ -132,7 +168,7 @@ namespace AE::ResEditor { ReadbackImageDesc readback; readback.heapType = EStagingHeapType::Dynamic; - readback.imageDim = desc.dimension; + readback.imageDim = uint3{src_dim, 1u}; ctx.ReadbackImage( src->GetImageId(), readback ) .Then( [self, capture, encoder = self->_videoEncoder.load()] (const ImageMemView &inView) @@ -243,9 +279,16 @@ namespace AE::ResEditor cfg.framerate = FractionalI{ int(_videoInfo.frameRate) }; cfg.bitrate = Bitrate{ ulong(double(bitrate) * 1024.0) * 1024 }; // TODO cfg.hwAccelerated = EHwAcceleration::Optional; - cfg.targetGPU = GraphicsScheduler().GetFeatureSet().devicesIds.include.First(); cfg.targetCPU = CpuArchInfo::Get().cpu.vendor; + #ifdef AE_ENABLE_REMOTE_GRAPHICS + # if RmG_UI_ON_HOST + if ( auto glib = GraphicsScheduler().GetDevice().GetGraphicsLib() ) + cfg.targetGPU = glib->GetResourceManager()->GetFeatureSet().devicesIds.include.First(); + # endif + #else + cfg.targetGPU = GraphicsScheduler().GetFeatureSet().devicesIds.include.First(); + #endif auto result = VideoFactory::CreateFFmpegEncoder(); const auto& video_folder = ResEditorAppConfig::Get().videoFolder; @@ -382,7 +425,7 @@ namespace AE::ResEditor break; case EFlags::Histogram : - img_desc.dimension = uint3{ 1024, 1024, 1 }; + img_desc.dimension = ImageDim_t{ 1024, 1024, 1 }; img_desc.usage = EImageUsage::ColorAttachment | EImageUsage::Sampled; img_desc.format = EPixelFormat::RGBA8_UNorm; // defined in 'histogram.as' img_desc.options = Default; @@ -537,8 +580,8 @@ namespace AE::ResEditor { constexpr auto& RTech = RenderTechs::Histogram_RTech; - const uint2 src_dim = uint2{srcImage.GetImageDesc().dimension}; // TODO: GetViewDimension ? - const uint2 dst_dim = uint2{dstImage.GetImageDesc().dimension}; + const uint2 src_dim = srcImage.GetImageDesc().Dimension2(); // TODO: GetViewDimension ? + const uint2 dst_dim = dstImage.GetImageDesc().Dimension2(); DirectCtx::Transfer copy_ctx { pd.rtask, RVRef(pd.cmdbuf), DebugLabel{"Histogram pass1", HtmlColor::Blue} }; DescriptorSetID comp_ds = _ppln1DS[ copy_ctx.GetFrameId().Index() ]; @@ -664,7 +707,7 @@ namespace AE::ResEditor { constexpr auto& RTech = RenderTechs::LinearDepth_RTech; - const uint2 dst_dim = uint2{dstImage.GetImageDesc().dimension}; // TODO: GetViewDimension ? + const uint2 dst_dim = dstImage.GetImageDesc().Dimension2(); // TODO: GetViewDimension ? DirectCtx::Graphics ctx { pd.rtask, RVRef(pd.cmdbuf), DebugLabel{"ToLinearDepth", HtmlColor::Blue} }; DescriptorSetID ds = _pplnDS[ ctx.GetFrameId().Index() ]; @@ -746,7 +789,7 @@ namespace AE::ResEditor { constexpr auto& RTech = RenderTechs::StencilView_RTech; - const uint2 dst_dim = uint2{dstImage.GetImageDesc().dimension}; // TODO: GetViewDimension ? + const uint2 dst_dim = dstImage.GetImageDesc().Dimension2(); // TODO: GetViewDimension ? DirectCtx::Graphics ctx { pd.rtask, RVRef(pd.cmdbuf), DebugLabel{"StencilView", HtmlColor::Blue} }; DescriptorSetID ds = _pplnDS[ ctx.GetFrameId().Index() ]; @@ -812,13 +855,13 @@ namespace AE::ResEditor CopyImagePass::CopyImagePass (RC src, RC dst, StringView dbgName) __Th___ : IPass{dbgName}, _srcImage{RVRef(src)}, _dstImage{RVRef(dst)} { - const auto& src_desc = _srcImage->GetImageDesc(); - const auto& dst_desc = _dstImage->GetImageDesc(); + const auto& src_desc = _srcImage->GetViewDesc(); + const auto& dst_desc = _dstImage->GetViewDesc(); CHECK_THROW( All( src_desc.dimension == dst_desc.dimension )); CHECK_THROW( All( src_desc.format == dst_desc.format )); + CHECK_THROW( src_desc.layerCount == dst_desc.layerCount ); - _dim = src_desc.dimension; _aspect = EPixelFormat_GetInfo( src_desc.format ).aspectMask; } @@ -834,12 +877,17 @@ namespace AE::ResEditor DirectCtx::Transfer ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{"CopyImage", HtmlColor::Blue} }; + const auto& src_desc = _srcImage->GetViewDesc(); + const auto& dst_desc = _dstImage->GetViewDesc(); + CHECK_THROW( All( src_desc.dimension == dst_desc.dimension )); + CHECK_THROW( src_desc.layerCount == dst_desc.layerCount ); + ImageCopy copy; copy.srcOffset = {}; - copy.srcSubres = { _aspect, 0_mipmap, 0_layer, UMax }; + copy.srcSubres = { _aspect, src_desc.baseMipmap, src_desc.baseLayer, src_desc.layerCount }; copy.dstOffset = {}; - copy.dstSubres = { _aspect, 0_mipmap, 0_layer, UMax }; - copy.extent = _dim; + copy.dstSubres = { _aspect, dst_desc.baseMipmap, dst_desc.baseLayer, dst_desc.layerCount }; + copy.extent = src_desc.Dimension(); ctx.CopyImage( _srcImage->GetImageId(), _dstImage->GetImageId(), {copy} ); @@ -850,6 +898,110 @@ namespace AE::ResEditor +/* +================================================= + constructor +================================================= +*/ + BlitImagePass::BlitImagePass (RC src, RC dst, StringView dbgName) __Th___ : + IPass{dbgName}, _srcImage{RVRef(src)}, _dstImage{RVRef(dst)} + { + const auto& src_desc = _srcImage->GetViewDesc(); + const auto& dst_desc = _dstImage->GetViewDesc(); + + CHECK_THROW( EPixelFormat_IsBlitSupported( src_desc.format, dst_desc.format, EBlitFilter::Linear )); + + _aspect = EPixelFormat_GetInfo( src_desc.format ).aspectMask; + } + +/* +================================================= + Execute +================================================= +*/ + bool BlitImagePass::Execute (SyncPassData &pd) __Th___ + { + if_unlikely( not _IsEnabled() ) + return true; + + DirectCtx::Transfer ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{"BlitImage", HtmlColor::Blue} }; + + const auto& src_desc = _srcImage->GetViewDesc(); + const auto& dst_desc = _dstImage->GetViewDesc(); + CHECK_THROW( src_desc.layerCount == dst_desc.layerCount ); + + ImageBlit blit; + blit.srcSubres = { _aspect, src_desc.baseMipmap, src_desc.baseLayer, src_desc.layerCount }; + blit.srcOffset0 = {}; + blit.srcOffset1 = src_desc.Dimension(); + blit.dstSubres = { _aspect, dst_desc.baseMipmap, dst_desc.baseLayer, dst_desc.layerCount }; + blit.dstOffset0 = {}; + blit.dstOffset1 = dst_desc.Dimension(); + + EBlitFilter filter = EBlitFilter::Linear; + if ( All( src_desc.Dimension() == dst_desc.Dimension() )) + filter = EBlitFilter::Nearest; + + ctx.BlitImage( _srcImage->GetImageId(), _dstImage->GetImageId(), filter, {blit} ); + + pd.cmdbuf = ctx.ReleaseCommandBuffer(); + return true; + } +//----------------------------------------------------------------------------- + + + +/* +================================================= + constructor +================================================= +*/ + ResolveImagePass::ResolveImagePass (RC src, RC dst, StringView dbgName) __Th___ : + IPass{dbgName}, _srcImage{RVRef(src)}, _dstImage{RVRef(dst)} + { + const auto& src_desc = _srcImage->GetViewDesc(); + const auto& dst_desc = _dstImage->GetViewDesc(); + + CHECK_THROW( All( src_desc.dimension == dst_desc.dimension )); + CHECK_THROW( All( src_desc.format == dst_desc.format )); + CHECK_THROW( src_desc.layerCount == dst_desc.layerCount ); + + _aspect = EPixelFormat_GetInfo( src_desc.format ).aspectMask; + } + +/* +================================================= + Execute +================================================= +*/ + bool ResolveImagePass::Execute (SyncPassData &pd) __Th___ + { + if_unlikely( not _IsEnabled() ) + return true; + + DirectCtx::Transfer ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{"CopyImage", HtmlColor::Blue} }; + + const auto& src_desc = _srcImage->GetViewDesc(); + const auto& dst_desc = _dstImage->GetViewDesc(); + CHECK_THROW( All( src_desc.dimension == dst_desc.dimension )); + CHECK_THROW( src_desc.layerCount == dst_desc.layerCount ); + + ImageResolve copy; + copy.srcOffset = {}; + copy.srcSubres = { _aspect, src_desc.baseMipmap, src_desc.baseLayer, src_desc.layerCount }; + copy.dstOffset = {}; + copy.dstSubres = { _aspect, dst_desc.baseMipmap, dst_desc.baseLayer, dst_desc.layerCount }; + copy.extent = src_desc.Dimension(); + + ctx.ResolveImage( _srcImage->GetImageId(), _dstImage->GetImageId(), {copy} ); + + pd.cmdbuf = ctx.ReleaseCommandBuffer(); + return true; + } +//----------------------------------------------------------------------------- + + + /* ================================================= Execute diff --git a/AE/samples/res_editor/Passes/OtherPasses.h b/AE/samples/res_editor/Passes/OtherPasses.h index 74a38cf3..06c40d22 100644 --- a/AE/samples/res_editor/Passes/OtherPasses.h +++ b/AE/samples/res_editor/Passes/OtherPasses.h @@ -38,8 +38,7 @@ namespace AE::ResEditor // methods public: - explicit Present (Array> src, StringView dbgName, RC dynSize, RC filterMode) __NE___ : - IPass{dbgName}, _src{RVRef(src)}, _dynSize{dynSize}, _filterMode{filterMode} {} + explicit Present (Array> src, StringView dbgName, RC dynSize) __NE___; // IPass // EPassType GetType () C_NE_OV { return EPassType::Present; } @@ -223,7 +222,6 @@ namespace AE::ResEditor private: RC _srcImage; RC _dstImage; - uint3 _dim; EImageAspect _aspect; @@ -239,6 +237,56 @@ namespace AE::ResEditor + // + // Blit Image pass + // + + class BlitImagePass final : public IPass + { + // variables + private: + RC _srcImage; + RC _dstImage; + EImageAspect _aspect; + + + // methods + public: + explicit BlitImagePass (RC src, RC dst, StringView dbgName) __Th___; + + // IPass // + EPassType GetType () C_NE_OV { return EPassType::Sync; } + bool Execute (SyncPassData &) __Th_OV; + void GetResourcesToResize (INOUT Array> &) __NE_OV {} + }; + + + + // + // Resolve Image pass + // + + class ResolveImagePass final : public IPass + { + // variables + private: + RC _srcImage; + RC _dstImage; + EImageAspect _aspect; + + + // methods + public: + explicit ResolveImagePass (RC src, RC dst, StringView dbgName) __Th___; + + // IPass // + EPassType GetType () C_NE_OV { return EPassType::Sync; } + bool Execute (SyncPassData &) __Th_OV; + void GetResourcesToResize (INOUT Array> &) __NE_OV {} + }; + + + // // Clear Image pass // diff --git a/AE/samples/res_editor/Passes/PassGroup.cpp b/AE/samples/res_editor/Passes/PassGroup.cpp index d1c89d72..476e2cee 100644 --- a/AE/samples/res_editor/Passes/PassGroup.cpp +++ b/AE/samples/res_editor/Passes/PassGroup.cpp @@ -14,7 +14,7 @@ namespace AE::ResEditor void PassGroup::AddPass (RC pass) __Th___ { CHECK_THROW( pass ); - CHECK_THROW( not AnyBits( pass->GetType(), ~(EPassType::Sync | EPassType::Update) )); + CHECK_THROW( NoBits( pass->GetType(), ~(EPassType::Sync | EPassType::Update) )); _passes.push_back( pass ); } diff --git a/AE/samples/res_editor/Passes/Postprocess.cpp b/AE/samples/res_editor/Passes/Postprocess.cpp index f10a7630..3b96bb50 100644 --- a/AE/samples/res_editor/Passes/Postprocess.cpp +++ b/AE/samples/res_editor/Passes/Postprocess.cpp @@ -7,6 +7,22 @@ namespace AE::ResEditor { +/* +================================================= + _GetDimension +================================================= +*/ + uint2 Postprocess::_GetDimension () C_NE___ + { + if ( not _renderTargets.empty() ) + return _renderTargets.front().image->GetViewDesc().Dimension2(); + + if ( _dynamicDim ) + return _dynamicDim->Dimension2(); + + RETURN_ERR( "render pass dimension is not specified" ); + } + /* ================================================= Execute @@ -17,11 +33,9 @@ namespace AE::ResEditor if_unlikely( not _IsEnabled() ) return true; - CHECK_ERR( not _renderTargets.empty() ); - ShaderDebugger::Result dbg; GraphicsPipelineID ppln; - const uint2 dim {_renderTargets[0].image->GetViewDimension()}; + const uint2 dim = _GetDimension(); if ( pd.dbg.IsEnabled( this )) { @@ -33,7 +47,7 @@ namespace AE::ResEditor ppln = it->second; DirectCtx::Transfer tctx{ pd.rtask, RVRef(pd.cmdbuf) }; - CHECK( pd.dbg.debugger->AllocForGraphics( OUT dbg, tctx, ppln, uint2{pd.dbg.coord * float2{dim} + 0.5f} )); + CHECK( pd.dbg.debugger->AllocForGraphics( OUT dbg, tctx, ppln, uint2{pd.dbg.coord * float2(dim-1u)} )); pd.cmdbuf = tctx.ReleaseCommandBuffer(); } } @@ -43,31 +57,36 @@ namespace AE::ResEditor DirectCtx::Graphics ctx{ pd.rtask, RVRef(pd.cmdbuf) }; - _resources.SetStates( ctx, Default ); - ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::FragmentShader ); - ctx.CommitBarriers(); - - // render pass + for (uint i = 0, cnt = _GetRepeatCount(); i < cnt; ++i) { - DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; - RenderPassDesc rp_desc = _rpDesc; + _resources.SetStates( ctx, Default ); + ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::FragmentShader ); + ctx.CommitBarriers(); - for (auto& rt : _renderTargets) { - rp_desc.AddTarget( rt.name, rt.image->GetViewId(), rt.clear ); - } + // render pass + { + DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; + RenderPassDesc rp_desc = _rpDesc; + + for (auto& rt : _renderTargets) { + rp_desc.AddTarget( rt.name, rt.image->GetViewId(), rt.clear ); + } - rp_desc.area = RectI{ int2{dim} }; - rp_desc.DefaultViewport( _depthRange.x, _depthRange.y ); + rp_desc.area = RectI{ int2{dim} }; + for (auto& vp : rp_desc.viewports) { + vp.rect *= float2{dim}; + } - auto dctx = ctx.BeginRenderPass( rp_desc, DebugLabel{_dbgName, _dbgColor} ); + auto dctx = ctx.BeginRenderPass( rp_desc, DebugLabel{_dbgName, _dbgColor} ); - dctx.BindPipeline( ppln ); - dctx.BindDescriptorSet( _dsIndex, ds ); - if ( dbg ) dctx.BindDescriptorSet( dbg.DSIndex(), dbg.DescSet() ); + dctx.BindPipeline( ppln ); + dctx.BindDescriptorSet( _dsIndex, ds ); + if ( dbg ) dctx.BindDescriptorSet( dbg.DSIndex(), dbg.DescSet() ); - dctx.Draw( 3 ); + dctx.Draw( 3 ); - ctx.EndRenderPass( dctx ); + ctx.EndRenderPass( dctx ); + } } pd.cmdbuf = ctx.ReleaseCommandBuffer(); @@ -81,14 +100,12 @@ namespace AE::ResEditor */ bool Postprocess::Update (TransferCtx_t &ctx, const UpdatePassData &pd) __Th___ { - CHECK_ERR( not _renderTargets.empty() ); - // validate dimensions - const uint2 cur_dim = uint2{ _renderTargets.front().image->GetViewDimension() }; + const uint2 cur_dim = _GetDimension(); { for (auto& rt : _renderTargets) { - const uint2 dim = uint2{ rt.image->GetViewDimension() }; + const uint2 dim = rt.image->GetViewDesc().Dimension2(); CHECK_ERR( All( cur_dim == dim )); } } @@ -96,7 +113,7 @@ namespace AE::ResEditor // update uniform buffer { ShaderTypes::ShadertoyUB ub_data; - ub_data.resolution = float3{cur_dim.x, cur_dim.y, 1}; + ub_data.resolution = float3{ cur_dim, 1.f }; ub_data.time = pd.totalTime.count(); ub_data.timeDelta = pd.frameTime.count(); ub_data.frame = pd.frameId; diff --git a/AE/samples/res_editor/Passes/Postprocess.h b/AE/samples/res_editor/Passes/Postprocess.h index 7801d034..d9e49198 100644 --- a/AE/samples/res_editor/Passes/Postprocess.h +++ b/AE/samples/res_editor/Passes/Postprocess.h @@ -32,7 +32,8 @@ namespace AE::ResEditor protected: RTechInfo _rtech; RenderPassDesc _rpDesc; - float2 _depthRange {0.f, 1.f}; + + RC _dynamicDim; PipelineMap_t _pipelines; PerFrameDescSet_t _descSets; @@ -55,6 +56,9 @@ namespace AE::ResEditor bool Execute (SyncPassData &) __Th_OV; bool Update (TransferCtx_t &, const UpdatePassData &) __Th_OV; void GetResourcesToResize (INOUT Array> &) __NE_OV; + + private: + ND_ uint2 _GetDimension () C_NE___; }; diff --git a/AE/samples/res_editor/Passes/RayTracingPass.cpp b/AE/samples/res_editor/Passes/RayTracingPass.cpp index 1eca8031..54dfa9fa 100644 --- a/AE/samples/res_editor/Passes/RayTracingPass.cpp +++ b/AE/samples/res_editor/Passes/RayTracingPass.cpp @@ -80,7 +80,7 @@ namespace AE::ResEditor sbt = it->second.Get<1>(); DirectCtx::Transfer tctx{ pd.rtask, RVRef(pd.cmdbuf) }; - CHECK( pd.dbg.debugger->AllocForRayTracing( OUT dbg, tctx, ppln, uint3{uint2{pd.dbg.coord * float2{dim} + 0.5f}, 0u })); + CHECK( pd.dbg.debugger->AllocForRayTracing( OUT dbg, tctx, ppln, uint3{pd.dbg.coord * float2(dim-1u), 0u })); pd.cmdbuf = tctx.ReleaseCommandBuffer(); } } @@ -92,48 +92,51 @@ namespace AE::ResEditor sbt = it->second.Get<1>(); } - DirectCtx::RayTracing ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; - DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; - - _resources.SetStates( ctx, Default ); - ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::RayTracingShaders ); - ctx.CommitBarriers(); + for (uint i = 0, cnt = _GetRepeatCount(); i < cnt; ++i) + { + DirectCtx::RayTracing ctx{ pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; + DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; - ctx.BindPipeline( ppln ); - ctx.BindDescriptorSet( _dsIndex, ds ); - if ( dbg ) ctx.BindDescriptorSet( dbg.DSIndex(), dbg.DescSet() ); + _resources.SetStates( ctx, Default ); + ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::RayTracingShaders ); + ctx.CommitBarriers(); - // from Vulkan specs: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#ray-tracing-pipeline-stack - { - const int max_ray_recursion = _maxRayRecursion ? _maxRayRecursion->Get() : 31; - const uint max_call_recursion = _maxCallRecursion ? _maxCallRecursion->Get() : 2; + ctx.BindPipeline( ppln ); + ctx.BindDescriptorSet( _dsIndex, ds ); + if ( dbg ) ctx.BindDescriptorSet( dbg.DSIndex(), dbg.DescSet() ); - const Bytes stack_size = - uint(Min( 1, max_ray_recursion )) * Bytes{Max( _closestHitStackMax, _missStackMax, _intersectionStackMax, _anyHitStackMax )} + - uint(Max( 0, max_ray_recursion-1 )) * Bytes{Max( _closestHitStackMax, _missStackMax )} + - Max( 2u, max_call_recursion ) * Bytes{_callableStackMax} + - _rayGenStackMax; + // from Vulkan specs: + // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#ray-tracing-pipeline-stack + { + const int max_ray_recursion = _maxRayRecursion ? _maxRayRecursion->Get() : 31; + const uint max_call_recursion = _maxCallRecursion ? _maxCallRecursion->Get() : 2; - ctx.SetStackSize( stack_size ); - } + const Bytes stack_size = + uint(Min( 1, max_ray_recursion )) * Bytes{Max( _closestHitStackMax, _missStackMax, _intersectionStackMax, _anyHitStackMax )} + + uint(Max( 0, max_ray_recursion-1 )) * Bytes{Max( _closestHitStackMax, _missStackMax )} + + Max( 2u, max_call_recursion ) * Bytes{_callableStackMax} + + _rayGenStackMax; - for (const auto& it : _iterations) - { - if ( it.indirect ){ - ctx.TraceRaysIndirect( sbt, it.indirect->GetBufferId( ctx.GetFrameId() ), it.indirectOffset ); - }else{ - ctx.TraceRays( it.Dimension(), sbt ); + ctx.SetStackSize( stack_size ); } - if ( not IsLastElement( it, _iterations )) + for (const auto& it : _iterations) { - ctx.ExecutionBarrier( EPipelineScope::RayTracing, EPipelineScope::RayTracing ); - ctx.CommitBarriers(); + if ( it.indirect ){ + ctx.TraceRaysIndirect( sbt, it.indirect->GetBufferId( ctx.GetFrameId() ), it.indirectOffset ); + }else{ + ctx.TraceRays( it.Dimension(), sbt ); + } + + if ( not IsLastElement( it, _iterations )) + { + ctx.ExecutionBarrier( EPipelineScope::RayTracing, EPipelineScope::RayTracing ); + ctx.CommitBarriers(); + } } - } - pd.cmdbuf = ctx.ReleaseCommandBuffer(); + pd.cmdbuf = ctx.ReleaseCommandBuffer(); + } return true; } diff --git a/AE/samples/res_editor/Passes/Renderer.cpp b/AE/samples/res_editor/Passes/Renderer.cpp index 99d44fef..0f969c5f 100644 --- a/AE/samples/res_editor/Passes/Renderer.cpp +++ b/AE/samples/res_editor/Passes/Renderer.cpp @@ -166,7 +166,7 @@ namespace AE::ResEditor if ( auto surf = rg.GetSurface() ) { if ( auto infos = surf->GetTargetInfo(); not infos.empty() ) { - surf_size = infos[0].dimension; + surf_size = float2{infos[0].dimension}; pix_to_mm = infos[0].pixToMm; } } diff --git a/AE/samples/res_editor/Passes/Scene.cpp b/AE/samples/res_editor/Passes/Scene.cpp index 7428a01c..d548ac24 100644 --- a/AE/samples/res_editor/Passes/Scene.cpp +++ b/AE/samples/res_editor/Passes/Scene.cpp @@ -19,7 +19,7 @@ namespace AE::ResEditor CHECK_ERR( _scene ); - const uint2 dim {_renderTargets[0].image->GetViewDimension()}; + const uint2 dim = _renderTargets[0].image->GetViewDesc().Dimension2(); const auto& instances = _scene->_geomInstances; Array dbg_result; LinearAllocator<> allocator; @@ -27,7 +27,7 @@ namespace AE::ResEditor if_unlikely( pd.dbg.IsEnabled( this )) { DirectCtx::Transfer tctx { pd.rtask, RVRef(pd.cmdbuf) }; - const uint2 coord = uint2{pd.dbg.coord * float2{dim} + 0.5f}; + const uint2 coord = uint2{pd.dbg.coord * float2(dim-1u)}; dbg_result.resize( instances.size() ); for (usize i = 0; i < instances.size(); ++i) @@ -40,60 +40,75 @@ namespace AE::ResEditor DirectCtx::Graphics ctx { pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; - // state transition + for (uint it = 0, cnt = _GetRepeatCount(); it < cnt; ++it) { - for (usize i = 0; i < instances.size(); ++i) { - instances[i].geometry->StateTransition( *_materials[i], ctx ); + // state transition + { + for (usize i = 0; i < instances.size(); ++i) { + instances[i].geometry->StateTransition( *_materials[i], ctx ); + } + _resources.SetStates( ctx, Default ); + ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::AllGraphicsShaders ); + ctx.CommitBarriers(); } - _resources.SetStates( ctx, Default ); - ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::AllGraphicsShaders ); - ctx.CommitBarriers(); - } - // render pass - { - RenderPassDesc rp_desc = _rpDesc; + // render pass + { + Scissors_t scissors; + RenderPassDesc rp_desc = _rpDesc; - for (auto& rt : _renderTargets) { - rp_desc.AddTarget( rt.name, rt.image->GetViewId(), rt.clear ); - } + for (auto& rt : _renderTargets) { + rp_desc.AddTarget( rt.name, rt.image->GetViewId(), rt.clear ); + } - rp_desc.area = RectI{ int2{dim} }; - rp_desc.DefaultViewport( _depthRange.x, _depthRange.y ); + rp_desc.area = RectI{ int2{dim} }; + for (auto& vp : rp_desc.viewports) { + vp.rect *= float2{dim}; + } - DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; - auto dctx = ctx.BeginRenderPass( rp_desc, DebugLabel{_dbgName, _dbgColor} ); + for (usize i = 0; i < _scissors.size(); ++i) + scissors.push_back( RectI{ _scissors[i] * float2{dim} }); - if ( _shadingRate ) - dctx.SetFragmentShadingRate( _shadingRate.rate, _shadingRate.primitiveOp, _shadingRate.textureOp ); + DescriptorSetID ds = _descSets[ ctx.GetFrameId().Index() ]; + auto dctx = ctx.BeginRenderPass( rp_desc, DebugLabel{_dbgName, _dbgColor} ); - decltype(&IGeomSource::Draw) draw_fn = null; - switch_enum( _renderLayer ) - { - case ERenderLayer::Opaque : - case ERenderLayer::Translucent : draw_fn = &IGeomSource::Draw; break; - case ERenderLayer::PostProcess : draw_fn = &IGeomSource::PostProcess; break; - case ERenderLayer::_Count : break; - } - switch_end + if ( not scissors.empty() ) + dctx.SetScissors( scissors ); - // draw - if ( draw_fn != null ) - { - for (usize i = 0; i < instances.size(); ++i) + if ( _shadingRate ) + dctx.SetFragmentShadingRate( _shadingRate.rate, _shadingRate.primitiveOp, _shadingRate.textureOp ); + + if ( not _wScaling.empty() ) + dctx.SetViewportWScaling( _wScaling ); + + decltype(&IGeomSource::Draw) draw_fn = null; + switch_enum( _renderLayer ) { - bool has_dbg_result = (not dbg_result.empty()) and (dbg_result[i] != null); - - CHECK_ERR( ((*instances[i].geometry).*draw_fn)( IGeomSource::DrawData{ - *_materials[i], dctx, ds, - (has_dbg_result ? dbg_result[i] : null), - (has_dbg_result ? pd.dbg.mode : Default), - (has_dbg_result ? pd.dbg.stage : Default) - })); + case ERenderLayer::Opaque : + case ERenderLayer::Translucent : draw_fn = &IGeomSource::Draw; break; + case ERenderLayer::PostProcess : draw_fn = &IGeomSource::PostProcess; break; + case ERenderLayer::_Count : break; + } + switch_end + + // draw + if ( draw_fn != null ) + { + for (usize i = 0; i < instances.size(); ++i) + { + bool has_dbg_result = (not dbg_result.empty()) and (dbg_result[i] != null); + + CHECK_ERR( ((*instances[i].geometry).*draw_fn)( IGeomSource::DrawData{ + *_materials[i], dctx, ds, + (has_dbg_result ? dbg_result[i] : null), + (has_dbg_result ? pd.dbg.mode : Default), + (has_dbg_result ? pd.dbg.stage : Default) + })); + } } - } - ctx.EndRenderPass( dctx ); + ctx.EndRenderPass( dctx ); + } } pd.cmdbuf = ctx.ReleaseCommandBuffer(); @@ -112,11 +127,11 @@ namespace AE::ResEditor // validate dimensions { - const uint2 cur_dim = uint2{ _renderTargets.front().image->GetViewDimension() }; + const uint2 cur_dim = _renderTargets.front().image->GetViewDesc().Dimension2(); for (auto& rt : _renderTargets) { - const uint2 dim = uint2{ rt.image->GetViewDimension() }; + const uint2 dim = rt.image->GetViewDesc().Dimension2(); CHECK_ERR( All( cur_dim == dim )); } } @@ -210,41 +225,43 @@ namespace AE::ResEditor CHECK_ERR( _scene ); - uint2 dim; - DirectCtx::RayTracing ctx { pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; - const uint fid = ctx.GetFrameId().Index(); - const auto& instances = _scene->_geomInstances; - - // state transition + for (uint i = 0, cnt = _GetRepeatCount(); i < cnt; ++i) { - for (auto& inst : instances) { - inst.geometry->StateTransition( ctx ); - } - _resources.SetStates( ctx, Default ); - ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::RayTracingShaders ); - ctx.CommitBarriers(); - } - - ctx.BindPipeline( _pipeline ); - ctx.BindDescriptorSet( _passDSIndex, _passDescSets[fid] ); - ctx.BindDescriptorSet( _objDSIndex, _objDescSets[fid] ); + DirectCtx::RayTracing ctx { pd.rtask, RVRef(pd.cmdbuf), DebugLabel{_dbgName, _dbgColor} }; + const uint fid = ctx.GetFrameId().Index(); + const auto& instances = _scene->_geomInstances; - for (const auto& it : _iterations) - { - if ( it.indirect ){ - ctx.TraceRaysIndirect( _sbt, it.indirect->GetBufferId( fid ), it.indirectOffset ); - }else{ - ctx.TraceRays( it.Dimension(), _sbt ); + // state transition + { + for (auto& inst : instances) { + inst.geometry->StateTransition( ctx ); + } + _resources.SetStates( ctx, Default ); + ctx.ResourceState( _ubuffer, EResourceState::UniformRead | EResourceState::RayTracingShaders ); + ctx.CommitBarriers(); } - if ( not IsLastElement( it, _iterations )) + ctx.BindPipeline( _pipeline ); + ctx.BindDescriptorSet( _passDSIndex, _passDescSets[fid] ); + ctx.BindDescriptorSet( _objDSIndex, _objDescSets[fid] ); + + for (const auto& it : _iterations) { - ctx.ExecutionBarrier( EPipelineScope::RayTracing, EPipelineScope::RayTracing ); - ctx.CommitBarriers(); + if ( it.indirect ){ + ctx.TraceRaysIndirect( _sbt, it.indirect->GetBufferId( fid ), it.indirectOffset ); + }else{ + ctx.TraceRays( it.Dimension(), _sbt ); + } + + if ( not IsLastElement( it, _iterations )) + { + ctx.ExecutionBarrier( EPipelineScope::RayTracing, EPipelineScope::RayTracing ); + ctx.CommitBarriers(); + } } - } - pd.cmdbuf = ctx.ReleaseCommandBuffer(); + pd.cmdbuf = ctx.ReleaseCommandBuffer(); + } return true; } diff --git a/AE/samples/res_editor/Passes/Scene.h b/AE/samples/res_editor/Passes/Scene.h index c1b2f8ce..3792ce7a 100644 --- a/AE/samples/res_editor/Passes/Scene.h +++ b/AE/samples/res_editor/Passes/Scene.h @@ -48,8 +48,11 @@ namespace AE::ResEditor // types private: - using Materials_t = Array< RC >; - using PplnToObjID_t = IGeomSource::DebugPrepareData::PplnToObjID_t; + using Materials_t = Array< RC >; + using PplnToObjID_t = IGeomSource::DebugPrepareData::PplnToObjID_t; + using ViewportWScaling_t = FixedArray< packed_float2, GraphicsConfig::MaxViewports >; + using FScissors_t = FixedArray< RectF, GraphicsConfig::MaxViewports >; + using Scissors_t = FixedArray< RectI, GraphicsConfig::MaxViewports >; public: struct ShadingRate @@ -70,8 +73,9 @@ namespace AE::ResEditor Materials_t _materials; RenderPassDesc _rpDesc; - float2 _depthRange {0.f, 1.f}; ERenderLayer _renderLayer; + ViewportWScaling_t _wScaling; + FScissors_t _scissors; ResourceArray _resources; // per pass RenderTargets_t _renderTargets; diff --git a/AE/samples/res_editor/Readme.md b/AE/samples/res_editor/Readme.md index 5b675940..df3ea3e0 100644 --- a/AE/samples/res_editor/Readme.md +++ b/AE/samples/res_editor/Readme.md @@ -65,7 +65,6 @@ Optional: download `RemoteGraphicsDevice` from CI to run on any platform: * [MacOS](https://github.com/azhirnov/as-en/actions/workflows/macos.yml) * [Android](https://github.com/azhirnov/as-en/actions/workflows/android.yml) - `rg-device.apk` -Optional but highly recommended: download [`_data` folder](https://github.com/azhirnov/as-en/tree/dev/AE/samples/res_editor/_data) from git. +Optional: download samples from [`_data` folder](https://github.com/azhirnov/as-en/tree/dev/AE/samples/res_editor/_data) from git. -Download resources: -TODO +Optional: download [resources](https://disk.yandex.ru/d/nbdf1UYipPV0CA) for samples. diff --git a/AE/samples/res_editor/Resources/DefaultResources.cpp b/AE/samples/res_editor/Resources/DefaultResources.cpp index fb93bf21..103b1c8f 100644 --- a/AE/samples/res_editor/Resources/DefaultResources.cpp +++ b/AE/samples/res_editor/Resources/DefaultResources.cpp @@ -26,7 +26,8 @@ namespace AE::ResEditor _CreateDummyImage2D( OUT _dummyRes.image2D, _gfxLinearAlloc ); _CreateDummyImage3D( OUT _dummyRes.image3D, _gfxLinearAlloc ); - _CreateDummyImageCube( OUT _dummyRes.imageCube, _gfxLinearAlloc ); + _CreateDummyImage2DArray( OUT _dummyRes.imageCube, _gfxLinearAlloc, True{"CubeMap"} ); + _CreateDummyImage2DArray( OUT _dummyRes.image2DArr, _gfxLinearAlloc, False{} ); if ( rts.GetFeatureSet().accelerationStructure() == FeatureSet::EFeature::RequireTrue ) { @@ -47,6 +48,7 @@ namespace AE::ResEditor res_mngr.ReleaseResources( _dummyRes.image2D.image, _dummyRes.image2D.view ); res_mngr.ReleaseResources( _dummyRes.image3D.image, _dummyRes.image3D.view ); res_mngr.ReleaseResources( _dummyRes.imageCube.image, _dummyRes.imageCube.view ); + res_mngr.ReleaseResources( _dummyRes.image2DArr.image, _dummyRes.image2DArr.view ); res_mngr.ReleaseResources( _dummyRes.rtGeometry, _dummyRes.rtScene ); } @@ -67,18 +69,21 @@ namespace AE::ResEditor StrongImageAndViewID result; - if ( is_cube or is_2darr ) + if ( is_cube ) { result.image = res_mngr.AcquireResource( _dummyRes.imageCube.image.Get() ); result.view = res_mngr.AcquireResource( _dummyRes.imageCube.view.Get() ); - } - else + }else + if ( is_2darr ) + { + result.image = res_mngr.AcquireResource( _dummyRes.image2DArr.image.Get() ); + result.view = res_mngr.AcquireResource( _dummyRes.image2DArr.view.Get() ); + }else if ( is_2d ) { result.image = res_mngr.AcquireResource( _dummyRes.image2D.image.Get() ); result.view = res_mngr.AcquireResource( _dummyRes.image2D.view.Get() ); - } - else + }else if ( is_3d ) { result.image = res_mngr.AcquireResource( _dummyRes.image3D.image.Get() ); @@ -168,10 +173,10 @@ namespace AE::ResEditor /* ================================================= - _CreateDummyImageCube + _CreateDummyImage2DArray ================================================= */ - void DefaultResources::_CreateDummyImageCube (OUT StrongImageAndViewID &dst, GfxMemAllocatorPtr gfxAlloc) const + void DefaultResources::_CreateDummyImage2DArray (OUT StrongImageAndViewID &dst, GfxMemAllocatorPtr gfxAlloc, Bool cubemap) const { auto& res_mngr = GraphicsScheduler().GetResourceManager(); ImageDesc desc; @@ -180,15 +185,17 @@ namespace AE::ResEditor desc.SetDimension( uint2{2} ); desc.SetUsage( EImageUsage::Sampled | EImageUsage::TransferSrc ); desc.SetArrayLayers( 6 ); - desc.SetOptions( EImageOpt::CubeCompatible ); - dst.image = res_mngr.CreateImage( desc, "dummy image cube", gfxAlloc ); + if ( cubemap ) + desc.SetOptions( EImageOpt::CubeCompatible ); + + dst.image = res_mngr.CreateImage( desc, (cubemap ? "dummy image cube" : "dummy image 2d array"), gfxAlloc ); CHECK_ERRV( dst.image ); ImageViewDesc view {desc}; view.swizzle = "RRR1"_swizzle; - dst.view = res_mngr.CreateImageView( view, dst.image, "dummy image cube view" ); + dst.view = res_mngr.CreateImageView( view, dst.image, (cubemap ? "dummy image cube view" : "dummy image 2d array view") ); CHECK_ERRV( dst.view ); RenderGraph().GetStateTracker().AddResource( dst.image, Default, EResourceState::ShaderSample | EResourceState::AllShaders ); @@ -246,7 +253,7 @@ namespace AE::ResEditor { auto& fmt_info = EPixelFormat_GetInfo( desc.format ); return ChooseAllocator( isDynamic, - ImageUtils::ImageSize( desc.dimension, desc.arrayLayers, desc.mipLevels, desc.samples, fmt_info.bitsPerBlock, fmt_info.TexBlockDim() )); + ImageUtils::ImageSize( desc.Dimension(), desc.arrayLayers, desc.mipLevels, desc.samples, fmt_info.bitsPerBlock, fmt_info.TexBlockDim() )); } GfxMemAllocatorPtr DefaultResources::ChooseAllocator (Bool isDynamic, const VideoImageDesc &desc) C_NE___ @@ -266,10 +273,10 @@ namespace AE::ResEditor if ( aspect != EImageAspect::Color ) { CHECK_ERRV( EPixelFormat_GetPlaneInfo( desc.format, aspect, OUT plane_fmt, OUT plane_scale )); - CHECK_ERRV( All( IsMultipleOf( desc.dimension, plane_scale ))); + CHECK_ERRV( All( IsMultipleOf( desc.Dimension2(), plane_scale ))); } - const uint2 dim = desc.dimension / plane_scale; + const uint2 dim = desc.Dimension2() / plane_scale; auto& plane_info = EPixelFormat_GetInfo( plane_fmt ); size += ImageUtils::ImageSize( uint3{dim,1}, plane_info.bitsPerBlock, plane_info.TexBlockDim() ); diff --git a/AE/samples/res_editor/Resources/DefaultResources.h b/AE/samples/res_editor/Resources/DefaultResources.h index d3ab0a01..5403fa25 100644 --- a/AE/samples/res_editor/Resources/DefaultResources.h +++ b/AE/samples/res_editor/Resources/DefaultResources.h @@ -24,6 +24,7 @@ namespace AE::ResEditor struct { StrongImageAndViewID image2D; StrongImageAndViewID imageCube; + StrongImageAndViewID image2DArr; StrongImageAndViewID image3D; Strong rtGeometry; @@ -59,11 +60,11 @@ namespace AE::ResEditor DefaultResources (); private: - void _CreateDummyImage2D (OUT StrongImageAndViewID &, GfxMemAllocatorPtr) const; - void _CreateDummyImage3D (OUT StrongImageAndViewID &, GfxMemAllocatorPtr) const; - void _CreateDummyImageCube (OUT StrongImageAndViewID &, GfxMemAllocatorPtr) const; - void _CreateDummyRTGeometry (OUT Strong &, GfxMemAllocatorPtr) const; - void _CreateDummyRTScene (OUT Strong &, GfxMemAllocatorPtr) const; + void _CreateDummyImage2D (OUT StrongImageAndViewID &, GfxMemAllocatorPtr) const; + void _CreateDummyImage3D (OUT StrongImageAndViewID &, GfxMemAllocatorPtr) const; + void _CreateDummyImage2DArray (OUT StrongImageAndViewID &, GfxMemAllocatorPtr, Bool) const; + void _CreateDummyRTGeometry (OUT Strong &, GfxMemAllocatorPtr) const; + void _CreateDummyRTScene (OUT Strong &, GfxMemAllocatorPtr) const; }; diff --git a/AE/samples/res_editor/Resources/Image.cpp b/AE/samples/res_editor/Resources/Image.cpp index 5113c378..b38f5c61 100644 --- a/AE/samples/res_editor/Resources/Image.cpp +++ b/AE/samples/res_editor/Resources/Image.cpp @@ -296,7 +296,7 @@ namespace { if ( _outDynSize ) { ASSERT( imageDesc.imageDim == _outDynSize->NumDimensions() ); - _outDynSize->Resize( imageDesc.dimension ); + _outDynSize->Resize( imageDesc.Dimension() ); } auto derived = _derived.ReadLock(); @@ -498,9 +498,9 @@ namespace { CHECK_ERR( mem ); auto& hdr = PlacementNew
( mem->Data() )->hdr; - hdr.dimension = packed_ushort3{ImageUtils::MipmapDimension( img_desc.dimension, view_desc.baseMipmap.Get(), EPixelFormat_GetInfo(view_desc.format).TexBlockDim() )}; - hdr.arrayLayers = ushort(view_desc.layerCount); - hdr.mipmaps = ushort(view_desc.mipmapCount); + hdr.dimension = view_desc.dimension; + hdr.arrayLayers = view_desc.layerCount; + hdr.mipmaps = view_desc.mipmapCount; hdr.viewType = view_desc.viewType; hdr.format = view_desc.format; @@ -541,7 +541,7 @@ namespace { const auto layer = view_desc.baseLayer + op.curLayer; ReadbackImageDesc read; - read.imageDim = ImageUtils::MipmapDimension( img_desc.dimension, mipmap.Get(), fmt_info.TexBlockDim() ); + read.imageDim = ImageUtils::MipmapDimension( img_desc.Dimension(), mipmap.Get(), fmt_info.TexBlockDim() ); read.arrayLayer = layer; read.mipLevel = mipmap; read.heapType = EStagingHeapType::Dynamic; @@ -556,12 +556,12 @@ namespace { const auto mipmap = view_desc.baseMipmap + cur_mipmap; const auto layer = view_desc.baseLayer + cur_layer; const auto& fmt_info = EPixelFormat_GetInfo( view_desc.format ); - const uint3 mip_dim = ImageUtils::MipmapDimension( img_desc.dimension, mipmap.Get(), fmt_info.TexBlockDim() ); + const uint3 mip_dim = ImageUtils::MipmapDimension( img_desc.Dimension(), mipmap.Get(), fmt_info.TexBlockDim() ); AssetPacker::ImagePacker::Header header; - header.dimension = packed_ushort3{ImageUtils::MipmapDimension( img_desc.dimension, view_desc.baseMipmap.Get(), fmt_info.TexBlockDim() )}; - header.arrayLayers = ushort(view_desc.layerCount); - header.mipmaps = ushort(view_desc.mipmapCount); + header.dimension = view_desc.dimension; + header.arrayLayers = view_desc.layerCount; + header.mipmaps = view_desc.mipmapCount; header.viewType = view_desc.viewType; header.format = view_desc.format; @@ -584,7 +584,7 @@ namespace { Unused( file->WriteBlock( off + SizeOf, memView.ContentSize(), RVRef(mem) )); }, "Image::Readback", - ETaskQueue::Background + ETaskQueue::PerFrame ); if ( op.stream.IsCompleted() ) @@ -724,7 +724,7 @@ namespace { ImageViewDesc view_desc = GetViewDesc(); CHECK_ERR( CompareImageTypes( desc, intermImg )); - desc.dimension = intermImg.Dimension() << baseMipmap.Get(); + desc.dimension = CheckCast( intermImg.Dimension() << baseMipmap.Get() ); desc.arrayLayers = ImageLayer{ intermImg.ArrayLayers() + baseLayer.Get() }; desc.mipLevels = MipmapLevel{ intermImg.MipLevels() + baseMipmap.Get() }; desc.imageDim = intermImg.GetImageDim(); @@ -828,16 +828,5 @@ namespace { return null; } -/* -================================================= - GetViewDimension -================================================= -*/ - uint3 Image::GetViewDimension () C_NE___ - { - auto [desc, view] = _imageDesc.ReadAll(); - return Max( 1u, desc.dimension >> view.baseMipmap.Get() ); - } - } // AE::ResEditor diff --git a/AE/samples/res_editor/Resources/Image.h b/AE/samples/res_editor/Resources/Image.h index 094d63c2..d56110b3 100644 --- a/AE/samples/res_editor/Resources/Image.h +++ b/AE/samples/res_editor/Resources/Image.h @@ -135,8 +135,6 @@ namespace AE::ResEditor ND_ ImageViewDesc GetViewDesc () C_NE___ { return _imageDesc.Read<1>(); } ND_ StringView GetName () C_NE___ { return _dbgName; } - ND_ uint3 GetViewDimension () C_NE___; - ND_ RC CreateView (const ImageViewDesc &, StringView dbgName) __NE___; diff --git a/AE/samples/res_editor/Resources/RTScene.cpp b/AE/samples/res_editor/Resources/RTScene.cpp index c57b53fb..11d79dc6 100644 --- a/AE/samples/res_editor/Resources/RTScene.cpp +++ b/AE/samples/res_editor/Resources/RTScene.cpp @@ -518,6 +518,8 @@ namespace AE::ResEditor break; } } + switch_end + CHECK_ERR( mem_view.CopyFrom( instances ) == size ); } diff --git a/AE/samples/res_editor/Resources/VideoImage.cpp b/AE/samples/res_editor/Resources/VideoImage.cpp index 6e2d52e1..c1784bbd 100644 --- a/AE/samples/res_editor/Resources/VideoImage.cpp +++ b/AE/samples/res_editor/Resources/VideoImage.cpp @@ -48,7 +48,7 @@ namespace AE::ResEditor CHECK_THROW( inDesc.format == config.dstFormat ); ImageDesc desc = inDesc; - desc.dimension = uint3{ props.videoStream.dimension, 1u }; + desc.dimension = ImageDim_t{uint3{ props.videoStream.dimension, 1u }}; _dimension = props.videoStream.dimension; auto& res_mngr = GraphicsScheduler().GetResourceManager(); @@ -73,7 +73,7 @@ namespace AE::ResEditor _DtTrQueue().EnqueueImageTransition( _ids[i] ); } - _allocator.SetBlockSize( EPixelFormat_ImageSize( desc.format, desc.dimension )); + _allocator.SetBlockSize( EPixelFormat_ImageSize( desc.format, desc.Dimension() )); for (usize i = 0; i < _imageMemView.size(); ++i) { CHECK_THROW( Video::IVideoDecoder::AllocMemView( config, OUT _imageMemView[i], _allocator )); @@ -315,7 +315,7 @@ namespace AE::ResEditor void VideoImage::_Validate (const States s) __NE___ { Unused( s ); - ASSERT( not AnyBits( s.emptyBits, s.decodedBits )); + ASSERT( NoBits( s.emptyBits, s.decodedBits )); ASSERT( (s.emptyBits | s.decodedBits) == ToBitMask(_MaxCpuImages) ); ASSERT( s.decodedBits == 0 or HasBit( s.decodedBits, s.pos )); } diff --git a/AE/samples/res_editor/Resources/VideoImage2.cpp b/AE/samples/res_editor/Resources/VideoImage2.cpp index 46cb608f..74c570e0 100644 --- a/AE/samples/res_editor/Resources/VideoImage2.cpp +++ b/AE/samples/res_editor/Resources/VideoImage2.cpp @@ -51,7 +51,7 @@ namespace AE::ResEditor CHECK_THROW( inDesc.format == config.dstFormat ); // ycbcr sampler created for format 'inDesc.format' VideoImageDesc desc; - desc.dimension = uint2{inDesc.dimension}; + desc.dimension = ImageDim2_t{inDesc.dimension}; desc.arrayLayers = 1_layer; desc.format = config.dstFormat; desc.options = inDesc.options; @@ -339,7 +339,7 @@ namespace AE::ResEditor void VideoImage2::_Validate (const States s) __NE___ { Unused( s ); - ASSERT( not AnyBits( s.emptyBits, s.decodedBits )); + ASSERT( NoBits( s.emptyBits, s.decodedBits )); ASSERT( (s.emptyBits | s.decodedBits) == ToBitMask(_MaxCpuImages) ); ASSERT( s.decodedBits == 0 or HasBit( s.decodedBits, s.pos )); } diff --git a/AE/samples/res_editor/Resources/VideoImage2.h b/AE/samples/res_editor/Resources/VideoImage2.h index 854d5577..ec4537e8 100644 --- a/AE/samples/res_editor/Resources/VideoImage2.h +++ b/AE/samples/res_editor/Resources/VideoImage2.h @@ -56,7 +56,7 @@ namespace AE::ResEditor RC _outDynSize; // triggered when current image has been resized RC _decoder; - uint2 _dimension; + VideoImageDim_t _dimension; StreamArr_t _streamArr; AsyncTask _lastDecoding; diff --git a/AE/samples/res_editor/Scripting/PipelineCompiler.inl.h b/AE/samples/res_editor/Scripting/PipelineCompiler.inl.h index 9128b9b0..2e454741 100644 --- a/AE/samples/res_editor/Scripting/PipelineCompiler.inl.h +++ b/AE/samples/res_editor/Scripting/PipelineCompiler.inl.h @@ -18,19 +18,20 @@ namespace AE::ResEditor { using namespace AE::Base; + using AE::PipelineCompiler::EImageType; /* ================================================= GetDescriptorImageType ================================================= */ - ND_ inline PipelineCompiler::EImageType GetDescriptorImageType (const Graphics::EPixelFormat fmt, const Graphics::EImage type, bool multisampling) + ND_ inline EImageType GetDescriptorImageType (const Graphics::EPixelFormat fmt, const Graphics::EImage type, bool multisampling) { return PipelineCompiler::EImageType_FromPixelFormat( fmt ) | PipelineCompiler::EImageType_FromImage( type, multisampling ); } - ND_ inline PipelineCompiler::EImageType GetDescriptorImageType (const Graphics::ImageDesc &desc) + ND_ inline EImageType GetDescriptorImageType (const Graphics::ImageDesc &desc) { CHECK_ERR( desc.imageDim != Default ); CHECK_ERR( desc.format != Default ); @@ -41,7 +42,7 @@ namespace AE::ResEditor return GetDescriptorImageType( desc.format, view.viewType, desc.samples.IsEnabled() ); } - ND_ inline PipelineCompiler::EImageType GetDescriptorImageType (const Graphics::ImageDesc &desc, const Graphics::ImageViewDesc &view) + ND_ inline EImageType GetDescriptorImageType (const Graphics::ImageDesc &desc, const Graphics::ImageViewDesc &view) { CHECK_ERR( desc.imageDim != Default ); CHECK_ERR( view.format != Default ); @@ -54,13 +55,13 @@ namespace AE::ResEditor GetDescriptorImageTypeRelaxed ================================================= */ - ND_ inline PipelineCompiler::EImageType GetDescriptorImageTypeRelaxed (const Graphics::EPixelFormat fmt, const Graphics::EImage type, bool multisampling) + ND_ inline EImageType GetDescriptorImageTypeRelaxed (const Graphics::EPixelFormat fmt, const Graphics::EImage type, Bool multisampling, Bool cubemap) { return PipelineCompiler::EImageType_FromPixelFormatRelaxed( fmt ) | - PipelineCompiler::EImageType_FromImage( type, multisampling ); + PipelineCompiler::EImageType_FromImage( type, multisampling, cubemap ); } - ND_ inline Tuple< PipelineCompiler::EImageType, PipelineCompiler::EImageType > GetDescriptorImageTypeRelaxed (const Graphics::ImageDesc &desc) + ND_ inline Tuple< EImageType, EImageType > GetDescriptorImageTypeRelaxed (const Graphics::ImageDesc &desc) { CHECK_ERR( desc.imageDim != Default ); CHECK_ERR( desc.format != Default ); @@ -68,14 +69,14 @@ namespace AE::ResEditor Graphics::ImageViewDesc view {desc}; view.Validate( desc ); - auto t0 = GetDescriptorImageTypeRelaxed( desc.format, view.viewType, desc.samples.IsEnabled() ); + auto t0 = GetDescriptorImageTypeRelaxed( desc.format, view.viewType, Bool{desc.samples.IsEnabled()}, False{} ); auto t1 = t0; if ( AllBits( desc.options, Graphics::EImageOpt::CubeCompatible )) { view.viewType = desc.arrayLayers.Get() > 6 ? Graphics::EImage::CubeArray : Graphics::EImage::Cube; - t1 = GetDescriptorImageTypeRelaxed( desc.format, view.viewType, desc.samples.IsEnabled() ); + t1 = GetDescriptorImageTypeRelaxed( desc.format, view.viewType, Bool{desc.samples.IsEnabled()}, True{} ); } return Tuple{ t0, t1 }; } diff --git a/AE/samples/res_editor/Scripting/ScriptBasePass.cpp b/AE/samples/res_editor/Scripting/ScriptBasePass.cpp index 30a2da04..d778fdeb 100644 --- a/AE/samples/res_editor/Scripting/ScriptBasePass.cpp +++ b/AE/samples/res_editor/Scripting/ScriptBasePass.cpp @@ -40,7 +40,6 @@ namespace AE::ResEditor ================================================= */ ScriptBasePass::ScriptBasePass () __Th___ : - _dynamicDim{ new ScriptDynamicDim{ MakeRC( uint2{1} )}}, _args{ [this](ScriptPassArgs::Argument &arg) { _OnAddArg( arg ); }} {} @@ -391,6 +390,17 @@ namespace AE::ResEditor } } +/* +================================================= + _Dimension +================================================= +*/ + ScriptDynamicDim* ScriptBasePass::_Dimension () __Th___ + { + CHECK_THROW_MSG( _HasCustomDynamicDimension() ); + return ScriptDynamicDimPtr{_dynamicDim}.Detach(); + } + /* ================================================= _SetDynamicDimension @@ -400,15 +410,28 @@ namespace AE::ResEditor { CHECK_THROW_MSG( dynDim and dynDim->Get() ); - if ( _dynamicDim == dynDim or _dynamicDim->Get() == dynDim->Get() ) - return; + if ( _HasCustomDynamicDimension() ) + { + if ( _dynamicDim == dynDim or _dynamicDim->Get() == dynDim->Get() ) + return; + } - CHECK_THROW_MSG( _dynamicDim.UseCount() == 1 and _dynamicDim->Get().use_count() == 2, + CHECK_THROW_MSG( not _HasCustomDynamicDimension(), "Previous dynamic dimension is already used" ); _dynamicDim = dynDim; } +/* +================================================= + _HasCustomDynamicDimension +================================================= +*/ + bool ScriptBasePass::_HasCustomDynamicDimension () C_Th___ + { + return bool{_dynamicDim}; + } + /* ================================================= _SetConstDimension @@ -598,6 +621,19 @@ namespace AE::ResEditor } } +/* +================================================= + SetRepeatCount +================================================= +*/ + void ScriptBasePass::SetRepeatCount (const ScriptDynamicUIntPtr &value) __Th___ + { + CHECK_THROW_MSG( value ); + CHECK_THROW_MSG( not _repeatCount, "Repeat count is already set" ); + + _repeatCount = value; + } + /* ================================================= EnableIf* @@ -672,6 +708,9 @@ namespace AE::ResEditor dst._enablePass.op = this->_enablePass.op; } + if ( this->_repeatCount ) + dst._repeatCount = this->_repeatCount->Get(); + AE_LOGI( "Compiled: "s << this->_dbgName ); } diff --git a/AE/samples/res_editor/Scripting/ScriptBasePass.cpp.h b/AE/samples/res_editor/Scripting/ScriptBasePass.cpp.h index 552471dc..3127fba9 100644 --- a/AE/samples/res_editor/Scripting/ScriptBasePass.cpp.h +++ b/AE/samples/res_editor/Scripting/ScriptBasePass.cpp.h @@ -1,5 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#pragma once + #include "res_editor/Scripting/ScriptCommon.h" namespace AE::ResEditor @@ -135,6 +137,9 @@ namespace classBinder.AddMethod( &ScriptBasePass::EnableIfGreater, "EnableIfGreater", {"dynamic", "refValue"} ); classBinder.AddMethod( &ScriptBasePass::EnableIfAnyBit, "EnableIfAnyBit", {"dynamic", "refValue"} ); + classBinder.Comment( "Repeat pass multiple times.\nCan be used for performance tests." ); + classBinder.AddMethod( &ScriptBasePass::SetRepeatCount, "Repeat", {} ); + if ( withArgs ) { classBinder.Comment( "Add resource to all shaders in the current pass.\n" @@ -173,7 +178,7 @@ namespace ================================================= */ template - void ScriptBaseRenderPass::_BindBaseRenderPass (B &classBinder, Bool withBlending) __Th___ + void ScriptBaseRenderPass::_BindBaseRenderPass (B &classBinder, Bool withBlending, Bool withRWAtt) __Th___ { using C = typename B::Class_t; @@ -291,8 +296,22 @@ namespace classBinder.template AddGenericMethod< void (const String &, const ScriptImagePtr &, const ImageLayer &, uint, const MipmapLevel &, EBlendFactor, EBlendFactor, EBlendOp, EBlendFactor, EBlendFactor, EBlendOp) >( &ScriptBaseRenderPass::_OutputBlend, "OutputBlend", {"name", "image", "baseLayer", "layerCount", "mipmap", "srcRGB", "dstRGB", "opRGB", "srcA", "dstA", "opA"} ); } + // read/write input attachment + if ( withRWAtt ) + { + classBinder.Comment( "Used instead of 'Output()' to define image as input attachment & color attachment (read/write input attachment)." ); + classBinder.AddMethod( &ScriptBaseRenderPass::_InOut, "InOut", {"inName", "outName", "image"} ); + } + // depth classBinder.AddMethod( &ScriptBaseRenderPass::_SetDepthRange, "DepthRange", {"min", "max"} ); + + // viewports + classBinder.AddMethod( &ScriptBaseRenderPass::_AddViewport0, "AddViewport", {"rect", "minDepth", "maxDepth", "scissor", "wScale"} ); + classBinder.AddMethod( &ScriptBaseRenderPass::_AddViewport1, "AddViewport", {"rect", "minDepth", "maxDepth"} ); + classBinder.AddMethod( &ScriptBaseRenderPass::_AddViewport2, "AddViewport", {"rect"} ); + classBinder.AddMethod( &ScriptBaseRenderPass::_AddViewport3, "AddViewport", {"left", "top", "right", "bottom"} ); + classBinder.AddMethod( &ScriptBaseRenderPass::_AddViewport4, "AddViewport", {"rect", "minDepth", "maxDepth", "scissor"} ); } diff --git a/AE/samples/res_editor/Scripting/ScriptBasePass.h b/AE/samples/res_editor/Scripting/ScriptBasePass.h index 9e8da109..4943a18a 100644 --- a/AE/samples/res_editor/Scripting/ScriptBasePass.h +++ b/AE/samples/res_editor/Scripting/ScriptBasePass.h @@ -94,6 +94,7 @@ namespace AE::ResEditor RGBA8u _dbgColor = HtmlColor::Red; ScriptDynamicDimPtr _dynamicDim; + ScriptDynamicUIntPtr _repeatCount; ScriptBaseControllerPtr _controller; ScriptPassArgs _args; @@ -188,6 +189,8 @@ namespace AE::ResEditor void ConstantDD (const String &name, const ScriptDynamicDimPtr &value) __Th___; + void SetRepeatCount (const ScriptDynamicUIntPtr &) __Th___; + void EnableIfEqual (const ScriptDynamicUIntPtr &dyn, uint ref) __Th___; void EnableIfLess (const ScriptDynamicUIntPtr &dyn, uint ref) __Th___; void EnableIfGreater (const ScriptDynamicUIntPtr &dyn, uint ref) __Th___; @@ -237,10 +240,12 @@ namespace AE::ResEditor void _Init (IPass &dst, const ScriptBaseControllerPtr &defaultController) C_Th___; - ND_ ScriptDynamicDim* _Dimension () __Th___ { return ScriptDynamicDimPtr{_dynamicDim}.Detach(); } + ND_ ScriptDynamicDim* _Dimension () __Th___; void _SetDynamicDimension (const ScriptDynamicDimPtr &) __Th___; void _SetConstDimension (const uint3 &dim) __Th___; + ND_ bool _HasCustomDynamicDimension () C_Th___; + template static void _BindBase (B &binder, Bool withArgs) __Th___; diff --git a/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.cpp b/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.cpp index cc25e80e..0e78077e 100644 --- a/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.cpp +++ b/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.cpp @@ -183,5 +183,81 @@ namespace AE::ResEditor _depthRange = float2{min, max}; } +/* +================================================= + _AddViewport +================================================= +*/ + void ScriptBaseRenderPass::_AddViewport0 (const RectF &rect, float minDepth, float maxDepth, const RectF &scissor, const packed_float2 &wScale) __Th___ + { + CHECK_THROW_MSG( _viewports.size() == _wScaling.size() ); + CHECK_THROW_MSG( _viewports.size() == _scissors.size() ); + CHECK_THROW_MSG( GraphicsScheduler().GetFeatureSet().clipSpaceWScalingNV == FeatureSet::EFeature::RequireTrue, + "'clipSpaceWScalingNV' feature is not supported" ); + + auto& vp = _viewports.emplace_back(); + vp.rect = rect; + vp.minDepth = minDepth; + vp.maxDepth = maxDepth; + + _wScaling.push_back( wScale ); + _scissors.push_back( scissor ); + } + + void ScriptBaseRenderPass::_AddViewport1 (const RectF &rect, float minDepth, float maxDepth) __Th___ + { + auto& vp = _viewports.emplace_back(); + vp.rect = rect; + vp.minDepth = minDepth; + vp.maxDepth = maxDepth; + } + + void ScriptBaseRenderPass::_AddViewport2 (const RectF &rect) __Th___ + { + _AddViewport1( rect, _depthRange.x, _depthRange.y ); + } + + void ScriptBaseRenderPass::_AddViewport3 (float left, float top, float right, float bottom) __Th___ + { + _AddViewport1( RectF{left, top, right, bottom}, _depthRange.x, _depthRange.y ); + } + + void ScriptBaseRenderPass::_AddViewport4 (const RectF &rect, float minDepth, float maxDepth, const RectF &scissor) __Th___ + { + CHECK_THROW_MSG( _viewports.size() == _scissors.size() ); + + auto& vp = _viewports.emplace_back(); + vp.rect = rect; + vp.minDepth = minDepth; + vp.maxDepth = maxDepth; + + _scissors.push_back( scissor ); + } + +/* +================================================= + _InOut +================================================= +*/ + void ScriptBaseRenderPass::_InOut (const String &inName, const String& outName, const ScriptImagePtr &rt) __Th___ + { + CHECK_THROW_MSG( rt ); + CHECK_THROW_MSG( not inName.empty() ); + CHECK_THROW_MSG( not outName.empty() ); + CHECK_THROW_MSG( inName != outName ); + + auto& dst = _output.emplace_back(); + + dst.name = outName; + dst.inName = inName; + dst.rt = rt; + + rt->AddUsage( rt->IsDepthOrStencil() ? EResourceUsage::DepthStencil : EResourceUsage::ColorAttachment ); + rt->AddUsage( EResourceUsage::InputAttachment ); + + if ( rt->IsMutableDimension() ) + _SetDynamicDimension( rt->DimensionRC() ); + } + } // AE::ResEditor diff --git a/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.h b/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.h index 85d5d16f..231babf4 100644 --- a/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.h +++ b/AE/samples/res_editor/Scripting/ScriptBaseRenderPass.h @@ -15,11 +15,15 @@ namespace AE::ResEditor { // types protected: - using ClearValue_t = RenderPassDesc::ClearValue_t; + using ClearValue_t = RenderPassDesc::ClearValue_t; + using Viewports_t = FixedArray< Viewport, GraphicsConfig::MaxViewports >; + using ViewportWScaling_t = FixedArray< packed_float2, GraphicsConfig::MaxViewports >; + using Scissors_t = FixedArray< RectF, GraphicsConfig::MaxViewports >; struct Output { - String name; + String name; // only for color attachment + String inName; // only for input attachment ScriptImagePtr rt; ImageLayer layer; uint layerCount = UMax; @@ -41,21 +45,34 @@ namespace AE::ResEditor protected: Array _output; float2 _depthRange {0.f, 1.f}; + Viewports_t _viewports; + ViewportWScaling_t _wScaling; + Scissors_t _scissors; // methods protected: template - static void _BindBaseRenderPass (B &binder, Bool withBlending) __Th___; + static void _BindBaseRenderPass (B &binder, Bool withBlending, Bool withRWAtt) __Th___; - static void _Output (Scripting::ScriptArgList args) __Th___; - static void _OutputBlend (Scripting::ScriptArgList args) __Th___; + static void _Output (Scripting::ScriptArgList args) __Th___; + static void _OutputBlend (Scripting::ScriptArgList args) __Th___; private: - void _Output2 (Scripting::ScriptArgList args) __Th___; - void _OutputBlend2 (Scripting::ScriptArgList args) __Th___; + void _Output2 (Scripting::ScriptArgList args) __Th___; + void _OutputBlend2 (Scripting::ScriptArgList args) __Th___; - void _SetDepthRange (float min, float max) __Th___; + void _InOut (const String &, const String&, const ScriptImagePtr &) __Th___; + + void _SetDepthRange (float min, float max) __Th___; + + void _AddViewport0 (const RectF &rect, float minDepth, float maxDepth, + const RectF &scissor, const packed_float2 &wScale) __Th___; + void _AddViewport1 (const RectF &rect, float minDepth, float maxDepth) __Th___; + void _AddViewport2 (const RectF &rect) __Th___; + void _AddViewport3 (float left, float top, float right, float bottom) __Th___; + void _AddViewport4 (const RectF &rect, float minDepth, float maxDepth, + const RectF &scissor) __Th___; }; diff --git a/AE/samples/res_editor/Scripting/ScriptBuffer.cpp b/AE/samples/res_editor/Scripting/ScriptBuffer.cpp index 7bdee36f..d0190327 100644 --- a/AE/samples/res_editor/Scripting/ScriptBuffer.cpp +++ b/AE/samples/res_editor/Scripting/ScriptBuffer.cpp @@ -202,15 +202,15 @@ namespace */ void ScriptBuffer::_ValidateResourceUsage (const EResourceUsage usage) __Th___ { - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::ColorAttachment )); - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::DepthStencil )); - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::Sampled )); - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::GenMipmaps )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::ColorAttachment )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::DepthStencil )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::Sampled )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::GenMipmaps )); if ( AllBits( usage, EResourceUsage::UploadedData )) { - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::ComputeWrite )); - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::ShaderAddress )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::ComputeWrite )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::ShaderAddress )); } auto& fs = ScriptExe::ScriptResourceApi::GetFeatureSet(); @@ -969,6 +969,9 @@ namespace binder.Comment( "Dynamic array size, can be used for draw call." ); binder.AddMethod( &ScriptBuffer::ArraySize, "ArraySize", {} ); + binder.Comment( "Constant array size, can be used for draw call." ); + binder.AddMethod( &ScriptBuffer::ConstArraySize, "ConstArraySize", {} ); + binder.Comment( "Build buffer data layout with initial content.\n" "Returns offset in bytes where data is begin." ); @@ -1103,6 +1106,7 @@ namespace case EResourceUsage::DepthStencil : case EResourceUsage::ComputeRW : case EResourceUsage::Present : + case EResourceUsage::InputAttachment : default : RETURN_ERR( "unsupported usage" ); } switch_end @@ -1365,6 +1369,8 @@ namespace ScriptDynamicUIntPtr result; if ( _inDynCount ) result = _inDynCount; if ( _outDynCount ) result = _outDynCount; + + ASSERT_MSG( _staticCount == 0, "use ConstArraySize() instead" ); return result; } diff --git a/AE/samples/res_editor/Scripting/ScriptBuffer.h b/AE/samples/res_editor/Scripting/ScriptBuffer.h index b6d741aa..af814400 100644 --- a/AE/samples/res_editor/Scripting/ScriptBuffer.h +++ b/AE/samples/res_editor/Scripting/ScriptBuffer.h @@ -55,7 +55,7 @@ namespace AE::ResEditor BufferViewDesc _viewDesc; EBufferType _type = Default; EResourceUsage _resUsage = Default; - uint _texbufType = 0; // PipelineCompiler::EImageType + uint _texbufType = 0; // EImageType String _dbgName; ScriptDynamicUIntPtr _inDynCount; // array size depends on this dynamic variable @@ -97,12 +97,13 @@ namespace AE::ResEditor ND_ String GetTypeName () C_NE___; ND_ uint TexelBufferType () C_NE___ { ASSERT( not HasLayout() ); return _texbufType; } ND_ bool IsDynamicSize () C_NE___ { return _inDynCount != null; } - ND_ ulong GetDeviceAddress () __Th___; // TODO: return object with address and ref to buffer - ND_ EPixelFormat GetViewFormat () C_Th___; // TODO + ND_ ulong GetDeviceAddress () __Th___; + // ND_ EPixelFormat GetViewFormat () C_Th___; // TODO ND_ bool WithHistory () C_NE___ { return AllBits( _resUsage, EResourceUsage::WithHistory ); } ND_ ScriptDynamicUInt* ArraySize () C_Th___; ND_ ScriptDynamicUIntPtr ArraySizeRC () C_Th___; + ND_ uint ConstArraySize () C_Th___ { return _staticCount; } ND_ Bytes GetFieldOffset (const String &name) __Th___; ND_ uint GetFieldType (const String &name) __Th___; // PipelineCompiler::EValueType diff --git a/AE/samples/res_editor/Scripting/ScriptCommon.h b/AE/samples/res_editor/Scripting/ScriptCommon.h index a3623250..935efaa6 100644 --- a/AE/samples/res_editor/Scripting/ScriptCommon.h +++ b/AE/samples/res_editor/Scripting/ScriptCommon.h @@ -20,6 +20,8 @@ namespace AE::ResEditor using AE::Scripting::ScriptEnginePtr; + using AE::PipelineCompiler::EImageType; + using EnableScriptRC = AE::Scripting::AngelScriptHelper::SimpleRefCounter; template @@ -93,6 +95,8 @@ namespace AE::ResEditor WithHistory = 1 << 13, Transfer = 1 << 14, + + InputAttachment = 1 << 15, }; AE_BIT_OPERATORS( EResourceUsage ); diff --git a/AE/samples/res_editor/Scripting/ScriptComputePass.cpp b/AE/samples/res_editor/Scripting/ScriptComputePass.cpp index ed22ea94..0b2f66f3 100644 --- a/AE/samples/res_editor/Scripting/ScriptComputePass.cpp +++ b/AE/samples/res_editor/Scripting/ScriptComputePass.cpp @@ -272,6 +272,9 @@ namespace binder.AddMethod( &ScriptComputePass::LocalSize2v, "LocalSize", {} ); binder.AddMethod( &ScriptComputePass::LocalSize3v, "LocalSize", {} ); + binder.Comment( "Set subgroup size." ); + binder.AddMethod( &ScriptComputePass::SubgroupSize, "SubgroupSize", {} ); + binder.Comment( "Execute compute shader with number of the workgroups.\n" "Total number of threads is 'groupCount * localSize'." ); binder.AddMethod( &ScriptComputePass::DispatchGroups1, "DispatchGroups", {"groupCountX"} ); @@ -414,7 +417,7 @@ namespace AE::ResEditor ShaderStructTypePtr st{ new ShaderStructType{"ComputePassUB"}}; st->Set( EStructLayout::Std140, R"#( float time; // shader playback time (in seconds) - float timeDelta; // render time (in seconds) + float timeDelta; // frame render time (in seconds), max value: 1/30s uint frame; // shader playback frame, global frame counter uint passFrameId; // current pass frame index uint seed; // unique value, updated on each shader reloading @@ -500,7 +503,7 @@ namespace AE::ResEditor ds_layout->AddUniformBuffer( stage, "un_PerPass", ArraySize{1}, "ComputePassUB", EResourceState::ShaderUniform, False{} ); } - _args.ArgsToDescSet( stage, ds_layout, ArraySize{1}, EAccessType::Coherent ); // throw + _args.ArgsToDescSet( stage, ds_layout, ArraySize{1} ); // throw uint cs_line = 0; @@ -583,6 +586,9 @@ namespace AE::ResEditor ppln_spec->AddToRenderTech( "rtech", "Compute" ); ppln_spec->SetOptions( pplnOpt ); + if ( _subgroupSize != 0 ) + ppln_spec->SetSubgroupSize( _subgroupSize ); + // if successfully compiled ppln_spec->Enable(); } diff --git a/AE/samples/res_editor/Scripting/ScriptComputePass.h b/AE/samples/res_editor/Scripting/ScriptComputePass.h index 407d151f..2a605f6e 100644 --- a/AE/samples/res_editor/Scripting/ScriptComputePass.h +++ b/AE/samples/res_editor/Scripting/ScriptComputePass.h @@ -36,7 +36,8 @@ namespace AE::ResEditor private: const Path _pplnPath; - uint3 _localSize {0}; + uint3 _localSize {0}; + uint _subgroupSize = 0; Iterations_t _iterations; @@ -51,6 +52,8 @@ namespace AE::ResEditor void LocalSize2v (const packed_uint2 &v) __Th___ { return LocalSize3v({ v, 1u }); } void LocalSize3v (const packed_uint3 &v) __Th___; + void SubgroupSize (uint value) __Th___ { _subgroupSize = value; } + void DispatchGroups1 (uint groupCountX) __Th___ { return DispatchGroups3v({ groupCountX, 1u, 1u }); } void DispatchGroups2 (uint groupCountX, uint groupCountY) __Th___ { return DispatchGroups3v({ groupCountX, groupCountY, 1u }); } void DispatchGroups3 (uint groupCountX, uint groupCountY, uint groupCountZ) __Th___ { return DispatchGroups3v({ groupCountX, groupCountY, groupCountZ }); } diff --git a/AE/samples/res_editor/Scripting/ScriptDynamicVars.cpp b/AE/samples/res_editor/Scripting/ScriptDynamicVars.cpp index ea47aaaf..c66069c3 100644 --- a/AE/samples/res_editor/Scripting/ScriptDynamicVars.cpp +++ b/AE/samples/res_editor/Scripting/ScriptDynamicVars.cpp @@ -150,37 +150,48 @@ namespace */ void ScriptDynamicDim::Bind (const ScriptEnginePtr &se) __Th___ { - ClassBinder binder{ se }; - binder.CreateRef( 0, False{"no ctor"} ); - - binder.AddFactoryCtor( &ScriptDynamicT_Ctor< ScriptDynamicDim, DynamicDim, packed_uint3 >, {} ); - - binder.AddMethod( &ScriptDynamicDim::Mul1, "opMul", {} ); - binder.AddMethod( &ScriptDynamicDim::Div1, "opDiv", {} ); - binder.AddMethod( &ScriptDynamicDim::Mul1, "Mul", {} ); - binder.AddMethod( &ScriptDynamicDim::Div1, "Div", {} ); - binder.AddMethod( &ScriptDynamicDim::DivRound1, "DivRound", {} ); - binder.AddMethod( &ScriptDynamicDim::DivCeil1, "DivCeil", {} ); - - binder.AddMethod( &ScriptDynamicDim::Mul2, "opMul", {} ); - binder.AddMethod( &ScriptDynamicDim::Div2, "opDiv", {} ); - binder.AddMethod( &ScriptDynamicDim::Mul2, "Mul", {} ); - binder.AddMethod( &ScriptDynamicDim::Div2, "Div", {} ); - binder.AddMethod( &ScriptDynamicDim::DivRound2, "DivRound", {} ); - binder.AddMethod( &ScriptDynamicDim::DivCeil2, "DivCeil", {} ); - - binder.AddMethod( &ScriptDynamicDim::Mul3, "opMul", {} ); - binder.AddMethod( &ScriptDynamicDim::Div3, "opDiv", {} ); - binder.AddMethod( &ScriptDynamicDim::Mul3, "Mul", {} ); - binder.AddMethod( &ScriptDynamicDim::Div3, "Div", {} ); - binder.AddMethod( &ScriptDynamicDim::DivRound3, "DivRound", {} ); - binder.AddMethod( &ScriptDynamicDim::DivCeil3, "DivCeil", {} ); - - binder.AddMethod( &ScriptDynamicDim::X, "X", {} ); - binder.AddMethod( &ScriptDynamicDim::Y, "Y", {} ); - binder.AddMethod( &ScriptDynamicDim::XY, "XY", {} ); - binder.AddMethod( &ScriptDynamicDim::Area, "Area", {} ); - binder.AddMethod( &ScriptDynamicDim::Volume, "Volume", {} ); + { + ClassBinder binder{ se }; + binder.CreateRef( 0, False{"no ctor"} ); + + binder.AddFactoryCtor( &ScriptDynamicT_Ctor< ScriptDynamicDim, DynamicDim, packed_uint3 >, {} ); + + binder.AddMethod( &ScriptDynamicDim::Mul1, "opMul", {} ); + binder.AddMethod( &ScriptDynamicDim::Div1, "opDiv", {} ); + binder.AddMethod( &ScriptDynamicDim::Mul1, "Mul", {} ); + binder.AddMethod( &ScriptDynamicDim::Div1, "Div", {} ); + binder.AddMethod( &ScriptDynamicDim::DivRound1, "DivRound", {} ); + binder.AddMethod( &ScriptDynamicDim::DivCeil1, "DivCeil", {} ); + + binder.AddMethod( &ScriptDynamicDim::Mul2, "opMul", {} ); + binder.AddMethod( &ScriptDynamicDim::Div2, "opDiv", {} ); + binder.AddMethod( &ScriptDynamicDim::Mul2, "Mul", {} ); + binder.AddMethod( &ScriptDynamicDim::Div2, "Div", {} ); + binder.AddMethod( &ScriptDynamicDim::DivRound2, "DivRound", {} ); + binder.AddMethod( &ScriptDynamicDim::DivCeil2, "DivCeil", {} ); + + binder.AddMethod( &ScriptDynamicDim::Mul3, "opMul", {} ); + binder.AddMethod( &ScriptDynamicDim::Div3, "opDiv", {} ); + binder.AddMethod( &ScriptDynamicDim::Mul3, "Mul", {} ); + binder.AddMethod( &ScriptDynamicDim::Div3, "Div", {} ); + binder.AddMethod( &ScriptDynamicDim::DivRound3, "DivRound", {} ); + binder.AddMethod( &ScriptDynamicDim::DivCeil3, "DivCeil", {} ); + + binder.AddMethod( &ScriptDynamicDim::X, "X", {} ); + binder.AddMethod( &ScriptDynamicDim::Y, "Y", {} ); + binder.AddMethod( &ScriptDynamicDim::XY, "XY", {} ); + binder.AddMethod( &ScriptDynamicDim::Area, "Area", {} ); + binder.AddMethod( &ScriptDynamicDim::Volume, "Volume", {} ); + } + { + ClassBinder binder{ se }; + binder.AddMethod( &ScriptDynamicUInt::ToDim2, "Dimension2", {} ); + binder.AddMethod( &ScriptDynamicUInt::ToDim3, "Dimension3", {} ); + } + { + ClassBinder binder{ se }; + binder.AddMethod( &ScriptDynamicUInt2::ToDim, "Dimension", {} ); + } } //----------------------------------------------------------------------------- @@ -284,6 +295,44 @@ namespace return result.Detach(); } +/* +================================================= + ScriptDynamicUInt::PowOf2 +================================================= +*/ + ScriptDynamicUInt* ScriptDynamicUInt::PowOf2a () __Th___ + { + return PowOf2b( 1 ); + } + + ScriptDynamicUInt* ScriptDynamicUInt::PowOf2b (uint value) __Th___ + { + auto du = _value->Clone(); + du->SetOp( value, EDynamicVarOperator::PowOf2 ); + + ScriptDynamicUIntPtr result{ new ScriptDynamicUInt{ RVRef(du) }}; + return result.Detach(); + } + +/* +================================================= + ScriptDynamicDim::ToDim* +================================================= +*/ + ScriptDynamicDim* ScriptDynamicUInt::ToDim2 () __Th___ + { + auto ds = _value->ToDim2(); + ScriptDynamicDimPtr result{ new ScriptDynamicDim{ RVRef(ds) }}; + return result.Detach(); + } + + ScriptDynamicDim* ScriptDynamicUInt::ToDim3 () __Th___ + { + auto ds = _value->ToDim3(); + ScriptDynamicDimPtr result{ new ScriptDynamicDim{ RVRef(ds) }}; + return result.Detach(); + } + /* ================================================= ScriptDynamicUInt::Bind @@ -294,13 +343,15 @@ namespace ClassBinder binder{ se }; binder.CreateRef(); binder.AddFactoryCtor( &ScriptDynamicT_Ctor< ScriptDynamicUInt, DynamicUInt, uint >, {} ); - binder.AddMethod( &ScriptDynamicUInt::Mul, "Mul", {} ); - binder.AddMethod( &ScriptDynamicUInt::Div, "Div", {} ); - binder.AddMethod( &ScriptDynamicUInt::DivNear, "DivNear", {} ); - binder.AddMethod( &ScriptDynamicUInt::DivCeil, "DivCeil", {} ); - binder.AddMethod( &ScriptDynamicUInt::Add, "Add", {} ); - binder.AddMethod( &ScriptDynamicUInt::Sub, "Sub", {} ); - binder.AddMethod( &ScriptDynamicUInt::Pow, "Pow", {} ); + binder.AddMethod( &ScriptDynamicUInt::Mul, "Mul", {} ); + binder.AddMethod( &ScriptDynamicUInt::Div, "Div", {} ); + binder.AddMethod( &ScriptDynamicUInt::DivNear, "DivNear", {} ); + binder.AddMethod( &ScriptDynamicUInt::DivCeil, "DivCeil", {} ); + binder.AddMethod( &ScriptDynamicUInt::Add, "Add", {} ); + binder.AddMethod( &ScriptDynamicUInt::Sub, "Sub", {} ); + binder.AddMethod( &ScriptDynamicUInt::Pow, "Pow", {} ); + binder.AddMethod( &ScriptDynamicUInt::PowOf2a, "PowOf2", {} ); + binder.AddMethod( &ScriptDynamicUInt::PowOf2b, "PowOf2", {} ); } //----------------------------------------------------------------------------- @@ -328,6 +379,32 @@ namespace return result.Detach(); } +/* +================================================= + PowOf2 +================================================= +*/ + ScriptDynamicUInt2* ScriptDynamicUInt2::PowOf2 () __Th___ + { + auto du = _value->Clone(); + du->SetOp( uint2{1}, EDynamicVarOperator::PowOf2 ); + + ScriptDynamicUInt2Ptr result{ new ScriptDynamicUInt2{ RVRef(du) }}; + return result.Detach(); + } + +/* +================================================= + ToDim +================================================= +*/ + ScriptDynamicDim* ScriptDynamicUInt2::ToDim () __Th___ + { + auto ds = _value->ToDim(); + ScriptDynamicDimPtr result{ new ScriptDynamicDim{ RVRef(ds) }}; + return result.Detach(); + } + /* ================================================= ScriptDynamicUInt2::Bind @@ -338,8 +415,9 @@ namespace ClassBinder binder{ se }; binder.CreateRef(); binder.AddFactoryCtor( &ScriptDynamicT_Ctor< ScriptDynamicUInt2, DynamicUInt2, const packed_uint2& >, {} ); - binder.AddMethod( &ScriptDynamicUInt2::X, "X", {} ); - binder.AddMethod( &ScriptDynamicUInt2::Y, "Y", {} ); + binder.AddMethod( &ScriptDynamicUInt2::X, "X", {} ); + binder.AddMethod( &ScriptDynamicUInt2::Y, "Y", {} ); + binder.AddMethod( &ScriptDynamicUInt2::PowOf2, "PowOf2", {} ); } //----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/Scripting/ScriptDynamicVars.h b/AE/samples/res_editor/Scripting/ScriptDynamicVars.h index 32a52bd6..4c99ceb7 100644 --- a/AE/samples/res_editor/Scripting/ScriptDynamicVars.h +++ b/AE/samples/res_editor/Scripting/ScriptDynamicVars.h @@ -104,6 +104,10 @@ namespace AE::ResEditor ND_ ScriptDynamicUInt* Add (uint value) __Th___; ND_ ScriptDynamicUInt* Sub (uint value) __Th___; ND_ ScriptDynamicUInt* Pow (uint value) __Th___; + ND_ ScriptDynamicUInt* PowOf2a () __Th___; + ND_ ScriptDynamicUInt* PowOf2b (uint value) __Th___; + ND_ ScriptDynamicDim* ToDim2 () __Th___; + ND_ ScriptDynamicDim* ToDim3 () __Th___; static void Bind (const ScriptEnginePtr &se) __Th___; }; @@ -130,6 +134,9 @@ namespace AE::ResEditor ND_ ScriptDynamicUInt* X () C_Th___; ND_ ScriptDynamicUInt* Y () C_Th___; + ND_ ScriptDynamicUInt2* PowOf2 () __Th___; + ND_ ScriptDynamicDim* ToDim () __Th___; + static void Bind (const ScriptEnginePtr &se) __Th___; }; diff --git a/AE/samples/res_editor/Scripting/ScriptExe.cpp b/AE/samples/res_editor/Scripting/ScriptExe.cpp index dd1db26f..5cca7f17 100644 --- a/AE/samples/res_editor/Scripting/ScriptExe.cpp +++ b/AE/samples/res_editor/Scripting/ScriptExe.cpp @@ -60,9 +60,7 @@ namespace { src.push_back( img ); - auto fmode = UIInteraction::Instance().GetFilterMode(); - - return MakeRCTh( RVRef(src), "Present", dynSize, fmode ); + return MakeRCTh( RVRef(src), "Present", dynSize ); } //----------------------------------------------------------------------------- @@ -167,6 +165,54 @@ namespace { + // + // Blit Image Pass + // + class ScriptExe::ScriptBlitImage final : public ScriptBasePass + { + private: + ScriptImagePtr src; + ScriptImagePtr dst; + + public: + ScriptBlitImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) : + src{src}, dst{dst} {} + + void _OnAddArg (INOUT ScriptPassArgs::Argument &) C_Th_OV {} + + RC ToPass () C_Th_OV + { + return MakeRCTh( src->ToResource(), dst->ToResource(), "BlitImage" ); + } + }; +//----------------------------------------------------------------------------- + + + + // + // Resolve Image Pass + // + class ScriptExe::ScriptResolveImage final : public ScriptBasePass + { + private: + ScriptImagePtr src; + ScriptImagePtr dst; + + public: + ScriptResolveImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) : + src{src}, dst{dst} {} + + void _OnAddArg (INOUT ScriptPassArgs::Argument &) C_Th_OV {} + + RC ToPass () C_Th_OV + { + return MakeRCTh( src->ToResource(), dst->ToResource(), "ResolveImage" ); + } + }; +//----------------------------------------------------------------------------- + + + // // Clear Image Pass // @@ -1158,20 +1204,28 @@ namespace { CHECK_THROW_MSG( src ); CHECK_THROW_MSG( dst ); - CHECK_THROW_MSG( not src->IsMutableDimension() ); - CHECK_THROW_MSG( not dst->IsMutableDimension() ); - - CHECK_THROW_MSG( All( src->Dimension3() == dst->Dimension3() )); - CHECK_THROW_MSG( src->ArrayLayers() == dst->ArrayLayers() ); - CHECK_THROW_MSG( src->MipmapCount() == dst->MipmapCount() ); - auto& src_fmt = EPixelFormat_GetInfo( src->PixelFormat() ); auto& dst_fmt = EPixelFormat_GetInfo( dstFormat ); CHECK_THROW_MSG( not src_fmt.IsCompressed() and dst_fmt.IsCompressed() ); - CHECK_THROW_MSG( dst->PixelFormat() == src->PixelFormat() or dstFormat == dst->PixelFormat() ); CHECK_THROW_MSG( src_fmt.IsColor() and dst_fmt.IsColor() ); - CHECK_THROW_MSG( All( IsMultipleOf( uint2{dst->Dimension2()}, dst_fmt.TexBlockDim() ))); + + CHECK_THROW_MSG( src->ArrayLayers() == dst->ArrayLayers() ); + CHECK_THROW_MSG( src->MipmapCount() == dst->MipmapCount() ); + + if ( src->IsMutableDimension() and dst->IsMutableDimension() ) + { + CHECK_THROW_MSG( src->DimensionRC() == dst->DimensionRC() ); + } + else + { + CHECK_THROW_MSG( not src->IsMutableDimension() ); + CHECK_THROW_MSG( not dst->IsMutableDimension() ); + CHECK_THROW_MSG( All( src->Dimension3() == dst->Dimension3() )); + + CHECK_THROW_MSG( All( IsMultipleOf( uint2{dst->Dimension2()}, dst_fmt.TexBlockDim() ))); + CHECK_THROW_MSG( dst->PixelFormat() == src->PixelFormat() or dstFormat == dst->PixelFormat() ); + } src->AddUsage( EResourceUsage::WillReadback ); dst->AddUsage( EResourceUsage::UploadedData ); @@ -1199,6 +1253,40 @@ namespace { data.passGroup->Add( ScriptBasePassPtr{ new ScriptCopyImage{ src, dst }}); } +/* +================================================= + _BlitImage +================================================= +*/ + void ScriptExe::_BlitImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) __Th___ + { + CHECK_THROW_MSG( src and dst ); + src->AddUsage( EResourceUsage::Transfer ); + dst->AddUsage( EResourceUsage::GenMipmaps ); + + auto& data = _GetTempData(); + CHECK_THROW_MSG( data.passGroup ); + + data.passGroup->Add( ScriptBasePassPtr{ new ScriptBlitImage{ src, dst }}); + } + +/* +================================================= + _ResolveImage +================================================= +*/ + void ScriptExe::_ResolveImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) __Th___ + { + CHECK_THROW_MSG( src and dst ); + src->AddUsage( EResourceUsage::Transfer ); + dst->AddUsage( EResourceUsage::Transfer ); + + auto& data = _GetTempData(); + CHECK_THROW_MSG( data.passGroup ); + + data.passGroup->Add( ScriptBasePassPtr{ new ScriptResolveImage{ src, dst }}); + } + /* ================================================= _ClearImage* @@ -1645,6 +1733,21 @@ namespace { } } +/* +================================================= + _NormalizeSpectrum +================================================= +*/ + void ScriptExe::_SpectrumToLinear (INOUT ScriptArray &wlToRGB) __Th___ + { + for (uint i = 0; i < wlToRGB.size(); ++i) + { + wlToRGB[i].y = RemoveSRGBCurve( wlToRGB[i].y ); + wlToRGB[i].z = RemoveSRGBCurve( wlToRGB[i].z ); + wlToRGB[i].w = RemoveSRGBCurve( wlToRGB[i].w ); + } + } + /* ================================================= _WhiteColorSpectrum3 @@ -1669,6 +1772,7 @@ namespace { wlToRGB.push_back( float4( 635.f, 1.0f, 0.0f, 0.0f )); wlToRGB.push_back( float4( 720.f, 0.5f, 0.0f, 0.0f )); + _SpectrumToLinear( INOUT wlToRGB ); if ( normalized ) _NormalizeSpectrum( INOUT wlToRGB ); } @@ -1689,6 +1793,7 @@ namespace { wlToRGB.push_back( float4( 650.f, 0.9f, 0.0f, 0.00f )); wlToRGB.push_back( float4( 700.f, 0.3f, 0.0f, 0.00f )); + _SpectrumToLinear( INOUT wlToRGB ); if ( normalized ) _NormalizeSpectrum( INOUT wlToRGB ); } @@ -1712,6 +1817,7 @@ namespace { wlToRGB.push_back( float4( 650.f, 0.83f, 0.00f, 0.00f )); #endif + _SpectrumToLinear( INOUT wlToRGB ); if ( normalized ) _NormalizeSpectrum( INOUT wlToRGB ); } @@ -1800,6 +1906,7 @@ namespace { CoreBindings::BindStdTypes( se ); CoreBindings::BindScalarMath( se ); CoreBindings::BindVectorMath( se ); + CoreBindings::BindQuaternion( se ); CoreBindings::BindRect( se ); CoreBindings::BindMatrixMath( se ); CoreBindings::BindColor( se ); @@ -1898,6 +2005,8 @@ namespace { se->AddFunction( &ScriptExe::_GenMipmaps, "GenMipmaps", {}, "Pass which generates mipmaps for image." ); se->AddFunction( &ScriptExe::_CopyImage, "CopyImage", {}, "Pass which copy image content to another image." ); + se->AddFunction( &ScriptExe::_BlitImage, "BlitImage", {}, "Pass which blits image to another image." ); + se->AddFunction( &ScriptExe::_ResolveImage, "ResolveImage", {}, "Pass which resolve multisample image to another single-sampled image." ); se->AddFunction( &ScriptExe::_CompressImage, "CompressImage", {"src", "dst"}, "Pass which compress image on CPU or GPU." ); se->AddFunction( &ScriptExe::_CompressImage2, "CompressImage", {"src", "dst", "dstFormat"}, "Pass which compress image on CPU or GPU.\n'dstFormat' may not be supported by current GPU, but may be used for software decoding.\n'dst' image must be compatible with 'dstFormat'." ); @@ -1930,7 +2039,7 @@ namespace { se->AddFunction( &ScriptExe::_GetCylinder1, "GetCylinder", {"segmentCount", "isInner", "positions", "texcoords", "indices"}, "Returns cylinder" ); se->AddFunction( &ScriptExe::_GetCylinder2, "GetCylinder", {"segmentCount", "isInner", "positions", "normals", "tangents", "bitangents", "texcoords", "indices"}, "Returns cylinder" ); - se->AddFunction( &ScriptExe::_GetSphericalCube1, "GetSphericalCube", {"lod", "positions", "indices"}, "Returns spherical cube without projection and rotation" ); + se->AddFunction( &ScriptExe::_GetSphericalCube1, "GetSphericalCube", {"lod", "positions", "indices"}, "Returns spherical cube without projection and face rotation.\nIn 'positions': xy - pos on face, z - face index." ); se->AddFunction( &ScriptExe::_IndicesToPrimitives, "IndicesToPrimitives", {"indices", "primitives"}, "Helper function to convert array of indices to array of uint3 indices per triangle" ); se->AddFunction( &ScriptExe::_GetFrustumPlanes, "GetFrustumPlanes", {"viewProj", "outPlanes"}, "Helper function to convert matrix to 6 planes of the frustum." ); @@ -1980,10 +2089,10 @@ namespace { se->AddFunction( &ScriptExe::_SliderF3a, "Slider", {"dyn", "name", "min", "max", "initial"} ); se->AddFunction( &ScriptExe::_SliderF4a, "Slider", {"dyn", "name", "min", "max", "initial"} ); - se->AddFunction( &ScriptExe::_WhiteColorSpectrum3, "WhiteColorSpectrum3", {"wavelengthToRGB"}, "Returns array with 3 elements, where x - wavelength in nm, yzw - RGB color." ); - se->AddFunction( &ScriptExe::_WhiteColorSpectrum7, "WhiteColorSpectrum7", {"wavelengthToRGB", "normalized"}, "Returns array with 7 elements, where x - wavelength in nm, yzw - RGB color.\nnormalized - sum of colors will be 1." ); - se->AddFunction( &ScriptExe::_WhiteColorSpectrumStep100nm, "WhiteColorSpectrumStep100nm", {"wavelengthToRGB", "normalized"}, "Returns array 4 elements with visible light spectrum with step 100nm, where x - wavelength in nm, yzw - RGB color.\nnormalized - sum of colors will be 1." ); - se->AddFunction( &ScriptExe::_WhiteColorSpectrumStep50nm, "WhiteColorSpectrumStep50nm", {"wavelengthToRGB", "normalized"}, "Returns array 7 elements with visible light spectrum with step 50nm, where x - wavelength in nm, yzw - RGB color.\nnormalized - sum of colors will be 1." ); + se->AddFunction( &ScriptExe::_WhiteColorSpectrum3, "WhiteColorSpectrum3", {"wavelengthToRGB"}, "Returns array with 3 elements, where x - wavelength in nm, yzw - RGB color in linear space." ); + se->AddFunction( &ScriptExe::_WhiteColorSpectrum7, "WhiteColorSpectrum7", {"wavelengthToRGB", "normalized"}, "Returns array with 7 elements, where x - wavelength in nm, yzw - RGB color in linear space.\nnormalized - sum of colors will be 1." ); + se->AddFunction( &ScriptExe::_WhiteColorSpectrumStep100nm, "WhiteColorSpectrumStep100nm", {"wavelengthToRGB", "normalized"}, "Returns array 4 elements with visible light spectrum with step 100nm, where x - wavelength in nm, yzw - RGB color in linear space.\nnormalized - sum of colors will be 1." ); + se->AddFunction( &ScriptExe::_WhiteColorSpectrumStep50nm, "WhiteColorSpectrumStep50nm", {"wavelengthToRGB", "normalized"}, "Returns array 7 elements with visible light spectrum with step 50nm, where x - wavelength in nm, yzw - RGB color in linear space.\nnormalized - sum of colors will be 1." ); se->AddFunction( &ScriptExe::_CM_CubeSC_Forward, "CM_CubeSC_Forward", {"snormCoord_cubeFace"}, "Convert 2D regular grid on cube face to 3D position on cube." ); se->AddFunction( &ScriptExe::_CM_IdentitySC_Forward, "CM_IdentitySC_Forward", {"snormCoord_cubeFace"}, "Convert 2D regular grid on cube face to 3D position on sphere using identity projection (normalization)." ); @@ -2432,6 +2541,14 @@ namespace { obj_storage.pplnStorage = &ppln_storage; obj_storage.shaderFolders = _GetTempData().cfg.shaderDirs; obj_storage.defaultFeatureSet = "DefaultFS"; + obj_storage.defaultShaderDefines = "\n" + "AE_LICENSE_MIT\n" + "AE_LICENSE_BSD2\n" + "AE_LICENSE_BSD3\n" + "AE_LICENSE_APACHE_2\n" + "AE_LICENSE_UNLICENSE\n" + "AE_LICENSE_CC_BY_NC_SA_3\n" + "AE_ENABLE_UNKNOWN_LICENSE\n"; obj_storage.spirvCompiler = MakeUnique( _GetTempData().cfg.includeDirs ); obj_storage.spirvCompiler->SetDefaultResourceLimits(); @@ -2781,8 +2898,10 @@ namespace { bool CompareImageTypes (const Graphics::ImageDesc &lhs, const ResLoader::IntermImage &rhs) { const auto [lhs_t0, lhs_t1] = GetDescriptorImageTypeRelaxed( lhs ); - const auto rhs_t = GetDescriptorImageTypeRelaxed( rhs.PixelFormat(), rhs.GetType(), false ); - return lhs_t0 == rhs_t or lhs_t1 == rhs_t; + const auto rhs_t0 = GetDescriptorImageTypeRelaxed( rhs.PixelFormat(), rhs.GetType(), False{"non-MS"}, False{"non-CubeMap"} ); + const auto rhs_t1 = GetDescriptorImageTypeRelaxed( rhs.PixelFormat(), rhs.GetType(), False{"non-MS"}, True{"CubeMap"} ); + return lhs_t0 == rhs_t0 or lhs_t1 == rhs_t0 or + lhs_t0 == rhs_t1 or lhs_t1 == rhs_t1; } } // AE::ResEditor diff --git a/AE/samples/res_editor/Scripting/ScriptExe.h b/AE/samples/res_editor/Scripting/ScriptExe.h index d2841691..8f7bb486 100644 --- a/AE/samples/res_editor/Scripting/ScriptExe.h +++ b/AE/samples/res_editor/Scripting/ScriptExe.h @@ -81,6 +81,8 @@ namespace AE::ResEditor class ScriptPassGroup; class ScriptGenMipmaps; class ScriptCopyImage; + class ScriptBlitImage; + class ScriptResolveImage; class ScriptClearImage; class ScriptClearBuffer; class ScriptBuildRTGeometry; @@ -183,6 +185,8 @@ namespace AE::ResEditor static void _GenMipmaps (const ScriptImagePtr &rt) __Th___; static void _CopyImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) __Th___; + static void _BlitImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) __Th___; + static void _ResolveImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) __Th___; static void _CompressImage (const ScriptImagePtr &src, const ScriptImagePtr &dst) __Th___; static void _CompressImage2 (const ScriptImagePtr &src, const ScriptImagePtr &dst, EPixelFormat dstFormat) __Th___; @@ -315,6 +319,7 @@ namespace AE::ResEditor INOUT ScriptArray &positions, INOUT ScriptArray &indices) __Th___; + static void _SpectrumToLinear (INOUT ScriptArray &) __Th___; static void _NormalizeSpectrum (INOUT ScriptArray &) __Th___; static void _WhiteColorSpectrum3 (OUT ScriptArray &) __Th___; static void _WhiteColorSpectrum7 (OUT ScriptArray &, bool) __Th___; diff --git a/AE/samples/res_editor/Scripting/ScriptExe_MeshGen.cpp b/AE/samples/res_editor/Scripting/ScriptExe_MeshGen.cpp index 4160f2c8..4e14ac06 100644 --- a/AE/samples/res_editor/Scripting/ScriptExe_MeshGen.cpp +++ b/AE/samples/res_editor/Scripting/ScriptExe_MeshGen.cpp @@ -729,7 +729,7 @@ namespace AE::ResEditor for (uint y = 0; y < vcount; ++y) for (uint x = 0; x < vcount; ++x) { - float2 ncoord = ToSNorm( float2{x,y} / float(vcount-1) ); + float2 ncoord = ToSNorm( float2{uint2{ x, y }} / float(vcount-1) ); positions.push_back(float3{ ncoord, float(face) }); ++vert_i; diff --git a/AE/samples/res_editor/Scripting/ScriptGeomSource.cpp b/AE/samples/res_editor/Scripting/ScriptGeomSource.cpp index a959d369..d822de90 100644 --- a/AE/samples/res_editor/Scripting/ScriptGeomSource.cpp +++ b/AE/samples/res_editor/Scripting/ScriptGeomSource.cpp @@ -2387,6 +2387,9 @@ namespace { CHECK_THROW( _geomSrc ); CHECK_THROW( rtech ); + CHECK_THROW_MSG( GraphicsScheduler().GetFeatureSet().drawIndirectFirstInstance == FeatureSet::EFeature::RequireTrue, + "ModelGeomSource requires 'drawIndirectFirstInstance' feature which is not supported" ); + switch_enum( layer ) { case ERenderLayer::Opaque : break; diff --git a/AE/samples/res_editor/Scripting/ScriptImage.cpp b/AE/samples/res_editor/Scripting/ScriptImage.cpp index 2cf32190..9856dd7d 100644 --- a/AE/samples/res_editor/Scripting/ScriptImage.cpp +++ b/AE/samples/res_editor/Scripting/ScriptImage.cpp @@ -7,8 +7,8 @@ namespace AE::ResEditor { namespace { - static ScriptImage* ScriptImage_Ctor1 (PipelineCompiler::EImageType imageType, const String &filename) { - return ScriptImagePtr{ new ScriptImage{ uint(imageType), filename, Default }}.Detach(); + static ScriptImage* ScriptImage_Ctor1 (EImageType imageType, const String &filename) { + return ScriptImagePtr{ new ScriptImage{ imageType, filename, Default }}.Detach(); } static ScriptImage* ScriptImage_Ctor2 (EPixelFormat format, const packed_uint2 &dim) { @@ -51,8 +51,8 @@ namespace return ScriptImagePtr{ new ScriptImage{ format, ds, layers, mipmaps }}.Detach(); } - static ScriptImage* ScriptImage_Ctor12 (PipelineCompiler::EImageType imageType, const String &filename, ScriptImage::ELoadOpFlags flags) { - return ScriptImagePtr{ new ScriptImage{ uint(imageType), filename, flags }}.Detach(); + static ScriptImage* ScriptImage_Ctor12 (EImageType imageType, const String &filename, ScriptImage::ELoadOpFlags flags) { + return ScriptImagePtr{ new ScriptImage{ imageType, filename, flags }}.Detach(); } } // namespace @@ -63,13 +63,11 @@ namespace constructor ================================================= */ - ScriptImage::ScriptImage (uint imageType, const String &filename, ELoadOpFlags flags) __Th___ : + ScriptImage::ScriptImage (EImageType imageType, const String &filename, ELoadOpFlags flags) __Th___ : _imageType{imageType} { - using PipelineCompiler::EImageType; - _desc.imageDim = EImageDim_2D; - switch ( EImageType(imageType) & EImageType::_TexMask ) + switch ( imageType & EImageType::_TexMask ) { case EImageType::Img1D : case EImageType::Img1DArray : _desc.imageDim = EImageDim_1D; break; @@ -85,7 +83,7 @@ namespace // set similar format _desc.format = EPixelFormat::RGBA8_UNorm; - switch ( EImageType(imageType) & EImageType::_ValMask ) + switch ( imageType & EImageType::_ValMask ) { case EImageType::Float : _desc.format = EPixelFormat::RGBA32F; break; case EImageType::Half : _desc.format = EPixelFormat::RGBA16F; break; @@ -111,7 +109,8 @@ namespace _desc.format = format; _desc.imageDim = _inDynSize->Get()->NumDimensions(); - _imageType = uint(GetDescriptorImageType( _desc )); + _desc.dimension = ImageDim_t{uint3{ EPixelFormat_GetInfo( format ).TexBlockDim(), 1u }}; + _imageType = GetDescriptorImageType( _desc ); } ScriptImage::ScriptImage (EPixelFormat format, const packed_uint3 &dim) __Th___ : @@ -129,10 +128,10 @@ namespace _desc.arrayLayers = layers; _desc.mipLevels = mipmaps; _desc.imageDim = dim.z > 1 ? EImageDim_3D : EImageDim_2D; - _desc.dimension = uint3(dim); + _desc.dimension = CheckCast(dim); _desc.Validate(); - _imageType = uint(GetDescriptorImageType( _desc )); + _imageType = GetDescriptorImageType( _desc ); } ScriptImage::ScriptImage (EPixelFormat format, const ScriptDynamicDimPtr &ds, const ImageLayer &layers, const MipmapLevel &mipmaps) __Th___ : @@ -144,11 +143,12 @@ namespace _desc.format = format; _desc.imageDim = _inDynSize->Get()->NumDimensions(); + _desc.dimension = ImageDim_t{uint3{ EPixelFormat_GetInfo( format ).TexBlockDim(), 1u }}; _desc.arrayLayers = layers; _desc.mipLevels = mipmaps; _desc.Validate(); - _imageType = uint(GetDescriptorImageType( _desc )); + _imageType = GetDescriptorImageType( _desc ); } /* @@ -169,9 +169,7 @@ namespace */ bool ScriptImage::IsDepthOrStencil () C_NE___ { - using PipelineCompiler::EImageType; - - switch ( EImageType(_imageType) & EImageType::_ValMask ) + switch ( _imageType & EImageType::_ValMask ) { case EImageType::Depth : case EImageType::Stencil : @@ -183,9 +181,7 @@ namespace bool ScriptImage::HasDepth () C_NE___ { - using PipelineCompiler::EImageType; - - switch ( EImageType(_imageType) & EImageType::_ValMask ) + switch ( _imageType & EImageType::_ValMask ) { case EImageType::Depth : case EImageType::DepthStencil : @@ -196,9 +192,7 @@ namespace bool ScriptImage::HasStencil () C_NE___ { - using PipelineCompiler::EImageType; - - switch ( EImageType(_imageType) & EImageType::_ValMask ) + switch ( _imageType & EImageType::_ValMask ) { case EImageType::Stencil : case EImageType::DepthStencil : @@ -251,14 +245,14 @@ namespace { if ( AllBits( usage, EResourceUsage::UploadedData )) { - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::ColorAttachment )); - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::DepthStencil )); - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::ComputeWrite )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::ColorAttachment )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::DepthStencil )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::ComputeWrite )); } if ( AllBits( usage, EResourceUsage::WithHistory )) { - CHECK_THROW_MSG( not AnyBits( usage, EResourceUsage::UploadedData )); + CHECK_THROW_MSG( NoBits( usage, EResourceUsage::UploadedData )); } CHECK_THROW_MSG( not AllBits( usage, EResourceUsage::ColorAttachment | EResourceUsage::DepthStencil ), @@ -308,7 +302,7 @@ namespace return _base->Dimension3(); CHECK_THROW_MSG( not IsMutableDimension() ); - return _desc.dimension; + return packed_uint3{_desc.dimension}; } /* @@ -441,7 +435,7 @@ namespace result->_viewDesc = ImageViewDesc{ viewType, format, baseMipmap, mipmapCount, baseLayer, layerCount }; result->_viewDesc.Validate( _desc ); - result->_imageType = uint(GetDescriptorImageType( _desc, result->_viewDesc )); + result->_imageType = GetDescriptorImageType( _desc, result->_viewDesc ); return result.Detach(); } @@ -485,7 +479,7 @@ namespace _GetImageType ================================================= */ - using PCImageType = PipelineCompiler::EImageType; + using PCImageType = EImageType; auto ScriptImage::_GetImageType () C_Th___ { return PCImageType(ImageType()); } @@ -664,15 +658,24 @@ namespace case EResourceUsage::ColorAttachment : _desc.usage |= EImageUsage::ColorAttachment | EImageUsage::TransferSrc; break; case EResourceUsage::DepthStencil : _desc.usage |= EImageUsage::DepthStencilAttachment; break; + case EResourceUsage::InputAttachment : _desc.usage |= EImageUsage::InputAttachment; break; case EResourceUsage::UploadedData : _desc.usage |= EImageUsage::TransferDst; break; case EResourceUsage::WillReadback : _desc.usage |= EImageUsage::TransferSrc; break; case EResourceUsage::Sampled : _desc.usage |= EImageUsage::Sampled; break; - case EResourceUsage::GenMipmaps : _desc.usage |= EImageUsage::Transfer; _desc.options |= (EImageOpt::BlitSrc | EImageOpt::BlitDst); break; - case EResourceUsage::Present : _desc.usage |= EImageUsage::TransferSrc; _desc.options |= EImageOpt::BlitSrc; break; case EResourceUsage::Transfer : _desc.usage |= EImageUsage::Transfer; break; + case EResourceUsage::GenMipmaps : + _desc.usage |= EImageUsage::Transfer; + _desc.options |= (EImageOpt::BlitSrc | EImageOpt::BlitDst); + break; + + case EResourceUsage::Present : + _desc.usage |= EImageUsage::TransferSrc; + _desc.options |= EImageOpt::BlitSrc; + break; + case EResourceUsage::Unknown : case EResourceUsage::ShaderAddress : case EResourceUsage::ComputeRW : @@ -692,7 +695,7 @@ namespace return _resource; } - if ( AllBits( _desc.usage, EImageUsage::TransferSrc ) and not AnyBits( _desc.usage, EImageUsage::DepthStencilAttachment )) + if ( AllBits( _desc.usage, EImageUsage::TransferSrc ) and NoBits( _desc.usage, EImageUsage::DepthStencilAttachment )) _desc.options |= EImageOpt::BlitSrc; auto& res_mngr = GraphicsScheduler().GetResourceManager(); @@ -722,9 +725,9 @@ namespace else { if ( _inDynSize ) { - _desc.dimension = _inDynSize->Get()->Dimension3_NonZero(); + _desc.dimension = ImageDim_t{_inDynSize->Get()->Dimension3_NonZero()}; }else{ - CHECK_THROW_MSG( All( _desc.dimension > uint3{0} ), "failed to create image '"s << _dbgName << "'" ); + CHECK_THROW_MSG( All( _desc.dimension > ImageDim_t{0} ), "failed to create image '"s << _dbgName << "'" ); } CHECK_THROW_MSG( res_mngr.IsSupported( _desc ), diff --git a/AE/samples/res_editor/Scripting/ScriptImage.h b/AE/samples/res_editor/Scripting/ScriptImage.h index 87c47ae8..f3a7bedd 100644 --- a/AE/samples/res_editor/Scripting/ScriptImage.h +++ b/AE/samples/res_editor/Scripting/ScriptImage.h @@ -16,14 +16,6 @@ namespace AE::ResEditor { // types public: - enum class EImageType : uint - { - Unknown, - ConstDataFromFile, - Storage, - RenderTarget, - }; - using ELoadOpFlags = Image::ELoadOpFlags; private: @@ -37,7 +29,7 @@ namespace AE::ResEditor const bool _descDefined = false; EResourceUsage _resUsage = Default; - uint _imageType = 0; // PipelineCompiler::EImageType + EImageType _imageType = Default; LoadOps_t _loadOps; String _dbgName; @@ -51,7 +43,7 @@ namespace AE::ResEditor // methods public: ScriptImage () = delete; - ScriptImage (uint imageType, const String &filename, ELoadOpFlags flags) __Th___; + ScriptImage (EImageType imageType, const String &filename, ELoadOpFlags flags) __Th___; ScriptImage (EPixelFormat format, const ScriptDynamicDimPtr &ds) __Th___; ScriptImage (EPixelFormat format, const packed_uint3 &dim) __Th___; ScriptImage (EPixelFormat format, const packed_uint3 &dim, @@ -84,7 +76,7 @@ namespace AE::ResEditor ND_ ImageDesc Description () C_NE___; ND_ ImageViewDesc ViewDescription () C_NE___ { return _viewDesc; } - ND_ uint ImageType () C_NE___ { return _imageType; } + ND_ EImageType ImageType () C_NE___ { return _imageType; } ND_ EPixelFormat PixelFormat () C_NE___ { return _viewDesc.format == Default ? _desc.format : _viewDesc.format; } ND_ bool IsColor () C_NE___ { return not IsDepthOrStencil(); } diff --git a/AE/samples/res_editor/Scripting/ScriptPassArgs.h b/AE/samples/res_editor/Scripting/ScriptPassArgs.h index 9d039eb8..e99c4301 100644 --- a/AE/samples/res_editor/Scripting/ScriptPassArgs.h +++ b/AE/samples/res_editor/Scripting/ScriptPassArgs.h @@ -68,8 +68,8 @@ namespace AE::ResEditor void ArgImageArrOut (const String &name, Array arr) __Th___; void ArgImageArrInOut (const String &name, Array arr) __Th___; - template - void ArgsToDescSet (EShaderStages stages, DSL &dsLayout, AS arraySize, AT accessType) C_Th___; + template + void ArgsToDescSet (EShaderStages stages, DSL &dsLayout, AS arraySize) C_Th___; void InitResources (OUT ResourceArray &resources, PipelinePackID packId) C_Th___; void ValidateArgs () C_Th___; void AddLayoutReflection () C_Th___; @@ -92,43 +92,43 @@ namespace AE::ResEditor ArgsToDescSet ================================================= */ - template - void ScriptPassArgs::ArgsToDescSet (const EShaderStages stages, DSL &dsLayout, AS, AT accessType) C_Th___ + template + void ScriptPassArgs::ArgsToDescSet (const EShaderStages stages, DSL &dsLayout, const AS arraySize) C_Th___ { - const AS array_size {1}; + ASSERT( arraySize.value == 1 ); for (auto& arg : _args) { Visit( arg.res, [&] (ScriptBufferPtr buf) { if ( buf->HasLayout() ){ - dsLayout->AddStorageBuffer( stages, arg.name, array_size, buf->GetTypeName(), accessType, arg.state, False{} ); + dsLayout->AddStorageBuffer( stages, arg.name, arraySize, buf->GetTypeName(), Default, arg.state, False{} ); }else{ // TODO - // dsLayout->AddStorageTexelBuffer( stages, arg.name, array_size, PipelineCompiler::EImageType(buf->TexelBufferType()), - // buf->GetViewFormat(), accessType, arg.state ); + // dsLayout->AddStorageTexelBuffer( stages, arg.name, arraySize, EImageType(buf->TexelBufferType()), + // buf->GetViewFormat(), Default, arg.state ); } }, [&] (ScriptImagePtr tex) { - const auto type = PipelineCompiler::EImageType(tex->ImageType()); + const auto type = EImageType(tex->ImageType()); if ( not arg.samplerName.empty() ) dsLayout->AddCombinedImage_ImmutableSampler( stages, arg.name, type, arg.state, arg.samplerName ); else if ( AllBits( arg.state, EResourceState::ShaderSample )) - dsLayout->AddSampledImage( stages, arg.name, array_size, type, arg.state ); + dsLayout->AddSampledImage( stages, arg.name, arraySize, type, arg.state ); else - dsLayout->AddStorageImage( stages, arg.name, array_size, type, tex->PixelFormat(), accessType, arg.state ); + dsLayout->AddStorageImage( stages, arg.name, arraySize, type, tex->PixelFormat(), Default, arg.state ); }, [&] (ScriptVideoImagePtr video) { String sampler = (video->HasYcbcrSampler() ? video->GetSamplerName() : arg.samplerName); - dsLayout->AddCombinedImage_ImmutableSampler( stages, arg.name, PipelineCompiler::EImageType(video->ImageType()), arg.state, sampler ); + dsLayout->AddCombinedImage_ImmutableSampler( stages, arg.name, EImageType(video->ImageType()), arg.state, sampler ); }, [&] (ScriptRTScenePtr) { - dsLayout->AddRayTracingScene( stages, arg.name, array_size ); + dsLayout->AddRayTracingScene( stages, arg.name, arraySize ); }, [&] (const Array &arr) { - const auto type = PipelineCompiler::EImageType(arr[0]->ImageType()); + const auto type = EImageType(arr[0]->ImageType()); if ( not arg.samplerName.empty() ) { Array samplers; samplers.resize( arr.size(), arg.samplerName ); @@ -137,7 +137,7 @@ namespace AE::ResEditor if ( AllBits( arg.state, EResourceState::ShaderSample )) { dsLayout->AddSampledImage( stages, arg.name, AS{uint(arr.size())}, type, arg.state ); }else{ - dsLayout->AddStorageImage( stages, arg.name, AS{uint(arr.size())}, type, arr[0]->PixelFormat(), accessType, arg.state ); + dsLayout->AddStorageImage( stages, arg.name, AS{uint(arr.size())}, type, arr[0]->PixelFormat(), Default, arg.state ); } }, [] (NullUnion) { diff --git a/AE/samples/res_editor/Scripting/ScriptPostprocess.cpp b/AE/samples/res_editor/Scripting/ScriptPostprocess.cpp index 21eab38f..8225c210 100644 --- a/AE/samples/res_editor/Scripting/ScriptPostprocess.cpp +++ b/AE/samples/res_editor/Scripting/ScriptPostprocess.cpp @@ -100,7 +100,7 @@ namespace binder.CreateRef( 0, False{"no ctor"} ); _BindBase( binder, True{"withArgs"} ); - _BindBaseRenderPass( binder, True{"withBlending"} ); + _BindBaseRenderPass( binder, True{"withBlending"}, True{"withRWAttachment"} ); binder.Comment( "Set path to fragment shader, empty - load current file." ); binder.AddFactoryCtor( &ScriptPostprocess_Ctor0, {} ); @@ -109,6 +109,9 @@ namespace binder.AddFactoryCtor( &ScriptPostprocess_Ctor3, {"postprocessFlags"} ); binder.AddFactoryCtor( &ScriptPostprocess_Ctor4, {"postprocessFlags", "defines"} ); binder.AddFactoryCtor( &ScriptPostprocess_Ctor5, {"shaderPath", "defines"} ); + + binder.Comment( "Can be used only if pass hasn't attachments." ); + binder.AddMethod( &ScriptBasePass::_SetDynamicDimension, "SetDimension", {} ); } } @@ -138,7 +141,6 @@ namespace Bytes ub_size; result->_rtech = _CompilePipeline( OUT ub_size ); // throw - result->_depthRange = this->_depthRange; EnumSet dbg_modes; @@ -175,6 +177,16 @@ namespace CHECK_THROW( res_mngr.CreateDescriptorSets( OUT result->_dsIndex, OUT result->_descSets.data(), max_frames, ppln, DescriptorSetName{"ds0"}, null, _dbgName )); _args.InitResources( OUT result->_resources, result->_rtech.packId ); // throw + + for (auto [out, i] : WithIndex(_output)) + { + if ( out.inName.empty() ) + continue; + + result->_resources.Add( UniformName{out.inName}, out.rt->ToResource(), + (out.rt->IsDepthOrStencil() ? EResourceState::InputDepthStencilAttachment_RW : EResourceState::InputColorAttachment_RW) + | EResourceState::FragmentShader ); + } } uint min_layer_count = UMax; @@ -188,24 +200,35 @@ namespace AssignMin( INOUT min_layer_count, count ); } + if ( _output.empty() ) + min_layer_count = 1; + CHECK_THROW( min_layer_count > 0 ); result->_rpDesc.renderPassName = RenderPassName{"rp"}; result->_rpDesc.subpassName = SubpassName{"main"}; result->_rpDesc.packId = result->_rtech.packId; result->_rpDesc.layerCount = ImageLayer{min_layer_count}; + result->_rpDesc.area = RectI{0,0,1,1}; + result->_rpDesc.viewports = this->_viewports; + result->_dynamicDim = this->_dynamicDim ? this->_dynamicDim->Get() : null; - for (usize i = 0; i < _output.size(); ++i) + if ( result->_rpDesc.viewports.empty() ) + result->_rpDesc.AddViewport( RectF{0.f, 0.f, 1.f, 1.f}, _depthRange.x, _depthRange.y ); + + for (auto [src, i] : WithIndex(_output)) { - auto& src = _output[i]; auto rt = src.rt->ToResource(); CHECK_THROW( rt ); // validate - for (auto& [name, res, state] : result->_resources.Get()) + if ( src.inName.empty() ) { - if ( auto* tex = UnionGet< RC >( res )) - CHECK_THROW_MSG( tex->get() != rt.get(), "Image '"s << rt->GetName() << "' used as input and output" ); + for (auto& [name, res, state] : result->_resources.Get()) + { + if ( auto* tex = UnionGet< RC >( res )) + CHECK_THROW_MSG( tex->get() != rt.get(), "Image '"s << rt->GetName() << "' used as input and output, use 'InOut()' instead." ); + } } ImageViewDesc view; @@ -223,7 +246,6 @@ namespace dst.image = rt; dst.clear = src.clear; } - CHECK_THROW( not result->_renderTargets.empty() ); _Init( *result, null ); UIInteraction::Instance().AddPassDbgInfo( result.get(), dbg_modes, EShaderStages::Fragment ); @@ -259,7 +281,7 @@ namespace AE::ResEditor st->Set( EStructLayout::Std140, R"#( float3 resolution; // viewport resolution (in pixels) float time; // shader playback time (in seconds) - float timeDelta; // render time (in seconds) + float timeDelta; // frame render time (in seconds), max value: 1/30s uint frame; // shader playback frame, global frame counter uint passFrameId; // current pass frame index uint seed; // unique value, updated on each shader reloading @@ -313,7 +335,9 @@ namespace AE::ResEditor _args.ValidateArgs(); - CHECK_THROW( not _output.empty() ); + if ( _output.empty() ) + CHECK_THROW_MSG( _HasCustomDynamicDimension(), "If pass hasn't attachments (Output()) use SetDimension() to set framebuffer dimension." ); + for (auto& out : _output) { CHECK_THROW_MSG( out.rt ); @@ -324,34 +348,47 @@ namespace AE::ResEditor CompatibleRenderPassDescPtr compat_rp{ new CompatibleRenderPassDesc{ "compat.rp" }}; compat_rp->AddSubpass( subpass ); { - for (usize i = 0; i < _output.size(); ++i) + for (auto [out, i] : WithIndex(_output)) { - RPAttachmentPtr att = compat_rp->AddAttachment2( _output[i].name ); - auto rt = _output[i].rt; - const auto desc = rt->ToResource()->GetImageDesc(); + RPAttachmentPtr att = compat_rp->AddAttachment2( out.name ); + const auto desc = out.rt->ToResource()->GetImageDesc(); + EAttachment type = (out.rt->IsDepthOrStencil() ? EAttachment::DepthStencil : EAttachment::Color); att->format = desc.format; att->samples = desc.samples; - att->AddUsage( subpass, (rt->IsDepthOrStencil() ? EAttachment::DepthStencil : EAttachment::Color) ); + if ( not out.inName.empty() ) + { + att->AddUsage3( subpass, EAttachment::ReadWrite, + RPAttachment::ShaderIO{ out.inName, Default, uint(i) }, + RPAttachment::ShaderIO{ out.name, Default, uint(i) }); + }else + att->AddUsage( subpass, type ); } }{ RenderPassSpecPtr rp_spec = compat_rp->AddSpecialization2( "rp" ); const auto ds_state = EResourceState::DepthStencilAttachment_RW | EResourceState::DSTestAfterFS; - for (usize i = 0; i < _output.size(); ++i) + for (auto [out, i] : WithIndex(_output)) { - RPAttachmentSpecPtr att = rp_spec->AddAttachment2( _output[i].name ); + RPAttachmentSpecPtr att = rp_spec->AddAttachment2( out.name ); att->loadOp = EAttachmentLoadOp::Load; att->storeOp = EAttachmentStoreOp::Store; - if ( _output[i].HasClearValue() ) + if ( out.HasClearValue() ) { att->loadOp = EAttachmentLoadOp::Clear; att->AddLayout( "ExternalIn", EResourceState::Invalidate ); } - att->AddLayout( subpass, (_output[i].rt->IsDepthOrStencil() ? ds_state : EResourceState::ColorAttachment) ); + EResourceState state = (out.rt->IsDepthOrStencil() ? ds_state : EResourceState::ColorAttachment); + if ( not out.inName.empty() ) + { + CHECK( not out.HasClearValue() ); + state = (out.rt->IsDepthOrStencil() ? EResourceState::InputDepthStencilAttachment_RW : EResourceState::InputColorAttachment_RW) + | EResourceState::FragmentShader; + } + att->AddLayout( subpass, state ); } } @@ -370,8 +407,16 @@ namespace AE::ResEditor ubSize = st->StaticSize(); ds_layout->AddUniformBuffer( stage, "un_PerPass", ArraySize{1}, "ShadertoyUB", EResourceState::ShaderUniform, False{} ); + + for (auto [out, i] : WithIndex(_output)) + { + if ( out.inName.empty() ) continue; + ds_layout->AddSubpassInput( stage, out.inName, uint(i), out.rt->ImageType(), + (out.rt->IsDepthOrStencil() ? EResourceState::InputDepthStencilAttachment_RW : EResourceState::InputColorAttachment_RW) + | EResourceState::FragmentShader ); + } } - _args.ArgsToDescSet( stage, ds_layout, ArraySize{1}, EAccessType::Coherent ); // throw + _args.ArgsToDescSet( stage, ds_layout, ArraySize{1} ); // throw uint fs_line = 0; diff --git a/AE/samples/res_editor/Scripting/ScriptRayTracingPass.cpp b/AE/samples/res_editor/Scripting/ScriptRayTracingPass.cpp index 493022ad..a4696a78 100644 --- a/AE/samples/res_editor/Scripting/ScriptRayTracingPass.cpp +++ b/AE/samples/res_editor/Scripting/ScriptRayTracingPass.cpp @@ -611,7 +611,7 @@ namespace AE::ResEditor ShaderStructTypePtr st{ new ShaderStructType{"RayTracingPassUB"}}; st->Set( EStructLayout::Std140, R"#( float time; // shader playback time (in seconds) - float timeDelta; // render time (in seconds) + float timeDelta; // frame render time (in seconds), max value: 1/30s uint frame; // shader playback frame, global frame counter uint passFrameId; // current pass frame index uint seed; // unique value, updated on each shader reloading @@ -672,7 +672,7 @@ namespace AE::ResEditor ds_layout->AddUniformBuffer( stage, "un_PerPass", ArraySize{1}, "RayTracingPassUB", EResourceState::ShaderUniform, False{} ); } - _args.ArgsToDescSet( stage, ds_layout, ArraySize{1}, EAccessType::Coherent ); // throw + _args.ArgsToDescSet( stage, ds_layout, ArraySize{1} ); // throw String header; diff --git a/AE/samples/res_editor/Scripting/ScriptScene.cpp b/AE/samples/res_editor/Scripting/ScriptScene.cpp index fb0115c0..5aa12527 100644 --- a/AE/samples/res_editor/Scripting/ScriptScene.cpp +++ b/AE/samples/res_editor/Scripting/ScriptScene.cpp @@ -274,7 +274,6 @@ namespace AE::ResEditor dbg_modes |= mtr->GetDebugModeBits(); } - result->_depthRange = this->_depthRange; result->_renderLayer = this->_renderLayer; result->_shadingRate = this->_shadingRate; @@ -298,10 +297,17 @@ namespace AE::ResEditor } CHECK_THROW( min_layer_count > 0 ); + result->_wScaling = _wScaling; + result->_scissors = _scissors; result->_rpDesc.renderPassName = RenderPassName{"rp"}; result->_rpDesc.subpassName = SubpassName{"main"}; result->_rpDesc.packId = result->_rtech.packId; result->_rpDesc.layerCount = ImageLayer{min_layer_count}; + result->_rpDesc.area = RectI{0,0,1,1}; + result->_rpDesc.viewports = _viewports; + + if ( result->_rpDesc.viewports.empty() ) + result->_rpDesc.AddViewport( RectF{0.f, 0.f, 1.f, 1.f}, _depthRange.x, _depthRange.y ); for (usize i = 0; i < _output.size(); ++i) { @@ -354,7 +360,7 @@ namespace AE::ResEditor binder.CreateRef( 0, False{"no ctor"} ); _BindBase( binder, True{"with args"} ); - _BindBaseRenderPass( binder, False{"without blending"} ); + _BindBaseRenderPass( binder, False{"without blending"}, False{"without RWAttachment"} ); binder.Comment( "Add path to single pipeline or folder with pipelines.\n" "Scene geometry will be linked with compatible pipeline or error will be generated." ); @@ -363,6 +369,9 @@ namespace AE::ResEditor binder.AddMethod( &ScriptSceneGraphicsPass::SetLayer, "Layer", {} ); binder.AddMethod( &ScriptSceneGraphicsPass::SetFragmentShadingRate, "FragmentShadingRate", {"rate", "primitiveOp", "textureOp"} ); + + binder.Comment( "Can be used only if pass hasn't attachments." ); + binder.AddMethod( &ScriptBasePass::_SetDynamicDimension, "SetDimension", {} ); } } @@ -386,7 +395,7 @@ namespace AE::ResEditor // view // float2 resolution; // viewport resolution (in pixels) float time; // shader playback time (in seconds) - float timeDelta; // render time (in seconds) + float timeDelta; // frame render time (in seconds), max value: 1/30s uint frame; // shader playback frame, global frame counter uint seed; // unique value, updated on each shader reloading @@ -484,7 +493,7 @@ namespace AE::ResEditor const auto stage = EShaderStages::AllGraphics; ds_layout->AddUniformBuffer( EShaderStages::AllGraphics, "un_PerPass", ArraySize{1}, "SceneGraphicsPassUB", EResourceState::ShaderUniform, False{} ); - _args.ArgsToDescSet( stage, ds_layout, ArraySize{1}, EAccessType::Coherent ); // throw + _args.ArgsToDescSet( stage, ds_layout, ArraySize{1} ); // throw String str; _AddSlidersAsMacros( OUT str ); @@ -740,7 +749,7 @@ namespace AE::ResEditor st->Set( EStructLayout::Std140, R"#( // view // float time; // shader playback time (in seconds) - float timeDelta; // render time (in seconds) + float timeDelta; // frame render time (in seconds), max value: 1/30s uint frame; // shader playback frame, global frame counter uint seed; // unique value, updated on each shader reloading @@ -801,7 +810,7 @@ namespace AE::ResEditor ds_layout->AddUniformBuffer( stage, "un_PerPass", ArraySize{1}, "SceneRayTracingPassUB", EResourceState::ShaderUniform, False{} ); ds_layout->AddRayTracingScene( stage, "un_RtScene", ArraySize{1} ); - _args.ArgsToDescSet( stage, ds_layout, ArraySize{1}, EAccessType::Coherent ); // throw + _args.ArgsToDescSet( stage, ds_layout, ArraySize{1} ); // throw String str; _AddSlidersAsMacros( OUT str ); diff --git a/AE/samples/res_editor/Scripting/ScriptVideoImage.cpp b/AE/samples/res_editor/Scripting/ScriptVideoImage.cpp index fe4b8305..ad00edaa 100644 --- a/AE/samples/res_editor/Scripting/ScriptVideoImage.cpp +++ b/AE/samples/res_editor/Scripting/ScriptVideoImage.cpp @@ -27,7 +27,7 @@ namespace */ ScriptVideoImage::ScriptVideoImage (EPixelFormat format, const String &filename) __Th___ : _format{ format }, - _imageType{uint( PipelineCompiler::EImageType::Img2D | PipelineCompiler::EImageType::Float )}, + _imageType{uint( EImageType::Img2D | EImageType::Float )}, _videoFile{ filename } { CHECK_THROW_MSG( GetVFS().Exists( _videoFile ), @@ -211,7 +211,7 @@ namespace ImageDesc desc; desc.imageDim = EImageDim_2D; desc.format = _format; - desc.dimension = uint3{ _dim, 1 }; + desc.dimension = ImageDim_t{uint3{ _dim, 1u }}; CHECK_ERR( _resUsage != Default ); for (auto usage : BitfieldIterate( _resUsage )) @@ -238,6 +238,7 @@ namespace case EResourceUsage::IndirectBuffer : case EResourceUsage::ASBuild : case EResourceUsage::WithHistory : + case EResourceUsage::InputAttachment : default : RETURN_ERR( "unsupported usage" ); } switch_end diff --git a/AE/samples/res_editor/Scripting/ScriptVideoImage.h b/AE/samples/res_editor/Scripting/ScriptVideoImage.h index d27ad88d..64b0e2b6 100644 --- a/AE/samples/res_editor/Scripting/ScriptVideoImage.h +++ b/AE/samples/res_editor/Scripting/ScriptVideoImage.h @@ -25,7 +25,7 @@ namespace AE::ResEditor private: EResourceUsage _resUsage = Default; EPixelFormat _format = Default; - uint _imageType = 0; // PipelineCompiler::EImageType + uint _imageType = 0; // EImageType uint2 _dim; VFS::FileName _videoFile; String _dbgName; diff --git a/AE/samples/res_editor/_data/CMakeLists.txt b/AE/samples/res_editor/_data/CMakeLists.txt index c0cf51ea..2d6e312a 100644 --- a/AE/samples/res_editor/_data/CMakeLists.txt +++ b/AE/samples/res_editor/_data/CMakeLists.txt @@ -15,7 +15,7 @@ else() endif() set( SOURCES ${DATA_FILES} ${SHADERS_SHARED} ${SHADERS_3PARTY} ${CONFIGS} ) -add_library( "ResourceEditor.Data" STATIC EXCLUDE_FROM_ALL ${SOURCES} ${PREBUILD_CPP_FILE} ) +add_library( "ResourceEditor.Data" STATIC EXCLUDE_FROM_ALL ${SOURCES} ) set_property( TARGET "ResourceEditor.Data" PROPERTY LINKER_LANGUAGE CXX ) if (TARGET "ResourceEditor") @@ -44,3 +44,16 @@ target_include_directories( "ResourceEditor.Data" PUBLIC "${AE_ENGINE_SHARED_DATA}/scripts" "${AE_ENGINE_SHARED_DATA}/shaders" ) + +# optional: allow to compile scripts & shaders +set_source_files_properties( ${SOURCES} PROPERTIES LANGUAGE CXX ) + +target_compile_definitions( "ResourceEditor.Data" PUBLIC + "AE_LICENSE_MIT" + "AE_LICENSE_BSD2" + "AE_LICENSE_BSD3" + "AE_LICENSE_APACHE_2" + "AE_LICENSE_UNLICENSE" + "AE_LICENSE_CC_BY_NC_SA_3" + "AE_ENABLE_UNKNOWN_LICENSE" +) diff --git a/AE/samples/res_editor/_data/cpp/vk_types.h b/AE/samples/res_editor/_data/cpp/vk_types.h index 604a250e..4c3d07cc 100644 --- a/AE/samples/res_editor/_data/cpp/vk_types.h +++ b/AE/samples/res_editor/_data/cpp/vk_types.h @@ -1,10 +1,10 @@ -//e0b8bb51 +//5ca9e2b9 #ifndef CameraData_DEFINED # define CameraData_DEFINED // size: 384, align: 16 struct CameraData { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x8142e66cu}}; // 'CameraData' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x8142e66cu}}; float4x4_storage viewProj; float4x4_storage invViewProj; @@ -31,7 +31,7 @@ // size: 1200, align: 16 struct ShadertoyUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa31fc14bu}}; // 'ShadertoyUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa31fc14bu}}; float3 resolution; float time; @@ -80,7 +80,7 @@ // size: 1088, align: 16 struct ComputePassUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xaba36a57u}}; // 'ComputePassUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xaba36a57u}}; float time; float timeDelta; @@ -117,7 +117,7 @@ // size: 4, align: 4 (16) struct ComputePassPC { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xa1d3ae84u}}; // 'ComputePassPC' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xa1d3ae84u}}; uint dispatchIndex; }; @@ -130,7 +130,7 @@ // size: 1088, align: 16 struct RayTracingPassUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x1539319au}}; // 'RayTracingPassUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x1539319au}}; float time; float timeDelta; @@ -167,7 +167,7 @@ // size: 1056, align: 16 struct SceneGraphicsPassUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x1f8a4833u}}; // 'SceneGraphicsPassUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x1f8a4833u}}; float2 resolution; float time; @@ -200,7 +200,7 @@ // size: 1040, align: 16 struct SceneRayTracingPassUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xd09ba9b0u}}; // 'SceneRayTracingPassUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xd09ba9b0u}}; float time; float timeDelta; @@ -231,7 +231,7 @@ // size: 112, align: 16 struct SphericalCubeMaterialUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xeb01110au}}; // 'SphericalCubeMaterialUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xeb01110au}}; float4x4_storage transform; float3x3_storage normalMat; @@ -246,7 +246,7 @@ // size: 112, align: 16 struct UnifiedGeometryMaterialUB { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x6940ef36u}}; // 'UnifiedGeometryMaterialUB' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x6940ef36u}}; float4x4_storage transform; float3x3_storage normalMat; diff --git a/AE/samples/res_editor/_data/pipeline_inc/Model.as b/AE/samples/res_editor/_data/pipeline_inc/ModelTypes.as similarity index 93% rename from AE/samples/res_editor/_data/pipeline_inc/Model.as rename to AE/samples/res_editor/_data/pipeline_inc/ModelTypes.as index 4396a89c..f7269aa1 100644 --- a/AE/samples/res_editor/_data/pipeline_inc/Model.as +++ b/AE/samples/res_editor/_data/pipeline_inc/ModelTypes.as @@ -111,8 +111,7 @@ void InitRayTracingPipelineLayout () // descriptor set { - const uint stages = EShaderStages::RayClosestHit | EShaderStages::RayCallable; - const uint stages2 = stages | EShaderStages::RayGen; + const uint stages = EShaderStages::RayClosestHit | EShaderStages::RayCallable | EShaderStages::RayGen; RC ds = DescriptorSetLayout( "rt-model.mtr.ds" ); ds.StorageBuffer( stages, "un_RTInstances", "ModelRTInstances", EResourceState::ShaderStorage_Read ); @@ -148,7 +147,9 @@ void BufferTypes (bool withFS, bool hasRT) }{ RC st = ShaderStructType( "ModelNode_Array" ); st.Set( EStructLayout::Compatible_Std430, + "uint instanceCount;" + "ModelNode elements [];" ); + st.AddUsage( ShaderStructTypeUsage::BufferLayout ); // enable c++ reflection } // mesh for ray tracing @@ -158,16 +159,19 @@ void BufferTypes (bool withFS, bool hasRT) RC st = ShaderStructType( "ModelRTMesh" ); if ( withFS ) st.AddFeatureSet( "MinRecursiveRayTracing" ); st.Set( EStructLayout::Std430, - "packed_float3 * normals;" + - "float2 * texcoords;" + - "uint * indices;" ); + "packed_float3 * positions;" + // [index_count] + "packed_float3 * normals;" + // [index_count] + "float2 * texcoords;" + // [index_count] + "uint * indices;" ); // [primitive_count * 3] st.AddUsage( ShaderStructTypeUsage::BufferLayout ); // enable c++ reflection }{ + // only 4 instance types, see [ERTGeometryType](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/GeomSource/IGeomSource.h) RC st = ShaderStructType( "ModelRTInstances" ); st.Set( EStructLayout::Std430, "ModelRTMesh & meshesPerInstance [4];" + // address to ModelRTMesh[] "uint * materialsPerInstance [4];" + - "float3x3 * normalMatPerInstance [4];" ); + "float3x3 * normalMatPerInstance [4];" + + "float4x4 * modelMatPerInstance [4];" ); st.AddUsage( ShaderStructTypeUsage::BufferLayout ); // enable c++ reflection } } diff --git a/AE/samples/res_editor/_data/pipelines/ModelReflection.as b/AE/samples/res_editor/_data/pipelines/ModelReflection.as index c0e14e06..bbbb7626 100644 --- a/AE/samples/res_editor/_data/pipelines/ModelReflection.as +++ b/AE/samples/res_editor/_data/pipelines/ModelReflection.as @@ -1,5 +1,5 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -#include "Model.as" +#include "ModelTypes.as" void ASmain () { diff --git a/AE/samples/res_editor/_data/pipelines/ModelShared.as b/AE/samples/res_editor/_data/pipelines/ModelShared.as index eefb736d..0f2c995c 100644 --- a/AE/samples/res_editor/_data/pipelines/ModelShared.as +++ b/AE/samples/res_editor/_data/pipelines/ModelShared.as @@ -2,7 +2,7 @@ /* Always included for GraphicsPass */ -#include "Model.as" +#include "ModelTypes.as" void ASmain () { diff --git a/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2a.as b/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2a.as new file mode 100644 index 00000000..ad915c93 --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2a.as @@ -0,0 +1,140 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + { + RC st = ShaderStructType( "io" ); + st.Set( EStructLayout::InternalIO, + "float instanceId;" ); + }{ + RC ds = DescriptorSetLayout( "mtr.ds" ); + ds.UniformBuffer( EShaderStages::Vertex, "un_PerObject", "UnifiedGeometryMaterialUB" ); + ds.StorageBuffer( EShaderStages::Vertex, "un_VBuffer", "VBuffer", EResourceState::ShaderStorage_Read ); // external + }{ + RC pl = PipelineLayout( "pl" ); + pl.DSLayout( "pass", 0, "pass.ds" ); + pl.DSLayout( "material", 1, "mtr.ds" ); + } + + { + RC ppln = GraphicsPipeline( "tmpl" ); + ppln.SetLayout( "pl" ); + ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); + ppln.SetShaderIO( EShader::Vertex, EShader::Fragment, "io" ); + + { + RC vs = Shader(); + vs.LoadSelf(); + ppln.SetVertexShader( vs ); + }{ + RC fs = Shader(); + fs.LoadSelf(); + ppln.SetFragmentShader( fs ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + spec.AddToRenderTech( "rtech", "main" ); // in ScriptSceneGraphicsPass + + RenderState rs; + + rs.inputAssembly.topology = EPrimitive::TriangleList; + + rs.rasterization.frontFaceCCW = false; + rs.rasterization.cullMode = ECullMode::None; + + spec.SetRenderState( rs ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_VERT + + void Main () + { + float2 pos = un_VBuffer.vertices[gl.VertexIndex]; + pos = gl.InstanceIndex == 0 ? pos : -pos + (iWithOffset == 1 ? 0.01 : 0.0); + + pos *= float(1u << iScale); + + gl.Position = float4(pos, 0.0, 1.0); + Out.instanceId = float(gl.InstanceIndex); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "Hash.glsl" + #include "Color.glsl" + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 QuadGroupId () + { + const float4 colors [] = { + float4( 1.0, 0.0, 0.0, 1.0 ), // red (0,0) + float4( 1.0, 1.0, 0.0, 1.0 ), // yellow (1,0) + float4( 0.0, 0.0, 1.0, 1.0 ), // blue (0,1) + float4( 1.0, 0.0, 1.0, 1.0 ) // pink (1,1) + }; + return colors[ gl.subgroup.Index & 3 ]; + } + + float4 SubgroupId () + { + return Rainbow( float(gl.subgroup.Index) / float(gl.subgroup.Size-1) ); + } + + float4 UniqueSubgroup () + { + float3 sum = gl.subgroup.Add(float3( gl.FragCoord.xy, In.instanceId )); + return Rainbow( DHash13( sum * iHash )); + } + + float4 HelperInvocationCount () + { + uint i = HelperInvocationCountPerQuad(); + return Rainbow( float(i) / 3.0 ); + } + + float4 FullQuad () + { + float val = DHash12( (Floor( gl.FragCoord.xy / 4.0 ) * 10.0 + 10.0) * iHash ); + float sum = gl.quadGroup.Broadcast( val, 0 ) + + gl.quadGroup.Broadcast( val, 1 ) + + gl.quadGroup.Broadcast( val, 2 ) + + gl.quadGroup.Broadcast( val, 3 ); + return Rainbow( 1.0 - sum / (val * 4.0) ); + } + + float4 FullSubgroup () + { + float val = 1.0; + float sum = gl.subgroup.Add( val ); + return Rainbow( 1.0 - sum / gl.subgroup.Size ); + } + + void Main () + { + switch ( iMode ) + { + case 0 : out_Color = QuadGroupId(); break; + case 1 : out_Color = SubgroupId(); break; + case 2 : out_Color = UniqueSubgroup(); break; + case 3 : out_Color = FullSubgroup(); break; + case 4 : out_Color = HelperInvocationCount(); break; + case 5 : out_Color = FullQuad(); break; + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2b.as b/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2b.as new file mode 100644 index 00000000..cba1fd1c --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2b.as @@ -0,0 +1,146 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + { + RC st = ShaderStructType( "io" ); + st.Set( EStructLayout::InternalIO, + "float instanceId;" + + "float2 uv;" ); + }{ + RC ds = DescriptorSetLayout( "mtr.ds" ); + ds.UniformBuffer( EShaderStages::Vertex, "un_PerObject", "UnifiedGeometryMaterialUB" ); + ds.StorageBuffer( EShaderStages::Vertex, "un_VBuffer", "VBuffer", EResourceState::ShaderStorage_Read ); // external + }{ + RC pl = PipelineLayout( "pl" ); + pl.DSLayout( "pass", 0, "pass.ds" ); + pl.DSLayout( "material", 1, "mtr.ds" ); + } + + { + RC ppln = GraphicsPipeline( "tmpl" ); + ppln.SetLayout( "pl" ); + ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); + ppln.SetShaderIO( EShader::Vertex, EShader::Fragment, "io" ); + + { + RC vs = Shader(); + vs.LoadSelf(); + ppln.SetVertexShader( vs ); + }{ + RC fs = Shader(); + fs.LoadSelf(); + ppln.SetFragmentShader( fs ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + spec.AddToRenderTech( "rtech", "main" ); // in ScriptSceneGraphicsPass + + RenderState rs; + + rs.inputAssembly.topology = EPrimitive::TriangleList; + + rs.rasterization.frontFaceCCW = false; + rs.rasterization.cullMode = ECullMode::None; + + spec.SetRenderState( rs ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_VERT + #include "Math.glsl" + + void Main () + { + float2 pos = un_VBuffer.vertices[gl.VertexIndex]; + pos = gl.InstanceIndex == 0 ? pos : -pos + (iWithOffset == 1 ? 0.01 : 0.0); + + pos *= float(1u << iScale); + + gl.Position = float4(pos, 0.0, 1.0); + Out.instanceId = float(gl.InstanceIndex); + Out.uv = ToUNorm( pos ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "Hash.glsl" + #include "Color.glsl" + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 QuadGroupId () + { + const float4 colors [] = { + float4( 1.0, 0.0, 0.0, 1.0 ), // red (0,0) + float4( 1.0, 1.0, 0.0, 1.0 ), // yellow (1,0) + float4( 0.0, 0.0, 1.0, 1.0 ), // blue (0,1) + float4( 1.0, 0.0, 1.0, 1.0 ) // pink (1,1) + }; + return colors[ gl.subgroup.Index & 3 ]; + } + + float4 SubgroupId () + { + return Rainbow( float(gl.subgroup.Index) / float(gl.subgroup.Size-1) ); + } + + float4 UniqueSubgroup () + { + float3 sum = gl.subgroup.Add(float3( gl.FragCoord.xy, In.instanceId )); + return Rainbow( DHash13( sum * iHash )); + } + + float4 HelperInvocationCount () + { + uint i = HelperInvocationCountPerQuad(); + return Rainbow( float(i) / 3.0 ); + } + + float4 FullQuad () + { + float val = DHash12( (Floor( gl.FragCoord.xy / 4.0 ) * 10.0 + 10.0) * iHash ); + float sum = gl.quadGroup.Broadcast( val, 0 ) + + gl.quadGroup.Broadcast( val, 1 ) + + gl.quadGroup.Broadcast( val, 2 ) + + gl.quadGroup.Broadcast( val, 3 ); + return Rainbow( 1.0 - sum / (val * 4.0) ); + } + + float4 FullSubgroup () + { + float val = 1.0; + float sum = gl.subgroup.Add( val ); + return Rainbow( 1.0 - sum / gl.subgroup.Size ); + } + + + void Main () + { + out_Color = gl.texture.Sample( un_Texture, In.uv ) * 0.001; + + switch ( iMode ) + { + case 0 : out_Color += QuadGroupId(); break; + case 1 : out_Color += SubgroupId(); break; + case 2 : out_Color += UniqueSubgroup(); break; + case 3 : out_Color += FullSubgroup(); break; + case 4 : out_Color += HelperInvocationCount(); break; + case 5 : out_Color += FullQuad(); break; + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2c.as b/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2c.as new file mode 100644 index 00000000..d5de564f --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2c.as @@ -0,0 +1,133 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + { + RC ds = DescriptorSetLayout( "mtr.ds" ); + ds.UniformBuffer( EShaderStages::Vertex, "un_PerObject", "UnifiedGeometryMaterialUB" ); + ds.StorageBuffer( EShaderStages::Vertex, "un_VBuffer", "VBuffer", EResourceState::ShaderStorage_Read ); // external + }{ + RC pl = PipelineLayout( "pl" ); + pl.DSLayout( "pass", 0, "pass.ds" ); + pl.DSLayout( "material", 1, "mtr.ds" ); + } + + { + RC ppln = GraphicsPipeline( "tmpl" ); + ppln.SetLayout( "pl" ); + ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); + + { + RC vs = Shader(); + vs.LoadSelf(); + ppln.SetVertexShader( vs ); + }{ + RC fs = Shader(); + fs.LoadSelf(); + ppln.SetFragmentShader( fs ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + spec.AddToRenderTech( "rtech", "main" ); // in ScriptSceneGraphicsPass + + RenderState rs; + + rs.inputAssembly.topology = EPrimitive::TriangleList; + + rs.rasterization.frontFaceCCW = false; + rs.rasterization.cullMode = ECullMode::None; + + spec.SetRenderState( rs ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_VERT + + void Main () + { + float2 pos = un_VBuffer.vertices[gl.VertexIndex]; + + pos *= float(1u << iScale); + + gl.Position = float4(pos, 0.0, 1.0); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "Hash.glsl" + #include "Color.glsl" + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 QuadGroupId () + { + const float4 colors [] = { + float4( 1.0, 0.0, 0.0, 1.0 ), // red (0,0) + float4( 1.0, 1.0, 0.0, 1.0 ), // yellow (1,0) + float4( 0.0, 0.0, 1.0, 1.0 ), // blue (0,1) + float4( 1.0, 0.0, 1.0, 1.0 ) // pink (1,1) + }; + return colors[ gl.subgroup.Index & 3 ]; + } + + float4 SubgroupId () + { + return Rainbow( float(gl.subgroup.Index) / float(gl.subgroup.Size-1) ); + } + + float4 UniqueSubgroup () + { + float2 sum = gl.subgroup.Add( gl.FragCoord.xy ); + return Rainbow( DHash12( sum * iHash )); + } + + float4 HelperInvocationCount () + { + uint i = HelperInvocationCountPerQuad(); + return Rainbow( float(i) / 3.0 ); + } + + float4 FullQuad () + { + float val = DHash12( (Floor( gl.FragCoord.xy / 4.0 ) * 10.0 + 10.0) * iHash ); + float sum = gl.quadGroup.Broadcast( val, 0 ) + + gl.quadGroup.Broadcast( val, 1 ) + + gl.quadGroup.Broadcast( val, 2 ) + + gl.quadGroup.Broadcast( val, 3 ); + return Rainbow( 1.0 - sum / (val * 4.0) ); + } + + float4 FullSubgroup () + { + float val = 1.0; + float sum = gl.subgroup.Add( val ); + return Rainbow( 1.0 - sum / gl.subgroup.Size ); + } + + void Main () + { + switch ( iMode ) + { + case 0 : out_Color = QuadGroupId(); break; + case 1 : out_Color = SubgroupId(); break; + case 2 : out_Color = UniqueSubgroup(); break; + case 3 : out_Color = FullSubgroup(); break; + case 4 : out_Color = HelperInvocationCount(); break; + case 5 : out_Color = FullQuad(); break; + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1a.as b/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1a.as new file mode 100644 index 00000000..2a4c35be --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1a.as @@ -0,0 +1,84 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + { + RC st = ShaderStructType( "io" ); + st.Set( EStructLayout::InternalIO, + "float2 uv;" ); + }{ + RC ds = DescriptorSetLayout( "mtr.ds" ); + ds.UniformBuffer( EShaderStages::Vertex, "un_PerObject", "UnifiedGeometryMaterialUB" ); + ds.StorageBuffer( EShaderStages::Vertex, "un_VBuffer", "VBuffer", EResourceState::ShaderStorage_Read ); // external + ds.CombinedImage( EShaderStages::Fragment, "un_Texture", EImageType::FImage2D, Sampler_LinearRepeat ); // external + }{ + RC pl = PipelineLayout( "pl" ); + pl.DSLayout( "pass", 0, "pass.ds" ); + pl.DSLayout( "material", 1, "mtr.ds" ); + } + + { + RC ppln = GraphicsPipeline( "tmpl" ); + ppln.SetLayout( "pl" ); + ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); + ppln.SetShaderIO( EShader::Vertex, EShader::Fragment, "io" ); + + { + RC vs = Shader(); + vs.LoadSelf(); + ppln.SetVertexShader( vs ); + }{ + RC fs = Shader(); + fs.LoadSelf(); + ppln.SetFragmentShader( fs ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + spec.AddToRenderTech( "rtech", "main" ); // in ScriptSceneGraphicsPass + + RenderState rs; + + rs.inputAssembly.topology = EPrimitive::TriangleList; + + rs.rasterization.frontFaceCCW = false; + rs.rasterization.cullMode = ECullMode::Back; + + spec.SetRenderState( rs ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_VERT + #include "Math.glsl" + + void Main () + { + float2 pos = un_VBuffer.vertices[gl.VertexIndex]; + + gl.Position = float4(ToSNorm(pos), 0.0, 1.0); + Out.uv = pos; + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "Math.glsl" + + void Main () + { + float2 uv = In.uv * iScale + iBias; + out_Color = gl.texture.Sample( un_Texture, uv ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1b.as b/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1b.as new file mode 100644 index 00000000..72aa2e44 --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1b.as @@ -0,0 +1,84 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + { + RC st = ShaderStructType( "io" ); + st.Set( EStructLayout::InternalIO, + "float2 uv;" ); + }{ + RC ds = DescriptorSetLayout( "mtr.ds" ); + ds.UniformBuffer( EShaderStages::Vertex, "un_PerObject", "UnifiedGeometryMaterialUB" ); + ds.StorageBuffer( EShaderStages::Vertex, "un_VBuffer", "VBuffer", EResourceState::ShaderStorage_Read ); // external + ds.CombinedImage( EShaderStages::Fragment, "un_Texture", EImageType::FImage2D, Sampler_LinearRepeat ); // external + }{ + RC pl = PipelineLayout( "pl" ); + pl.DSLayout( "pass", 0, "pass.ds" ); + pl.DSLayout( "material", 1, "mtr.ds" ); + } + + { + RC ppln = GraphicsPipeline( "tmpl" ); + ppln.SetLayout( "pl" ); + ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); + ppln.SetShaderIO( EShader::Vertex, EShader::Fragment, "io" ); + + { + RC vs = Shader(); + vs.LoadSelf(); + ppln.SetVertexShader( vs ); + }{ + RC fs = Shader(); + fs.LoadSelf(); + ppln.SetFragmentShader( fs ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + spec.AddToRenderTech( "rtech", "main" ); // in ScriptSceneGraphicsPass + + RenderState rs; + + rs.inputAssembly.topology = EPrimitive::TriangleList; + + rs.rasterization.frontFaceCCW = false; + rs.rasterization.cullMode = ECullMode::Back; + + spec.SetRenderState( rs ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_VERT + #include "Math.glsl" + + void Main () + { + float2 pos = un_VBuffer.vertices[gl.VertexIndex]; + + gl.Position = float4(ToSNorm(pos), 0.0, 1.0); + Out.uv = pos; + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "Hash.glsl" + + void Main () + { + float2 uv = HEHash22( (int2(gl.FragCoord.xy) >> iStep) + iOffset ) + In.uv; + out_Color = gl.texture.Sample( un_Texture, uv ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/samples/Cubemap.as b/AE/samples/res_editor/_data/pipelines/samples/Cubemap.as index 605493d2..81540c87 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/Cubemap.as +++ b/AE/samples/res_editor/_data/pipelines/samples/Cubemap.as @@ -4,7 +4,7 @@ */ #ifdef __INTELLISENSE__ # include -# include +# include #endif //----------------------------------------------------------------------------- #ifdef SCRIPT diff --git a/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass1.as b/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass1.as index b0cff310..e6e5392d 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass1.as +++ b/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass1.as @@ -19,7 +19,7 @@ } { RC ppln = GraphicsPipeline( prefix+"t" ); - ppln.SetLayout( "model.pl" ); // [InitPipelineLayout()](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipeline_inc/Model.as) + ppln.SetLayout( "model.pl" ); // [InitPipelineLayout()](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipeline_inc/ModelTypes.as) ppln.SetVertexInput( "VB{Posf3, Normf3, UVf2}" ); ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); ppln.SetShaderIO( EShader::Vertex, EShader::Fragment, prefix+"io.vs-fs" ); diff --git a/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass2.as b/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass2.as index c77596bc..189be126 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass2.as +++ b/AE/samples/res_editor/_data/pipelines/samples/DeferredTexturing-pass2.as @@ -12,7 +12,7 @@ { RC ppln = GraphicsPipeline( prefix+"t" ); - ppln.SetLayout( "model.pl" ); // [InitPipelineLayout()](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipeline_inc/Model.as) + ppln.SetLayout( "model.pl" ); // [InitPipelineLayout()](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipeline_inc/ModelTypes.as) ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); { diff --git a/AE/samples/res_editor/_data/pipelines/samples/Dispersion2D-area.as b/AE/samples/res_editor/_data/pipelines/samples/Dispersion2D-area.as index edf484cd..18d3e578 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/Dispersion2D-area.as +++ b/AE/samples/res_editor/_data/pipelines/samples/Dispersion2D-area.as @@ -119,7 +119,7 @@ for (uint i = 0; i < un_Constants.wavelengthToRGB.length(); ++i) { if ( wl == un_Constants.wavelengthToRGB[i].x ) - col = un_Constants.wavelengthToRGB[i].yzw; + col = un_Constants.wavelengthToRGB[i].yzw; // linear space } const float3 v [] = { diff --git a/AE/samples/res_editor/_data/pipelines/samples/FSBarycentric.as b/AE/samples/res_editor/_data/pipelines/samples/FSBarycentric.as index 1d36c302..1c22e129 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/FSBarycentric.as +++ b/AE/samples/res_editor/_data/pipelines/samples/FSBarycentric.as @@ -82,7 +82,7 @@ #else const float thickness = 1.5; // pixels const float falloff = 6.0; // pixels - const float wireframe = FSBarycentricWireframe( thickness, falloff ); + const float wireframe = FSBarycentricWireframe( thickness, falloff ).x; #endif out_Color = float4( wireframe ); diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-1.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-1.as index 1d5fc1be..c6299661 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-1.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-1.as @@ -80,7 +80,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Noise.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" void Main () { diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-2.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-2.as index b7cbdc81..998a806c 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-2.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-2.as @@ -80,7 +80,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Noise.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" FBM_NOISE_Hash( PerlinNoise ) TURBULENCE_FBM_Hash( PerlinNoiseFBM ) diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-3.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-3.as index 92634d78..089ff3f1 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-3.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-HiPerf/Mtr-3.as @@ -86,7 +86,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Hash.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" void RandomTexID (float scale, float bias, uint intBias, out uint texId, out float2 uv) { diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-1.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-1.as index 1bf3052a..308052bd 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-1.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-1.as @@ -80,7 +80,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Noise.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" void Main () { diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-2.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-2.as index 82a1779c..06cd175c 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-2.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-2.as @@ -80,7 +80,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Noise.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" FBM_NOISE_Hash( PerlinNoise ) diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-3.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-3.as index 2be97cea..9ae9bdc0 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-3.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-LowPerf/Mtr-3.as @@ -86,7 +86,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Hash.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" void Main () { diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-1.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-1.as index 1d5fc1be..c6299661 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-1.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-1.as @@ -80,7 +80,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Noise.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" void Main () { diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-2.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-2.as index 761cdc78..6ef6f278 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-2.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-2.as @@ -80,7 +80,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Noise.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" FBM_NOISE_Hash( PerlinNoise ) TURBULENCE_FBM_Hash( PerlinNoiseFBM ) diff --git a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-3.as b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-3.as index e0b6c595..19ece0ae 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-3.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MaterialDepthBuffer-MedPerf/Mtr-3.as @@ -86,7 +86,7 @@ #ifdef SH_FRAG #include "Color.glsl" #include "Hash.glsl" - #include "FragHelper.glsl" + #include "CodeTemplates.glsl" void RandomTexID (float scale, float bias, uint intBias, out uint texId, out float2 uv) { diff --git a/AE/samples/res_editor/_data/pipelines/samples/MeshShader-Cubes.as b/AE/samples/res_editor/_data/pipelines/samples/MeshShader-Cubes.as index 6b4a7da0..25246730 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/MeshShader-Cubes.as +++ b/AE/samples/res_editor/_data/pipelines/samples/MeshShader-Cubes.as @@ -97,7 +97,7 @@ #include "Frustum.glsl" #include "Transform.glsl" - shared uint s_TaskCount; + WGShared uint s_TaskCount; ND_ bool IsVisible (float3 center, float radius) { return Frustum_TestSphere( un_PerPass.camera.frustum, center, radius ); diff --git a/AE/samples/res_editor/_data/pipelines/samples/Model-Cubemap.as b/AE/samples/res_editor/_data/pipelines/samples/Model-Cubemap.as index 526a8a18..53b864a7 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/Model-Cubemap.as +++ b/AE/samples/res_editor/_data/pipelines/samples/Model-Cubemap.as @@ -89,8 +89,16 @@ void Main () { + #if 1 + uint idx = gl.InstanceIndex / un_Nodes.instanceCount; + int face = int(gl.InstanceIndex - idx); + #else + // requires AE_shader_draw_parameters + uint idx = gl.BaseInstance; int face = gl.InstanceIndex - gl.BaseInstance; - ModelNode node = un_Nodes.elements[ gl.BaseInstance ]; + #endif + + ModelNode node = un_Nodes.elements[idx]; float4 world_pos = LocalPosToWorldSpace( node.transform * float4( in_Position, 1.0f )); gl.Position = un_CBuf.cubemapViewProj[face] * world_pos; diff --git a/AE/samples/res_editor/_data/pipelines/samples/Model-RT-1.as b/AE/samples/res_editor/_data/pipelines/samples/Model-RT.as similarity index 100% rename from AE/samples/res_editor/_data/pipelines/samples/Model-RT-1.as rename to AE/samples/res_editor/_data/pipelines/samples/Model-RT.as diff --git a/AE/samples/res_editor/_data/pipelines/samples/Model-1.as b/AE/samples/res_editor/_data/pipelines/samples/Model.as similarity index 99% rename from AE/samples/res_editor/_data/pipelines/samples/Model-1.as rename to AE/samples/res_editor/_data/pipelines/samples/Model.as index d75945d1..89b0ad4e 100644 --- a/AE/samples/res_editor/_data/pipelines/samples/Model-1.as +++ b/AE/samples/res_editor/_data/pipelines/samples/Model.as @@ -22,7 +22,7 @@ } { RC ppln = GraphicsPipeline( prefix+"t" ); - ppln.SetLayout( "model.pl" ); // [InitPipelineLayout()](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipeline_inc/Model.as) + ppln.SetLayout( "model.pl" ); // [InitPipelineLayout()](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipeline_inc/ModelTypes.as) if ( withUV and withNorm ) ppln.SetVertexInput( "VB{Posf3, Normf3, UVf2}" ); else if ( withNorm ) ppln.SetVertexInput( "VB{Posf3, Normf3}" ); else diff --git a/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass1.as b/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass1.as new file mode 100644 index 00000000..9913205a --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass1.as @@ -0,0 +1,95 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# define SH_RAY_GEN +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + RC ppln = RayTracingPipeline( "templ" ); + ppln.SetLayout( "rt-model.pl" ); + + // general + { + RC rg = Shader(); + rg.type = EShader::RayGen; + rg.LoadSelf(); + ppln.AddGeneralShader( "Main", rg ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + + spec.AddToRenderTech( "rtech", "main" ); // in SceneRayTracingPass + + // shader binding table + { + RC sbt = RayTracingShaderBinding( spec, "sbt" ); + + sbt.BindRayGen( "Main" ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_RAY_GEN + #include "GlobalIndex.glsl" + #include "HWRayTracing.glsl" + + uint3 UnpackID (uint id) + { + return uint3( + id >> 31, // instanceId + (id >> 20) & 0x7FF, // geometryId + id & 0x000FFFFF ); // primitiveId + } + + uint PackID (uint instanceId, uint geometryId, uint primitiveId) + { + uint r = (instanceId << 31) | + ((geometryId & 0x7FF) << 20) | + (primitiveId & 0x000FFFFF); + + //if ( ! AllEqual( UnpackID( r ), uint3( instanceId, geometryId, primitiveId ))) + // return 0; + return r; + } + + void Main () + { + const int2 coord = GetGlobalCoord().xy; + float3 view_dir = Ray_From( un_PerPass.camera.invViewProj, float3(0.0), 0.f, GetGlobalCoordUNorm().xy ).dir; + + gl::RayQuery ray_query; + gl.rayQuery.Initialize( ray_query, un_RtScene, gl::RayFlags::Opaque, + 0xFF, un_PerPass.camera.pos, un_PerPass.camera.clipPlanes.x, + view_dir, un_PerPass.camera.clipPlanes.y ); + + if ( gl.rayQuery.Proceed( ray_query )) + { + if ( GetCandidateIntersectionType( ray_query ) == gl::RayQueryCandidateIntersection::Triangle ) + gl.rayQuery.ConfirmIntersection( ray_query ); + } + + uint id = ~0u; + float depth = 0.f; + + if ( GetCommittedIntersectionType( ray_query ) != gl::RayQueryCommittedIntersection::None ) + { + depth = GetCommittedIntersectionT( ray_query ); + id = PackID( GetCommittedIntersectionInstanceId( ray_query ), + GetCommittedIntersectionGeometryIndex( ray_query ), + GetCommittedIntersectionPrimitiveIndex( ray_query )); + } + + gl.image.Store( un_IDBuffer, coord, uint4(id) ); // R32U + gl.image.Store( un_Depth, coord, float4(depth) ); // R32F + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass2.as b/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass2.as new file mode 100644 index 00000000..7ad94b7e --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass2.as @@ -0,0 +1,300 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# define SH_RAY_GEN +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + RC ppln = RayTracingPipeline( "templ" ); + ppln.SetLayout( "rt-model.pl" ); + + // general + { + RC rg = Shader(); + rg.type = EShader::RayGen; + rg.LoadSelf(); + ppln.AddGeneralShader( "Main", rg ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + + spec.AddToRenderTech( "rtech", "main" ); // in SceneRayTracingPass + + // shader binding table + { + RC sbt = RayTracingShaderBinding( spec, "sbt" ); + + sbt.BindRayGen( "Main" ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_RAY_GEN + #include "GlobalIndex.glsl" + #include "HWRayTracing.glsl" + #include "ModelMaterial.glsl" + + #define PositionsRef packed_float3_AEPtr + #define NormalsRef packed_float3_AEPtr + #define TexcoordsRef float2_AEPtr + #define IndicesRef uint_AEPtr + + struct MeshAndMaterial + { + float3 pos; // world space + // float3 surfNormal; // world space // from triangle positions + float3 smoothNormal; // world space // from vertex attributes + float2 uv0; + float2 uv0_dx; + float2 uv0_dy; + ModelMaterial mtr; + }; + + + uint3 UnpackID (uint id) + { + return uint3( + id >> 31, // instanceId + (id >> 20) & 0x7FF, // geometryId + id & 0x000FFFFF ); // primitiveId + } + + float3 LocalPosToWorldSpace (float4x4 mat, float3 pos) { + return (mat * float4(pos, 1.0f)).xyz - un_PerPass.camera.pos; + } + + float4 LocalPosToClipSpace (float4x4 mat, float3 pos) { + return un_PerPass.camera.viewProj * float4(LocalPosToWorldSpace( mat, pos ), 1.0); + } + + float4 WorldPosToClipSpace (float3 pos) { + return un_PerPass.camera.viewProj * float4(pos, 1.0); + } + + + // from https://github.com/ConfettiFX/The-Forge/blob/master/Common_3/Renderer/VisibilityBuffer/Shaders/FSL/vb_shading_utilities.h.fsl + // Apache-2.0 license + //>>>> + #if 1 + #define rcp(VALUE) (1.0f / (VALUE)) + #define mul(x,y) ((x) * (y)) + + struct GradientInterpolationResults + { + float2 interp; + float2 dx; + float2 dy; + }; + + struct BarycentricDeriv + { + float3 m_lambda; + float3 m_ddx; + float3 m_ddy; + }; + + float3 rayTriangleIntersection (float3 p0, float3 p1, float3 p2, float3 o, float3 d) + { + float3 v0v1 = p1-p0; + float3 v0v2 = p2-p0; + float3 pvec = cross(d,v0v2); + float det = dot(v0v1,pvec); + float invDet = 1/det; + float3 tvec = o - p0; + float u = dot(tvec,pvec) * invDet; + float3 qvec = cross(tvec,v0v1); + float v = dot(d,qvec) *invDet; + float w = 1.0f - v - u; + return float3(w,u,v); + } + + BarycentricDeriv CalcRayBary (float3 pt0, float3 pt1, float3 pt2, float3 pixelNdc, float3 rayOrigin, float4x4 viewInv, float4x4 projInv, float2 twoOverScreenSize) + { + BarycentricDeriv ret; + + // On the near plane, calculate the NDC of two nearby pixels in X and Y directions + float3 ndcPos = pixelNdc; + float3 ndcDx = pixelNdc + float3(twoOverScreenSize.x, 0, 0); + float3 ndcDy = pixelNdc - float3(0, twoOverScreenSize.y, 0); + + // Inverse projection transform into view space + float4 viewPos = mul(projInv, float4(ndcPos, 1.0)); + float4 viewDx = mul(projInv, float4(ndcDx, 1.0)); + float4 viewDy = mul(projInv, float4(ndcDy, 1.0)); + + // Inverse view transform into world space + // By setting homogeneous coordinate W to 0, this directly generates ray directions + float3 rayDir = normalize(mul(viewInv, float4(viewPos.xyz, 0)).xyz); + float3 rayDirDx = normalize(mul(viewInv, float4(viewDx.xyz, 0)).xyz); + float3 rayDirDy = normalize(mul(viewInv, float4(viewDy.xyz, 0)).xyz); + + // Ray-triangle intersection for barycentric coordinates + float3 lambda = rayTriangleIntersection(pt0, pt1, pt2, rayOrigin, rayDir); + float3 lambdaDx = rayTriangleIntersection(pt0, pt1, pt2, rayOrigin, rayDirDx); + float3 lambdaDy = rayTriangleIntersection(pt0, pt1, pt2, rayOrigin, rayDirDy); + + // Derivatives + ret.m_lambda = lambda; + ret.m_ddx = lambdaDx - lambda; + ret.m_ddy = lambdaDy - lambda; + return ret; + } + + BarycentricDeriv CalcFullBary (float4 pt0, float4 pt1, float4 pt2, float2 pixelNdc, float2 two_over_windowsize) + { + BarycentricDeriv ret; + float3 invW = rcp(float3(pt0.w, pt1.w, pt2.w)); + //Project points on screen to calculate post projection positions in 2D + float2 ndc0 = pt0.xy * invW.x; + float2 ndc1 = pt1.xy * invW.y; + float2 ndc2 = pt2.xy * invW.z; + + // Computing partial derivatives and prospective correct attribute interpolation with barycentric coordinates + // Equation for calculation taken from Appendix A of DAIS paper: + // https://cg.ivd.kit.edu/publications/2015/dais/DAIS.pdf + + // Calculating inverse of determinant(rcp of area of triangle). + float invDet = rcp(determinant(float2x2(ndc2 - ndc1, ndc0 - ndc1))); + + //determining the partial derivatives + // ddx[i] = (y[i+1] - y[i-1])/Determinant + ret.m_ddx = float3(ndc1.y - ndc2.y, ndc2.y - ndc0.y, ndc0.y - ndc1.y) * invDet * invW; + ret.m_ddy = float3(ndc2.x - ndc1.x, ndc0.x - ndc2.x, ndc1.x - ndc0.x) * invDet * invW; + // sum of partial derivatives. + float ddxSum = dot(ret.m_ddx, float3(1,1,1)); + float ddySum = dot(ret.m_ddy, float3(1,1,1)); + + // Delta vector from pixel's screen position to vertex 0 of the triangle. + float2 deltaVec = pixelNdc - ndc0; + + // Calculating interpolated W at point. + float interpInvW = invW.x + deltaVec.x*ddxSum + deltaVec.y*ddySum; + float interpW = rcp(interpInvW); + // The barycentric co-ordinate (m_lambda) is determined by perspective-correct interpolation. + // Equation taken from DAIS paper. + ret.m_lambda.x = interpW * (invW[0] + deltaVec.x*ret.m_ddx.x + deltaVec.y*ret.m_ddy.x); + ret.m_lambda.y = interpW * (0.0f + deltaVec.x*ret.m_ddx.y + deltaVec.y*ret.m_ddy.y); + ret.m_lambda.z = interpW * (0.0f + deltaVec.x*ret.m_ddx.z + deltaVec.y*ret.m_ddy.z); + + //Scaling from NDC to pixel units + ret.m_ddx *= two_over_windowsize.x; + ret.m_ddy *= two_over_windowsize.y; + ddxSum *= two_over_windowsize.x; + ddySum *= two_over_windowsize.y; + + ret.m_ddy *= -1.0f; + ddySum *= -1.0f; + + // This part fixes the derivatives error happening for the projected triangles. + // Instead of calculating the derivatives constantly across the 2D triangle we use a projected version + // of the gradients, this is more accurate and closely matches GPU raster behavior. + // Final gradient equation: ddx = (((lambda/w) + ddx) / (w+|ddx|)) - lambda + + // Calculating interpW at partial derivatives position sum. + float interpW_ddx = 1.0f / (interpInvW + ddxSum); + float interpW_ddy = 1.0f / (interpInvW + ddySum); + + // Calculating perspective projected derivatives. + ret.m_ddx = interpW_ddx*(ret.m_lambda*interpInvW + ret.m_ddx) - ret.m_lambda; + ret.m_ddy = interpW_ddy*(ret.m_lambda*interpInvW + ret.m_ddy) - ret.m_lambda; + + return ret; + } + + GradientInterpolationResults Interpolate2DWithDeriv (BarycentricDeriv deriv, float2 uv0, float2 uv1, float2 uv2) + { + float3 attr0 = float3(uv0.x, uv1.x, uv2.x); // u + float3 attr1 = float3(uv0.y, uv1.y, uv2.y); // v + + GradientInterpolationResults result; + // independently interpolate x and y attributes. + result.interp.x = dot(deriv.m_lambda, attr0); + result.interp.y = dot(deriv.m_lambda, attr1); + + // Calculate attributes' dx and dy (for texture sampling). + result.dx.x = dot(attr0, deriv.m_ddx); + result.dx.y = dot(attr1, deriv.m_ddx); + result.dy.x = dot(attr0, deriv.m_ddy); + result.dy.y = dot(attr1, deriv.m_ddy); + return result; + } + #endif + //<<<<< + + + MeshAndMaterial UnpackTriangle (uint3 id) + { + ModelRTMesh_AERef mesh = un_RTInstances.meshesPerInstance[ id.x ][ id.y ]; + PositionsRef pos_addr = NormalsRef( mesh.positions ); + NormalsRef norm_addr = NormalsRef( mesh.normals ); + IndicesRef idx_addr = IndicesRef( mesh.indices ); + TexcoordsRef uv_addr = TexcoordsRef( mesh.texcoords ); + const uint3 idx = uint3( idx_addr.data[ id.z*3+0 ], idx_addr.data[ id.z*3+1 ], idx_addr.data[ id.z*3+2 ]); + float4x4 model_mat = un_RTInstances.modelMatPerInstance[ id.x ].data[ id.y ]; + float3x3 norm_mat = un_RTInstances.normalMatPerInstance[ id.x ].data[ id.y ]; + MeshAndMaterial result; + + float3 wpos0 = LocalPosToWorldSpace( model_mat, Cast( pos_addr.data[ idx.x ]) ); + float3 wpos1 = LocalPosToWorldSpace( model_mat, Cast( pos_addr.data[ idx.y ]) ); + float3 wpos2 = LocalPosToWorldSpace( model_mat, Cast( pos_addr.data[ idx.z ]) ); + + # if 0 + BarycentricDeriv deriv = CalcRayBary( wpos0, wpos1, wpos2, + float3(GetGlobalCoordSNorm().xy, 0.0), float3(0.0), + MatInverse(un_PerPass.camera.view), MatInverse(un_PerPass.camera.proj), + 2.0 / float2(GetGlobalSize().xy) ); + # else + BarycentricDeriv deriv = CalcFullBary( WorldPosToClipSpace( wpos0 ), + WorldPosToClipSpace( wpos1 ), + WorldPosToClipSpace( wpos2 ), + GetGlobalCoordSNorm().xy, + 2.0 / float2(GetGlobalSize().xy) ); + # endif + GradientInterpolationResults uv_res = Interpolate2DWithDeriv( deriv, uv_addr.data[idx.x], uv_addr.data[idx.y], uv_addr.data[idx.z] ); + + result.uv0 = uv_res.interp; + result.uv0_dx = uv_res.dx; + result.uv0_dy = uv_res.dy; + + result.smoothNormal = Normalize( norm_mat * + BaryLerp( Cast( norm_addr.data[ idx.x ]), + Cast( norm_addr.data[ idx.y ]), + Cast( norm_addr.data[ idx.z ]), + deriv.m_lambda )); + result.pos = BaryLerp( wpos0, wpos1, wpos2, deriv.m_lambda ); + + result.mtr = un_Materials.elements[ un_RTInstances.materialsPerInstance[ id.x ].data[ id.y ]]; + + return result; + } + + + void Main () + { + const int2 coord = GetGlobalCoord().xy; + const uint id = gl.image.Load( un_IDBuffer, coord ).r; + + if ( id == ~0u ) + { + gl.image.Store( un_ColorBuf, coord, float4(0.0f, 1.f, 1.f, 1.f) ); + return; + } + + MeshAndMaterial mm = UnpackTriangle( UnpackID( id )); + float4 albedo = SampleGradAlbedo( mm.mtr, mm.uv0, mm.uv0_dx, mm.uv0_dy ); + + albedo *= CalcLighting( mm.pos, mm.smoothNormal ); + + gl.image.Store( un_ColorBuf, coord, albedo ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-1.as b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-1.as index 97c4f07d..df77c8c3 100644 --- a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-1.as +++ b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-1.as @@ -145,7 +145,7 @@ const float thickness = 1.0; // pixels const float falloff = 1.0; // pixels - out_Color.rgb *= FSBarycentricWireframe( thickness, falloff ); + out_Color.rgb *= FSBarycentricWireframe( thickness, falloff ).x; } // screen space quad/circle diff --git a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-2.as b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-2.as index f6bd2f57..7b967836 100644 --- a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-2.as +++ b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-2.as @@ -106,7 +106,7 @@ const float thickness = 1.0; // pixels const float falloff = 1.0; // pixels - out_Color.rgb *= FSBarycentricWireframe( thickness, falloff ); + out_Color.rgb *= FSBarycentricWireframe( thickness, falloff ).x; } out_Color.a = 1.0; } diff --git a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-3.as b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-3.as index ceff6011..ef491892 100644 --- a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-3.as +++ b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-3.as @@ -112,7 +112,7 @@ const float thickness = 1.0; // pixels const float falloff = 1.0; // pixels - out_Color.rgb *= FSBarycentricWireframe( thickness, falloff ); + out_Color.rgb *= FSBarycentricWireframe( thickness, falloff ).x; }*/ // screen space circle diff --git a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-4.as b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-4.as index a56e5120..dab4036b 100644 --- a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-4.as +++ b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-4.as @@ -121,7 +121,7 @@ const float thickness = 1.0; // pixels const float falloff = 1.0; // pixels - out_Color.rgb = Lerp( float3(0.0, 0.5, 1.0), out_Color.rgb, FSBarycentricWireframe( thickness, falloff )); + out_Color.rgb = Lerp( float3(0.0, 0.5, 1.0), out_Color.rgb, FSBarycentricWireframe( thickness, falloff ).x); } break; } diff --git a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5a.as b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5a.as index 2cc4c0d3..332d0ffc 100644 --- a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5a.as +++ b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5a.as @@ -74,7 +74,7 @@ const float2 coord = un_Geometry.positions[idx].xy; const int face = int(un_Geometry.positions[idx].z); - if ( iMode == 0 ) + if ( iProjInFS == 0 ) { switch ( iProj ) { @@ -133,7 +133,7 @@ void Main () { - if ( iMode == 0 ) + if ( iProjInFS == 0 ) out_Color = float4(Normalize(In.normal), 1.0); else out_Color = float4(SphereNormal( In.normal.xy, ECubeFace(In.normal.z) ), 1.0); diff --git a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5b.as b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5b.as index a56e5120..dab4036b 100644 --- a/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5b.as +++ b/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5b.as @@ -121,7 +121,7 @@ const float thickness = 1.0; // pixels const float falloff = 1.0; // pixels - out_Color.rgb = Lerp( float3(0.0, 0.5, 1.0), out_Color.rgb, FSBarycentricWireframe( thickness, falloff )); + out_Color.rgb = Lerp( float3(0.0, 0.5, 1.0), out_Color.rgb, FSBarycentricWireframe( thickness, falloff ).x); } break; } diff --git a/AE/samples/res_editor/_data/pipelines/tests/ProceduralGrid.as b/AE/samples/res_editor/_data/pipelines/tests/ProceduralGrid.as index f5d521d0..bbb53633 100644 --- a/AE/samples/res_editor/_data/pipelines/tests/ProceduralGrid.as +++ b/AE/samples/res_editor/_data/pipelines/tests/ProceduralGrid.as @@ -35,7 +35,7 @@ rs.inputAssembly.topology = EPrimitive::TriangleStrip; rs.rasterization.frontFaceCCW = true; - rs.rasterization.cullMode = ECullMode::None; + rs.rasterization.cullMode = ECullMode::Back; spec.SetRenderState( rs ); } @@ -103,7 +103,7 @@ case 1 : #ifdef AE_fragment_shader_barycentric - wireframe = FSBarycentricWireframe( 0.0, 1.0 ); + wireframe = FSBarycentricWireframe( 0.0, 1.0 ).x; #endif break; } diff --git a/AE/samples/res_editor/_data/pipelines/tests/TriangleBarycentrics.as b/AE/samples/res_editor/_data/pipelines/tests/TriangleBarycentrics.as new file mode 100644 index 00000000..c9ea8301 --- /dev/null +++ b/AE/samples/res_editor/_data/pipelines/tests/TriangleBarycentrics.as @@ -0,0 +1,275 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + { + RC ds = DescriptorSetLayout( "mtr.ds" ); + ds.UniformBuffer( EShaderStages::Vertex, "un_PerObject", "UnifiedGeometryMaterialUB" ); + ds.StorageBuffer( EShaderStages::Vertex | EShaderStages::Fragment, "un_VBuffer", "VBuffer", EResourceState::ShaderStorage_Read ); + }{ + RC pl = PipelineLayout( "pl" ); + pl.DSLayout( "pass", 0, "pass.ds" ); + pl.DSLayout( "material", 1, "mtr.ds" ); + } + + { + RC ppln = GraphicsPipeline( "tmpl" ); + ppln.SetLayout( "pl" ); + ppln.SetFragmentOutputFromRenderTech( "rtech", "main" ); + + { + RC vs = Shader(); + vs.LoadSelf(); + ppln.SetVertexShader( vs ); + }{ + RC fs = Shader(); + fs.LoadSelf(); + ppln.SetFragmentShader( fs ); + } + + // specialization + { + RC spec = ppln.AddSpecialization( "spec" ); + spec.AddToRenderTech( "rtech", "main" ); // in ScriptSceneGraphicsPass + + RenderState rs; + + rs.inputAssembly.topology = EPrimitive::TriangleList; + + rs.rasterization.frontFaceCCW = true; + rs.rasterization.cullMode = ECullMode::None; + + spec.SetRenderState( rs ); + } + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_VERT + + void Main () + { + const uint idx = iShape*3 + gl.VertexIndex; + const float4x4 proj = un_VBuffer.projection[iProj]; + + gl.Position = proj * float4( un_VBuffer.vertices[idx] + iCameraPos, 1.f ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "Matrix.glsl" + #include "GlobalIndex.glsl" + + int3 GetGlobalSize() { + return int3( un_PerPass.resolution.xy, 1 ); + } + + float4 LocalToClipSpace (float3 v) { + return un_VBuffer.projection[iProj] * float4(v + iCameraPos, 1.0); + } + + float3 LocalToWorldSpace (float3 v) { + return v + iCameraPos; + } + + float3 CalcCurrentPos () + { + return float3( GetGlobalCoordSNorm().xy, gl.FragCoord.z ); + } + + + float3 CalcBarycentrics (float4 p0, float4 p1, float4 p2, float3 p) + { + float3 v0 = p1.xyz - p0.xyz; + float3 v1 = p2.xyz - p0.xyz; + float3 v2 = p - p0.xyz; + + // determinant + float d00 = Dot( v0, v0 ); + float d01 = Dot( v0, v1 ); + float d11 = Dot( v1, v1 ); + float d20 = Dot( v2, v0 ); + float d21 = Dot( v2, v1 ); + float denom = d00 * d11 - d01 * d01; + + float3 bar; + bar.y = (d11 * d20 - d01 * d21) / denom; + bar.z = (d00 * d21 - d01 * d20) / denom; + bar.x = 1.0 - bar.y - bar.z; + return bar; + } + + + // from https://github.com/ConfettiFX/The-Forge/blob/master/Common_3/Renderer/VisibilityBuffer/Shaders/FSL/vb_shading_utilities.h.fsl + // Apache-2.0 license + //>>>> + #define rcp(VALUE) (1.0f / (VALUE)) + #define mul(x,y) ((x) * (y)) + + struct BarycentricDeriv + { + float3 m_lambda; + float3 m_ddx; + float3 m_ddy; + }; + + float3 rayTriangleIntersection (float3 p0, float3 p1, float3 p2, float3 o, float3 d) + { + float3 v0v1 = p1-p0; + float3 v0v2 = p2-p0; + float3 pvec = cross(d,v0v2); + float det = dot(v0v1,pvec); + float invDet = 1/det; + float3 tvec = o - p0; + float u = dot(tvec,pvec) * invDet; + float3 qvec = cross(tvec,v0v1); + float v = dot(d,qvec) *invDet; + float w = 1.0f - v - u; + return float3(w,u,v); + } + + BarycentricDeriv CalcRayBary (float3 pt0, float3 pt1, float3 pt2, float3 pixelNdc, float3 rayOrigin, float4x4 viewInv, float4x4 projInv, float2 twoOverScreenSize) + { + BarycentricDeriv ret; + + // On the near plane, calculate the NDC of two nearby pixels in X and Y directions + float3 ndcPos = pixelNdc; + float3 ndcDx = pixelNdc + float3(twoOverScreenSize.x, 0, 0); + float3 ndcDy = pixelNdc - float3(0, twoOverScreenSize.y, 0); + + // Inverse projection transform into view space + float4 viewPos = mul(projInv, float4(ndcPos, 1.0)); + float4 viewDx = mul(projInv, float4(ndcDx, 1.0)); + float4 viewDy = mul(projInv, float4(ndcDy, 1.0)); + + // Inverse view transform into world space + // By setting homogeneous coordinate W to 0, this directly generates ray directions + float3 rayDir = normalize(mul(viewInv, float4(viewPos.xyz, 0)).xyz); + float3 rayDirDx = normalize(mul(viewInv, float4(viewDx.xyz, 0)).xyz); + float3 rayDirDy = normalize(mul(viewInv, float4(viewDy.xyz, 0)).xyz); + + // Ray-triangle intersection for barycentric coordinates + float3 lambda = rayTriangleIntersection(pt0, pt1, pt2, rayOrigin, rayDir); + float3 lambdaDx = rayTriangleIntersection(pt0, pt1, pt2, rayOrigin, rayDirDx); + float3 lambdaDy = rayTriangleIntersection(pt0, pt1, pt2, rayOrigin, rayDirDy); + + // Derivatives + ret.m_lambda = lambda; + ret.m_ddx = lambdaDx - lambda; + ret.m_ddy = lambdaDy - lambda; + return ret; + } + + BarycentricDeriv CalcFullBary (float4 pt0, float4 pt1, float4 pt2, float2 pixelNdc, float2 two_over_windowsize) + { + BarycentricDeriv ret; + float3 invW = rcp(float3(pt0.w, pt1.w, pt2.w)); + //Project points on screen to calculate post projection positions in 2D + float2 ndc0 = pt0.xy * invW.x; + float2 ndc1 = pt1.xy * invW.y; + float2 ndc2 = pt2.xy * invW.z; + + // Computing partial derivatives and prospective correct attribute interpolation with barycentric coordinates + // Equation for calculation taken from Appendix A of DAIS paper: + // https://cg.ivd.kit.edu/publications/2015/dais/DAIS.pdf + + // Calculating inverse of determinant(rcp of area of triangle). + float invDet = rcp(determinant(float2x2(ndc2 - ndc1, ndc0 - ndc1))); + + //determining the partial derivatives + // ddx[i] = (y[i+1] - y[i-1])/Determinant + ret.m_ddx = float3(ndc1.y - ndc2.y, ndc2.y - ndc0.y, ndc0.y - ndc1.y) * invDet * invW; + ret.m_ddy = float3(ndc2.x - ndc1.x, ndc0.x - ndc2.x, ndc1.x - ndc0.x) * invDet * invW; + // sum of partial derivatives. + float ddxSum = dot(ret.m_ddx, float3(1,1,1)); + float ddySum = dot(ret.m_ddy, float3(1,1,1)); + + // Delta vector from pixel's screen position to vertex 0 of the triangle. + float2 deltaVec = pixelNdc - ndc0; + + // Calculating interpolated W at point. + float interpInvW = invW.x + deltaVec.x*ddxSum + deltaVec.y*ddySum; + float interpW = rcp(interpInvW); + // The barycentric co-ordinate (m_lambda) is determined by perspective-correct interpolation. + // Equation taken from DAIS paper. + ret.m_lambda.x = interpW * (invW[0] + deltaVec.x*ret.m_ddx.x + deltaVec.y*ret.m_ddy.x); + ret.m_lambda.y = interpW * (0.0f + deltaVec.x*ret.m_ddx.y + deltaVec.y*ret.m_ddy.y); + ret.m_lambda.z = interpW * (0.0f + deltaVec.x*ret.m_ddx.z + deltaVec.y*ret.m_ddy.z); + + //Scaling from NDC to pixel units + ret.m_ddx *= two_over_windowsize.x; + ret.m_ddy *= two_over_windowsize.y; + ddxSum *= two_over_windowsize.x; + ddySum *= two_over_windowsize.y; + + ret.m_ddy *= -1.0f; + ddySum *= -1.0f; + + // This part fixes the derivatives error happening for the projected triangles. + // Instead of calculating the derivatives constantly across the 2D triangle we use a projected version + // of the gradients, this is more accurate and closely matches GPU raster behavior. + // Final gradient equation: ddx = (((lambda/w) + ddx) / (w+|ddx|)) - lambda + + // Calculating interpW at partial derivatives position sum. + float interpW_ddx = 1.0f / (interpInvW + ddxSum); + float interpW_ddy = 1.0f / (interpInvW + ddySum); + + // Calculating perspective projected derivatives. + ret.m_ddx = interpW_ddx*(ret.m_lambda*interpInvW + ret.m_ddx) - ret.m_lambda; + ret.m_ddy = interpW_ddy*(ret.m_lambda*interpInvW + ret.m_ddy) - ret.m_lambda; + + return ret; + } + //<<<<< + + + void Main () + { + // transform triangle to clip space + float4 p0 = LocalToClipSpace( un_VBuffer.vertices[iShape*3+0] ); + float4 p1 = LocalToClipSpace( un_VBuffer.vertices[iShape*3+1] ); + float4 p2 = LocalToClipSpace( un_VBuffer.vertices[iShape*3+2] ); + float3 ip = CalcCurrentPos(); + + float3 ref_bar = gl.BaryCoord; + + // TODO: works only for ortho projection + float3 bar1 = CalcBarycentrics( p0, p1, p2, ip ); + + float3 bar2 = CalcFullBary( p0, p1, p2, GetGlobalCoordSNorm().xy, 2.0 / un_PerPass.resolution.xy ).m_lambda; + + // only for perspective projection + float3 bar3 = CalcRayBary( LocalToWorldSpace( un_VBuffer.vertices[iShape*3+0] ), + LocalToWorldSpace( un_VBuffer.vertices[iShape*3+1] ), + LocalToWorldSpace( un_VBuffer.vertices[iShape*3+2] ), + float3(GetGlobalCoordSNorm().xy, 0.0), float3(0.0), + f4x4_Identity(), MatInverse(un_VBuffer.projection[iProj]), + 2.0 / un_PerPass.resolution.xy ).m_lambda; + float scale = Exp10( float(iScale) ); + + float3 bar; + switch ( iMode ) + { + case 0 : bar = ref_bar; break; + case 1 : bar = bar1; break; + case 2 : bar = bar2; break; + case 3 : bar = bar3; break; + } + switch ( iCmp ) + { + case 0 : out_Color = float4(bar, 1.0); break; + case 1 : out_Color = float4(Abs( ref_bar - bar ) * scale, 1.0); break; + case 2 : out_Color = float4(Distance( ref_bar, bar ) * scale); break; + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/pipelines/tests/Triangulation.as b/AE/samples/res_editor/_data/pipelines/tests/Triangulation.as index 164083fa..2a95833c 100644 --- a/AE/samples/res_editor/_data/pipelines/tests/Triangulation.as +++ b/AE/samples/res_editor/_data/pipelines/tests/Triangulation.as @@ -118,7 +118,7 @@ const float thickness = 1.5; // pixels const float falloff = 6.0; // pixels - out_Color *= FSBarycentricWireframe( thickness, falloff ); + out_Color *= FSBarycentricWireframe( thickness, falloff ).x; } // normals diff --git a/AE/samples/res_editor/_data/scripts/callable/GenPlanet.as b/AE/samples/res_editor/_data/scripts/callable/GenPlanet.as index e249723d..79621806 100644 --- a/AE/samples/res_editor/_data/scripts/callable/GenPlanet.as +++ b/AE/samples/res_editor/_data/scripts/callable/GenPlanet.as @@ -127,7 +127,7 @@ // positions with 1 pixel border for normals calculation - shared float3 s_Positions[ gl.WorkGroupSize.x * gl.WorkGroupSize.y ]; + WGShared float3 s_Positions[ gl.WorkGroupSize.x * gl.WorkGroupSize.y ]; float3 ReadPosition (int2 local) @@ -177,8 +177,8 @@ FBM_NOISE_Hash( PerlinNoise ) TURBULENCE_FBM_Hash( PerlinNoiseFBM ) - shared float3 s_Positions[ gl.WorkGroupSize.x * gl.WorkGroupSize.y ]; - shared float3 s_Normals [ gl.WorkGroupSize.x * gl.WorkGroupSize.y ]; + WGShared float3 s_Positions[ gl.WorkGroupSize.x * gl.WorkGroupSize.y ]; + WGShared float3 s_Normals [ gl.WorkGroupSize.x * gl.WorkGroupSize.y ]; void Main () diff --git a/AE/samples/res_editor/_data/scripts/perf/BufferStorage.as b/AE/samples/res_editor/_data/scripts/perf/BufferStorage.as new file mode 100644 index 00000000..04c37ec5 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/BufferStorage.as @@ -0,0 +1,185 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Same as [Storage](Storage.as) but with tunable data size per thread +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define MODE 0 +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + const uint2 local = uint2(8); + + RC buf16 = Buffer(); + RC buf32 = Buffer(); + RC buf64 = Buffer(); + RC buf128 = Buffer(); + + RC buf16_size = dim.Area(); + RC buf32_size = buf16_size.Div( 2 ); + RC buf64_size = buf16_size.Div( 4 ); + RC buf128_size = buf16_size.Div( 8 ); + + RC mode = DynamicUInt(); + RC count = DynamicUInt(); + RC gen_tex = DynamicUInt(); + + buf16.ArrayLayout( + "SBuf16", + "float4 data;", + buf16_size ); + + buf32.ArrayLayout( + "SBuf32", + "float2x4 data;", + buf32_size ); + + buf64.ArrayLayout( + "SBuf64", + "float4x4 data;", + buf64_size ); + + buf128.ArrayLayout( + "SBuf128", + "float4 data[8];", + buf128_size ); + + Slider( tex_dim, "TexDim", 1, 8, 2 ); + Slider( mode, "Mode", 0, 3 ); + Slider( count, "Repeat", 1, 32 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + + // render loop + { + RC pass = ComputePass( "", "GEN_NOISE" ); + pass.ArgOut( "un_Buffer16", buf16 ); + pass.ArgOut( "un_Buffer32", buf32 ); + pass.ArgOut( "un_Buffer64", buf64 ); + pass.ArgOut( "un_Buffer128", buf128 ); + pass.LocalSize( local ); + pass.DispatchThreads( dim ); + pass.EnableIfEqual( gen_tex, 1 ); + } + //-------------------- + { + RC pass = ComputePass( "", "MODE=0" ); + pass.ArgInOut( "un_Buffer", buf16 ); + pass.LocalSize( local.x*local.y ); + pass.DispatchThreads( buf16_size ); + pass.EnableIfEqual( mode, 0 ); + pass.Repeat( count ); + }{ + RC pass = ComputePass( "", "MODE=1" ); + pass.ArgInOut( "un_Buffer", buf32 ); + pass.LocalSize( local.x*local.y ); + pass.DispatchThreads( buf32_size ); + pass.EnableIfEqual( mode, 1 ); + pass.Repeat( count ); + }{ + RC pass = ComputePass( "", "MODE=2" ); + pass.ArgInOut( "un_Buffer", buf64 ); + pass.LocalSize( local.x*local.y ); + pass.DispatchThreads( buf64_size ); + pass.EnableIfEqual( mode, 2 ); + pass.Repeat( count ); + }{ + RC pass = ComputePass( "", "MODE=3" ); + pass.ArgInOut( "un_Buffer", buf128 ); + pass.LocalSize( local.x*local.y ); + pass.DispatchThreads( buf128_size ); + pass.EnableIfEqual( mode, 3 ); + pass.Repeat( count ); + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef MODE + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 Update (float4 val) + { + return val * val + 0.001; + } + + void Main () + { + const uint idx = GetGlobalIndex(); + + #if MODE == 0 + { + float4 data = un_Buffer.elements[ idx ].data; + data = Update( data ); + un_Buffer.elements[ idx ].data = data; + } + #elif MODE == 1 + { + float2x4 data = un_Buffer.elements[ idx ].data; + data[0] = Update( data[0] ); + data[1] = Update( data[1] ); + un_Buffer.elements[ idx ].data = data; + } + #elif MODE == 2 + { + float4x4 data = un_Buffer.elements[ idx ].data; + data[0] = Update( data[0] ); + data[1] = Update( data[1] ); + data[2] = Update( data[2] ); + data[3] = Update( data[3] ); + un_Buffer.elements[ idx ].data = data; + } + #elif MODE == 3 + { + float4 data[8] = un_Buffer.elements[ idx ].data; + + [[unroll]] for (uint i = 0; i < 8; ++i) + data[i] = Update( data[i] ); + + un_Buffer.elements[ idx ].data = data; + } + #else + # error unsupported MODE + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy); + float4 col0 = Rainbow( Hash_Uniform( uv, 0.11 )); + float4 col1 = Rainbow( Hash_Uniform( uv, 0.22 )); + float4 col2 = Rainbow( Hash_Uniform( uv, 0.33 )); + float4 col3 = Rainbow( Hash_Uniform( uv, 0.44 )); + uint idx = GetGlobalIndex(); + + un_Buffer16.elements[ idx ].data = col0; + un_Buffer32.elements[ idx ].data = float2x4( col0, col1 ); + un_Buffer64.elements[ idx ].data = float4x4( col0, col1, col2, col3 ); + + un_Buffer128.elements[ idx ].data[0] = col0; + un_Buffer128.elements[ idx ].data[1] = col1; + un_Buffer128.elements[ idx ].data[2] = col2; + un_Buffer128.elements[ idx ].data[3] = col3; + un_Buffer128.elements[ idx ].data[4] = col0 + col1; + un_Buffer128.elements[ idx ].data[5] = col1 + col2; + un_Buffer128.elements[ idx ].data[6] = col2 + col3; + un_Buffer128.elements[ idx ].data[7] = col0 + col2; + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/ImageStorage-1.as b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-1.as new file mode 100644 index 00000000..d0e4caba --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-1.as @@ -0,0 +1,141 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +# define MODE +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + + RC rt0 = Image( EPixelFormat::RGBA8_UNorm, dim ); + RC rt1 = Image( EPixelFormat::RGBA8_UNorm, dim ); + RC rt2 = Image( EPixelFormat::RGBA8_UNorm, dim ); + RC rt3 = Image( EPixelFormat::RGBA8_UNorm, dim ); + + RC mode = DynamicUInt(); + RC count = DynamicUInt(); + RC gen_tex = DynamicUInt(); + RC noise_step = DynamicUInt(); + + Slider( tex_dim, "TexDim", 1, 8, 2 ); + Slider( mode, "Mode", 0, 1 ); + Slider( count, "Repeat", 1, 32 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( noise_step, "NoiseStep", 0, 4 ); + + // render loop + { + RC pass = Postprocess( "", "GEN_NOISE" ); + pass.Output( "out_Color0", rt0 ); + pass.Output( "out_Color1", rt1 ); + pass.Output( "out_Color2", rt2 ); + pass.Output( "out_Color3", rt3 ); + pass.Constant( "iNoiseStep", noise_step ); + pass.EnableIfEqual( gen_tex, 1 ); + }{ + RC pass = Postprocess( "", "MODE=0" ); + pass.InOut( "in_Color0", "out_Color0", rt0 ); + pass.InOut( "in_Color1", "out_Color1", rt1 ); + pass.InOut( "in_Color2", "out_Color2", rt2 ); + pass.InOut( "in_Color3", "out_Color3", rt3 ); + pass.EnableIfEqual( mode, 0 ); + pass.Repeat( count ); + }{ + RC pass = Postprocess( "", "MODE=1" ); + pass.InOut( "in_Color0", "out_Color0", rt0 ); + pass.InOut( "in_Color1", "out_Color1", rt1 ); + pass.EnableIfEqual( mode, 1 ); + pass.Repeat( count ); + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + + float Encode (float4 color) + { + return uintBitsToFloat( packUnorm4x8( color )); + } + + float4 Decode (float value) + { + return unpackUnorm4x8( floatBitsToUint( value )); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef MODE + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 Update (float4 val) + { + return val * val + 0.001; + } + + float2 Update (float2 val) + { + return val * val + 0.001; + } + + void Main () + { + #if MODE == 0 + { + float4 data; + data.r = Encode( gl.subpass.Load( in_Color0 )); + data.g = Encode( gl.subpass.Load( in_Color1 )); + data.b = Encode( gl.subpass.Load( in_Color2 )); + data.a = Encode( gl.subpass.Load( in_Color3 )); + + data = Update( data ); + + out_Color0 = Decode( data.r ); + out_Color1 = Decode( data.g ); + out_Color2 = Decode( data.b ); + out_Color3 = Decode( data.a ); + } + #elif MODE == 1 + { + float2 data; + data.r = Encode( gl.subpass.Load( in_Color0 )); + data.g = Encode( gl.subpass.Load( in_Color1 )); + + data = Update( data ); + + out_Color0 = Decode( data.r ); + out_Color1 = Decode( data.g ); + } + #else + # error unsupported MODE + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy >> iNoiseStep); + float4 col = Rainbow( Hash_Uniform( uv, 0.111 )); + + out_Color0 = Decode( col.r ); + out_Color1 = Decode( col.g ); + out_Color2 = Decode( col.b ); + out_Color3 = Decode( col.a ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/ImageStorage-2.as b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-2.as new file mode 100644 index 00000000..2640b3b5 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-2.as @@ -0,0 +1,101 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +# define PROCESS +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + + RC rt0 = Image( EPixelFormat::RGBA16_UNorm, dim ); + RC rt1 = Image( EPixelFormat::RGBA16_UNorm, dim ); + + RC count = DynamicUInt(); + RC gen_tex = DynamicUInt(); + RC noise_step = DynamicUInt(); + + Slider( tex_dim, "TexDim", 1, 8, 2 ); + Slider( count, "Repeat", 1, 32 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( noise_step, "NoiseStep", 0, 4 ); + + // render loop + { + RC pass = Postprocess( "", "GEN_NOISE" ); + pass.Output( "out_Color0", rt0 ); + pass.Output( "out_Color1", rt1 ); + pass.Constant( "iNoiseStep", noise_step ); + pass.EnableIfEqual( gen_tex, 1 ); + } + { + RC pass = Postprocess( "", "PROCESS" ); + pass.InOut( "in_Color0", "out_Color0", rt0 ); + pass.InOut( "in_Color1", "out_Color1", rt1 ); + pass.Repeat( count ); + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + + float2 Encode (float4 color) + { + return float2( uintBitsToFloat( packUnorm2x16( color.xy )), + uintBitsToFloat( packUnorm2x16( color.zw )) ); + } + + float4 Decode (float2 value) + { + return float4( unpackUnorm2x16( floatBitsToUint( value.x )), + unpackUnorm2x16( floatBitsToUint( value.y )) ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef PROCESS + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 Update (float4 val) + { + return val * val + 0.001; + } + + void Main () + { + float4 data; + data.rg = Encode( gl.subpass.Load( in_Color0 )); + data.ba = Encode( gl.subpass.Load( in_Color1 )); + + data = Update( data ); + + out_Color0 = Decode( data.rg ); + out_Color1 = Decode( data.ba ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy >> iNoiseStep); + float4 col = Rainbow( Hash_Uniform( uv, 0.111 )); + + out_Color0 = Decode( col.rg ); + out_Color1 = Decode( col.ba ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/ImageStorage-3.as b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-3.as new file mode 100644 index 00000000..0e28ac69 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-3.as @@ -0,0 +1,78 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +# define PROCESS +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + + RC rt0 = Image( EPixelFormat::R32F, dim ); + + RC count = DynamicUInt(); + RC gen_tex = DynamicUInt(); + RC noise_step = DynamicUInt(); + + Slider( tex_dim, "TexDim", 1, 8, 2 ); + Slider( count, "Repeat", 1, 32 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( noise_step, "NoiseStep", 0, 4 ); + + // render loop + { + RC pass = Postprocess( "", "GEN_NOISE" ); + pass.Output( "out_Color0", rt0 ); + pass.Constant( "iNoiseStep", noise_step ); + pass.EnableIfEqual( gen_tex, 1 ); + } + { + RC pass = Postprocess( "", "PROCESS" ); + pass.InOut( "in_Color0", "out_Color0", rt0 ); + pass.Repeat( count ); + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef PROCESS + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float Update (float val) + { + return val * val + 0.001; + } + + void Main () + { + float data; + data = gl.subpass.Load( in_Color0 ).r; + + data = Update( data ); + + out_Color0.r = data; + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy >> iNoiseStep); + + out_Color0.r = Hash_Uniform( uv, 0.5 ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/ImageStorage-Reorder.as b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-Reorder.as new file mode 100644 index 00000000..0279cfef --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/ImageStorage-Reorder.as @@ -0,0 +1,131 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + https://developer.nvidia.com/blog/optimizing-compute-shaders-for-l2-locality-using-thread-group-id-swizzling/ +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define MODE 0 +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + const uint2 local = uint2(16); + + RC image = Image( EPixelFormat::RGBA32F, dim ); + RC mode = DynamicUInt(); + RC count = DynamicUInt(); + RC gen_tex = DynamicUInt(); + RC tile_size = DynamicUInt(); + const uint max_mode = 4; + + Slider( tex_dim, "TexDim", 1, 8, 2 ); + Slider( mode, "Mode", 0, max_mode ); + Slider( count, "Repeat", 1, 32 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( tile_size, "TileSize", 3, 6, 4 ); + + // render loop + { + RC pass = ComputePass( "", "GEN_NOISE" ); + pass.ArgOut( "un_Image", image ); + pass.LocalSize( local ); + pass.DispatchThreads( dim ); + pass.EnableIfEqual( gen_tex, 1 ); + } + + for (uint i = 0; i <= max_mode; ++i) + { + RC pass = ComputePass( "", "MODE="+i+";DIMX="+local.x+";DIMY="+local.y ); + pass.ArgInOut( "un_Image", image ); + pass.Constant( "iTileSizePOT", tile_size ); + pass.LocalSize( local ); + pass.DispatchThreads( dim ); + pass.EnableIfEqual( mode, i ); + pass.Repeat( count ); + } + + Present( image ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef MODE + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 Update (float4 val) + { + return val * val + 0.001; + } + + + void Main () + { + #if MODE == 0 + // implementation defined order + const int2 pos = GetGlobalCoord().xy; + + #elif MODE == 1 + // row major + uint2 upos = gl.WorkGroupID.xy * uint2(DIMX, DIMY); + upos.x += gl.LocalInvocationID.x; + upos.y += gl.LocalInvocationID.y; + const int2 pos = int2(upos); + + #elif MODE == 2 + // column major + uint2 upos = gl.WorkGroupID.xy * uint2(DIMX, DIMY); + upos.y += gl.LocalInvocationID.x; + upos.x += gl.LocalInvocationID.y; + const int2 pos = int2(upos); + + #elif MODE == 3 || MODE == 4 + const uint tile_pot = iTileSizePOT; + const uint tile_mask = (1u << tile_pot) - 1; + const uint gi = gl.WorkGroupID.x + gl.WorkGroupID.y * gl.NumWorkGroups.x; + uint2 up; + up.x = (gi & tile_mask) + (((gi >> tile_pot) / gl.NumWorkGroups.y) << tile_pot); + up.y = (gi >> tile_pot) % gl.NumWorkGroups.y; + + #if MODE == 3 + // row major + up = up * uint2(DIMX, DIMY) + gl.LocalInvocationID.xy; + #else + // column major + up = up * uint2(DIMX, DIMY) + gl.LocalInvocationID.yx; + #endif + const int2 pos = int2(up); + + #else + # error unsupported MODE + #endif + + float4 data = gl.image.Load( un_Image, pos ); + data = Update( data ); + gl.image.Store( un_Image, pos, data ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy); + float4 col = Rainbow( Hash_Uniform( uv, 0.111 )); + + gl.image.Store( un_Image, GetGlobalCoord().xy, col ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/Inst-fp16.as b/AE/samples/res_editor/_data/scripts/perf/Inst-fp16.as new file mode 100644 index 00000000..f7a00b1a --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/Inst-fp16.as @@ -0,0 +1,402 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Mali dim: 1<<7 .. 1<<10 + PowerVR dim: 1<<10 +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define MODE 0 +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC rt = Image( EPixelFormat::RGBA8_UNorm, IsDiscreteGPU() ? uint2(4<<10) : uint2(1<<10) ); + RC count = DynamicUInt(); + RC mode = DynamicUInt(); + const array mode_str = { + "V4_ADD", "V4_ADD1", "V4_MUL", "V4_MUL1", + "V4_MUL_ADD", "V2_MUL_ADD", "S_MUL_ADD", + "V4_FMA", "V2_FMA", "S_FMA" + }; + + Slider( mode, "iMode", 0, mode_str.size()-1, 0 ); + Slider( count, "Repeat", 1, 32 ); + + // render loop + for (uint i = 0; i < mode_str.size(); ++i) + { + #if 1 + RC pass = ComputePass( "", "MODE="+mode_str[i] ); + pass.ArgOut( "un_Image", rt ); + pass.LocalSize( 16, 16 ); + pass.DispatchThreads( rt.Dimension2() ); + #else + RC pass = Postprocess( "", "MODE="+mode_str[i] ); + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + #endif + pass.EnableIfEqual( mode, i ); + pass.Repeat( count ); + } + // Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef MODE + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + #define NONE 0 + #define V4_ADD 1 + #define V4_MUL 2 + #define V4_ADD1 3 + #define V4_MUL1 4 + #define V4_FMA 5 + #define V4_MUL_ADD 6 + #define S_FMA 7 + #define V2_FMA 8 + #define V2_MUL_ADD 9 + #define S_MUL_ADD 10 + + #define UNROLL1 //[[unroll]] // too slow during pipeline creation + #define UNROLL2 [[unroll]] + + #define type half + #define type2 half2 + #define type4 half4 + #define itype sshort + + #if defined(AE_Qualcomm_Adreno_GPU) || defined(AE_Intel_GPU) || defined(AE_NVidia_GPU) + # define FOR() [[unroll]] for (itype i = itype(0), cnt = itype(COUNT1*COUNT2); i < cnt; ++i) // NV: must be <= 1024, unroll is too slow + #elif defined(AE_ARM_Mali_GPU) || defined(AE_IMG_PowerVR_GPU) + # define FOR() for (itype i = itype(0), cnt = itype(COUNT1*COUNT2); i < cnt; ++i) + + #elif 0 + # define FOR() UNROLL1 for (type i = type(0.0), cnt = type(COUNT1); i < cnt; ++i) UNROLL2 for (type j = type(0.0); j < type(COUNT2); ++j) + #elif 0 + # define FOR() UNROLL1 for (itype i = itype(0); i < itype(COUNT1); ++i) UNROLL2 for (itype j = itype(0); j < itype(COUNT2); ++j) + #elif 0 + //# define FOR() UNROLL2 for (itype i = itype(0), cnt = itype(COUNT1*COUNT2); i < cnt; ++i) + # define FOR() UNROLL2 for (type i = type(0), cnt = type(COUNT1*COUNT2); i < cnt; ++i) + #endif + + #ifdef SH_COMPUTE + # if defined(AE_ARM_Mali_GPU) + # define OUTPUT(x) gl.image.Store( un_Image, GetGlobalCoord().xy, float4(x) ) + # else + # define OUTPUT(x) if ( AllLess( x, half4(-1800.0hf) )) gl.image.Store( un_Image, GetGlobalCoord().xy, float4(x) ) + # endif + #else + # if defined(AE_ARM_Mali_GPU) + # define OUTPUT(x) out_Color = float4(x); + # else + # define OUTPUT(x) out_Color = float4(x); if ( AllGreater( x, half4(-1800.0hf) )) Discard(); + # endif + #endif + + #ifdef AE_NVidia_GPU + # define DIM (4<<10) + # define COUNT1 (1<<3) + # define COUNT2 (1<<3) + #elif defined(AE_Qualcomm_Adreno_GPU) + # define DIM (2<<10) + # define COUNT1 (1<<2) + # define COUNT2 (1<<2) + #elif defined(AE_ARM_Mali_GPU) || defined(AE_IMG_PowerVR_GPU) + # define DIM (1<<10) + # define COUNT1 (1<<3) + # define COUNT2 (1<<2) + #elif defined(AE_Intel_GPU) + # define DIM (1<<10) + # define COUNT1 (1<<3) + # define COUNT2 (1<<2) + #endif + // total: RTSize * COUNT1 * COUNT2 * 16 * 4 + // NV: 68.7 TOp/ms + // Adreno: 4295 GOp/ms + // Mali,PVR: 2147 GOp/ms + + + void Main () + { + const type4 p = type4(1.0 + GetGlobalCoord().xyyx / DIM * float4(0.5, 0.75, 0.4, 0.9) * 0.1); + const type4 t = p.xwzy * type4(0.944, 1.209, 0.97, 1.08); + + #if MODE == NONE + OUTPUT(p); + + #elif MODE == V4_ADD1 + type4 a = t; + + // 16 adds + FOR() + { + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + } + OUTPUT(a); + + #elif MODE == V4_MUL1 + type4 a = type4(1.0); + + // 16 muls + FOR() + { + a *= p; a *= a; + a *= t; a *= a; + a *= p; a *= a; + a *= t; a *= a; + a *= p; a *= a; + a *= t; a *= a; + a *= p; a *= a; + a *= t; a *= a; + } + OUTPUT(a); + + #elif MODE == V4_ADD + type4 a = t; + type4 b = t * type(0.111); + type4 c = t * type(0.222); + type4 d = t * type(0.333); + type4 e = t * type(0.444); + type4 f = t * type(0.555); + type4 g = t * type(0.666); + type4 h = t * type(0.777); + + // 16 adds + FOR() + { + a += p; + b += t; + c += p; + d += t; + e += p; + f += t; + g += p; + h += t; + + a += b; + b += c; + c += d; + d += e; + e += f; + f += g; + g += h; + h += a; + } + OUTPUT(h); + + #elif MODE == V4_MUL + type4 a = t; + type4 b = t * type(0.111); + type4 c = t * type(0.222); + type4 d = t * type(0.333); + type4 e = t * type(0.444); + type4 f = t * type(0.555); + type4 g = t * type(0.666); + type4 h = t * type(0.777); + + // 16 muls + FOR() + { + a *= p; + b *= t; + c *= p; + d *= t; + e *= p; + f *= t; + g *= p; + h *= t; + + a *= b; + b *= c; + c *= d; + d *= e; + e *= f; + f *= g; + g *= h; + h *= a; + } + OUTPUT(h); + + #elif MODE == V4_MUL_ADD + type4 a = type4(1.0); + + // 16 muls, 16 adds + FOR() + { + a = (a * p) + t; + a = (a * p) + t; + a = (a * t) + p; + a = (a * t) + p; + a = (a * p) + t; + a = (a * p) + t; + a = (a * t) + p; + a = (a * t) + p; + + a = (a * p) + t; + a = (a * p) + t; + a = (a * t) + p; + a = (a * t) + p; + a = (a * p) + t; + a = (a * p) + t; + a = (a * t) + p; + a = (a * t) + p; + } + OUTPUT(a); + + #elif MODE == V2_MUL_ADD + const type2 q = p.xx; + const type2 w = t.zw; + type2 a = type2(1.0); + + // 16 muls, 16 adds + FOR() + { + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + } + OUTPUT(type4(a,a)); + + #elif MODE == S_MUL_ADD + const type q = p.x; + const type w = t.y; + type a = type(1.0); + + // 16 muls, 16 adds + FOR() + { + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + a = (a * q) + w; + a = (a * q) + w; + a = (a * w) + q; + a = (a * w) + q; + } + OUTPUT(type4(a)); + + #elif MODE == V4_FMA + type4 a = type4(1.0); + + // 16 fma + FOR() + { + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + } + OUTPUT(a); + + #elif MODE == S_FMA + const type q = p.x; + const type w = t.y; + type a = type(1.0); + + // 16 fma + FOR() + { + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, w, q ); + + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, w, q ); + } + OUTPUT(type4(a)); + + #elif MODE == V2_FMA + const type2 q = p.xx; + const type2 w = t.zw; + type2 a = type2(1.0); + + // 16 fma + FOR() + { + a = fma( a, q, w ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, w, q ); + + a = fma( a, q, w ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, w, q ); + a = fma( a, q, w ); + a = fma( a, q, w ); + a = fma( a, w, q ); + a = fma( a, w, q ); + } + OUTPUT(type4(a,a)); + + #else + # error + #endif + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/Inst-fp32.as b/AE/samples/res_editor/_data/scripts/perf/Inst-fp32.as new file mode 100644 index 00000000..b8b470bc --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/Inst-fp32.as @@ -0,0 +1,352 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Mali dim: 1<<7 + PowerVR dim: 1<<10 +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define MODE 0 +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC rt = Image( EPixelFormat::RGBA8_UNorm, IsDiscreteGPU() ? uint2(4<<10) : uint2(1<<10) ); + RC count = DynamicUInt(); + RC mode = DynamicUInt(); + const array mode_str = { + "NONE", "ADD", "ADD1", "MUL", "MUL1", "MUL_ADD", "MUL_ADD1", "FMA", "FMA1" + }; + + Slider( mode, "Mode", 0, mode_str.size()-1, 1 ); + Slider( count, "Repeat", 1, 32 ); + + // render loop + for (uint i = 0; i < mode_str.size(); ++i) + { + #if 1 + RC pass = ComputePass( "", "MODE="+mode_str[i] ); + pass.ArgOut( "un_Image", rt ); + pass.LocalSize( 16, 16 ); + pass.DispatchThreads( rt.Dimension2() ); + #else + RC pass = Postprocess( "", "MODE="+mode_str[i] ); + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + #endif + pass.EnableIfEqual( mode, i ); + pass.Repeat( count ); + } + // Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef MODE + //precision mediump float; + + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + #define NONE 0 + #define ADD 1 + #define MUL 2 + #define ADD1 3 + #define MUL1 4 + #define FMA 5 + #define FMA1 6 + #define MUL_ADD 7 + #define MUL_ADD1 8 + + #define UNROLL1 //[[unroll]] // too slow during pipeline creation + #define UNROLL2 [[unroll]] + + #define type float + #define type4 float4 + + #if defined(AE_Qualcomm_Adreno_GPU) || defined(AE_Intel_GPU) || defined(AE_NVidia_GPU) + # define FOR() [[unroll]] for (int i = 0, cnt = COUNT1*COUNT2; i < cnt; ++i) // NV: must be <= 1024, unroll is too slow + #elif defined(AE_ARM_Mali_GPU) || defined(AE_IMG_PowerVR_GPU) + # define FOR() for (int i = 0, cnt = COUNT1*COUNT2; i < cnt; ++i) + + #elif 0 + # define FOR() UNROLL1 for (type i = type(0.0), cnt = type(COUNT1); i < cnt; ++i) UNROLL2 for (type j = type(0.0); j < type(COUNT2); ++j) + #elif 0 + # define FOR() UNROLL1 for (int i = 0; i < COUNT1; ++i) UNROLL2 for (int j = 0; j < COUNT2; ++j) + #elif 0 + # define FOR() UNROLL2 for (int i = 0; i < COUNT1*COUNT2; ++i) + //# define FOR() UNROLL2 for (type i = type(0.0), cnt = type(COUNT1*COUNT2); i < cnt; ++i) + #endif + + #ifdef SH_COMPUTE + # if defined(AE_ARM_Mali_GPU) + # define OUTPUT(x) gl.image.Store( un_Image, GetGlobalCoord().xy, float4(x) ) + # else + # define OUTPUT(x) if ( AllLess( x, float4(-1.e+20) )) gl.image.Store( un_Image, GetGlobalCoord().xy, float4(x) ) + # endif + #else + # if defined(AE_ARM_Mali_GPU) + # define OUTPUT(x) out_Color = float4(x); + # else + # define OUTPUT(x) out_Color = float4(x); if ( AllGreater( x, float4(-1.e+20) )) Discard(); + # endif + #endif + + #ifdef AE_NVidia_GPU + # define DIM (4<<10) + # define COUNT1 (1<<3) + # define COUNT2 (1<<3) + #elif defined(AE_Qualcomm_Adreno_GPU) + # define DIM (1<<8) + # define COUNT1 (1<<5) + # define COUNT2 (1<<5) + #elif defined(AE_ARM_Mali_GPU) || defined(AE_IMG_PowerVR_GPU) + # define DIM (1<<10) + # define COUNT1 (1<<2) + # define COUNT2 (1<<2) + #elif defined(AE_Intel_GPU) + # define DIM (1<<9) + # define COUNT1 (1<<3) + # define COUNT2 (1<<3) + #endif + // total: RTSize * COUNT1 * COUNT2 * 16 * 4 + // NV: 68.7 TOp/ms + // Adreno: 4290 GOp/ms + // Mali,PVR: 1073 GOp/ms + + + void Main () + { + const type4 p = type4(1.0 + GetGlobalCoord().xyyx / DIM * float4(0.5, 0.75, 0.4, 0.9) * 0.1); + const type4 t = p.xwzy * type4(0.944, 1.209, 0.97, 1.08); + + #if MODE == NONE + OUTPUT(p); + + #elif MODE == ADD1 + type4 a = t; + + // 16 adds + FOR() + { + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + a += p; a -= t; + } + OUTPUT(a); + + #elif MODE == MUL1 + type4 a = type4(1.0); + + // 16 muls + FOR() + { + a *= p; a *= a; + a *= t; a *= a; + a *= p; a *= a; + a *= t; a *= a; + a *= p; a *= a; + a *= t; a *= a; + a *= p; a *= a; + a *= t; a *= a; + } + OUTPUT(a); + + #elif MODE == ADD + type4 a = t; + type4 b = t * type(0.111); + type4 c = t * type(0.222); + type4 d = t * type(0.333); + type4 e = t * type(0.444); + type4 f = t * type(0.555); + type4 g = t * type(0.666); + type4 h = t * type(0.777); + + // 16 adds + FOR() + { + a += p; + b += t; + c -= p; + d -= t; + e += p; + f += t; + g -= p; + h -= t; + + a -= b; + b += c; + c -= d; + d += e; + e -= f; + f += g; + g -= h; + h += a; + } + OUTPUT(h); + + #elif MODE == MUL + type4 a = t; + type4 b = t * type(0.111); + type4 c = t * type(0.222); + type4 d = t * type(0.333); + type4 e = t * type(0.444); + type4 f = t * type(0.555); + type4 g = t * type(0.666); + type4 h = t * type(0.777); + + // 16 muls, 6 ops latency + FOR() + { + a *= p; + b *= p; + c *= t; + d *= t; + e *= p; + f *= p; + g *= t; + h *= t; + + a *= t; + b *= t; + c *= p; + d *= p; + e *= t; + f *= t; + g *= p; + h *= p; + } + OUTPUT(a+b-c+d-e+f-g+h); + + #elif MODE == MUL_ADD + type4 a = t; + type4 b = t * type(0.111); + type4 c = t * type(0.222); + type4 d = t * type(0.333); + type4 e = t * type(0.444); + type4 f = t * type(0.555); + type4 g = t * type(0.666); + type4 h = t * type(0.777); + + // 16 muls, 16 adds + FOR() + { + a = (a * p) + t; + b = (b * p) + t; + c = (c * t) + p; + d = (d * t) + p; + e = (e * p) + t; + f = (f * p) + t; + g = (g * t) + p; + h = (h * t) + p; + + a = (a * t) + p; + b = (b * t) + p; + c = (c * p) + t; + d = (d * p) + t; + e = (e * t) + p; + f = (f * t) + p; + g = (g * p) + t; + h = (h * p) + t; + } + OUTPUT(a+b-c+d-e+f-g+h); + + #elif MODE == MUL_ADD1 + type4 a = type4(1.0); + + // 16 muls, 16 adds + FOR() + { + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + a = (a * p) + t; + } + OUTPUT(a); + + #elif MODE == FMA + type4 a = t; + type4 b = t * type(0.111); + type4 c = t * type(0.222); + type4 d = t * type(0.333); + type4 e = t * type(0.444); + type4 f = t * type(0.555); + type4 g = t * type(0.666); + type4 h = t * type(0.777); + + // 16 fma + FOR() + { + a = fma( a, p, t ); + b = fma( b, p, t ); + c = fma( c, t, p ); + d = fma( d, t, p ); + e = fma( e, p, t ); + f = fma( f, p, t ); + g = fma( g, t, p ); + h = fma( h, t, p ); + + a = fma( a, t, p ); + b = fma( b, t, p ); + c = fma( c, p, t ); + d = fma( d, p, t ); + e = fma( e, t, p ); + f = fma( f, t, p ); + g = fma( g, p, t ); + h = fma( h, p, t ); + } + OUTPUT(a+b-c+d-e+f-g+h); + + #elif MODE == FMA1 + type4 a = type4(1.0); + + // 16 fma + FOR() + { + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + a = fma( a, p, t ); + a = fma( a, p, t ); + a = fma( a, t, p ); + a = fma( a, t, p ); + } + OUTPUT(a); + + #else + # error + #endif + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/RTCompression.as b/AE/samples/res_editor/_data/scripts/perf/RTCompression.as new file mode 100644 index 00000000..c9abba2d --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/RTCompression.as @@ -0,0 +1,269 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Check render target lossless compression. + Possible names: + * Delta Color Compression (DCC) + * ARM FrameBuffer Compression (AFBC) + + RT compression will decrease memory traffic which minimize memory bandwidth and memory access stalls. +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define GEN_SOLID +# define GEN_GRADIENT +# define GEN_NOISE +# define READ +# define READ_CS +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + const EPixelFormat fmt = EPixelFormat::RGBA8_UNorm; + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + const uint2 local_size = uint2(8,16); + RC dim2 = dim.Mul( 2 ); + RC rt2 = Image( fmt, dim2 ); + RC rt3;// = Image( fmt, dim2 ); + RC rt4;// = Image( fmt, dim2 ); + RC rt = Image( fmt, dim ); + RC gen_color = DynamicUInt(); + RC mode = DynamicUInt(); + RC count = DynamicUInt(); + + Slider( tex_dim, "TexDim", 1, 8, 2 ); // NV: max 3 for RGBA32, max 5 for RGBA16, max 7 for RGBA8 + Slider( gen_color, "Pattern", 0, 7, 1 ); + Slider( mode, "Mode", 0, 3, 3 ); + Slider( count, "Repeat", 1, 16 ); + + // with DCC + if ( @rt2 != null ) + { + { + RC pass = Postprocess( "", "GEN_SOLID" ); + pass.Output( "out_Color", rt2 ); + pass.EnableIfEqual( gen_color, 1 ); + }{ + RC pass = Postprocess( "", "GEN_GRADIENT" ); + pass.Output( "out_Color", rt2 ); + pass.EnableIfEqual( gen_color, 2 ); + } + for (uint i = 0; i < 5; ++i) + { + RC pass = Postprocess( "", "SIZE=" + (4-i) + ";GEN_NOISE" ); + pass.Output( "out_Color", rt2 ); + pass.EnableIfEqual( gen_color, 3+i ); + } + for (uint i = 0; i < 3; ++i) + { + RC pass = Postprocess( "", "READ;MODE=" + i ); + pass.ArgIn( "un_RT", rt2, (i == 2 ? Sampler_LinearClamp : Sampler_NearestClamp) ); + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.EnableIfEqual( mode, 1+i ); + pass.Repeat( count ); + } + } + + // without DCC + if ( @rt3 != null ) + { + { + RC pass = ComputePass( "", "GEN_SOLID" ); + pass.ArgOut( "un_OutImage", rt3 ); + pass.LocalSize( local_size ); + pass.DispatchThreads( rt3.Dimension() ); + pass.EnableIfEqual( gen_color, 1 ); + }{ + RC pass = ComputePass( "", "GEN_GRADIENT" ); + pass.ArgOut( "un_OutImage", rt3 ); + pass.LocalSize( local_size ); + pass.DispatchThreads( rt3.Dimension() ); + pass.EnableIfEqual( gen_color, 2 ); + } + for (uint i = 0; i < 5; ++i) + { + RC pass = ComputePass( "", "SIZE=" + (4-i) + ";GEN_NOISE" ); + pass.ArgOut( "un_OutImage", rt3 ); + pass.LocalSize( local_size ); + pass.DispatchThreads( rt3.Dimension() ); + pass.EnableIfEqual( gen_color, 3+i ); + } + for (uint i = 0; i < 3; ++i) + { + RC pass = ComputePass( "", "READ_CS;MODE="+i ); + if ( i == 0 ) pass.ArgIn( "un_RT", rt3, Sampler_LinearClamp ); + if ( i == 1 ) pass.ArgIn( "un_RT", rt3, Sampler_NearestClamp ); + if ( i == 2 ) pass.ArgIn( "un_RT", rt3 ); + pass.ArgOut( "un_OutImage", rt ); + pass.LocalSize( local_size ); + pass.DispatchThreads( rt.Dimension() ); + pass.EnableIfEqual( mode, 1+i ); + pass.Repeat( count ); + } + } + + // with DCC + if ( @rt4 != null ) + { + { + RC pass = Postprocess( "", "GEN_SOLID" ); + pass.Output( "out_Color", rt4 ); + pass.EnableIfEqual( gen_color, 1 ); + }{ + RC pass = Postprocess( "", "GEN_GRADIENT" ); + pass.Output( "out_Color", rt4 ); + pass.EnableIfEqual( gen_color, 2 ); + } + for (uint i = 0; i < 5; ++i) + { + RC pass = Postprocess( "", "SIZE=" + (4-i) + ";GEN_NOISE" ); + pass.Output( "out_Color", rt4 ); + pass.EnableIfEqual( gen_color, 3+i ); + } + + for (uint i = 0; i < 3; ++i) + { + RC pass = ComputePass( "", "READ_CS;MODE="+i ); + if ( i == 0 ) pass.ArgIn( "un_RT", rt4, Sampler_LinearClamp ); + if ( i == 1 ) pass.ArgIn( "un_RT", rt4, Sampler_NearestClamp ); + if ( i == 2 ) pass.ArgIn( "un_RT", rt4 ); + pass.ArgOut( "un_OutImage", rt ); + pass.LocalSize( local_size ); + pass.DispatchThreads( rt.Dimension() ); + pass.EnableIfEqual( mode, 1+i ); + pass.Repeat( count ); + } + } + + //Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#if defined(SH_FRAG) || defined(SH_COMPUTE) + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_SOLID + + void Main () + { + float4 col = float4(0.0); + + #ifdef SH_COMPUTE + gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); + #else + out_Color = col; + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_GRADIENT + + void Main () + { + float2 uv = GetGlobalCoordUNorm().xy; + + float3 col = float3(uv, 1.0 - uv.x * uv.y); + + #ifdef SH_COMPUTE + gl.image.Store( un_OutImage, GetGlobalCoord().xy, float4(col, 1.0) ); + #else + out_Color = float4(col, 1.0); + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy >> SIZE); + + float4 col = Rainbow( Hash_Uniform( uv, 0.111 )); + + #ifdef SH_COMPUTE + gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); + #else + out_Color = col; + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef READ + + void Main () + { + const int dim = 2; + + #if MODE == 0 + float4 col = float4(0.0); + const int2 coord = GetGlobalCoord().xy * dim; + + [[unroll]] for (int y = 0; y < dim; ++y) + [[unroll]] for (int x = 0; x < dim; ++x) + col += gl.texture.Fetch( un_RT, coord + int2(x,y), 0 ); + + out_Color = col / float(dim * dim); + + #elif MODE == 1 + + float4 col = float4(0.0); + const float2 step = 1.0 / float2(GetGlobalSize().xy * dim); + const float2 coord = GetGlobalCoord().xy * dim; + + [[unroll]] for (int y = 0; y < dim; ++y) + [[unroll]] for (int x = 0; x < dim; ++x) + col += gl.texture.Sample( un_RT, (coord + float2(x,y)) * step ); + + out_Color = col / float(dim * dim); + #else + + out_Color = gl.texture.Sample( un_RT, GetGlobalCoordUNorm().xy ); + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef READ_CS + + void Main () + { + float4 col = float4(0.0); + const int dim = 2; + const int2 coord = GetGlobalCoord().xy * dim; + + #if MODE == 0 + + col = gl.texture.Sample( un_RT, GetGlobalCoordUNorm().xy ); + + #elif MODE == 1 + + [[unroll]] for (int y = 0; y < dim; ++y) + [[unroll]] for (int x = 0; x < dim; ++x) + col += gl.texture.Fetch( un_RT, coord + int2(x,y), 0 ); + + col /= float(dim * dim); + #else + + [[unroll]] for (int y = 0; y < dim; ++y) + [[unroll]] for (int x = 0; x < dim; ++x) + col += gl.image.Load( un_RT, coord + int2(x,y) ); + + col /= float(dim * dim); + #endif + + gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/Storage.as b/AE/samples/res_editor/_data/scripts/perf/Storage.as new file mode 100644 index 00000000..0004aae4 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/Storage.as @@ -0,0 +1,170 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +# define MODE 0 +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + const uint2 local = uint2(8); + + RC rt = Image( EPixelFormat::RGBA32F, dim ); + RC rt2 = Image( EPixelFormat::RGBA32F, dim ); + RC image = Image( EPixelFormat::RGBA32F, dim ); + RC buf = Buffer(); + RC buf_size = dim.Area(); + RC mode = DynamicUInt(); + RC count = DynamicUInt(); + RC gen_tex = DynamicUInt(); + RC noise_step = DynamicUInt(); + + buf.ArrayLayout( + "SBuf", + "float4 data;", + buf_size ); + + Slider( tex_dim, "TexDim", 1, 8, 2 ); + Slider( mode, "Mode", 0, 5 ); + Slider( count, "Repeat", 1, 32 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( noise_step, "NoiseStep", 0, 4 ); + + // render loop + { + RC pass = Postprocess( "", "GEN_NOISE" ); + pass.Output( "out_Color", rt ); + pass.Constant( "iNoiseStep", noise_step ); // for RT compression + pass.EnableIfEqual( gen_tex, 1 ); + }{ + RC pass = ComputePass( "", "GEN_NOISE" ); + pass.ArgOut( "un_Image", image ); + pass.ArgOut( "un_Buffer", buf ); + pass.Constant( "iNoiseStep", noise_step ); + pass.LocalSize( local ); + pass.DispatchThreads( dim ); + pass.EnableIfEqual( gen_tex, 1 ); + } + //-------------------- + { + RC pass = ComputePass( "", "LOAD_STORE;MODE=0" ); + pass.ArgInOut( "un_Image", image ); + pass.LocalSize( local ); + pass.DispatchThreads( dim ); + pass.EnableIfEqual( mode, 0 ); + pass.Repeat( count ); + }{ + RC pass = Postprocess( "", "RW_ATTACH;MODE=1" ); + pass.InOut( "in_Color", "out_Color", rt ); + pass.EnableIfEqual( mode, 1 ); + pass.Repeat( count ); + }{ + RC pass = Postprocess( "", "DB_FETCH;MODE=2" ); + pass.Output( "out_Color", rt2, RGBA32f(0.0) ); + pass.ArgIn( "un_RT", rt, Sampler_NearestClamp ); + pass.EnableIfEqual( mode, 2 ); + pass.Repeat( count ); + }{ + RC pass = Postprocess( "", "DB_SAMPLE;MODE=3" ); + pass.Output( "out_Color", rt2, RGBA32f(0.0) ); + pass.ArgIn( "un_RT", rt, Sampler_NearestClamp ); + pass.EnableIfEqual( mode, 3 ); + pass.Repeat( count ); + } + //-------------------- + { + RC pass = ComputePass( "", "BUF_LOAD_STORE_CS;MODE=4" ); + pass.ArgInOut( "un_Buffer", buf ); + pass.LocalSize( local.x*local.y ); + pass.DispatchThreads( buf_size ); + pass.EnableIfEqual( mode, 4 ); + pass.Repeat( count ); + }{ + RC pass = Postprocess( "", "BUF_LOAD_STORE_FS;MODE=5" ); + pass.SetDimension( dim ); + pass.ArgInOut( "un_Buffer", buf ); + pass.EnableIfEqual( mode, 5 ); + pass.Repeat( count ); + } + } + +#endif +//----------------------------------------------------------------------------- +#ifdef MODE + #include "GlobalIndex.glsl" + #include "CodeTemplates.glsl" + + float4 Update (float4 val) + { + return val * val + 0.001; + } + + void Main () + { + #if MODE == 0 + { + int2 pos = GetGlobalCoord().xy; + float4 data = gl.image.Load( un_Image, pos ); + data = Update( data ); + gl.image.Store( un_Image, pos, data ); + } + #elif MODE == 1 + { + float4 data = gl.subpass.Load( in_Color ); + out_Color = Update( data ); + } + #elif MODE == 2 + { + float4 data = gl.texture.Fetch( un_RT, GetGlobalCoord().xy, 0 ); + out_Color = Update( data ); + } + #elif MODE == 3 + { + float4 data = gl.texture.Sample( un_RT, GetGlobalCoordUNorm().xy, 0 ); + out_Color = Update( data ); + } + #elif MODE == 4 + { + float4 data = un_Buffer.elements[ GetGlobalIndex() ].data; + data = Update( data ); + un_Buffer.elements[ GetGlobalIndex() ].data = data; + } + #elif MODE == 5 + { + float4 data = un_Buffer.elements[ GetGlobalIndex() ].data; + data = Update( data ); + un_Buffer.elements[ GetGlobalIndex() ].data = data; + } + #else + # error unsupported MODE + #endif + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy >> iNoiseStep); + float4 col = Rainbow( Hash_Uniform( uv, 0.111 )); + + #ifdef SH_COMPUTE + un_Buffer.elements[ GetGlobalIndex() ].data = col; + gl.image.Store( un_Image, GetGlobalCoord().xy, col ); + #else + out_Color = col; + #endif + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/samples-2d/Subgroups.as b/AE/samples/res_editor/_data/scripts/perf/Subgroups-1.as similarity index 99% rename from AE/samples/res_editor/_data/scripts/samples-2d/Subgroups.as rename to AE/samples/res_editor/_data/scripts/perf/Subgroups-1.as index c3e904e1..711b7a74 100644 --- a/AE/samples/res_editor/_data/scripts/samples-2d/Subgroups.as +++ b/AE/samples/res_editor/_data/scripts/perf/Subgroups-1.as @@ -1,7 +1,4 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -/* - -*/ #ifdef __INTELLISENSE__ # include # include diff --git a/AE/samples/res_editor/_data/scripts/perf/Subgroups-2.as b/AE/samples/res_editor/_data/scripts/perf/Subgroups-2.as new file mode 100644 index 00000000..c5a6939b --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/Subgroups-2.as @@ -0,0 +1,158 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Mode: + + 0. Quads. + 1. Threads in subgroup. + + 2. Unique subgroup. + Each subgroup has random color, use 'Hash' slider to change color. + Use it to check if GPU can: + - fill multiple triangles by a single subgroup + - fill triangles with different gl_InstanceIndex + + 3. Full subgroup. + Red color indicates that all threads in subgroup are executed. + Green/blue/violet colors indicates that some threads in subgroup are not executed, + this happens when: + - helper invocation allocates thread but not executed (optimized) + - triangle in tile is too small + - multiple triangles can not be filled with a single subgroup + + 4. Helper invocations per quad. + Red color indicates that all threads in quad are not a helper invocations. + Violet color indicates that 3 threads in quad are helper invocations. + The absence of violet colors indicates that driver optimizes quads. + + 5. Full quad. + Red color indicates that all threads in quad are executed. + Green/blue/violet colors indicates that some threads in quad are not executed. +*/ +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize()/4 ); rt.Name( "RT" ); + RC tex = Image( EImageType::FImage2D, "shadertoy/BlueNoise.png" ); + RC scene = Scene(); + RC scene2 = Scene(); + RC draw_mode = DynamicUInt(); + RC mode = DynamicUInt(); + RC with_offset = DynamicUInt(); + RC scale = DynamicUInt(); + RC hash = DynamicFloat(); + const float size = 0.085; + + { + RC geometry = UnifiedGeometry(); + RC vbuf = Buffer(); + array vertices; + + vertices.reserve( 10 * 10 * 3 ); + for (uint y = 0; y < 10; ++y) + for (uint x = 0; x < 10; ++x) + { + float2 pos = (float2(x,y) / 5.0) - 1.0 + 0.1; + vertices.push_back( float2(-1.0, -1.0) * size + pos ); + vertices.push_back( float2(-1.0, 1.0) * size + pos ); + vertices.push_back( float2( 1.0, -1.0) * size + pos ); + } + vbuf.FloatArray( "vertices", vertices ); + vbuf.LayoutName( "VBuffer" ); + + UnifiedGeometry_Draw cmd; + cmd.vertexCount = vertices.size(); + cmd.instanceCount = 2; + cmd.firstInstance = 0; + geometry.Draw( cmd ); + + geometry.ArgIn( "un_VBuffer", vbuf ); + + scene.Add( geometry ); + } + { + RC geometry = UnifiedGeometry(); + RC vbuf = Buffer(); + array vertices; + array indices; + + vertices.reserve( 10 * 10 * 6 ); + for (uint y = 0; y < 10; ++y) + for (uint x = 0; x < 10; ++x) + { + uint idx = vertices.size(); + float2 pos = (float2(x,y) / 5.0) - 1.0 + 0.1; + vertices.push_back( float2(-1.0, -1.0) * size + pos ); + vertices.push_back( float2(-1.0, 1.0) * size + pos ); + vertices.push_back( float2( 1.0, -1.0) * size + pos ); + vertices.push_back( float2( 1.0, 1.0) * size + pos ); + + indices.push_back( idx+0 ); + indices.push_back( idx+1 ); + indices.push_back( idx+2 ); + + indices.push_back( idx+1 ); + indices.push_back( idx+2 ); + indices.push_back( idx+3 ); + } + vbuf.FloatArray( "vertices", vertices ); + vbuf.UIntArray( "indices", indices ); + vbuf.LayoutName( "VBuffer" ); + + UnifiedGeometry_DrawIndexed cmd; + cmd.indexCount = indices.size(); + cmd.IndexBuffer( vbuf, "indices" ); + geometry.Draw( cmd ); + + geometry.ArgIn( "un_VBuffer", vbuf ); + + scene2.Add( geometry ); + } + + Slider( draw_mode, "DrawMode", 0, 2 ); + Slider( mode, "Mode", 0, 5, 2 ); + Slider( with_offset, "WithOffset", 0, 1, 0 ); + Slider( scale, "Scale", 0, 2, 2 ); + Slider( hash, "Hash", 1.f, 16.f ); + + // render loop + { + RC pass = scene.AddGraphicsPass( "draw" ); + pass.AddPipeline( "perf/Subgroups-2a.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2a.as) + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Constant( "iMode", mode ); + pass.Constant( "iWithOffset", with_offset ); + pass.Constant( "iScale", scale ); + pass.Constant( "iHash", hash ); + pass.EnableIfEqual( draw_mode, 0 ); + }{ + RC pass = scene.AddGraphicsPass( "draw" ); + pass.AddPipeline( "perf/Subgroups-2b.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2b.as) + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Constant( "iMode", mode ); + pass.Constant( "iWithOffset", with_offset ); + pass.Constant( "iScale", scale ); + pass.Constant( "iHash", hash ); + pass.ArgIn( "un_Texture", tex, Sampler_NearestClamp ); + pass.EnableIfEqual( draw_mode, 1 ); + }{ + RC pass = scene2.AddGraphicsPass( "draw" ); + pass.AddPipeline( "perf/Subgroups-2c.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/perf/Subgroups-2c.as) + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Constant( "iMode", mode ); + pass.Constant( "iScale", scale ); + pass.Constant( "iHash", hash ); + pass.ArgIn( "un_Texture", tex, Sampler_NearestClamp ); + pass.EnableIfEqual( draw_mode, 2 ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/TexCache.as b/AE/samples/res_editor/_data/scripts/perf/TexCache.as new file mode 100644 index 00000000..f791b954 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/TexCache.as @@ -0,0 +1,103 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +# define GEN_TEX +# define TEST_TEX_CACHE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC scale = DynamicFloat(); + RC hash = DynamicFloat(); + RC count = DynamicUInt(); + RC step = DynamicUInt(); + RC tex_pot = DynamicUInt2(); + RC tex_dim = tex_pot.PowOf2().Dimension(); + RC gen_tex = DynamicUInt(); + + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); rt.Name( "RT" ); + RC tex = Image( EPixelFormat::RGBA8_UNorm, tex_dim ); tex.Name( "Texture" ); + + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( tex_pot, "TexDim", uint2(1), uint2(14), uint2(4) ); + Slider( count, "Repeat", 1, 32, 1 ); + Slider( step, "UVStep", 0, 3, 0 ); + Slider( scale, "Scale", 0.f, 4.f, 1.f ); + Slider( hash, "Hash", 0.1f, 2.f, 1.f ); + + // render loop + { + RC pass = ComputePass( "", "GEN_TEX" ); + pass.ArgOut( "un_OutImage", tex ); + pass.LocalSize( 8, 8 ); + pass.DispatchThreads( tex.Dimension() ); + pass.EnableIfEqual( gen_tex, 1 ); + }{ + RC pass = ComputePass( "", "TEST_TEX_CACHE" ); + pass.ArgOut( "un_OutImage", rt ); + pass.ArgIn( "un_Texture", tex, Sampler_LinearRepeat ); + pass.Constant( "iScale", scale ); + pass.Constant( "iHash", hash ); + pass.Constant( "iStep", step ); + pass.LocalSize( 8, 8 ); + pass.DispatchThreads( rt.Dimension() ); + pass.Repeat( count ); + } + + // Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_TEX + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy); + float4 col = Rainbow( Hash_Uniform( uv, 0.f )); + gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef TEST_TEX_CACHE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + + void Main () + { + // float2 uv0 = HEHash22( GetGlobalCoord().xy ); + float2 uv0 = DHash22( float2(GetGlobalCoord().xy >> iStep) * iHash ) + GetGlobalCoordUNorm().xy * 0.1; + + float2 uv1 = uv0 + float2(0.1, 0.5) * iScale; + float2 uv2 = uv0 + float2(0.2, 0.6) * iScale; + float2 uv3 = uv0 + float2(0.3, 0.7) * iScale; + float2 uv4 = uv0 + float2(0.4, 0.8) * iScale; + float2 uv5 = uv0 + float2(0.5, 0.9) * iScale; + float2 uv6 = uv0 + float2(0.6, 0.1) * iScale; + float2 uv7 = uv0 + float2(0.7, 0.2) * iScale; + + float4 col = + gl.texture.Sample( un_Texture, uv0 ) + + gl.texture.Sample( un_Texture, uv1 ) + + gl.texture.Sample( un_Texture, uv2 ) + + gl.texture.Sample( un_Texture, uv3 ) + + gl.texture.Sample( un_Texture, uv4 ) + + gl.texture.Sample( un_Texture, uv5 ) + + gl.texture.Sample( un_Texture, uv6 ) + + gl.texture.Sample( un_Texture, uv7 ); + col /= 8.0; + + if ( AllLess( col, float4(-1.e+20) )) + gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/perf/TexLookup.as b/AE/samples/res_editor/_data/scripts/perf/TexLookup.as new file mode 100644 index 00000000..b6a1c907 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/perf/TexLookup.as @@ -0,0 +1,123 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + 1. Used 'uv * scale + bias' to access texture. + * change 'bias' to check cost of uv offset. + * change 'scale' to check cost of uv scale which increase cache misses. + + 2. Used '(noise >> step + offset) + uv' to access texture. + * step=0 is 1x1 noise + * step=1 is 2x2 noise, texture coords changed linearly inside a single quad. + * step=1 offset=1 is 2x2 noise where single screen quad has independent coords but nearest quads has close coords. + * step=2 is 4x4 noise + * step=3 is 8x8 noise, this is near to subgroup size. + * step=4 is 16x16 noise, this is greater than subgroup size. +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define GEN_NOISE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC tex_dim = DynamicUInt(); + RC dim = tex_dim.Mul( 1024 ).Dimension2(); + RC dim2 = dim.Mul( 2 ); + + RC rt = Image( EPixelFormat::RGBA8_UNorm, dim ); rt.Name( "RT" ); + RC noise = Image( EPixelFormat::RGBA8_UNorm, dim2 ); noise.Name( "Noise" ); + RC scene = Scene(); + RC gen_tex = DynamicUInt(); + RC mode = DynamicUInt(); + RC count = DynamicUInt(); + + { + RC geometry = UnifiedGeometry(); + RC vbuf = Buffer(); + array vertices; + array indices; + + GetGrid( 64, vertices, indices ); + + vbuf.FloatArray( "vertices", vertices ); + vbuf.UIntArray( "indices", indices ); + vbuf.LayoutName( "VBuffer" ); + + UnifiedGeometry_DrawIndexed cmd; + cmd.indexCount = indices.size(); + cmd.IndexBuffer( vbuf, "indices" ); + geometry.Draw( cmd ); + + geometry.ArgIn( "un_VBuffer", vbuf ); + geometry.ArgIn( "un_Texture", noise, Sampler_LinearRepeat ); + + scene.Add( geometry ); + } + + Slider( tex_dim, "TexDim", 1, 8, 4 ); + Slider( gen_tex, "GenTex", 0, 1, 1 ); + Slider( mode, "UV Noise", 0, 1 ); + Slider( count, "Repeat", 1, 32 ); + + // render loop + if ( false ){ + { + RC pass = Postprocess( "", "GEN_NOISE" ); + pass.Output( "out_Color", noise ); + pass.EnableIfEqual( gen_tex, 1 ); + } + }else{ + { + RC pass = ComputePass( "", "GEN_NOISE" ); + pass.ArgOut( "un_OutImage", noise ); + pass.LocalSize( 8, 8 ); + pass.DispatchThreads( dim2 ); + pass.EnableIfEqual( gen_tex, 1 ); + } + } + + { + RC pass = scene.AddGraphicsPass( "sequential lookup" ); + pass.AddPipeline( "perf/TexLookup-1a.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1a.as) + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Slider( "iScale", 0.25f, 2.f, 1.f ); + pass.Slider( "iBias", 0.f, 1.f, 0.f ); + pass.EnableIfEqual( mode, 0 ); + pass.Repeat( count ); + }{ + RC pass = scene.AddGraphicsPass( "UV noise" ); + pass.AddPipeline( "perf/TexLookup-1b.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/perf/TexLookup-1b.as) + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Slider( "iStep", 0, 4, 4 ); + pass.Slider( "iOffset", 0, 1, 0 ); + pass.EnableIfEqual( mode, 1 ); + pass.Repeat( count ); + } + + // Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef GEN_NOISE + #include "GlobalIndex.glsl" + #include "Hash.glsl" + #include "Color.glsl" + + void Main () + { + float2 uv = float2(GetGlobalCoord().xy); + float4 col = Rainbow( Hash_Uniform( uv, 0.111f )); + + #ifdef SH_COMPUTE + gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); + #else + out_Color = col; + #endif + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/samples-2d/AA-Grid.as b/AE/samples/res_editor/_data/scripts/samples-2d/AA-Grid.as index d2268d43..759d91cc 100644 --- a/AE/samples/res_editor/_data/scripts/samples-2d/AA-Grid.as +++ b/AE/samples/res_editor/_data/scripts/samples-2d/AA-Grid.as @@ -107,7 +107,7 @@ // 2D circles case 1 : { - float2 pos = GetGlobalCoordFloat().xy + iOffset; + float2 pos = GetGlobalCoordSF().xy + iOffset; float dist = Length( pos ); pos = Normalize( pos ); float md = AA_Lines( dist, 1.0/scale2d, Thickness() ); diff --git a/AE/samples/res_editor/_data/scripts/samples-2d/MaterialDepthBuffer.as b/AE/samples/res_editor/_data/scripts/samples-2d/MaterialDepthBuffer.as index 1d47300c..c2558584 100644 --- a/AE/samples/res_editor/_data/scripts/samples-2d/MaterialDepthBuffer.as +++ b/AE/samples/res_editor/_data/scripts/samples-2d/MaterialDepthBuffer.as @@ -35,14 +35,14 @@ array> images; for (uint i = 0; i < 4; ++i) { - images.push_back( Image( EImageType::FImage2D, "shadertoy/Abstract_1.jpg" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/Abstract_3.jpg" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/BlueNoise.png" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/Lichen.jpg" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_1.jpg" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_2.jpg" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_3.jpg" )); - images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_4.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Abstract_1.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Abstract_3.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/BlueNoise.png" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Lichen.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_1.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_2.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_3.jpg" )); + images.push_back( Image( EImageType::FImage2D, "shadertoy/Organic_4.jpg" )); } cbuffer.FloatArray( "ids", mtr_ids ); diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/DeferredTexturing.as b/AE/samples/res_editor/_data/scripts/samples-3d/DeferredTexturing.as index c6bc1e88..04726566 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/DeferredTexturing.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/DeferredTexturing.as @@ -1,4 +1,18 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + First pass: + * draw scene to the G-Buffer (RGBA32U + depth) + * R channel - material ID and normal.z + * G channel - normal.xy + * B channel - texture UV + * A channel - UV derivatives + + Second pass: + * draw fullscreen triangle + * unpack G-Buffer + * apply material, used material ID, texture UV and UV derivatives + * apply lighting, used normal +*/ #ifdef __INTELLISENSE__ # include #endif diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/Model-1.as b/AE/samples/res_editor/_data/scripts/samples-3d/Model.as similarity index 83% rename from AE/samples/res_editor/_data/scripts/samples-3d/Model-1.as rename to AE/samples/res_editor/_data/scripts/samples-3d/Model.as index 4541a6ff..651b2906 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/Model-1.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/Model.as @@ -40,13 +40,13 @@ // render loop { RC draw_pass = scene.AddGraphicsPass( "opaque" ); - draw_pass.AddPipeline( "samples/Model-1.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model-1.as) + draw_pass.AddPipeline( "samples/Model.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model.as) draw_pass.Output( "out_Color", rt, RGBA32f(0.0f, 1.f, 1.f, 1.f) ); draw_pass.Output( ds, DepthStencil(1.f, 0) ); draw_pass.Layer( ERenderLayer::Opaque ); }{ RC draw_pass = scene.AddGraphicsPass( "translucent" ); - draw_pass.AddPipeline( "samples/Model-1.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model-1.as) + draw_pass.AddPipeline( "samples/Model.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model.as) draw_pass.Output( "out_Color", rt ); draw_pass.Output( ds ); draw_pass.Layer( ERenderLayer::Translucent ); diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/Particles-1.as b/AE/samples/res_editor/_data/scripts/samples-3d/Particles-1.as index a09c0dde..415ecc59 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/Particles-1.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/Particles-1.as @@ -135,31 +135,47 @@ if ( (i % Mode_Count == Mode_GS) and not Supports_GeometryShader() ) continue; - RC draw_pass = scenes[i].AddGraphicsPass( "draw" ); - draw_pass.SetDebugLabel( "draw", RGBA8u(200, 200, 0, 255) ); - draw_pass.EnableIfEqual( draw_mode, i ); - draw_pass.Constant( "iSize", particle_size ); + RC pass = scenes[i].AddGraphicsPass( "draw" ); + pass.EnableIfEqual( draw_mode, i ); + pass.Constant( "iSize", particle_size ); switch ( i ) { case Mode_GS : - draw_pass.AddPipeline( "particles/Rays-gs.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-gs.as) + pass.SetDebugLabel( "Rays, GS", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Rays-gs.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-gs.as) + break; + case Mode_GS + Mode_Count : - draw_pass.AddPipeline( "particles/Dots-gs.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-gs.as) + pass.SetDebugLabel( "Dots, GS", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Dots-gs.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-gs.as) + break; + case Mode_Instancing : - draw_pass.AddPipeline( "particles/Rays-i.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-i.as) + pass.SetDebugLabel( "Rays, instancing", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Rays-i.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-i.as) + break; + case Mode_Instancing + Mode_Count : - draw_pass.AddPipeline( "particles/Dots-i.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-i.as) + pass.SetDebugLabel( "Dots, instancing", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Dots-i.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-i.as) + break; + case Mode_TriList : - draw_pass.AddPipeline( "particles/Rays-tl.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-tl.as) + pass.SetDebugLabel( "Rays, trilist", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Rays-tl.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-tl.as) + break; + case Mode_TriList + Mode_Count : - draw_pass.AddPipeline( "particles/Dots-tl.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-tl.as) + pass.SetDebugLabel( "Dots, trilist", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Dots-tl.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-tl.as) + break; // case Mode_MS : - // draw_pass.AddPipeline( "particles/Rays-ms.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-ms.as) + // pass.AddPipeline( "particles/Rays-ms.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-ms.as) // case Mode_MS + Mode_Count : - // draw_pass.AddPipeline( "particles/Dots-ms.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-ms.as) + // pass.AddPipeline( "particles/Dots-ms.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-ms.as) } - draw_pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Output( "out_Color", rt, RGBA32f(0.0) ); } Present( rt ); } diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/Particles-2.as b/AE/samples/res_editor/_data/scripts/samples-3d/Particles-2.as index 94e16dc1..0aa978f4 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/Particles-2.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/Particles-2.as @@ -135,33 +135,49 @@ if ( (i % Mode_Count == Mode_GS) and not Supports_GeometryShader() ) continue; - RC draw_pass = scenes[i].AddGraphicsPass( "draw" ); - draw_pass.SetDebugLabel( "draw", RGBA8u(200, 200, 0, 255) ); - draw_pass.EnableIfEqual( draw_mode, i ); - draw_pass.Constant( "iSize", particle_size ); + RC pass = scenes[i].AddGraphicsPass( "draw" ); + pass.EnableIfEqual( draw_mode, i ); + pass.Constant( "iSize", particle_size ); switch ( i ) { case Mode_GS : - draw_pass.AddPipeline( "particles/Rays-gs-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-gs-fp16.as) + pass.SetDebugLabel( "Rays, GS", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Rays-gs-fp16.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-gs-fp16.as) + break; + case Mode_GS + Mode_Count : - draw_pass.AddPipeline( "particles/Dots-gs-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-gs-fp16.as) + pass.SetDebugLabel( "Dots, GS", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Dots-gs-fp16.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-gs-fp16.as) + break; + case Mode_Instancing : - draw_pass.AddPipeline( "particles/Rays-i-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-i-fp16.as) + pass.SetDebugLabel( "Rays, instancing", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Rays-i-fp16.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-i-fp16.as) + break; + case Mode_Instancing + Mode_Count : - draw_pass.AddPipeline( "particles/Dots-i-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-i-fp16.as) + pass.SetDebugLabel( "Dots, instancing", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Dots-i-fp16.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-i-fp16.as) + break; + case Mode_TriList : - draw_pass.AddPipeline( "particles/Rays-tl-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-tl-fp16.as) + pass.SetDebugLabel( "Rays, trilist", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Rays-tl-fp16.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-tl-fp16.as) + break; + case Mode_TriList + Mode_Count : - draw_pass.AddPipeline( "particles/Dots-tl-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-tl-fp16.as) + pass.SetDebugLabel( "Dots, trilist", RGBA8u(200, 200, 0, 255) ); + pass.AddPipeline( "particles/Dots-tl-fp16.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-tl-fp16.as) + break; // case Mode_MS : - // draw_pass.AddPipeline( "particles/Rays-ms-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-ms-fp16.as) + // pass.AddPipeline( "particles/Rays-ms-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Rays-ms-fp16.as) // case Mode_MS + Mode_Count : - // draw_pass.AddPipeline( "particles/Dots-ms-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-ms-fp16.as) + // pass.AddPipeline( "particles/Dots-ms-fp16.as" ); break; // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/particles/Dots-ms-fp16.as) } - draw_pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Output( "out_Color", rt, RGBA32f(0.0) ); } Present( rt ); } diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/Planet-1.as b/AE/samples/res_editor/_data/scripts/samples-3d/Planet-1.as index 8d78f817..d9b6d5bf 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/Planet-1.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/Planet-1.as @@ -48,7 +48,7 @@ void ASmain () planet.ArgIn( "un_NormalMap", normal_view, Sampler_LinearMipmapRepeat ); planet.ArgIn( "un_AlbedoMap", albedo_view, Sampler_LinearMipmapRepeat ); planet.ArgIn( "un_EmissionMap", emission_view, Sampler_LinearMipmapRepeat ); - planet.DetailLevel( 0, 9 ); + planet.DetailLevel( 9 ); scene.Add( planet ); } diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/Planet-2.as b/AE/samples/res_editor/_data/scripts/samples-3d/Planet-2.as index 509da8c2..37f2e21a 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/Planet-2.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/Planet-2.as @@ -53,7 +53,7 @@ void ASmain () planet.ArgIn( "un_NormalMap", normal_view, Sampler_LinearMipmapRepeat ); planet.ArgIn( "un_AlbedoMap", albedo_view, Sampler_LinearMipmapRepeat ); planet.ArgIn( "un_EmissionMap", emission_view, Sampler_LinearMipmapRepeat ); - planet.DetailLevel( 0, 9 ); + planet.DetailLevel( 9 ); scene.Add( planet, float3(0.f, 0.f, 2.f) ); } diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/RenderToCubemap.as b/AE/samples/res_editor/_data/scripts/samples-3d/RenderToCubemap.as index 690c0e33..00c35d9c 100644 --- a/AE/samples/res_editor/_data/scripts/samples-3d/RenderToCubemap.as +++ b/AE/samples/res_editor/_data/scripts/samples-3d/RenderToCubemap.as @@ -41,9 +41,9 @@ array vp; vp.resize( 6 ); vp[0] = proj * float4x4().RotateY(ToRad( -90.f )); // +X - vp[1] = proj * float4x4().RotateY(ToRad( 90.f )); // -X + vp[1] = proj * float4x4().RotateY(ToRad( 90.f )); // -X vp[2] = proj * float4x4().RotateX(ToRad( -90.f )); // +Y - vp[3] = proj * float4x4().RotateX(ToRad( 90.f )); // -Y + vp[3] = proj * float4x4().RotateX(ToRad( 90.f )); // -Y vp[4] = proj; // +Z vp[5] = proj * float4x4().RotateX(ToRad( -180.f )) * float4x4().RotateZ(ToRad( -180.f )); // -Z @@ -92,8 +92,7 @@ void Main () { Ray ray; - float2 uv = GetGlobalCoordUNorm().xy; //uv.y = 1.0 - uv.y; - const float ipd = 64.0e-3f; // meters + float2 uv = GetGlobalCoordUNorm().xy; const float z_near = 0.1f; const float2 screen_dim = un_PerPass.resolution.xy; const float pix_to_m = un_PerPass.pixToMm * 0.001f; diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/VisibilityBuffer.as b/AE/samples/res_editor/_data/scripts/samples-3d/VisibilityBuffer.as new file mode 100644 index 00000000..9f4e296b --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/samples-3d/VisibilityBuffer.as @@ -0,0 +1,66 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Visibility buffer prototype. + - Implemented using ray tracing pass which has storage buffer for vertices. + - Derivative calculations copy pasted from 'The Forge' engine. + - Without clusterization and other improvements. +*/ +#ifdef __INTELLISENSE__ +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + RC idbuf = Image( EPixelFormat::R32U, SurfaceSize() ); idbuf.Name( "ID-Buffer" ); + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); rt.Name( "RT-Color" ); + RC ds = Image( EPixelFormat::R32F, SurfaceSize() ); ds.Name( "RT-Depth" ); + RC scene = Scene(); + + // setup camera + { + RC camera = FPVCamera(); + + camera.ClipPlanes( 0.1f, 100.f ); + camera.FovY( 60.f ); + + const float s = 1.0f; + camera.ForwardBackwardScale( s*2.0f, s ); + camera.UpDownScale( s ); + camera.SideMovementScale( s ); + + scene.Set( camera ); + } + + // setup model + { + RC model = Model( "res/models/Sponza/Sponza.gltf" ); + + model.InitialTransform( float3(0.f, -1.f, 0.f), float3(0.f, ToRad(90.f), ToRad(180.f)), 100.f ); + + model.AddOmniLight( float3(0.f, -5.f, 0.f), float3(0.f, 0.f, 0.05f), RGBA32f(1.f) ); + + scene.Add( model ); + } + + // render loop + { + RC pass = scene.AddRayTracingPass( "fill VisBuffer" ); + pass.SetPipeline( "samples/VisibilityBuffer-pass1.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass1.as) + pass.ArgOut( "un_IDBuffer", idbuf ); + pass.ArgOut( "un_Depth", ds ); + pass.Dispatch( rt.Dimension() ); + }{ + RC pass = scene.AddRayTracingPass( "resolve VisBuffer" ); + pass.SetPipeline( "samples/VisibilityBuffer-pass2.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/VisibilityBuffer-pass2.as) + pass.ArgOut( "un_ColorBuf", rt ); + pass.ArgIn( "un_IDBuffer", idbuf ); + pass.ArgIn( "un_Depth", ds ); + pass.Dispatch( rt.Dimension() ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/samples-3d/Volumetric-1.as b/AE/samples/res_editor/_data/scripts/samples-3d/Volumetric.as similarity index 100% rename from AE/samples/res_editor/_data/scripts/samples-3d/Volumetric-1.as rename to AE/samples/res_editor/_data/scripts/samples-3d/Volumetric.as diff --git a/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2D.as b/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2D.as index b7153bb4..a6121959 100644 --- a/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2D.as +++ b/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2D.as @@ -241,8 +241,8 @@ // sphere case 2 : - cone.dir0 = GetDirection2D( snorm_pos0 * Pi2() ); - cone.dir1 = GetDirection2D( snorm_pos1 * Pi2() ); + cone.dir0 = GetDirection2D( snorm_pos0 * float_Pi2 ); + cone.dir1 = GetDirection2D( snorm_pos1 * float_Pi2 ); cone.origin0 = iLightPos * map_scale; cone.origin1 = cone.origin0; break; @@ -529,16 +529,17 @@ //----------------------------------------------------------------------------- #ifdef TONEMAPPING #include "ToneMapping.glsl" + #include "ColorSpaceUtility.glsl" void Main () { - float4 hdr = gl.texture.Fetch( un_HDR, int2(gl.FragCoord.xy), 0 ) / iScale; + float4 hdr = gl.texture.Fetch( un_HDR, int2(gl.FragCoord.xy), 0 ) / iScale; // linear space switch ( iTonemapping ) { case 1 : out_Color = float4(ToneMap_Unreal( hdr.rgb ), 1.0); break; case 2 : out_Color = float4(Tonemap_Lottes( hdr.rgb ), 1.0); break; - default : out_Color = hdr; break; + default : out_Color = ApplySRGBCurve( hdr ); break; } } diff --git a/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2DLayered.as b/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2DLayered.as index b271bf2d..ac89d26b 100644 --- a/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2DLayered.as +++ b/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2DLayered.as @@ -283,8 +283,8 @@ // sphere case 2 : - cone.dir0 = GetDirection2D( snorm_pos0 * Pi2() ); - cone.dir1 = GetDirection2D( snorm_pos1 * Pi2() ); + cone.dir0 = GetDirection2D( snorm_pos0 * float_Pi2 ); + cone.dir1 = GetDirection2D( snorm_pos1 * float_Pi2 ); cone.origin0 = iLightPos * map_scale; cone.origin1 = cone.origin0; break; @@ -624,16 +624,17 @@ //----------------------------------------------------------------------------- #ifdef TONEMAPPING #include "ToneMapping.glsl" + #include "ColorSpaceUtility.glsl" void Main () { - float4 hdr = gl.texture.Fetch( un_HDR, int2(gl.FragCoord.xy), 0 ) / iScale; + float4 hdr = gl.texture.Fetch( un_HDR, int2(gl.FragCoord.xy), 0 ) / iScale; // linear space switch ( iTonemapping ) { case 1 : out_Color = float4(ToneMap_Unreal( hdr.rgb ), 1.0); break; case 2 : out_Color = float4(Tonemap_Lottes( hdr.rgb ), 1.0); break; - default : out_Color = hdr; break; + default : out_Color = ApplySRGBCurve( hdr ); break; } } diff --git a/AE/samples/res_editor/_data/scripts/samples-rt/RT-Model-1.as b/AE/samples/res_editor/_data/scripts/samples-rt/RT-Model.as similarity index 87% rename from AE/samples/res_editor/_data/scripts/samples-rt/RT-Model-1.as rename to AE/samples/res_editor/_data/scripts/samples-rt/RT-Model.as index 9e1fa2f9..aa8c16e9 100644 --- a/AE/samples/res_editor/_data/scripts/samples-rt/RT-Model-1.as +++ b/AE/samples/res_editor/_data/scripts/samples-rt/RT-Model.as @@ -43,7 +43,7 @@ // render loop { RC draw = scene.AddGraphicsPass( "draw opaque" ); - draw.AddPipeline( "samples/Model-1.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model-1.as) + draw.AddPipeline( "samples/Model.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model.as) draw.Output( "out_Color", rt, RGBA32f(0.0f, 1.f, 1.f, 1.f) ); draw.Output( ds, DepthStencil(1.f, 0) ); draw.Layer( ERenderLayer::Opaque ); @@ -51,7 +51,7 @@ } { RC pass = scene.AddRayTracingPass( "rtrace" ); - pass.SetPipeline( "samples/Model-RT-1.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model-RT-1.as) + pass.SetPipeline( "samples/Model-RT.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/samples/Model-RT.as) pass.ArgOut( "un_OutImage", rt ); pass.Dispatch( rt.Dimension() ); pass.EnableIfEqual( view_mode, 1 ); diff --git a/AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce-1.as b/AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce.as similarity index 99% rename from AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce-1.as rename to AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce.as index b2eadc74..0592aa0f 100644 --- a/AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce-1.as +++ b/AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce.as @@ -172,7 +172,7 @@ layout(std430, buffer_reference) buffer readonly IndicesRef { uint indices []; PrimaryRay.depth = 0.0; PrimaryRay.recursion = recursion; - if ( recursion > iMaxRecursion ) + if ( recursion >= iMaxRecursion ) { PrimaryRay.color = float3(0.95, 0.18, 0.95); // for debugging return; @@ -228,7 +228,7 @@ layout(std430, buffer_reference) buffer readonly IndicesRef { uint indices []; void RayTrace (float3 objNormal, const uint recursion) { - const float err = Epsilon(); + const float err = float_epsilon; float absorption = 0.1; HWRay hwray = HWRay_Create(); diff --git a/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-3.as b/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-3.as index 4ab87d4a..41ff475c 100644 --- a/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-3.as +++ b/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-3.as @@ -67,8 +67,7 @@ pass.Slider( "iDistanceOnSphere", 0, 1 ); GenMipmaps( cubemap_view ); - } - { + }{ RC draw = scene.AddGraphicsPass( "draw sphere" ); draw.AddPipeline( "sphere/SphericalCube-3.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-3.as) draw.Output( "out_Color", rt, RGBA32f(0.0) ); @@ -134,8 +133,7 @@ { const float2 pos_on_face = Floor( scale * ncoord ) + float2(x,y) + 0.5; const float3 obj_pos = ProjectToSphere( pos_on_face / scale ); - const float d = SDF_Sphere( pos_on_sphere - obj_pos, 0.06 ); - // const float d = Distance2( pos_on_sphere, obj_pos ) - 0.06; + const float d = Distance2( pos_on_sphere, obj_pos ) - 0.06; dist = Min( dist, d ); } diff --git a/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-5.as b/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-5.as index c4510c02..07a0f890 100644 --- a/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-5.as +++ b/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-5.as @@ -65,7 +65,7 @@ draw.AddPipeline( "sphere/SphericalCube-5a.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/sphere/SphericalCube-5a.as) draw.Output( "out_Color", cubemap, RGBA32f(0.2) ); draw.Constant( "iProj", proj_type ); - draw.Slider( "iMode", 0, 1 ); + draw.Slider( "iProjInFS", 0, 1 ); GenMipmaps( cubemap_view ); } diff --git a/AE/samples/res_editor/_data/scripts/sphere/UVSphere-1.as b/AE/samples/res_editor/_data/scripts/sphere/UVSphere-1.as new file mode 100644 index 00000000..ee63b76e --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/sphere/UVSphere-1.as @@ -0,0 +1,150 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC cubemap = Image( EPixelFormat::RGBA8_UNorm, uint2(1024), ImageLayer(6), MipmapLevel(~0) ); cubemap.Name( "Cubemap tex" ); + RC cubemap_view = cubemap.CreateView( EImage::Cube ); + + RC proj_type = DynamicInt(); + Slider( proj_type, "iProj", 0, 5, 1 ); + + // render loop + { + RC pass = ComputePass(); + pass.ArgInOut( "un_OutImage", cubemap_view ); + pass.LocalSize( 8, 8 ); + pass.DispatchThreads( cubemap_view.Dimension2_Layers() ); + pass.Constant( "iProj", proj_type ); + pass.Slider( "iRadius", 0.f, 0.1f, 0.06f ); + + GenMipmaps( cubemap_view ); + }{ + RC pass = Postprocess(); + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.ArgIn( "un_CubeMap", cubemap_view, Sampler_LinearRepeat ); + pass.Slider( "iRotation", float2(-180.f, -90.f), float2(180.f, 90.f), float2(0.f) ); + pass.Slider( "iRotation2", float2(-2.f), float2(2.f), float2(0.f) ); + pass.Slider( "iScale", 0.25f, 1.1f, 1.1f ); + pass.Slider( "iFov", 0.f, 90.f, 60.f ); + pass.Slider( "iRadius", 0.0f, 0.9f, 0.5f ); // used to check circle distortion + pass.Constant( "iProj", proj_type ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "SDF.glsl" + #include "CubeMap.glsl" + #include "Geometry.glsl" + #include "Quaternion.glsl" + #include "GlobalIndex.glsl" + + float3 Project (float3 n) + { + switch ( iProj ) + { + case 0 : n = CM_IdentitySC_Inverse( n ); break; + case 1 : n = CM_TangentialSC_Inverse( n ); break; + case 2 : n = CM_EverittSC_Inverse( n ); break; + case 3 : n = CM_5thPolySC_Inverse( n ); break; + case 4 : n = CM_COBE_SC_Inverse( n ); break; + case 5 : n = CM_ArvoSC_Inverse( n ); break; + } + return CM_IdentitySC_Forward( n.xy, ECubeFace(n.z) ); + } + + void Main () + { + float2 uv = GetGlobalCoordSNormCorrected2() * iScale; + float4 norm = UVtoSphereNormal( uv, ToRad(iFov) ); + float3 uvw = norm.xyz; + + uvw = QMul( QRotationY(ToRad( iRotation.x + iRotation2.x )), uvw ); + uvw = QMul( QRotationX(ToRad( iRotation.y + iRotation2.y )), uvw ); + uvw = Project( uvw ); + + out_Color.rgb = gl.texture.Sample( un_CubeMap, uvw ).rgb * SmoothStep( norm.w, 0.0, 0.01 ); + + // screen space circle + { + float d = SDF2_Circle( uv, 0.23 * iRadius ); + d = SDF_OpSub( d, SDF2_Circle( uv, 0.2 * iRadius )); + + float a = SmoothStep( -d*4.0/iRadius, 0.02, 0.025 ); + float b = SmoothStep( -d*2.5/iRadius, 0.02, 0.025 ); + + out_Color.rgb *= (1.0 - a); + out_Color.rgb = Lerp( out_Color.rgb, float3(0.8, 0.0, 1.0), b ); + } + + out_Color.a = 1.0; + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_COMPUTE + #include "SDF.glsl" + #include "Color.glsl" + #include "CubeMap.glsl" + #include "Geometry.glsl" + #include "GlobalIndex.glsl" + + + int FaceIdx () { + return int(gl.WorkGroupID.z); + } + + float3 ProjectToSphere (const float2 snormCoord) + { + switch ( iProj ) + { + case 0 : return CM_IdentitySC_Forward( snormCoord, FaceIdx() ); + case 1 : return CM_TangentialSC_Forward( snormCoord, FaceIdx() ); + case 2 : return CM_EverittSC_Forward( snormCoord, FaceIdx() ); + case 3 : return CM_5thPolySC_Forward( snormCoord, FaceIdx() ); + case 4 : return CM_COBE_SC_Forward( snormCoord, FaceIdx() ); + case 5 : return CM_ArvoSC_Forward( snormCoord, FaceIdx() ); + } + return float3(0.0); + } + + void Main () + { + const float lod = 4.0; + const float2 size = float2(GetGlobalSize().xy); + const float2 ncoord = GetGlobalCoordSNorm().xy; + const float3 pos_on_sphere = ProjectToSphere( ncoord ); // == normal + float4 color = float4(0.0); + + color.b = AA_QuadGrid( size * ncoord, lod/size, 12.5 ) * 0.6; + + const float2 scale = float2(lod); + float dist = 1.0e+10; + + for (int y = -1; y <= 1; ++y) + for (int x = -1; x <= 1; ++x) + { + const float2 pos_on_face = Floor( scale * ncoord ) + float2(x,y) + 0.5; + const float3 obj_pos = ProjectToSphere( pos_on_face / scale ); + const float d = Distance( pos_on_sphere, obj_pos ) - iRadius; + + dist = Min( dist, d ); + } + + color.g = Saturate( 1.0 - SmoothStep( dist, -0.005, 0.005 )); + color.b *= (1.0 - color.g); + + gl.image.Store( un_OutImage, GetGlobalCoord(), color ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-1.as b/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-1.as index f28d167d..456c1e76 100644 --- a/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-1.as +++ b/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-1.as @@ -19,6 +19,8 @@ pass.ArgOut( "un_OutImage", rt ); pass.LocalSize( 8, 8 ); pass.DispatchThreads( rt.Dimension() ); + + pass.Slider( "iScale", 0, 8, 3 ); } Present( rt ); } @@ -32,6 +34,9 @@ float3 CubeFaceToNormal (ECubeFace face) { + // same + //return CM_RotateVec( float3(0.0, 0.0, 1.0), face ); + switch ( face ) { case ECubeFace_XPos : return float3(+1.0, 0.0, 0.0 ); @@ -49,7 +54,7 @@ float4 col = float4(0.0); const float y_max = 7.0; const float y = Floor( GetGroupCoordUNorm().y * y_max ); - const float scale = 1000.0; + const float scale = Exp10( float(iScale) ); float2 uv = GetGlobalCoordUNorm().xy; uv.y = (uv.y - y/y_max) * y_max; float3 dir = Ray_PlaneTo360( float3(0.0), 0.1, uv ).dir; diff --git a/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-2.as b/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-2.as deleted file mode 100644 index f28d167d..00000000 --- a/AE/samples/res_editor/_data/scripts/tests/CubeMapTest-2.as +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' -#ifdef __INTELLISENSE__ -# define SH_COMPUTE -# include -# include -#endif -//----------------------------------------------------------------------------- -#ifdef SCRIPT - - void ASmain () - { - // initialize - RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); rt.Name( "RT" ); - - // render loop - { - RC pass = ComputePass(); - - pass.ArgOut( "un_OutImage", rt ); - pass.LocalSize( 8, 8 ); - pass.DispatchThreads( rt.Dimension() ); - } - Present( rt ); - } - -#endif -//----------------------------------------------------------------------------- -#ifdef SH_COMPUTE - #include "GlobalIndex.glsl" - #include "CubeMap.glsl" - #include "Ray.glsl" - - float3 CubeFaceToNormal (ECubeFace face) - { - switch ( face ) - { - case ECubeFace_XPos : return float3(+1.0, 0.0, 0.0 ); - case ECubeFace_XNeg : return float3(-1.0, 0.0, 0.0 ); - case ECubeFace_YPos : return float3( 0.0, +1.0, 0.0 ); - case ECubeFace_YNeg : return float3( 0.0, -1.0, 0.0 ); - case ECubeFace_ZPos : return float3( 0.0, 0.0, +1.0 ); - case ECubeFace_ZNeg : return float3( 0.0, 0.0, -1.0 ); - } - return float3(0.0); - } - - void Main () - { - float4 col = float4(0.0); - const float y_max = 7.0; - const float y = Floor( GetGroupCoordUNorm().y * y_max ); - const float scale = 1000.0; - - float2 uv = GetGlobalCoordUNorm().xy; uv.y = (uv.y - y/y_max) * y_max; - float3 dir = Ray_PlaneTo360( float3(0.0), 0.1, uv ).dir; - - switch ( int(y) ) - { - case 0 : { - float3 uv_f = CM_IdentitySC_Inverse( dir ); - float3 dir2 = CM_IdentitySC_Forward( uv_f.xy, int(uv_f.z) ); - col.rgb = Abs( dir - dir2 ) * scale; - break; - } - case 1 : { - float3 uv_f = CM_TangentialSC_Inverse( dir ); - float3 dir2 = CM_TangentialSC_Forward( uv_f.xy, int(uv_f.z) ); - col.rgb = Abs( dir - dir2 ) * scale; - break; - } - case 2 : { - float3 uv_f = CM_EverittSC_Inverse( dir ); - float3 dir2 = CM_EverittSC_Forward( uv_f.xy, int(uv_f.z) ); - col.rgb = Abs( dir - dir2 ) * scale; - break; - } - case 3 : { - float3 uv_f = CM_5thPolySC_Inverse( dir ); - float3 dir2 = CM_5thPolySC_Forward( uv_f.xy, int(uv_f.z) ); - col.rgb = Abs( dir - dir2 ) * scale; - break; - } - case 4 : { - float3 uv_f = CM_COBE_SC_Inverse( dir ); - float3 dir2 = CM_COBE_SC_Forward( uv_f.xy, int(uv_f.z) ); - col.rgb = Abs( dir - dir2 ) * scale; - break; - } - case 5 : { - float3 uv_f = CM_ArvoSC_Inverse( dir ); - float3 dir2 = CM_ArvoSC_Forward( uv_f.xy, int(uv_f.z) ); - col.rgb = Abs( dir - dir2 ) * scale; - break; - } - case 6 : { - float3 uv_f = CM_IdentitySC_Inverse( dir ); - col.rgb = Abs( CubeFaceToNormal( ECubeFace(uv_f.z) ) - dir ); - break; - } - } - - gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); - } - -#endif -//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tests/Derivatives.as b/AE/samples/res_editor/_data/scripts/tests/Derivatives.as index 5aaf318f..4c80427f 100644 --- a/AE/samples/res_editor/_data/scripts/tests/Derivatives.as +++ b/AE/samples/res_editor/_data/scripts/tests/Derivatives.as @@ -1,6 +1,7 @@ // Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' /* - + Compare dFdxFine and dFdxCoarse. + Emulate dFd* in compute shader. */ #ifdef __INTELLISENSE__ # include diff --git a/AE/samples/res_editor/_data/scripts/tests/ImageExport-CubeMap.as b/AE/samples/res_editor/_data/scripts/tests/ImageExport-CubeMap.as index 462b31d6..06bcab17 100644 --- a/AE/samples/res_editor/_data/scripts/tests/ImageExport-CubeMap.as +++ b/AE/samples/res_editor/_data/scripts/tests/ImageExport-CubeMap.as @@ -15,8 +15,8 @@ RC ds = Image( EPixelFormat::Depth32F, SurfaceSize() ); ds.Name( "RT-Depth" ); const int2 tile_size = int2(128); - const int tile_count_pot = 5; // 2^X - const int2 cubemap_dim = tile_size << tile_count_pot; + const int tile_count_pot = 5; // 2^X + const int2 cubemap_dim = tile_size << tile_count_pot; // 4096 RC cubemap = Image( EPixelFormat::RGBA8_UNorm, uint2(cubemap_dim), ImageLayer(6), MipmapLevel(~0) ); cubemap.Name( "Cubemap tex" ); RC cubemap_view = cubemap.CreateView( EImage::Cube ); diff --git a/AE/samples/res_editor/_data/scripts/tests/LightModels.as b/AE/samples/res_editor/_data/scripts/tests/LightModels.as index 5339cc94..f9b168ee 100644 --- a/AE/samples/res_editor/_data/scripts/tests/LightModels.as +++ b/AE/samples/res_editor/_data/scripts/tests/LightModels.as @@ -34,13 +34,14 @@ #ifdef SH_FRAG #include "PBR.glsl" #include "Normal.glsl" + #include "Geometry.glsl" #include "GlobalIndex.glsl" #include "ColorSpaceUtility.glsl" - float3 Sphere (float2 uv, float2 duv) + float4 Sphere (float2 uv, float2 duv) { uv = ToSNorm( uv ) * (duv.yx / duv.x); - return float3( uv, Saturate( 1.0 - LengthSq( uv )) ); + return UVtoSphereNormal( uv ); } @@ -50,8 +51,8 @@ const float3 spec_col = RemoveSRGBCurve( iSpecular.rgb ); const float3 light_col = RemoveSRGBCurve( iLightCol.rgb ); - const float3 pos = Sphere( uv, duv ); - const float3 norm = -ComputeNormalInWS_dxdy( pos ); + const float4 pos = Sphere( uv, duv ); + const float3 norm = -ComputeNormalInWS_dxdy( pos.xyz ); const float3 light = Normalize( iLightDir ); const float3 view = Normalize( float3( ToSNorm(uv) * (duv.yx / duv.x), 0.7 )); const float f0 = 0.8; @@ -76,8 +77,8 @@ } } - lr.diffuse *= light_col * Step( 0.0001, pos.z ); - lr.specular *= light_col * Step( 0.0001, pos.z ); + lr.diffuse *= light_col * SmoothStep( pos.w, 0.0, 0.01 ); + lr.specular *= light_col * SmoothStep( pos.w, 0.0, 0.01 ); return lr; } diff --git a/AE/samples/res_editor/_data/scripts/tests/NaN.as b/AE/samples/res_editor/_data/scripts/tests/NaN.as index 7a23d3ff..0910a5e8 100644 --- a/AE/samples/res_editor/_data/scripts/tests/NaN.as +++ b/AE/samples/res_editor/_data/scripts/tests/NaN.as @@ -50,8 +50,12 @@ switch ( int(GetGroupCoordUNorm().y * 7.0) ) { + // specs: Which operand is the result is undefined if one of the operands is a NaN. case 0 : col = float4( right ? Min( nan, 0.5f ) : Min( 1.0f, nan )); break; + + // specs: Which operand is the result is undefined if one of the operands is a NaN. case 1 : col = float4( right ? Max( nan, 0.5f ) : Max( 1.0f, nan )); break; + case 2 : col = float4( Saturate( nan )); break; case 3 : col = float4( right ? Clamp( 0.5f, nan, 1.f ) : Clamp( 0.5f, 1.f, nan )); break; case 4 : col = float4( bool(nan) ? 0.5f : 1.f ); break; @@ -61,6 +65,8 @@ if ( Any(IsNaN( col )) or Any(IsInfinity( col )) ) col = float4(1.0, 0.0, 0.0, 1.0); + else + col *= float4(0.0, 0.75, 0.0, 1.0); gl.image.Store( un_OutImage, GetGlobalCoord().xy, col ); } diff --git a/AE/samples/res_editor/_data/scripts/tests/NormalPacking.as b/AE/samples/res_editor/_data/scripts/tests/NormalPacking.as index a50b2e22..0429b862 100644 --- a/AE/samples/res_editor/_data/scripts/tests/NormalPacking.as +++ b/AE/samples/res_editor/_data/scripts/tests/NormalPacking.as @@ -24,9 +24,11 @@ RC p_shape = DynamicUInt(); RC p_cmp = DynamicUInt(); RC p_diff = DynamicUInt(); + RC p_mode = DynamicUInt(); - Slider( p_shape, "Shape", 0, 1 ); - Slider( p_cmp, "Cmp", 0, 7 ); + Slider( p_shape, "Shape", 0, 4 ); + Slider( p_mode, "Format", 0, 3 ); + Slider( p_cmp, "Cmp", 0, 2 ); Slider( p_diff, "Diff", 0, 8, 2 ); // render loop @@ -46,6 +48,7 @@ pass.ArgIn( "un_NormalUn8", norm_un8, Sampler_NearestClamp ); pass.Constant( "iShape", p_shape ); pass.Constant( "iCmp", p_cmp ); + pass.Constant( "iFormat", p_mode ); pass.Constant( "iDiff", p_diff ); } Present( rt ); @@ -56,12 +59,13 @@ #ifdef SH_FRAG #include "Normal.glsl" #include "GBuffer.glsl" + #include "Geometry.glsl" #include "GlobalIndex.glsl" float3 Sphere (float2 uv, float2 duv) { uv = ToSNorm( uv ) * (duv.yx / duv.x); - return float3( uv, Saturate( 1.0 - LengthSq( uv )) ); + return UVtoSphereNormal( uv ).xyz; } float4 CalcNormalAndIndex () @@ -77,6 +81,9 @@ { case 0 : return float4( -ComputeNormalInWS_dxdy( Sphere( uv2, duv2 )), float(idx) ); case 1 : return float4( ComputeNormalInWS_dxdy( Sphere( uv2, duv2 )), float(idx) ); + case 2 : return float4( ComputeNormalInWS_dxdy( Sphere( uv2, duv2 )) * float3(-1.0, 1.0, -1.0), float(idx) ); + case 3 : return float4( ComputeNormalInWS_dxdy( Sphere( uv2, duv2 )) * float3(-1.0, -1.0, 1.0), float(idx) ); + case 4 : return float4( -ComputeNormalInWS_dxdy( Sphere( uv2, duv2 )).zxy, float(idx) ); } } @@ -88,6 +95,7 @@ case 1 : return float4( ToUNorm( CryTeck_EncodeNormal( norm )), 0.f, 0.f ); case 2 : return float4( Stalker_EncodeNormal( norm ), 0.f, 0.f ); case 3 : return float4( ToUNorm( Octahedron_EncodeNormal( norm )), 0.f, 0.f ); + case 4 : return float4( SigOctahedron_EncodeNormal( norm ), 0.f ); case 5 : return float4( ToUNorm( Stereo_EncodeNormal( norm )), 0.f, 0.f ); case 6 : return float4( Spheremap_EncodeNormal( norm ), 0.f, 0.f ); @@ -103,6 +111,7 @@ case 1 : return CryTeck_DecodeNormal( ToSNorm( packed.xy )); case 2 : return Stalker_DecodeNormal( packed.xy ); case 3 : return Octahedron_DecodeNormal( ToSNorm( packed.xy )); + case 4 : return SigOctahedron_DecodeNormal( packed.xyz ); case 5 : return Stereo_DecodeNormal( ToSNorm( packed.xy )); case 6 : return Spheremap_DecodeNormal( packed.xy ); @@ -131,31 +140,34 @@ void Main () { - float4 n_idx = CalcNormalAndIndex(); - float4 packed1 = gl.texture.Fetch( un_NormalFp32, int2(gl.FragCoord.xy), 0 ); - float4 packed2 = gl.texture.Fetch( un_NormalFp16, int2(gl.FragCoord.xy), 0 ); - float4 packed3 = gl.texture.Fetch( un_NormalUn16, int2(gl.FragCoord.xy), 0 ); - float4 packed4 = gl.texture.Fetch( un_NormalUn8, int2(gl.FragCoord.xy), 0 ); - - float3 norm1 = DecodeNormal( uint(n_idx.w), packed1 ); - float3 norm2 = DecodeNormal( uint(n_idx.w), packed2 ); - float3 norm3 = DecodeNormal( uint(n_idx.w), packed3 ); - float3 norm4 = DecodeNormal( uint(n_idx.w), packed4 ); - float diff = Pow( 10.f, float(iDiff) ); + float4 n_idx = CalcNormalAndIndex(); + float4 packed_fp32 = gl.texture.Fetch( un_NormalFp32, int2(gl.FragCoord.xy), 0 ); + float4 packed_fp16 = gl.texture.Fetch( un_NormalFp16, int2(gl.FragCoord.xy), 0 ); + float4 packed_un16 = gl.texture.Fetch( un_NormalUn16, int2(gl.FragCoord.xy), 0 ); + float4 packed_un8 = gl.texture.Fetch( un_NormalUn8, int2(gl.FragCoord.xy), 0 ); + + float3 norm_fp32 = DecodeNormal( uint(n_idx.w), packed_fp32 ); + float3 norm_fp16 = DecodeNormal( uint(n_idx.w), packed_fp16 ); + float3 norm_un16 = DecodeNormal( uint(n_idx.w), packed_un16 ); + float3 norm_un8 = DecodeNormal( uint(n_idx.w), packed_un8 ); + float diff = Pow( 10.f, float(iDiff) ); + float3 norm; + + switch ( iFormat ) + { + case 0 : norm = norm_fp32; break; + case 1 : norm = norm_fp16; break; + case 2 : norm = norm_un16; break; + case 3 : norm = norm_un8; break; + } out_Color = float4(1.0); switch ( iCmp ) { - case 0 : out_Color.rgb = norm1; break; - case 1 : out_Color.rgb = norm2; break; - case 2 : out_Color.rgb = norm3; break; - case 3 : out_Color.rgb = norm4; break; - - case 4 : out_Color.rgb = Abs( norm1 - n_idx.xyz ) * diff; break; - case 5 : out_Color.rgb = Abs( norm2 - n_idx.xyz ) * diff; break; - case 6 : out_Color.rgb = Abs( norm3 - n_idx.xyz ) * diff; break; - case 7 : out_Color.rgb = Abs( norm4 - n_idx.xyz ) * diff; break; + case 0 : out_Color.rgb = norm; break; + case 1 : out_Color.rgb = Abs( norm - n_idx.xyz ) * diff; break; + case 2 : out_Color.rgb = float3(Length( norm - n_idx.xyz )) * diff; break; } } diff --git a/AE/samples/res_editor/_data/scripts/tests/RG8toFloat.as b/AE/samples/res_editor/_data/scripts/tests/RG8toFloat.as new file mode 100644 index 00000000..64a62518 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/tests/RG8toFloat.as @@ -0,0 +1,187 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Some old GPU supports RT compression only for RGBA8_UNorm types. +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define ENCODE +# define DECODE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC packed = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC reference = Image( EPixelFormat::RG32F, SurfaceSize() ); + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC mode = DynamicUInt(); + RC cmp = DynamicUInt(); + + Slider( mode, "Mode", 0, 3 ); + + // render loop + { + RC pass = Postprocess( "", "ENCODE" ); + pass.Output( "out_Color0", packed ); + pass.Output( "out_Color1", reference ); + pass.Constant( "iMode", mode ); + }{ + RC pass = Postprocess( "", "DECODE" ); + pass.Output( "out_Color", rt ); + pass.ArgIn( "un_Packed", packed, Sampler_NearestClamp ); + pass.ArgIn( "un_Ref", reference, Sampler_NearestClamp ); + pass.Constant( "iMode", mode ); + pass.Slider( "iCmp", 0, 4 ); + pass.Slider( "iCmpScale", 0, 9, 2 ); + } + + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "GlobalIndex.glsl" + + + // based on code from https://stackoverflow.com/questions/34963366/encode-floating-point-data-in-a-rgba-texture + float4 EncodeV2 (float2 value) + { + const float2 bitSh = float2( 256.0, 1.0 ); + const float bitMsk = 1.0 / 256.0; + + float4 comp = Fract( value.xxyy * bitSh.xyxy ); + comp.yw -= comp.xz * bitMsk; + return comp; + } + + // error: 1.98e-3 + float2 DecodeV2 (float4 value) + { + const float2 bitShifts = float2( 1.0 / 256.0, 1.0 ); + return float2( Dot( value.xy, bitShifts ), Dot( value.zw, bitShifts )); + } + //--------------------------------- + + + float4 EncodeV3 (float2 value) + { + return unpackUnorm4x8( packUnorm2x16( value )); + } + + // error: 1.0e-5 + float2 DecodeV3 (float4 color) + { + return unpackUnorm2x16( packUnorm4x8( color )); + } + //--------------------------------- + + + // based on code from https://aras-p.info/blog/2009/07/30/encoding-floats-to-rgba-the-final/ + float4 EncodeV4 (float2 value) + { + const float2 bitSh = float2( 255.0, 1.0 ); + const float bitMsk = 1.0 / 255.0; + + float4 comp = Fract( value.xxyy * bitSh.xyxy ); + comp.yw -= comp.xz * bitMsk; + return comp; + } + + // error: 1.0e-5 + float2 DecodeV4 (float4 value) + { + const float2 bitShifts = float2( 1.0 / 255.0, 1.0 ); + return float2( Dot( value.xy, bitShifts ), Dot( value.zw, bitShifts )); + } + //--------------------------------- + + + // based on code from https://gamedev.ru/flame/forum/?id=248801&page=225&m=5967042#m3363 + float4 EncodeV5 (float2 value) + { + const float2 bitSh = float2( 256.0, 1.0 ); + const float bitMsk = 1.0 / 256.0; + + float4 comp = Fract( value.xxyy * bitSh.xyxy ); + comp.yw -= comp.xz * bitMsk; + return comp * (256.0 / 255.0); + } + + // error: 1.0e-5 + float2 DecodeV5 (float4 value) + { + const float2 bitShifts = float2( 255.0 / (256.0 * 256.0), + 255.0 / 256.0 ); + return float2( Dot( value.xy, bitShifts ), Dot( value.zw, bitShifts )); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef ENCODE + + void Main () + { + float2 unpacked = GetGlobalCoordUNorm().xy; + float4 packed; + + switch ( iMode ) + { + case 0 : packed = EncodeV2( unpacked ); break; + case 1 : packed = EncodeV3( unpacked ); break; + case 2 : packed = EncodeV4( unpacked ); break; + case 3 : packed = EncodeV5( unpacked ); break; + } + + out_Color0 = packed; + out_Color1.rg = unpacked; + } + +#endif +//----------------------------------------------------------------------------- +#ifdef DECODE + + void Main () + { + float4 packed = gl.texture.Fetch( un_Packed, GetGlobalCoord().xy, 0 ); + float2 reference = gl.texture.Fetch( un_Ref, GetGlobalCoord().xy, 0 ).rg; + float2 unpacked; + + switch ( iMode ) + { + case 0 : unpacked = DecodeV2( packed ); break; + case 1 : unpacked = DecodeV3( packed ); break; + case 2 : unpacked = DecodeV4( packed ); break; + case 3 : unpacked = DecodeV5( packed ); break; + } + + switch ( iCmp ) + { + case 0 : out_Color = float4(unpacked, 0.0, 1.0); break; + case 1 : out_Color = float4(reference, 0.0, 1.0); break; + case 2 : + { + float a = Distance( unpacked, reference ) * Exp10( float(iCmpScale) ); + out_Color = a > 1.0 ? float4(1.0, 0.0, 0.0, 1.0) : float4(a); + break; + } + case 3 : + { + float a = Abs( unpacked.x - reference.x ) * Exp10( float(iCmpScale) ); + out_Color = a > 1.0 ? float4(0.0, 1.0, 0.0, 1.0) : float4(a); + break; + } + case 4 : + { + float a = Abs( unpacked.y - reference.y ) * Exp10( float(iCmpScale) ); + out_Color = a > 1.0 ? float4(0.0, 1.0, 0.0, 1.0) : float4(a); + break; + } + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tests/RGBA8toFloat.as b/AE/samples/res_editor/_data/scripts/tests/RGBA8toFloat.as new file mode 100644 index 00000000..22999574 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/tests/RGBA8toFloat.as @@ -0,0 +1,227 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Some old GPU supports RT compression only for RGBA8_UNorm types. +*/ +#ifdef __INTELLISENSE__ +# include +# include +# define ENCODE +# define DECODE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC packed = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC reference = Image( EPixelFormat::R32F, SurfaceSize() ); + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC mode = DynamicUInt(); + RC cmp = DynamicUInt(); + + Slider( mode, "Mode", 0, 4 ); + + // render loop + { + RC pass = Postprocess( "", "ENCODE" ); + pass.Output( "out_Color0", packed ); + pass.Output( "out_Color1", reference ); + pass.Constant( "iMode", mode ); + }{ + RC pass = Postprocess( "", "DECODE" ); + pass.Output( "out_Color", rt ); + pass.ArgIn( "un_Packed", packed, Sampler_NearestClamp ); + pass.ArgIn( "un_Ref", reference, Sampler_NearestClamp ); + pass.Constant( "iMode", mode ); + pass.Slider( "iCmp", 0, 2 ); + pass.Slider( "iCmpScale", 0, 16, 2 ); + } + + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "GlobalIndex.glsl" + + + // from http://www.gamedev.net/forums/topic/684158-rgba-to-float-percision/5321388/ + float4 EncodeV1 (float value) + { + int rgba = floatBitsToInt(value); + float r = float(rgba >> 24) / 255.0; + float g = float((rgba & 0x00ff0000) >> 16) / 255.0; + float b = float((rgba & 0x0000ff00) >> 8) / 255.0; + float a = float(rgba & 0x000000ff) / 255.0; + return float4(r, g, b, a); + } + + // range: whole + // error: 0 + float DecodeV1 (float4 color) + { + int rgba = (int(color.x * 255.0) << 24) + (int(color.y * 255.0) << 16) + (int(color.z * 255.0) << 8) + int(color.w * 255.0); + return intBitsToFloat(rgba); + } + //--------------------------------- + + + // from https://stackoverflow.com/questions/34963366/encode-floating-point-data-in-a-rgba-texture + float4 EncodeV2 (float value) + { + const float4 bitSh = float4( 256.0 * 256.0 * 256.0, + 256.0 * 256.0, + 256.0, + 1.0 ); + const float4 bitMsk = float4(0.0, + 1.0 / 256.0, + 1.0 / 256.0, + 1.0 / 256.0 ); + float4 comp = Fract( value * bitSh ); + comp -= comp.xxyz * bitMsk; + return comp; + } + + // range: 0..1 + // error: 1.98e-3 + float DecodeV2 (float4 value) + { + const float4 bitShifts = float4( 1.0 / (256.0 * 256.0 * 256.0), + 1.0 / (256.0 * 256.0), + 1.0 / 256.0, + 1.0 ); + return Dot( value, bitShifts ); + } + //--------------------------------- + + + float4 EncodeV3 (float value) + { + return unpackUnorm4x8( floatBitsToUint( value )); + } + + // range: whole + // error: 0 + float DecodeV3 (float4 color) + { + return uintBitsToFloat( packUnorm4x8( color )); + } + //--------------------------------- + + + // from https://aras-p.info/blog/2009/07/30/encoding-floats-to-rgba-the-final/ + float4 EncodeV4 (float value) + { + const float4 bitSh = float4( 255.0 * 255.0 * 255.0, + 255.0 * 255.0, + 255.0, + 1.0 ); + const float4 bitMsk = float4(0.0, + 1.0 / 255.0, + 1.0 / 255.0, + 1.0 / 255.0 ); + float4 comp = Fract( value * bitSh ); + comp -= comp.xxyz * bitMsk; + return comp; + } + + // range: 0..1 + // error: 0.0 .. -1.0e-5 + float DecodeV4 (float4 value) + { + const float4 bitShifts = float4( 1.0 / (255.0 * 255.0 * 255.0), + 1.0 / (255.0 * 255.0), + 1.0 / 255.0, + 1.0 ); + return Dot( value, bitShifts ); + } + //--------------------------------- + + + // from https://gamedev.ru/flame/forum/?id=248801&page=225&m=5967042#m3363 + float4 EncodeV5 (float value) + { + const float4 bitSh = float4( 256.0 * 256.0 * 256.0, + 256.0 * 256.0, + 256.0, + 1.0 ); + const float4 bitMsk = float4(0.0, + 1.0 / 256.0, + 1.0 / 256.0, + 1.0 / 256.0 ); + float4 comp = Fract( value * bitSh ); + comp -= comp.xxyz * bitMsk; + return comp * (256.0 / 255.0); + } + + // range: 0..1 + // error: 1.0e-10 for value < 0.01 + // error: 0 for value > 0.01 + float DecodeV5 (float4 value) + { + const float4 bitShifts = float4( 255.0 / (256.0 * 256.0 * 256.0 * 256.0), + 255.0 / (256.0 * 256.0 * 256.0), + 255.0 / (256.0 * 256.0), + 255.0 / 256.0 ); + return Dot( value, bitShifts ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef ENCODE + + void Main () + { + float unpacked = GetGlobalCoordUNorm().x; + float4 packed; + + switch ( iMode ) + { + case 0 : packed = EncodeV1( unpacked ); break; + case 1 : packed = EncodeV2( unpacked ); break; + case 2 : packed = EncodeV3( unpacked ); break; + case 3 : packed = EncodeV4( unpacked ); break; + case 4 : packed = EncodeV5( unpacked ); break; + } + + out_Color0 = packed; + out_Color1.r = unpacked; + } + +#endif +//----------------------------------------------------------------------------- +#ifdef DECODE + + void Main () + { + float4 packed = gl.texture.Fetch( un_Packed, GetGlobalCoord().xy, 0 ); + float reference = gl.texture.Fetch( un_Ref, GetGlobalCoord().xy, 0 ).r; + float unpacked; + + switch ( iMode ) + { + case 0 : unpacked = DecodeV1( packed ); break; + case 1 : unpacked = DecodeV2( packed ); break; + case 2 : unpacked = DecodeV3( packed ); break; + case 3 : unpacked = DecodeV4( packed ); break; + case 4 : unpacked = DecodeV5( packed ); break; + } + + switch ( iCmp ) + { + case 0 : out_Color = float4(unpacked); break; + case 1 : out_Color = float4(reference); break; + + case 2 : + { + float a = Abs( unpacked - reference ) * Exp10( float(iCmpScale) ); + out_Color = a > 1.0 ? float4(1.0, 0.0, 0.0, 1.0) : float4(a); + break; + } + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tests/ScreenProjection-1.as b/AE/samples/res_editor/_data/scripts/tests/ScreenProjection.as similarity index 100% rename from AE/samples/res_editor/_data/scripts/tests/ScreenProjection-1.as rename to AE/samples/res_editor/_data/scripts/tests/ScreenProjection.as diff --git a/AE/samples/res_editor/_data/scripts/tests/SmoothNormal.as b/AE/samples/res_editor/_data/scripts/tests/SmoothNormal.as index 759dde34..a13f8c4d 100644 --- a/AE/samples/res_editor/_data/scripts/tests/SmoothNormal.as +++ b/AE/samples/res_editor/_data/scripts/tests/SmoothNormal.as @@ -29,12 +29,13 @@ //----------------------------------------------------------------------------- #ifdef SH_FRAG #include "Normal.glsl" + #include "Geometry.glsl" #include "GlobalIndex.glsl" float3 Sphere (float2 uv, float2 duv) { uv = ToSNorm( uv ) * (duv.yx / duv.x); - return float3( uv, Saturate( 1.0 - LengthSq( uv )) ); + return UVtoSphereNormal( uv ).xyz; } #define GetPosition( _pos_, _offset_ )\ diff --git a/AE/samples/res_editor/_data/scripts/tests/SmoothTBN.as b/AE/samples/res_editor/_data/scripts/tests/SmoothTBN.as index b14216cb..690f5b1c 100644 --- a/AE/samples/res_editor/_data/scripts/tests/SmoothTBN.as +++ b/AE/samples/res_editor/_data/scripts/tests/SmoothTBN.as @@ -30,12 +30,13 @@ //----------------------------------------------------------------------------- #ifdef SH_FRAG #include "Normal.glsl" + #include "Geometry.glsl" #include "GlobalIndex.glsl" float3 Sphere (float2 uv, float2 duv) { uv = ToSNorm( uv ) * (duv.yx / duv.x); - return float3( uv, Saturate( 1.0 - LengthSq( uv )) ); + return UVtoSphereNormal( uv ).xyz; } #define GetPosition( _pos_, _offset_ ) Sphere( ((_pos_) + (_offset_) * duv), duv ) diff --git a/AE/samples/res_editor/_data/scripts/tests/TriangleBarycentrics.as b/AE/samples/res_editor/_data/scripts/tests/TriangleBarycentrics.as new file mode 100644 index 00000000..825e4fd9 --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/tests/TriangleBarycentrics.as @@ -0,0 +1,63 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); rt.Name( "RT" ); + RC scene = Scene(); + uint shape_count = 0; + uint proj_count = 0; + + { + RC geometry = UnifiedGeometry(); + RC vbuf = Buffer(); + const array vertices = { + float3(0.f, -1.f, 0.5f), float3(-1.f, 1.f, 0.5f), float3(1.f, 1.f, 0.5f), + float3(0.f, -1.f, 0.1f), float3(-1.f, 1.f, 0.7f), float3(1.f, 1.f, 0.3f) + }; + const array proj = { + float4x4(), + float4x4().Ortho( RectF(-1.f, -1.f, 1.f, 1.f), float2(-100.f, 100.f) ), + float4x4().InfinitePerspective( ToRad(45.f), 1.f, 0.1f ), + float4x4().InfinitePerspective( ToRad(90.f), 1.5f, 0.1f ) + }; + + shape_count = vertices.size()/3; + proj_count = proj.size(); + + vbuf.FloatArray( "vertices", vertices ); + vbuf.FloatArray( "projection", proj ); + vbuf.LayoutName( "VBuffer" ); + + UnifiedGeometry_Draw cmd; + cmd.vertexCount = 3; + geometry.Draw( cmd ); + + geometry.ArgIn( "un_VBuffer", vbuf ); + + scene.Add( geometry ); + } + + // render loop + { + RC pass = scene.AddGraphicsPass( "draw" ); + pass.AddPipeline( "tests/TriangleBarycentrics.as" ); // [src](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/pipelines/tests/TriangleBarycentrics.as) + pass.Output( "out_Color", rt, RGBA32f(0.0) ); + pass.Slider( "iMode", 0, 3 ); + pass.Slider( "iCmp", 0, 2 ); + pass.Slider( "iScale", 0, 9, 2 ); + pass.Slider( "iShape", 0, shape_count-1 ); + pass.Slider( "iProj", 0, proj_count-1 ); + pass.Slider( "iCameraPos", float3(-10.f), float3(10.f, 10.f, 100.f), float3(0.f, 0.f, 0.f) ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tools/2d/Easing.as b/AE/samples/res_editor/_data/scripts/tools/2d/Easing.as index 8b3c74dc..f47b76be 100644 --- a/AE/samples/res_editor/_data/scripts/tools/2d/Easing.as +++ b/AE/samples/res_editor/_data/scripts/tools/2d/Easing.as @@ -14,7 +14,7 @@ RC mode = DynamicUInt(); RC submode = DynamicUInt(); const array mode_str = { - "Hermite", "Quadratic", "Cubic", "Quartic", "Quintic" + "Sine", "Hermite", "Quadratic", "Cubic", "Quartic", "Quintic", "Exponential", "Circular", "Elastic" }; Slider( mode, "Mode", 0, mode_str.size()-1, 0 ); diff --git a/AE/samples/res_editor/_data/scripts/tools/2d/Graph1.as b/AE/samples/res_editor/_data/scripts/tools/2d/Graph1.as index bb2dfe22..21b5f624 100644 --- a/AE/samples/res_editor/_data/scripts/tools/2d/Graph1.as +++ b/AE/samples/res_editor/_data/scripts/tools/2d/Graph1.as @@ -14,8 +14,9 @@ RC mode = DynamicUInt(); RC flip = DynamicUInt(); const array mode_str = { - "CBRT", "SQRT", "LN", "LOG2", "SMOOTHSTEP", "ONE_DIV_X", "SIN", "EXP", "QUADRATIC", - "CUBIC", "ONE_DIV_SQUARE_X", "QUARTIC" + "LOG2", "LN", "SMOOTHSTEP", "ONE_DIV_X", "SIN", "EXP", + "SRGB", "QUADRATIC", "CUBIC", "ONE_DIV_SQUARE_X", + "SQRT", "CBRT", "QUARTIC", "EXP2", "INVSQRT", "ASIN" }; Slider( mode, "Mode", 0, mode_str.size()-1, 0 ); @@ -39,6 +40,7 @@ #include "Easing.glsl" #include "Geometry.glsl" #include "GlobalIndex.glsl" + #include "ColorSpaceUtility.glsl" #define SMOOTHSTEP 0 #define ONE_DIV_X 1 @@ -52,6 +54,10 @@ #define LOG2 9 #define EXP 10 #define SIN 11 + #define EXP2 12 + #define SRGB 13 + #define INVSQRT 14 + #define ASIN 15 float Graph2 (float x) @@ -66,10 +72,10 @@ return ReciprocalSquaredEaseIn( x ); #elif MODE == SQRT - return Sqrt( x ); + return SquareRootEaseIn( x ); #elif MODE == CBRT - return Cbrt( x ); + return CubicRootEaseIn( x ); #elif MODE == QUADRATIC return QuadraticEaseIn( x ); @@ -87,11 +93,23 @@ return Logarithmic2EaseIn( x ); #elif MODE == EXP + return ExponentialE_EaseIn( x ); + + #elif MODE == EXP2 return ExponentialEaseIn( x ); #elif MODE == SIN return SineEaseIn( x ); + #elif MODE == SRGB + return RemoveSRGBCurve( x ); + + #elif MODE == INVSQRT + return (InvSqrt( 1.008 - x ) - 1.0) * 0.1; + + #elif MODE == ASIN + return ASin( x ) / (float_Pi * 0.5); + #else # error unsupported MODE #endif @@ -126,9 +144,12 @@ float2 uv = GetUV( 0 ); float2 uv2 = GetUV( 1 ); - float2 p0 = float2( uv.x, Graph( uv.x )); - float2 p1 = float2( uv2.x, Graph( uv2.x )); - float d = SDF2_Line( uv, p0, p1 ); + float d0; + { + float2 p0 = float2( uv.x, Graph( uv.x )); + float2 p1 = float2( uv2.x, Graph( uv2.x )); + d0 = SDF2_Line( uv, p0, p1 ); + } out_Color = float4(0.25); @@ -140,9 +161,18 @@ out_Color.rgb *= 0.8; float w = 8.f / MaxOf(GetGlobalSize().xy); - if ( d < w ) out_Color = Lerp( out_Color, float4( 1.0, 0.0, 0.0, 1.0 ), SmoothStep( 1.0 - d/w, 0.5, 1.0 )); - out_Color.rgb *= AA_QuadGrid( uv * 100.0, float2(0.04), 0.25 ); + // diagonal + float d2 = Abs( uv.x - uv.y ) * 2.0; + if ( d2 < w ) + out_Color.rgb *= SmoothStep( d2 / w, 0.0, 1.0 ); + + out_Color.rgb *= AA_QuadGrid( uv * 100.0, float2(0.04), 0.4 ); + out_Color.rgb *= AA_QuadGrid( uv * 400.0, float2(0.04), 0.5 ); + + // graph + if ( d0 < w ) + out_Color = Lerp( out_Color, float4( 1.0, 0.0, 0.0, 1.0 ), SmoothStep( 1.0 - d0/w, 0.5, 1.0 )); } } diff --git a/AE/samples/res_editor/_data/scripts/tools/2d/ImageCompression2D.as b/AE/samples/res_editor/_data/scripts/tools/2d/ImageCompression2D.as new file mode 100644 index 00000000..269e48cb --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/tools/2d/ImageCompression2D.as @@ -0,0 +1,88 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# define SH_COMPUTE +# include +# include +# define COMPARE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + const EPixelFormat src_fmt = EPixelFormat::RGBA8_UNorm; + + const EPixelFormat comp1_req = EPixelFormat::BC7_RGBA8_UNorm; + const EPixelFormat comp2_req = EPixelFormat::ASTC_RGBA8_8x8; + + const EPixelFormat comp1_fmt = Supports_Format( comp1_req ) ? comp1_req : src_fmt; + const EPixelFormat comp2_fmt = Supports_Format( comp2_req ) ? comp2_req : src_fmt; + + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC non_comp = Image( EImageType::FImage2D, "res/tex/exported-image-2d-1k.aeimg" ); + RC comp1 = Image( comp1_fmt, non_comp.Dimension() ); + RC comp2 = Image( comp2_fmt, non_comp.Dimension() ); + + // render loop + { + CompressImage( non_comp, comp1, comp1_req ); + CompressImage( non_comp, comp2, comp2_req ); + }{ + RC pass = Postprocess( "", "COMPARE" ); + pass.Output( "out_Color", rt ); + pass.ArgIn( "un_NonComp", non_comp, Sampler_NearestClamp ); + pass.ArgIn( "un_Comp1", comp1, Sampler_NearestClamp ); + pass.ArgIn( "un_Comp2", comp2, Sampler_NearestClamp ); + pass.Slider( "iCmp", 0, 5, 4 ); + pass.Slider( "iDiff", 1.f, 100.f, 10.f ); + pass.Slider( "iChannel", 0, 4, 0 ); + pass.Slider( "iLevel", 0, 16 ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef COMPARE + #include "GlobalIndex.glsl" + + float4 Swizzle (float4 c) + { + switch ( iChannel ) + { + case 0 : return c; + case 1 : return float4(c.r); + case 2 : return float4(c.g); + case 3 : return float4(c.b); + case 4 : return float4(c.a); + } + } + + void Main () + { + float2 uv = MapPixCoordToUNormCorrected( gl.FragCoord.xy, un_PerPass.resolution.xy, float2(gl.texture.GetSize( un_NonComp, 0 )) ); + float4 col1 = Swizzle( gl.texture.SampleLod( un_NonComp, uv, iLevel )); + float4 col2 = Swizzle( gl.texture.SampleLod( un_Comp1, uv, iLevel )); + float4 col3 = Swizzle( gl.texture.SampleLod( un_Comp2, uv, iLevel )); + + if ( ! IsUNorm( uv )) + { + out_Color = float4(0.0); + return; + } + + switch ( iCmp ) + { + case 0 : out_Color = col1; break; + case 1 : out_Color = col2; break; + case 2 : out_Color = col3; break; + + case 3 : out_Color = Abs( col1 - col2 ) * iDiff; break; + case 4 : out_Color = Abs( col1 - col3 ) * iDiff; break; + case 5 : out_Color = Abs( col2 - col3 ) * iDiff; break; + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tools/2d/ImageCompressionCube.as b/AE/samples/res_editor/_data/scripts/tools/2d/ImageCompressionCube.as new file mode 100644 index 00000000..bf79282c --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/tools/2d/ImageCompressionCube.as @@ -0,0 +1,90 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# define SH_COMPUTE +# include +# include +# define COMPARE +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + const EPixelFormat src_fmt = EPixelFormat::RGBA8_UNorm; + + const EPixelFormat comp1_req = EPixelFormat::BC7_RGBA8_UNorm; + const EPixelFormat comp2_req = EPixelFormat::ASTC_RGBA8_4x4; + + const EPixelFormat comp1_fmt = Supports_Format( comp1_req ) ? comp1_req : src_fmt; + const EPixelFormat comp2_fmt = Supports_Format( comp2_req ) ? comp2_req : src_fmt; + + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC non_comp = Image( EImageType::FImage2DArray, "res/tex/exported-image-cm-1k.aeimg" ); + RC comp1 = Image( comp1_fmt, non_comp.Dimension(), ImageLayer(6) ); + RC comp2 = Image( comp2_fmt, non_comp.Dimension(), ImageLayer(6) ); + + // render loop + { + CompressImage( non_comp, comp1, comp1_req ); + CompressImage( non_comp, comp2, comp2_req ); + }{ + RC pass = Postprocess( "", "COMPARE" ); + pass.Output( "out_Color", rt ); + pass.ArgIn( "un_NonComp", non_comp, Sampler_NearestClamp ); + pass.ArgIn( "un_Comp1", comp1, Sampler_NearestClamp ); + pass.ArgIn( "un_Comp2", comp2, Sampler_NearestClamp ); + pass.Slider( "iCmp", 0, 5, 4 ); + pass.Slider( "iDiff", 1.f, 100.f, 10.f ); + pass.Slider( "iChannel", 0, 4, 0 ); + pass.Slider( "iCubeFace", 0, 5 ); + pass.Slider( "iLevel", 0, 16 ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef COMPARE + #include "GlobalIndex.glsl" + #include "CubeMap.glsl" + + float4 Swizzle (float4 c) + { + switch ( iChannel ) + { + case 0 : return c; + case 1 : return float4(c.r); + case 2 : return float4(c.g); + case 3 : return float4(c.b); + case 4 : return float4(c.a); + } + } + + void Main () + { + float2 uv = MapPixCoordToUNormCorrected( gl.FragCoord.xy, un_PerPass.resolution.xy, float2(gl.texture.GetSize( un_NonComp, 0 ).xy) ); + float4 col1 = Swizzle( gl.texture.SampleLod( un_NonComp, float3(uv, iCubeFace), iLevel )); + float4 col2 = Swizzle( gl.texture.SampleLod( un_Comp1, float3(uv, iCubeFace), iLevel )); + float4 col3 = Swizzle( gl.texture.SampleLod( un_Comp2, float3(uv, iCubeFace), iLevel )); + + if ( ! IsUNorm( uv )) + { + out_Color = float4(0.0); + return; + } + + switch ( iCmp ) + { + case 0 : out_Color = col1; break; + case 1 : out_Color = col2; break; + case 2 : out_Color = col3; break; + + case 3 : out_Color = Abs( col1 - col2 ) * iDiff; break; + case 4 : out_Color = Abs( col1 - col3 ) * iDiff; break; + case 5 : out_Color = Abs( col2 - col3 ) * iDiff; break; + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tools/2d/Wave.as b/AE/samples/res_editor/_data/scripts/tools/2d/Wave.as new file mode 100644 index 00000000..37649f6f --- /dev/null +++ b/AE/samples/res_editor/_data/scripts/tools/2d/Wave.as @@ -0,0 +1,133 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +#ifdef __INTELLISENSE__ +# include +# include +# define MODE 0 +#endif +//----------------------------------------------------------------------------- +#ifdef SCRIPT + + void ASmain () + { + // initialize + RC rt = Image( EPixelFormat::RGBA8_UNorm, SurfaceSize() ); + RC mode = DynamicUInt(); + const array mode_str = { + "s_SIN", "u_SIN", + "s_TRIANGLE", "u_TRIANGLE", + "s_SMOOTHSTEP", "u_SMOOTHSTEP" + }; + + Slider( mode, "Mode", 0, mode_str.size()-1, 0 ); + + // render loop + for (uint i = 0; i < mode_str.size(); ++i) + { + RC pass = Postprocess( "", "MODE="+mode_str[i] ); + pass.Output( "out_Color", rt ); + pass.EnableIfEqual( mode, i ); + } + Present( rt ); + } + +#endif +//----------------------------------------------------------------------------- +#ifdef SH_FRAG + #include "SDF.glsl" + #include "GlobalIndex.glsl" + + // signed + #define s_SMOOTHSTEP 1 + #define s_TRIANGLE 2 + #define s_SIN 3 + + // unsigned + #define u_SMOOTHSTEP 10 + #define u_TRIANGLE 11 + #define u_SIN 12 + + + float SmoothStepWave (float x) + { + x= TriangleWave( x ); + return SmoothStep( x, 0.0, 1.0 ); + } + + float SignedTriangleWave (float x) + { + return TriangleWave( x ) * ToSNorm( LessFp( x, 1.0 )); + } + + float SignedSmoothStepWave (float x) + { + float y = TriangleWave( x ); + return SmoothStep( y, 0.0, 1.0 ) * ToSNorm( LessFp( x, 1.0 )); + } + + + float Graph (float x) + { + x = Saturate( x ) * 2.0; + + #if MODE == s_SMOOTHSTEP + return ToUNorm( SignedSmoothStepWave( x )); + + #elif MODE == s_TRIANGLE + return ToUNorm( SignedTriangleWave( x )); + + #elif MODE == s_SIN + return ToUNorm( Sin( x * float_Pi )); + + #elif MODE == u_SMOOTHSTEP + return SmoothStepWave( x ); + + #elif MODE == u_TRIANGLE + return TriangleWave( x ); + + #elif MODE == u_SIN + return Abs( Sin( x * float_Pi )); + + #else + # error unsupported MODE + #endif + } + + + float2 GetUV (int dx) + { + float2 uv = MapPixCoordToSNormCorrected2( float2(GetGlobalCoord().xy + int2(dx, 0)), float2(GetGlobalSize().xy) ); + uv.y = -uv.y; + uv *= 0.5; + uv += 0.5; + return uv; + } + + void Main () + { + float2 uv = GetUV( 0 ); + float2 uv2 = GetUV( 1 ); + + float2 p0 = float2( uv.x, Graph( uv.x )); + float2 p1 = float2( uv2.x, Graph( uv2.x )); + float d = SDF2_Line( uv, p0, p1 ); + + out_Color = float4(0.25); + + if ( ! IsUNorm( uv )) + out_Color.rgb *= 0.1; + else + { + if ( AnyGreater( uv, float2(1.0) )) + out_Color.rgb *= 0.8; + + float w = 8.f / MaxOf(GetGlobalSize().xy); + out_Color.rgb *= AA_QuadGrid( uv * 100.0, float2(0.04), 0.4 ); + out_Color.rgb *= AA_QuadGrid( uv * 400.0, float2(0.04), 0.5 ); + + if ( d < w ) + out_Color = Lerp( out_Color, float4( 1.0, 0.0, 0.0, 1.0 ), SmoothStep( 1.0 - d/w, 0.5, 1.0 )); + } + } + +#endif +//----------------------------------------------------------------------------- diff --git a/AE/samples/res_editor/_data/scripts/tools/noise/HashFp32.as b/AE/samples/res_editor/_data/scripts/tools/noise/HashFp32.as index 7bf919a2..d7fa3b07 100644 --- a/AE/samples/res_editor/_data/scripts/tools/noise/HashFp32.as +++ b/AE/samples/res_editor/_data/scripts/tools/noise/HashFp32.as @@ -27,7 +27,7 @@ RC pass = Postprocess(); pass.Output( "out_Color", rt ); - pass.Slider( "iHash", 0, 8, int(params[0]) ); + pass.Slider( "iHash", 0, 9, int(params[0]) ); pass.Slider( "iInSize", 1, 4, int(params[1]) ); pass.Slider( "iOutSize", 1, 4, int(params[2]) ); pass.Slider( "iComp", -1, 3, int(params[3]) ); @@ -98,11 +98,8 @@ } - ND_ float4 HEHash (const float4 inFloat) + ND_ float4 HEHash2 (const uint4 uval) { - // const uint4 uval = uint4(inFloat); - const uint4 uval = floatBitsToUint(inFloat); - switch ( iInSize ) { case 1 : return float4(HEHash11( uval.x )); @@ -137,6 +134,14 @@ return float4(0.0); } + ND_ float4 HEHashI (const float4 inFloat) { + return HEHash2( floatBitsToUint(inFloat) ); + } + + ND_ float4 HEHashF (const float4 inFloat) { + return HEHash2( uint4(Abs(inFloat)) ); + } + ND_ float4 HashV3 (const float4 inFloat) { @@ -206,20 +211,21 @@ ND_ float4 Hash (const float4 inFloat) { - #if iHash_max != 8 - # error Hash type count must be 8 + #if iHash_max != 9 + # error Hash type count must be 9 #endif switch ( iHash ) // License | scale | valid range | errors { //-----------------|---------|-----------------|----------------- case 0 : return DHash( inFloat ); // MIT | >30 | 0 .. 10^8 | when changed sign case 1 : return float4(WeylHash12( inFloat.xy )); // unlicense | >35 | 100 .. 4500 | near at 0 case 2 : return float4(ModHash12( inFloat.xy )); // CC BY-NC-SA 3.0 | any | 0 .. 2500 | if scale is multiple of 2 - case 3 : return HEHash( inFloat ); // MIT | any | 0 .. 3*10^38 | - - case 4 : return HashV3( inFloat ); // CC BY-NC-SA 3.0 | >1 | 1 .. 100 | bad quality for >100, invalid for >40'000 - case 5 : return MHash( inFloat ); // CC BY-NC-SA 3.0 | any | 0 .. 3*10^38 | - case 6 : return float4(UEFastHash12( inFloat.xy )); // ??? | >100 | 100 .. 10^4 | bad quality for >10^4, invalid for >10^7 - case 7 : return float4(InterleavedGradientNoise12( inFloat.xy )); // ??? | >100 | 100 .. 10^7 | visible pattern, bad quality for >10^7, invalid for >10^9 - case 8 : return float4(PseudoHash12( inFloat.xy )); // ??? | any | 10^4 .. 10^6 | when changed sign, visible pattern for <10^4, bad quality for >10^7, invalid for >10^10 + case 3 : return HEHashI( inFloat ); // MIT | any | 0 .. 3*10^38 | - + case 4 : return HEHashF( inFloat ); // MIT | any | 10^3 .. 10^9 | - + case 5 : return HashV3( inFloat ); // CC BY-NC-SA 3.0 | >1 | 1 .. 100 | bad quality for >100, invalid for >40'000 + case 6 : return MHash( inFloat ); // CC BY-NC-SA 3.0 | any | 0 .. 3*10^38 | + case 7 : return float4(UEFastHash12( inFloat.xy )); // ??? | >100 | 100 .. 10^4 | bad quality for >10^4, invalid for >10^7 + case 8 : return float4(InterleavedGradientNoise12( inFloat.xy )); // ??? | >100 | 100 .. 10^7 | visible pattern, bad quality for >10^7, invalid for >10^9 + case 9 : return float4(PseudoHash12( inFloat.xy )); // ??? | any | 10^4 .. 10^6 | when changed sign, visible pattern for <10^4, bad quality for >10^7, invalid for >10^10 } } diff --git a/AE/samples/res_editor/_data/shaders/FragHelper.glsl b/AE/samples/res_editor/_data/shaders/FragHelper.glsl deleted file mode 100644 index 04d46658..00000000 --- a/AE/samples/res_editor/_data/shaders/FragHelper.glsl +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' - - -/* -================================================= - HelperInvocationCount -================================================= -*/ -ND_ uint HelperInvocationCountPerQuad () -{ - uint helper = 0; - #ifdef AE_demote_to_helper_invocation - helper = gl.IsHelperInvocation() ? 1 : 0; - #else - helper = gl.HelperInvocation ? 1 : 0; - #endif - return gl.quadGroup.Broadcast( helper, 0 ) + - gl.quadGroup.Broadcast( helper, 1 ) + - gl.quadGroup.Broadcast( helper, 2 ) + - gl.quadGroup.Broadcast( helper, 3 ); -} - - -ND_ uint HelperInvocationCountPerWarp () -{ - uint helper = 0; - #ifdef AE_demote_to_helper_invocation - helper = gl.IsHelperInvocation() ? 1 : 0; - #else - helper = gl.HelperInvocation ? 1 : 0; - #endif - return gl.subgroup.InclusiveAdd( helper ); -} diff --git a/AE/samples/res_editor/_data/shaders/ModelMaterial.glsl b/AE/samples/res_editor/_data/shaders/ModelMaterial.glsl index 8df4ceb4..bb122ac2 100644 --- a/AE/samples/res_editor/_data/shaders/ModelMaterial.glsl +++ b/AE/samples/res_editor/_data/shaders/ModelMaterial.glsl @@ -18,15 +18,15 @@ ND_ float4 UnpackRGBM (uint rgbm) { float4 c = unpackUnorm4x8( rgbm ); retu // helpers #define SampleAlbedo( _mtr_, _uv_ )\ - gl.texture.Sample( gl::CombinedTex2D( un_AlbedoMaps[ UnpackMapAndSampler( _mtr_.albedoMap ).x ], un_AlbedoMapSampler ), _uv_ )\ + gl.texture.Sample( gl::CombinedTex2D( un_AlbedoMaps[ gl::Nonuniform( UnpackMapAndSampler( _mtr_.albedoMap ).x )], un_AlbedoMapSampler ), _uv_ )\ * UnpackRGBM( _mtr_.albedoRGBM ) #define SampleLodAlbedo( _mtr_, _uv_, _lod_ )\ - gl.texture.SampleLod( gl::CombinedTex2D( un_AlbedoMaps[ UnpackMapAndSampler( _mtr_.albedoMap ).x ], un_AlbedoMapSampler ), _uv_, _lod_ )\ + gl.texture.SampleLod( gl::CombinedTex2D( un_AlbedoMaps[ gl::Nonuniform( UnpackMapAndSampler( _mtr_.albedoMap ).x )], un_AlbedoMapSampler ), _uv_, _lod_ )\ * UnpackRGBM( _mtr_.albedoRGBM ) #define SampleGradAlbedo( _mtr_, _uv_, _uvdx_, _uvdy_ )\ - gl.texture.SampleGrad( gl::CombinedTex2D( un_AlbedoMaps[ UnpackMapAndSampler( _mtr_.albedoMap ).x ], un_AlbedoMapSampler ), _uv_, _uvdx_, _uvdy_ )\ + gl.texture.SampleGrad( gl::CombinedTex2D( un_AlbedoMaps[ gl::Nonuniform( UnpackMapAndSampler( _mtr_.albedoMap ).x )], un_AlbedoMapSampler ), _uv_, _uvdx_, _uvdy_ )\ * UnpackRGBM( _mtr_.albedoRGBM ) @@ -77,15 +77,16 @@ ND_ float4 CalcLighting (const float3 worldPos, const float3 worldNormal) UnpackWorldPos ================================================= */ +ND_ float3 UnpackWorldPos (float2 fragCoordSNorm, float depth) +{ + float4 pos = un_PerPass.camera.invViewProj * float4( fragCoordSNorm, depth, 1.0 ); + return pos.xyz / pos.w; +} + #ifdef SH_FRAG ND_ float3 UnpackWorldPos (gl::CombinedTex2D depthMap) { - float4 pos; - pos.xy = ToSNorm( gl.FragCoord.xy / un_PerPass.resolution.xy ); - pos.z = gl.texture.Fetch( depthMap, int2(gl.FragCoord.xy), 0 ).r; - pos.w = 1.0; - - pos = un_PerPass.camera.invViewProj * pos; - return pos.xyz / pos.w; + return UnpackWorldPos( ToSNorm( gl.FragCoord.xy / un_PerPass.resolution.xy ), + gl.texture.Fetch( depthMap, int2(gl.FragCoord.xy), 0 ).r ); } #endif diff --git a/AE/samples/res_editor/_data/shaders/glsl.h b/AE/samples/res_editor/_data/shaders/glsl.h new file mode 100644 index 00000000..3f27aada --- /dev/null +++ b/AE/samples/res_editor/_data/shaders/glsl.h @@ -0,0 +1,86 @@ +// Copyright (c) Zhirnov Andrey. For more information see 'LICENSE' +/* + Can be used for intellisense or to compile GLSL as C++ source (linking will always fail). +*/ + +#pragma once + +#include +#include + +using half2x2_storage = half2x2; +using half2x3_storage = half2x3; +using half2x4_storage = half2x4; +using half3x2_storage = half3x2; +using half3x3_storage = half3x3; +using half3x4_storage = half3x4; +using half4x2_storage = half4x2; +using half4x3_storage = half4x3; +using half4x4_storage = half4x4; + +using packed_half2x2_storage = half2x2; +using packed_half2x3_storage = half2x3; +using packed_half2x4_storage = half2x4; +using packed_half3x2_storage = half3x2; +using packed_half3x3_storage = half3x3; +using packed_half3x4_storage = half3x4; +using packed_half4x2_storage = half4x2; +using packed_half4x3_storage = half4x3; +using packed_half4x4_storage = half4x4; + +using float2x2_storage = float2x2; +using float2x3_storage = float2x3; +using float2x4_storage = float2x4; +using float3x2_storage = float3x2; +using float3x3_storage = float3x3; +using float3x4_storage = float3x4; +using float4x2_storage = float4x2; +using float4x3_storage = float4x3; +using float4x4_storage = float4x4; + +using packed_float2x2_storage = float2x2; +using packed_float2x3_storage = float2x3; +using packed_float2x4_storage = float2x4; +using packed_float3x2_storage = float3x2; +using packed_float3x3_storage = float3x3; +using packed_float3x4_storage = float3x4; +using packed_float4x2_storage = float4x2; +using packed_float4x3_storage = float4x3; +using packed_float4x4_storage = float4x4; + +using double2x2_storage = double2x2; +using double2x3_storage = double2x3; +using double2x4_storage = double2x4; +using double3x2_storage = double3x2; +using double3x3_storage = double3x3; +using double3x4_storage = double3x4; +using double4x2_storage = double4x2; +using double4x3_storage = double4x3; +using double4x4_storage = double4x4; + +using packed_double2x2_storage = double2x2; +using packed_double2x3_storage = double2x3; +using packed_double2x4_storage = double2x4; +using packed_double3x2_storage = double3x2; +using packed_double3x3_storage = double3x3; +using packed_double3x4_storage = double3x4; +using packed_double4x2_storage = double4x2; +using packed_double4x3_storage = double4x3; +using packed_double4x4_storage = double4x4; + +template +using StaticArray = std::array; + +#define StaticAssert(...) + +struct HashVal32 +{ + constexpr explicit HashVal32 (uint) {} +}; + +struct ShaderStructName +{ + constexpr explicit ShaderStructName (HashVal32) {} +}; + +#include <../cpp/vk_types.h> diff --git a/AE/samples/res_editor/_ui_data/controls/glfw.as b/AE/samples/res_editor/_ui_data/controls/glfw.as index 4f1bf39d..05e758d8 100644 --- a/AE/samples/res_editor/_ui_data/controls/glfw.as +++ b/AE/samples/res_editor/_ui_data/controls/glfw.as @@ -21,15 +21,6 @@ void ASmain (GLFW_ActionBindings& bindings) ActionInfo( "UI.ResExport", EGestureType::Down )); bind.Add( GLFW_Input::F11, ActionInfo( "FullscreenOnOff", EGestureType::Down )); - } - - // switch input mode - { - RC bind = bindings.CreateMode( "SwitchInputMode" ); - bind.Inherit( "Shared" ); - - bind.Add( GLFW_Input::Escape, - ActionInfo( "SwitchInputMode", EGestureType::Down )); bind.Add( GLFW_Input::P, ActionInfo( "Freeze", EGestureType::Down )); @@ -56,6 +47,15 @@ void ASmain (GLFW_ActionBindings& bindings) ActionInfo( "CustomKey1", EValueType::Float, EGestureType::Down, float4(10.f) )); } + // switch input mode + { + RC bind = bindings.CreateMode( "SwitchInputMode" ); + bind.Inherit( "Shared" ); + + bind.Add( GLFW_Input::Escape, + ActionInfo( "SwitchInputMode", EGestureType::Down )); + } + // UI bindings { RC bind = bindings.CreateMode( "Main.UI" ); diff --git a/AE/samples/res_editor/_ui_data/controls/winapi.as b/AE/samples/res_editor/_ui_data/controls/winapi.as index 5d2520a1..f6ab9133 100644 --- a/AE/samples/res_editor/_ui_data/controls/winapi.as +++ b/AE/samples/res_editor/_ui_data/controls/winapi.as @@ -21,15 +21,6 @@ void ASmain (WinAPI_ActionBindings& bindings) ActionInfo( "UI.ResExport", EGestureType::Down )); bind.Add( WinAPI_Input::F11, ActionInfo( "FullscreenOnOff", EGestureType::Down )); - } - - // switch input mode - { - RC bind = bindings.CreateMode( "SwitchInputMode" ); - bind.Inherit( "Shared" ); - - bind.Add( WinAPI_Input::Escape, - ActionInfo( "SwitchInputMode", EGestureType::Down )); bind.Add( WinAPI_Input::P, ActionInfo( "Freeze", EGestureType::Down )); @@ -56,6 +47,15 @@ void ASmain (WinAPI_ActionBindings& bindings) ActionInfo( "CustomKey1", EValueType::Float, EGestureType::Down, float4(10.f) )); } + // switch input mode + { + RC bind = bindings.CreateMode( "SwitchInputMode" ); + bind.Inherit( "Shared" ); + + bind.Add( WinAPI_Input::Escape, + ActionInfo( "SwitchInputMode", EGestureType::Down )); + } + // UI bindings { RC bind = bindings.CreateMode( "Main.UI" ); diff --git a/AE/samples/res_editor/_ui_data/cpp/ia_names.h b/AE/samples/res_editor/_ui_data/cpp/ia_names.h index 04218eab..3f88f802 100644 --- a/AE/samples/res_editor/_ui_data/cpp/ia_names.h +++ b/AE/samples/res_editor/_ui_data/cpp/ia_names.h @@ -197,8 +197,10 @@ namespace InputActions { constexpr operator InputModeName_t () const { return InputModeName_t{Hash_t{0xcedcb9fbu}}; } // 'Main.UI' - static constexpr uint actionCount = 16; + static constexpr uint actionCount = 18; enum Bindings : uint { + CustomKey1 = 0x23e12a05u, // InputActionName{"CustomKey1"} + Freeze = 0x413df12cu, // InputActionName{"Freeze"} FullscreenOnOff = 0xf5db7d10u, // InputActionName{"FullscreenOnOff"} PauseRendering = 0xba3bd32fu, // InputActionName{"PauseRendering"} ShowHelp = 0x3d738410u, // InputActionName{"ShowHelp"} @@ -222,8 +224,10 @@ namespace InputActions { constexpr operator InputModeName_t () const { return InputModeName_t{Hash_t{0x1420f18du}}; } // 'Shared' - static constexpr uint actionCount = 7; + static constexpr uint actionCount = 9; enum Bindings : uint { + CustomKey1 = 0x23e12a05u, // InputActionName{"CustomKey1"} + Freeze = 0x413df12cu, // InputActionName{"Freeze"} FullscreenOnOff = 0xf5db7d10u, // InputActionName{"FullscreenOnOff"} PauseRendering = 0xba3bd32fu, // InputActionName{"PauseRendering"} ShowHelp = 0x3d738410u, // InputActionName{"ShowHelp"} diff --git a/AE/samples/res_editor/_ui_data/cpp/vk_types.h b/AE/samples/res_editor/_ui_data/cpp/vk_types.h index 4c73b445..4588b758 100644 --- a/AE/samples/res_editor/_ui_data/cpp/vk_types.h +++ b/AE/samples/res_editor/_ui_data/cpp/vk_types.h @@ -3,7 +3,7 @@ // size: 24, align: 4 struct ModelMaterial { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x226dd4bau}}; // 'ModelMaterial' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x226dd4bau}}; uint flags; uint albedoMap; @@ -26,7 +26,7 @@ // size: 40, align: 2 struct CubeVertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x38ec4b6bu}}; // 'CubeVertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x38ec4b6bu}}; packed_short4 Position; packed_short4 Texcoord; @@ -47,7 +47,7 @@ // size: 32, align: 2 struct SphericalCubeVertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x35a6eeecu}}; // 'SphericalCubeVertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x35a6eeecu}}; packed_short4 Position; packed_short4 Texcoord; @@ -66,7 +66,7 @@ // size: 36 (48), align: 16 struct alignas(16) SceneOmniLight { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x6e3bdc7bu}}; // 'SceneOmniLight' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x6e3bdc7bu}}; float3 position; float3 attenuation; @@ -83,7 +83,7 @@ // size: 120 (128), align: 16 struct alignas(16) ModelNode { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xbf14b6ddu}}; // 'ModelNode' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xbf14b6ddu}}; float4x4_storage transform; float3x3_storage normalMat; @@ -97,46 +97,65 @@ StaticAssert( offsetof(ModelNode, materialIdx) == 116 ); StaticAssert( sizeof(ModelNode) == 128 ); +#ifndef ModelNode_Array_DEFINED +# define ModelNode_Array_DEFINED + // size: 4 (16), align: 16 + struct alignas(16) ModelNode_Array + { + static constexpr auto TypeName = ShaderStructName{HashVal32{0xcaaba68fu}}; + static constexpr size_t SizeOf (size_t count) { return 16 + (128 * count); } + + uint instanceCount; + // ModelNode elements []; + }; +#endif + StaticAssert( offsetof(ModelNode_Array, instanceCount) == 0 ); + StaticAssert( sizeof(ModelNode_Array) == 16 ); + #ifndef ModelRTMesh_DEFINED # define ModelRTMesh_DEFINED - // size: 24, align: 8 + // size: 32, align: 8 struct ModelRTMesh { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x6fe9689cu}}; // 'ModelRTMesh' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x6fe9689cu}}; + TDeviceAddress< packed_float3 *> positions; TDeviceAddress< packed_float3 *> normals; TDeviceAddress< float2 *> texcoords; TDeviceAddress< uint *> indices; }; #endif - StaticAssert( offsetof(ModelRTMesh, normals) == 0 ); - StaticAssert( offsetof(ModelRTMesh, texcoords) == 8 ); - StaticAssert( offsetof(ModelRTMesh, indices) == 16 ); - StaticAssert( sizeof(ModelRTMesh) == 24 ); + StaticAssert( offsetof(ModelRTMesh, positions) == 0 ); + StaticAssert( offsetof(ModelRTMesh, normals) == 8 ); + StaticAssert( offsetof(ModelRTMesh, texcoords) == 16 ); + StaticAssert( offsetof(ModelRTMesh, indices) == 24 ); + StaticAssert( sizeof(ModelRTMesh) == 32 ); #ifndef ModelRTInstances_DEFINED # define ModelRTInstances_DEFINED - // size: 96, align: 8 + // size: 128, align: 8 struct ModelRTInstances { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xf83ee5cdu}}; // 'ModelRTInstances' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xf83ee5cdu}}; StaticArray< TDeviceAddress< ModelRTMesh >, 4 > meshesPerInstance; StaticArray< TDeviceAddress< uint *>, 4 > materialsPerInstance; StaticArray< TDeviceAddress< float3x3_storage *>, 4 > normalMatPerInstance; + StaticArray< TDeviceAddress< float4x4_storage *>, 4 > modelMatPerInstance; }; #endif StaticAssert( offsetof(ModelRTInstances, meshesPerInstance) == 0 ); StaticAssert( offsetof(ModelRTInstances, materialsPerInstance) == 32 ); StaticAssert( offsetof(ModelRTInstances, normalMatPerInstance) == 64 ); - StaticAssert( sizeof(ModelRTInstances) == 96 ); + StaticAssert( offsetof(ModelRTInstances, modelMatPerInstance) == 96 ); + StaticAssert( sizeof(ModelRTInstances) == 128 ); #ifndef SceneDirectionalLight_DEFINED # define SceneDirectionalLight_DEFINED // size: 36 (48), align: 16 struct alignas(16) SceneDirectionalLight { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xbbb7657au}}; // 'SceneDirectionalLight' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xbbb7657au}}; float3 direction; float3 attenuation; @@ -153,7 +172,7 @@ // size: 60 (64), align: 16 struct alignas(16) SceneConeLight { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xbde8e869u}}; // 'SceneConeLight' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xbde8e869u}}; float3 position; float3 direction; @@ -174,7 +193,7 @@ // size: 1808, align: 16 struct SceneLights { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x34c2b6e7u}}; // 'SceneLights' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x34c2b6e7u}}; uint directionalCount; uint coneCount; @@ -197,7 +216,7 @@ // size: 2096, align: 16 struct Histogram_ssb { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x271de9a7u}}; // 'Histogram_ssb' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x271de9a7u}}; uint maxRGB; uint maxLuma; @@ -214,7 +233,7 @@ // size: 16, align: 8 (16) struct imgui_ub { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xb41e4542u}}; // 'imgui_ub' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xb41e4542u}}; float2 scale; float2 translate; @@ -229,7 +248,7 @@ // size: 4, align: 4 (16) struct imgui_pc { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xbe6e8191u}}; // 'imgui_pc' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xbe6e8191u}}; uint textureIdx; }; @@ -242,7 +261,7 @@ // size: 20, align: 4 struct imgui_vertex { - static constexpr auto TypeName = ShaderStructName{HashVal32{0x9e6b2802u}}; // 'imgui_vertex' + static constexpr auto TypeName = ShaderStructName{HashVal32{0x9e6b2802u}}; packed_float2 Position; packed_float2 UV; @@ -259,7 +278,7 @@ // size: 8, align: 8 (16) struct LinearDepth_draw_pc { - static constexpr auto TypeName = ShaderStructName{HashVal32{0xb92984e8u}}; // 'LinearDepth_draw_pc' + static constexpr auto TypeName = ShaderStructName{HashVal32{0xb92984e8u}}; float2 clipPlanes; }; diff --git a/AE/samples/res_editor/docs/Samples.md b/AE/samples/res_editor/docs/Samples.md index 23669e94..d39c2521 100644 --- a/AE/samples/res_editor/docs/Samples.md +++ b/AE/samples/res_editor/docs/Samples.md @@ -24,8 +24,6 @@ Content: ![](img/GravityLens.jpg) **Extensions:** -* [Subgroups](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-2d/Subgroups.as)
-![](img/Subgroup.png) * [Draw to HDR display](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-2d/HDR.as) * [Binary tree with buffer reference](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-2d/BufferReference.as)
![](img/BufferReferenceBinaryTree.png) @@ -47,7 +45,7 @@ Supported precalculated TBN and screen-space TBN calculation. Debug drawing for ![](img/SimpleSphereTracing.jpg) * [SDF used for collisions with camera](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/CameraCollision.as)
![](img/CameraCollision.jpg) -* [Simple volumetric](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/Volumetric-1.as) +* [Simple volumetric](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/Volumetric.as) **Shadows:** * [Shadow mapping](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/ShadowMap.as) @@ -65,7 +63,7 @@ Supported precalculated TBN and screen-space TBN calculation. Debug drawing for **Other:** * [Reverse depth buffer](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/ReverseZ.as) -* [glTF scene rendering](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/Model-1.as) +* [glTF scene rendering](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/Model.as) * [glTF scene rendering with deferred texturing](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/DeferredTexturing.as) * [SDF and MSDF font on 3D plane](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/SdfFont.as) * [Frustum culling](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-3d/FrustumCulling.as) @@ -82,14 +80,14 @@ Supported precalculated TBN and screen-space TBN calculation. Debug drawing for **Samples:** * [Simple geometry with dynamic shadows](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/RT-Shadow.as)
![](img/RT-Shadow.jpg) -* [Cube with reflections and refractions](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce-1.as)
+* [Cube with reflections and refractions](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/RT-MultiBounce.as)
![](img/RT-MultiBounce-1.jpg) * [Dispersion in prism](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2D.as) * [Dispersion in multiple prisms](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/Dispersion2DLayered.as)
![](img/Dispersion2DLayered.jpg) -* [glTF scene tracing](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/RT-Model-1.as) +* [glTF scene tracing](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/samples-rt/RT-Model.as) ## Order-Independent Transparency @@ -153,7 +151,7 @@ ResEditor allows to create game prototypes. * [Cooperative matrix](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/CoopMatrix.as) * [Multiple passes](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/MultiPassTest.as) * [NaN in shader](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/NaN.as) -* [Projections](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/ScreenProjection-1.as) +* [Projections](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/ScreenProjection.as) * [Shadertoy default shader](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/ShadertoyDefault.as) * [White color spectrum](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/Spectrum.as) * [Derivatives in FS and CS](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/Derivatives.as) @@ -167,6 +165,7 @@ ResEditor allows to create game prototypes. * [Spherical Cube: texture projection](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-2.as) * [Spherical Cube: circle distortion](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-3.as) * [Spherical Cube: 3D to texture](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/SphericalCube-4.as) +* [Procedural sphere](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/sphere/UVSphere-1.as) * [Triangulation-1](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/Triangulation-1.as) * [Triangulation-2](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/Triangulation-2.as) * [Procedural grid](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/ProceduralGrid.as) @@ -190,3 +189,9 @@ ResEditor allows to create game prototypes. * [gamma-correct upscale](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/tests/sRGB-Upscale.as)
![](img/sRGB-Upscale.png) +**Performance** +* [Render target compression](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/RTCompression.as) +* [FP16 mul/add performance](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Inst-fp16.as) +* [FP32 mul/add performance](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Inst-fp32.as) +* [Subgroups in fullscreen](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Subgroups-1.as), [Subgroups with multiple triangles](https://github.com/azhirnov/as-en/blob/dev/AE/samples/res_editor/_data/scripts/perf/Subgroups-2.as)
+![](img/Subgroup.png)