From d5fb7369cb858161fe159cd2d7a73b5a48740415 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 15 Mar 2022 21:24:29 +0100 Subject: [PATCH] DxEngine: Fix shader compilation on pre-D3D11 hardware (#12677) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Drop engine support for DirectX 9.1 Practically no one has such old hardware anymore and AtlasEngine additionally drops support for 10.0. The fallback also didn't work properly, because the `FeatureLevels` array failed to include 9.2 and 9.3. We'll simply fall back to WARP on all such devices. * Optimize shaders during compilation The two new flags increase shader performance sometimes significantly. * Fix shader feature level flags D3D feature level 10.0 only support 4.0 and 10.1 only 4.1 shaders. ## PR Checklist * [x] Closes #12655 * [x] I work here * [x] Tests added/passed ## Validation Steps Performed * Add `WindowsTerminal.exe` in `dxcpl.exe` * Add a basic `experimental.pixelShaderPath` * All forced feature levels between `9_1` and `11_1` render as expected ✅ --- src/renderer/dx/DxRenderer.cpp | 51 ++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/renderer/dx/DxRenderer.cpp b/src/renderer/dx/DxRenderer.cpp index 18b607fb70c..265160cc983 100644 --- a/src/renderer/dx/DxRenderer.cpp +++ b/src/renderer/dx/DxRenderer.cpp @@ -187,11 +187,7 @@ DxEngine::~DxEngine() // - entry - Entry function of shader // Return Value: // - Compiled binary. Errors are thrown and logged. -inline Microsoft::WRL::ComPtr -_CompileShader( - std::string source, - std::string target, - std::string entry = "main") +static Microsoft::WRL::ComPtr _CompileShader(const std::string_view& source, const char* target) { #if !TIL_FEATURE_DXENGINESHADERSUPPORT_ENABLED THROW_HR(E_UNEXPECTED); @@ -201,24 +197,24 @@ _CompileShader( Microsoft::WRL::ComPtr error{}; const HRESULT hr = D3DCompile( - source.c_str(), + source.data(), source.size(), nullptr, nullptr, nullptr, - entry.c_str(), - target.c_str(), - 0, + "main", + target, + D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &code, &error); if (FAILED(hr)) { - LOG_HR_MSG(hr, "D3DCompile failed with %x.", static_cast(hr)); + LOG_HR_MSG(hr, "D3DCompile failed with %08x", hr); if (error) { - LOG_HR_MSG(hr, "D3DCompile error\n%*S", static_cast(error->GetBufferSize()), static_cast(error->GetBufferPointer())); + LOG_HR_MSG(hr, "D3DCompile error\n%S", static_cast(error->GetBufferPointer())); } THROW_HR(hr); @@ -351,15 +347,33 @@ HRESULT DxEngine::_SetupTerminalEffects() vp.TopLeftY = 0; _d3dDeviceContext->RSSetViewports(1, &vp); + const char* shaderTargetVS = nullptr; + const char* shaderTargetPS = nullptr; + switch (_d3dDevice->GetFeatureLevel()) + { + case D3D_FEATURE_LEVEL_10_0: + shaderTargetVS = "vs_4_0"; + shaderTargetPS = "ps_4_0"; + break; + case D3D_FEATURE_LEVEL_10_1: + shaderTargetVS = "vs_4_1"; + shaderTargetPS = "ps_4_1"; + break; + default: + shaderTargetVS = "vs_5_0"; + shaderTargetPS = "ps_5_0"; + break; + } + // Prepare shaders. - auto vertexBlob = _CompileShader(screenVertexShaderString, "vs_5_0"); + auto vertexBlob = _CompileShader(&screenVertexShaderString[0], shaderTargetVS); Microsoft::WRL::ComPtr pixelBlob; // As the pixel shader source is user provided it's possible there's a problem with it // so load it inside a try catch, on any error log and fallback on the error pixel shader // If even the error pixel shader fails to load rely on standard exception handling try { - pixelBlob = _CompileShader(pixelShaderSource, "ps_5_0"); + pixelBlob = _CompileShader(pixelShaderSource, shaderTargetPS); } catch (...) { @@ -542,11 +556,12 @@ try // D3D11_CREATE_DEVICE_DEBUG | D3D11_CREATE_DEVICE_SINGLETHREADED; - const std::array FeatureLevels{ D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_10_1, - D3D_FEATURE_LEVEL_10_0, - D3D_FEATURE_LEVEL_9_1 }; + static constexpr std::array FeatureLevels{ + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + }; // Trying hardware first for maximum performance, then trying WARP (software) renderer second // in case we're running inside a downlevel VM where hardware passthrough isn't enabled like