From d783fecdf07cc31e60d90e1eaeb4ca506e84f47a Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:09:10 +0200 Subject: [PATCH 1/7] Fix zCModel::GetHomeVob function --- D3D11Engine/zCModel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/D3D11Engine/zCModel.h b/D3D11Engine/zCModel.h index 1151585a..29d930e0 100644 --- a/D3D11Engine/zCModel.h +++ b/D3D11Engine/zCModel.h @@ -334,7 +334,7 @@ class zCModel : public zCVisual { } zCVob* GetHomeVob() { - return reinterpret_cast(THISPTR_OFFSET( GothicMemoryLocations::zCModel::Offset_HomeVob )); + return *reinterpret_cast(THISPTR_OFFSET( GothicMemoryLocations::zCModel::Offset_HomeVob )); } private: From 0a8b6d5f409f7ee01d4b2cb8b1ed2b42cc7b51fb Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:12:02 +0200 Subject: [PATCH 2/7] Disable for now marking texture materials as portals Some modifications use the same texture for solid mesh and portals, due to how materials are handled in GD3D11 it will break those meshes from rendering correctly --- D3D11Engine/WorldConverter.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/D3D11Engine/WorldConverter.cpp b/D3D11Engine/WorldConverter.cpp index dae9c47f..6e949047 100644 --- a/D3D11Engine/WorldConverter.cpp +++ b/D3D11Engine/WorldConverter.cpp @@ -353,6 +353,10 @@ HRESULT WorldConverter::ConvertWorldMesh( zCPolygon** polys, unsigned int numPol } // Flag portals so that we can apply a different PS shader later + if ( poly->GetPolyFlags()->PortalPoly ) { + continue; + } + /* if ( poly->GetPolyFlags()->PortalPoly ) { zCMaterial* polymat = poly->GetMaterial(); if ( zCTexture* tex = polymat->GetTextureSingle() ) { @@ -367,6 +371,7 @@ HRESULT WorldConverter::ConvertWorldMesh( zCPolygon** polys, unsigned int numPol continue; } } + */ // Calculate midpoint of this triange to get the section XMFLOAT3 avgPos; From 786a9575e7e9b10ed3bf32d23123aedf7484bd46 Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:20:54 +0200 Subject: [PATCH 3/7] Rework particle effect shader --- D3D11Engine/D2DSettingsDialog.cpp | 6 +- D3D11Engine/D3D11Effect.cpp | 2 +- D3D11Engine/D3D11GraphicsEngine.cpp | 6 +- D3D11Engine/D3D11VShader.cpp | 2 +- D3D11Engine/GothicAPI.cpp | 31 +++---- D3D11Engine/GothicGraphicsState.h | 7 +- D3D11Engine/HookExceptionFilter.h | 2 +- D3D11Engine/Shaders/GS_Billboard.hlsl | 86 ++++++++----------- D3D11Engine/Shaders/VS_AdvanceRain.hlsl | 4 +- D3D11Engine/Shaders/VS_ParticlePoint.hlsl | 4 +- .../Shaders/VS_ParticlePointShaded.hlsl | 4 +- D3D11Engine/WorldObjects.h | 2 +- D3D11Engine/zCParticleFX.h | 6 -- 13 files changed, 61 insertions(+), 101 deletions(-) diff --git a/D3D11Engine/D2DSettingsDialog.cpp b/D3D11Engine/D2DSettingsDialog.cpp index a85c130c..14e28e57 100644 --- a/D3D11Engine/D2DSettingsDialog.cpp +++ b/D3D11Engine/D2DSettingsDialog.cpp @@ -370,13 +370,11 @@ XRESULT D2DSettingsDialog::InitControls() { SV_Slider* visualFXDDSlider = new SV_Slider( MainView, MainPanel ); visualFXDDSlider->SetPositionAndSize( D2D1::Point2F( 10, 22 ), D2D1::SizeF( 150, 15 ) ); - - visualFXDDSlider->AlignUnder( visualFXDDLabel, 5 ); - + visualFXDDSlider->AlignUnder( visualFXDDLabel, 5 ); visualFXDDSlider->SetDataToUpdate( &Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius ); visualFXDDSlider->SetIsIntegralSlider( true ); visualFXDDSlider->SetDisplayMultiplier( 0.001f ); - visualFXDDSlider->SetMinMax( 0.0f, 30000.0f ); + visualFXDDSlider->SetMinMax( 0.0f, 10000.0f ); visualFXDDSlider->SetValue( Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius ); SV_Label* worldDDLabel = new SV_Label( MainView, MainPanel ); diff --git a/D3D11Engine/D3D11Effect.cpp b/D3D11Engine/D3D11Effect.cpp index d5f3f456..db93aad0 100644 --- a/D3D11Engine/D3D11Effect.cpp +++ b/D3D11Engine/D3D11Effect.cpp @@ -82,7 +82,7 @@ void D3D11Effect::FillRandomRaindropData( std::vector& dat raindrop.color = float4( SeedX, SeedY, SeedZ, randomIncrease ); float height = 30.0f; - raindrop.scale = float2( height / 10.0f, height / 2.0f ); + raindrop.scale = float3( height / 10.0f, height / 2.0f, 0.f ); data[i] = raindrop; } diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index cfc5fab6..a1b24382 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -867,7 +867,7 @@ XRESULT D3D11GraphicsEngine::OnResize( INT2 newSize ) { GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R16G16_FLOAT ); GBuffer1_Normals = std::make_unique( - GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R16G16B16A16_FLOAT ); + GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R8G8B8A8_SNORM ); GBuffer0_Diffuse = std::make_unique( GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_B8G8R8A8_UNORM ); @@ -5692,10 +5692,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, XMMATRIX mat = view * world * offset * scale; - ParticleInstanceInfo ii; - ii.scale = float2( 50, 50 ); - ii.color = 0xFFFFFFFF; - Engine::GAPI->SetWorldTransformXM( mat ); SetupVS_ExPerInstanceConstantBuffer(); diff --git a/D3D11Engine/D3D11VShader.cpp b/D3D11Engine/D3D11VShader.cpp index 10e19681..bcd85a38 100644 --- a/D3D11Engine/D3D11VShader.cpp +++ b/D3D11Engine/D3D11VShader.cpp @@ -136,7 +136,7 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st { { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "DIFFUSE", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "SIZE", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "SIZE", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TYPE", 0, DXGI_FORMAT_R32_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "VELOCITY", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, }; diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 0e9c40a7..29701382 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -2268,21 +2268,14 @@ void GothicAPI::DrawParticleFX( zCVob* source, zCParticleFX* fx, ParticleFrameDa // Generate instance info part.emplace_back(); ParticleInstanceInfo& ii = part.back(); - ii.scale = XMFLOAT2( p->Size.x, p->Size.y ); - ii.drawMode = 0; + ii.scale = float3( p->Size.x, p->Size.y, 0.f ); // Construct world matrix - int alignment = fx->GetEmitter()->GetVisAlignment(); - if ( alignment == zPARTICLE_ALIGNMENT_XY ) { - ii.drawMode = 2; - } else if ( alignment == zPARTICLE_ALIGNMENT_VELOCITY || alignment == zPARTICLE_ALIGNMENT_VELOCITY_3D ) { - ii.drawMode = 3; - } // TODO: Y-Locked! - - if ( !fx->GetEmitter()->GetVisIsQuadPoly() ) { - ii.scale.x *= 0.5f; - ii.scale.y *= 0.5f; + ii.drawMode = fx->GetEmitter()->GetVisAlignment(); + if ( fx->GetEmitter()->GetVisIsQuadPoly() ) { + ii.drawMode += 10; } + float4 color; color.x = p->Color.x / 255.0f; color.y = p->Color.y / 255.0f; @@ -2300,17 +2293,19 @@ void GothicAPI::DrawParticleFX( zCVob* source, zCParticleFX* fx, ParticleFrameDa ii.color = color; ii.velocity = p->Vel; + if ( fx->GetEmitter()->GetVisAlignment() == 2 ) { + if ( zCVob* connectedVob = fx->GetConnectedVob() ) { + XMFLOAT4X4* worldMatrix = connectedVob->GetWorldMatrixPtr(); + ii.scale = float3( worldMatrix->m[0][0] * p->Size.x, worldMatrix->m[1][0] * p->Size.x, worldMatrix->m[1][0] * p->Size.x ); + ii.velocity = float3( worldMatrix->m[0][2] * p->Size.y, worldMatrix->m[1][2] * p->Size.y, worldMatrix->m[1][2] * p->Size.y ); + } + } + fx->UpdateParticle( p ); i++; } } - /* - Liker@WoG: -11.12.2020 14:58 https://forum.worldofplayers.de/forum/threads/1546222-Yet-Another-D3D11-Renderer?p=26626374&viewfull=1#post26626374 -11.12.2020 16:19 https://forum.worldofplayers.de/forum/threads/1546222-Yet-Another-D3D11-Renderer?p=26626530&viewfull=1#post26626530 -14.12.2020 20:25 https://forum.worldofplayers.de/forum/threads/1546222-Yet-Another-D3D11-Renderer?p=26628056&viewfull=1#post26628056 - */ // Create new particles? fx->CreateParticlesUpdateDependencies(); diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index f1380bff..86999ab9 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -545,12 +545,7 @@ struct GothicRendererSettings { IndoorVobDrawRadius = 5000.0f; OutdoorVobDrawRadius = 30000.0f; SkeletalMeshDrawRadius = 6000.0f; - VisualFXDrawRadius = 10000.0f; - -#if BUILD_SPACER_NET - VisualFXDrawRadius = 16000.0f; -#endif - + VisualFXDrawRadius = 8000.0f; OutdoorSmallVobDrawRadius = 10000.0f; SmallVobSize = 1500.0f; diff --git a/D3D11Engine/HookExceptionFilter.h b/D3D11Engine/HookExceptionFilter.h index e4e8dc55..f530fc4e 100644 --- a/D3D11Engine/HookExceptionFilter.h +++ b/D3D11Engine/HookExceptionFilter.h @@ -60,4 +60,4 @@ static void __AddDbgFuncCall( const std::string& fn, int threadID, bool out ) { LogInfo() << "Exception caught!"; \ \ } - */ \ No newline at end of file + */ diff --git a/D3D11Engine/Shaders/GS_Billboard.hlsl b/D3D11Engine/Shaders/GS_Billboard.hlsl index 2c13752b..e663ce79 100644 --- a/D3D11Engine/Shaders/GS_Billboard.hlsl +++ b/D3D11Engine/Shaders/GS_Billboard.hlsl @@ -20,79 +20,61 @@ struct PS_INPUT void GSMain(point VS_OUTPUT input[1], inout TriangleStream OutputStream) { float3 planeNormal = input[0].vPosition - CameraPosition; - //planeNormal.y = 0.0f; // For tree bilboard planeNormal = normalize(-planeNormal); + float3 position = input[0].vPosition; float3 upVector; float3 rightVector; - - - - //input[0].vSize *= 0.5f; - - //rightVector = rightVector * 100.0f; - //upVector *= 100.0f; - // Construct vertices - // We get the points by using the billboards right vector and the billboards height + int visIsQuadPoly = int(step(10.0, float(input[0].type))); + int visOrientation = input[0].type - (10 * visIsQuadPoly); + float3 vert[4]; - - if(input[0].type == 3) + if (visOrientation == 2) + { + rightVector = input[0].vSize; + upVector = input[0].vVelocity; + } + else if (visOrientation == 3) { - // Make up/right vectors along the velocity-vector - float3 velYPos = normalize(input[0].vVelocity); - float3 velXPos = normalize(cross(planeNormal, velYPos)); - - //velYPos = normalize(cross(planeNormal, velXPos)); + float3 velYPos = normalize(input[0].vVelocity); + float3 velXPos = normalize(cross(planeNormal, velYPos)); - rightVector = velXPos; - upVector = velYPos; - }else if(input[0].type == 2) - { - // xz-plane - upVector = float3(0.0f, 0.0f, 1.0f); - rightVector = float3(1.0f,0.0f,0.0f); // FIXME: Maybe rotate this with the vob? - }else + rightVector = velXPos * input[0].vSize.x; + upVector = velYPos * input[0].vSize.y; + } + else if (visOrientation == 1) { - // Construct up and right vectors - upVector = float3(0.0f, 1.0f, 0.0f); - rightVector = normalize(cross(planeNormal, upVector)); - - // Construct better up-vector - upVector = normalize(cross(planeNormal, rightVector)); + float3 velYPos = normalize(input[0].vVelocity); + float3 velXPos = normalize(cross(planeNormal, velYPos)); + velYPos = normalize(cross(planeNormal, velXPos)); + + rightVector = velXPos * input[0].vSize.x; + upVector = velYPos * input[0].vSize.y; } - - if(input[0].type == 5) - { - //upVector = float3(0.0f, 1.0f, 0.0f); - //rightVector = float3(1.0f,0.0f,0.0f); // FIXME: Maybe rotate this with the vob? - - // Scale vectors - rightVector *= input[0].vSize.x; - upVector *= input[0].vSize.y; - }else - { - // Scale vectors - rightVector *= input[0].vSize.x; - upVector *= input[0].vSize.y; + else + { + upVector = float3(0.0f, 1.0f, 0.0f) * input[0].vSize.y; + rightVector = float3(1.0f, 0.0f, 0.0f) * input[0].vSize.x; + + position += float3(input[0].vSize.x * 0.5, -input[0].vSize.y * 0.5, 0.0) * float(1 - visIsQuadPoly); } - vert[0] = input[0].vPosition - rightVector - upVector; // Get bottom left vertex - vert[1] = input[0].vPosition + rightVector - upVector; // Get bottom right vertex - vert[2] = input[0].vPosition - rightVector + upVector; // Get top left vertex - vert[3] = input[0].vPosition + rightVector + upVector; // Get top right vertex + vert[0] = position - rightVector + upVector; // Get top left vertex + vert[1] = position + rightVector + upVector; // Get top right vertex + vert[2] = position - rightVector - upVector; // Get bottom left vertex + vert[3] = position + rightVector - upVector; // Get bottom right vertex - // Get billboards texture coordinates float2 texCoord[4]; texCoord[0] = float2(0, 1); texCoord[1] = float2(1, 1); texCoord[2] = float2(0, 0); texCoord[3] = float2(1, 0); - // Append the two triangles to the stream + // Append triangles to the stream PS_INPUT outputVert = (PS_INPUT)0; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { outputVert.vPosition = mul(float4(vert[i], 1.0f), M_ViewProj); outputVert.vTexcoord = texCoord[i]; diff --git a/D3D11Engine/Shaders/VS_AdvanceRain.hlsl b/D3D11Engine/Shaders/VS_AdvanceRain.hlsl index a22dd4bf..f4a1ee69 100644 --- a/D3D11Engine/Shaders/VS_AdvanceRain.hlsl +++ b/D3D11Engine/Shaders/VS_AdvanceRain.hlsl @@ -31,7 +31,7 @@ struct VS_INPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float2 vSize : SIZE; + float3 vSize : SIZE; unsigned int type : TYPE; float3 vVelocity : VELOCITY; }; @@ -78,7 +78,7 @@ VS_OUTPUT VSMain( VS_INPUT Input ) Output.vPosition = Input.vPosition; Output.vDiffuse = Input.vDiffuse; - Output.vSize = Input.vSize; + Output.vSize = Input.vSize.xy; Output.vVelocity = Input.vVelocity; Output.type = Input.type; return Output; diff --git a/D3D11Engine/Shaders/VS_ParticlePoint.hlsl b/D3D11Engine/Shaders/VS_ParticlePoint.hlsl index c506e3cf..c2aa4805 100644 --- a/D3D11Engine/Shaders/VS_ParticlePoint.hlsl +++ b/D3D11Engine/Shaders/VS_ParticlePoint.hlsl @@ -16,7 +16,7 @@ struct VS_INPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float2 vSize : SIZE; + float3 vSize : SIZE; unsigned int type : TYPE; float3 vVelocity : VELOCITY; }; @@ -25,7 +25,7 @@ struct VS_OUTPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float2 vSize : SIZE; + float3 vSize : SIZE; int type : TYPE; float3 vVelocity : VELOCITY; }; diff --git a/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl b/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl index 44f071ee..d2970e34 100644 --- a/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl +++ b/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl @@ -23,7 +23,7 @@ struct VS_INPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float2 vSize : SIZE; + float3 vSize : SIZE; unsigned int type : TYPE; float3 vVelocity : VELOCITY; }; @@ -69,7 +69,7 @@ VS_OUTPUT VSMain( VS_INPUT Input ) Output.vPosition = Input.vPosition; Output.vDiffuse = Input.vDiffuse; //float4(Input.vDiffuse.gba, pow(Input.vDiffuse.r, 2.2f)); - Output.vSize = Input.vSize; + Output.vSize = Input.vSize.xy; Output.vVelocity = Input.vVelocity; Output.type = Input.type; return Output; diff --git a/D3D11Engine/WorldObjects.h b/D3D11Engine/WorldObjects.h index 0472b3c2..7978b5aa 100644 --- a/D3D11Engine/WorldObjects.h +++ b/D3D11Engine/WorldObjects.h @@ -31,7 +31,7 @@ struct ParticleRenderInfo { struct ParticleInstanceInfo { float3 position; float4 color; - float2 scale; + float3 scale; int drawMode; // 0 = billboard, 1 = y-locked billboard, 2 = y-plane, 3 = velo aligned float3 velocity; }; diff --git a/D3D11Engine/zCParticleFX.h b/D3D11Engine/zCParticleFX.h index a8379fa9..c5ce9d03 100644 --- a/D3D11Engine/zCParticleFX.h +++ b/D3D11Engine/zCParticleFX.h @@ -7,12 +7,6 @@ #include "zCTimer.h" #include "zCPolyStrip.h" -enum EZParticleAlignment { - zPARTICLE_ALIGNMENT_VELOCITY = 1, - zPARTICLE_ALIGNMENT_XY = 2, - zPARTICLE_ALIGNMENT_VELOCITY_3D = 3, -}; - class zSTRING; class zCPolyStrip; class zCMesh; From 2ee175cbf7840136328ca247281f019ccc9a135e Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:21:53 +0200 Subject: [PATCH 4/7] Add software skinning for particle effects that are based on zCModel --- D3D11Engine/GothicAPI.cpp | 119 ++++++++++++++++- D3D11Engine/GothicAPI.h | 4 + D3D11Engine/GothicMemoryLocations2_6_fix.h | 2 + D3D11Engine/HookedFunctions.h | 6 +- D3D11Engine/pch.h | 146 +++++++++++++++++++++ D3D11Engine/zCModel.h | 18 ++- 6 files changed, 291 insertions(+), 4 deletions(-) diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 29701382..b2fd7681 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -1853,6 +1853,121 @@ SkeletalMeshVisualInfo* GothicAPI::LoadzCModelData( oCNPC* npc ) { return mi; } +int GothicAPI::GetLowestLODNumPolys_SkeletalMesh( zCModel* model ) { + int numPolys = 0; + + SkeletalMeshVisualInfo* skeletalMesh = nullptr; + zCVob* homeVob = model->GetHomeVob(); + if ( homeVob && homeVob->GetVobType() == zVOB_TYPE_NSC ) { + oCNPC* npc = static_cast(homeVob); + auto it = SkeletalMeshNpcs.find( npc ); + if ( it != SkeletalMeshNpcs.end() ) { + skeletalMesh = it->second; + } + } else { + std::string str = model->GetVisualName(); + if ( str.empty() ) { // Happens when the model has no skeletal-mesh + zSTRING mds = model->GetModelName(); + str = mds.ToChar(); + mds.Delete(); + } + + auto it = SkeletalMeshVisuals.find( str ); + if ( it != SkeletalMeshVisuals.end() ) { + skeletalMesh = it->second; + } + } + + if ( skeletalMesh ) { + for ( auto const& itm : skeletalMesh->SkeletalMeshes ) { + for ( auto& mesh : itm.second ) { + numPolys += static_cast(mesh->Indices.size() / 3); + } + } + } + return numPolys; +} + +float3* GothicAPI::GetLowestLODPoly_SkeletalMesh( zCModel* model, const int polyId, float3*& polyNormal ) { + static float3 returnPositions[3]; + size_t polyIndex = static_cast(polyId) * 3; + polyNormal = &float3(0.f, 1.f, 0.f); + + SkeletalMeshVisualInfo* skeletalMesh = nullptr; + zCVob* homeVob = model->GetHomeVob(); + if ( homeVob && homeVob->GetVobType() == zVOB_TYPE_NSC ) { + oCNPC* npc = static_cast(homeVob); + auto it = SkeletalMeshNpcs.find( npc ); + if ( it != SkeletalMeshNpcs.end() ) { + skeletalMesh = it->second; + } + } else { + std::string str = model->GetVisualName(); + if ( str.empty() ) { // Happens when the model has no skeletal-mesh + zSTRING mds = model->GetModelName(); + str = mds.ToChar(); + mds.Delete(); + } + + auto it = SkeletalMeshVisuals.find( str ); + if ( it != SkeletalMeshVisuals.end() ) { + skeletalMesh = it->second; + } + } + + if ( skeletalMesh ) { + for ( auto const& itm : skeletalMesh->SkeletalMeshes ) { + for ( auto& mesh : itm.second ) { + if ( polyIndex >= mesh->Indices.size() ) { + polyIndex -= mesh->Indices.size(); + } else { + float fatness = model->GetModelFatness(); + std::vector transforms; + model->GetBoneTransforms( &transforms ); + + for ( int i = 0; i < 3; ++i ) { + VERTEX_INDEX _polyId = mesh->Indices[polyIndex + i]; + ExSkelVertexStruct& _polyVert = mesh->Vertices[_polyId]; + + XMVECTOR position = XMVectorZero(); + position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[0] ) ) * XMVector3Transform( + XMVectorSet( unquantizeHalfFloat( _polyVert.Position[0][0] ), + unquantizeHalfFloat( _polyVert.Position[0][1] ), + unquantizeHalfFloat( _polyVert.Position[0][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[0]] ) ) ); + + position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[1] ) ) * XMVector3Transform( + XMVectorSet( unquantizeHalfFloat( _polyVert.Position[1][0] ), + unquantizeHalfFloat( _polyVert.Position[1][1] ), + unquantizeHalfFloat( _polyVert.Position[1][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[1]] ) ) ); + + position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[2] ) ) * XMVector3Transform( + XMVectorSet( unquantizeHalfFloat( _polyVert.Position[2][0] ), + unquantizeHalfFloat( _polyVert.Position[2][1] ), + unquantizeHalfFloat( _polyVert.Position[2][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[2]] ) ) ); + + position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[3] ) ) * XMVector3Transform( + XMVectorSet( unquantizeHalfFloat( _polyVert.Position[3][0] ), + unquantizeHalfFloat( _polyVert.Position[3][1] ), + unquantizeHalfFloat( _polyVert.Position[3][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[3]] ) ) ); + + position += XMVectorReplicate( fatness ) * XMLoadFloat3( reinterpret_cast(&_polyVert.BindPoseNormal) ) ; + + // world matrix is applied later when particle calculate world position + XMMATRIX scale = XMMatrixScalingFromVector( model->GetModelScaleXM() ); + XMStoreFloat3( reinterpret_cast(&returnPositions[i]), XMVector3Transform( position, XMMatrixTranspose( scale ) ) ); + } + return returnPositions; + } + } + } + } + + returnPositions[0] = float3( 0.f, 0.f, 0.f ); + returnPositions[1] = float3( 0.f, 0.f, 0.f ); + returnPositions[2] = float3( 0.f, 0.f, 0.f ); + return returnPositions; +} + // TODO: REMOVE THIS! #include "D3D11GraphicsEngine.h" @@ -1916,7 +2031,7 @@ void GothicAPI::DrawSkeletalMeshVob( SkeletalVobInfo* vi, float distance, bool u // Get the bone transforms std::vector transforms; - model->GetBoneTransforms( &transforms, vi->Vob ); + model->GetBoneTransforms( &transforms ); if ( updateState ) { // Update attachments @@ -2148,7 +2263,7 @@ void GothicAPI::DrawSkeletalVN() { // Get the bone transforms std::vector transforms; - model->GetBoneTransforms( &transforms, vi->Vob ); + model->GetBoneTransforms( &transforms ); if ( !static_cast(vi->VisualInfo)->SkeletalMeshes.empty() ) { g->DrawSkeletalVertexNormals( vi, transforms, 0xFFFFFF, fatness ); diff --git a/D3D11Engine/GothicAPI.h b/D3D11Engine/GothicAPI.h index 94954bb2..5ac5b0ef 100644 --- a/D3D11Engine/GothicAPI.h +++ b/D3D11Engine/GothicAPI.h @@ -661,6 +661,10 @@ class GothicAPI { SkeletalMeshVisualInfo* LoadzCModelData( zCModel* model ); SkeletalMeshVisualInfo* LoadzCModelData( oCNPC* npc ); + /** Returns lowest lod of zCModel polys */ + int GetLowestLODNumPolys_SkeletalMesh( zCModel* model ); + float3* GetLowestLODPoly_SkeletalMesh( zCModel* model, const int polyId, float3*& polyNormal ); + /** Prints a message to the screen for the given amount of time */ void PrintMessageTimed( const INT2& position, const std::string& strMessage, float time = 3000.0f, DWORD color = 0xFFFFFFFF ); diff --git a/D3D11Engine/GothicMemoryLocations2_6_fix.h b/D3D11Engine/GothicMemoryLocations2_6_fix.h index bb37b124..5522ea7b 100644 --- a/D3D11Engine/GothicMemoryLocations2_6_fix.h +++ b/D3D11Engine/GothicMemoryLocations2_6_fix.h @@ -381,6 +381,8 @@ struct GothicMemoryLocations { static const unsigned int Offset_NumActiveAnis = 0x34; static const unsigned int Offset_AniChannels = 0x38; static const unsigned int GetVisualName = 0x0057DF60; + static const unsigned int GetLowestLODNumPolys = 0x00579490; + static const unsigned int GetLowestLODPoly = 0x005794B0; }; struct zCModelAni { diff --git a/D3D11Engine/HookedFunctions.h b/D3D11Engine/HookedFunctions.h index 907cd6bf..8e30273e 100644 --- a/D3D11Engine/HookedFunctions.h +++ b/D3D11Engine/HookedFunctions.h @@ -59,7 +59,6 @@ typedef int( __thiscall* oCSpawnManagerCheckRemoveNpc )(void*, oCNPC*); typedef void( __thiscall* oCSpawnManagerCheckInsertNpc )(void*); typedef void( __thiscall* zCVobSetVisual )(void*, zCVisual*); - typedef int( __thiscall* zCTex_D3DXTEX_BuildSurfaces )(void*, int); typedef int( __thiscall* zCTextureLoadResourceData )(void*); typedef int( __thiscall* zCThreadSuspendThread )(void*); @@ -72,6 +71,9 @@ typedef void( __fastcall* oCWorldRemoveFromLists )(void*, zCVob*); typedef int( __thiscall* zCModelPrototypeLoadModelASC )(void*, class zSTRING const&); typedef int( __thiscall* zCModelPrototypeReadMeshAndTreeMSB )(void*, int&, class zCFileBIN&); +typedef int( __thiscall* zCModelGetLowestLODNumPolys )(void*); +typedef float3*( __thiscall* zCModelGetLowestLODPoly )(void*, const int, float3*&); + typedef DWORD( __cdecl* GetInformationManagerProc )(); #ifdef BUILD_GOTHIC_1_08k @@ -148,6 +150,8 @@ struct HookedFunctionInfo { #endif #ifdef BUILD_GOTHIC_2_6_fix GenericThiscall original_zCActiveSndAutoCalcObstruction = reinterpret_cast(GothicMemoryLocations::zCActiveSnd::AutoCalcObstruction); // Not usable - only for hooking + zCModelGetLowestLODNumPolys original_zCModelGetLowestLODNumPolys = reinterpret_cast(GothicMemoryLocations::zCModel::GetLowestLODNumPolys); + zCModelGetLowestLODPoly original_zCModelGetLowestLODPoly = reinterpret_cast(GothicMemoryLocations::zCModel::GetLowestLODPoly); #endif //zCModelPrototypeLoadModelASC original_zCModelPrototypeLoadModelASC = reinterpret_cast(GothicMemoryLocations::zCModelPrototype::LoadModelASC); //zCModelPrototypeReadMeshAndTreeMSB original_zCModelPrototypeReadMeshAndTreeMSB = reinterpret_cast(GothicMemoryLocations::zCModelPrototype::ReadMeshAndTreeMSB); diff --git a/D3D11Engine/pch.h b/D3D11Engine/pch.h index b174c701..84f82cc3 100644 --- a/D3D11Engine/pch.h +++ b/D3D11Engine/pch.h @@ -75,3 +75,149 @@ inline unsigned short quantizeHalfFloat( float v ) h = ( em > ( 255 << 23 ) ) ? 0x7e00 : h; return static_cast(s | h); } + +inline float unquantizeHalfFloat( unsigned short v ) +{ + static const unsigned int mantissa_table[2048] = { + 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, + 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, + 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, + 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, + 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, + 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, + 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, + 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, + 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, + 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, + 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, + 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, + 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, + 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, + 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, + 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, + 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, + 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, + 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, + 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, + 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, + 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, + 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, + 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, + 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, + 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, + 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, + 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, + 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, + 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, + 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, + 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, + 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, + 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, + 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, + 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, + 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, + 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, + 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, + 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, + 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, + 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, + 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, + 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, + 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, + 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, + 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, + 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, + 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, + 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, + 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, + 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, + 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, + 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, + 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, + 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, + 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, + 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, + 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, + 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, + 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, + 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, + 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, + 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, + 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, + 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, + 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, + 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, + 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, + 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, + 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, + 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, + 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, + 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, + 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, + 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, + 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, + 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, + 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, + 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, + 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, + 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, + 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, + 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, + 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, + 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, + 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, + 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, + 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, + 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, + 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, + 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, + 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, + 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, + 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, + 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, + 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, + 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, + 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, + 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, + 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, + 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, + 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, + 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, + 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, + 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, + 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, + 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, + 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, + 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, + 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, + 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, + 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, + 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; + static const unsigned int exponent_table[64] = { + 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, + 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, + 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, + 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; + static const unsigned short offset_table[64] = { + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; + unsigned int fbits = mantissa_table[offset_table[v >> 10] + (v & 0x3FF)] + exponent_table[v >> 10]; + + float out; + memcpy( &out, &fbits, sizeof( float ) ); + return out; +} diff --git a/D3D11Engine/zCModel.h b/D3D11Engine/zCModel.h index 29d930e0..1f8b13d3 100644 --- a/D3D11Engine/zCModel.h +++ b/D3D11Engine/zCModel.h @@ -185,8 +185,24 @@ class zCModel : public zCVisual { byte unsmoothAnisFix[] = {0x75, 0x00, 0xC7, 0x44, 0x24, 0x78, 0x01, 0x00, 0x00, 0x00}; // Replaces a jnz in AdvanceAnis - Thanks to killer-m! memcpy((void *)GothicMemoryLocations::zCModel::RPL_AniQuality, unsmoothAnisFix, sizeof(unsmoothAnisFix)); #endif*/ + +#ifdef BUILD_GOTHIC_2_6_fix + DetourAttach( &reinterpret_cast(HookedFunctions::OriginalFunctions.original_zCModelGetLowestLODNumPolys), Hooked_zCModelGetLowestLODNumPolys ); + DetourAttach( &reinterpret_cast(HookedFunctions::OriginalFunctions.original_zCModelGetLowestLODPoly), Hooked_zCModelGetLowestLODPoly ); +#endif + } + + /** Fix particle emitter setup */ +#ifdef BUILD_GOTHIC_2_6_fix + static int __fastcall Hooked_zCModelGetLowestLODNumPolys( void* thisptr ) { + return Engine::GAPI->GetLowestLODNumPolys_SkeletalMesh( static_cast(thisptr) ); } + static float3* __fastcall Hooked_zCModelGetLowestLODPoly( void* thisptr, void*, const int polyId, float3*& polyNormal ) { + return Engine::GAPI->GetLowestLODPoly_SkeletalMesh( static_cast(thisptr), polyId, polyNormal ); + } +#endif + /** Creates an array of matrices for the bone transforms */ void __fastcall RenderNodeList( zTRenderContext& renderContext, zCArray& boneTransforms, zCRenderLightContainer& lightContainer, int lightingMode = 0 ) { reinterpret_cast&, zCRenderLightContainer&, int )> @@ -298,7 +314,7 @@ class zCModel : public zCVisual { } /** Fills a vector of (viewspace) bone-transformation matrices for this frame */ - void GetBoneTransforms( std::vector* transforms, zCVob* vob = nullptr ) { + void GetBoneTransforms( std::vector* transforms ) { zCArray* nodeList = GetNodeList(); if ( !nodeList ) return; From 52ce66efde015a08c85bbe2ec03faa036440dbb5 Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:21:59 +0200 Subject: [PATCH 5/7] Update version --- D3D11Engine/pch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/D3D11Engine/pch.h b/D3D11Engine/pch.h index 84f82cc3..d70cf670 100644 --- a/D3D11Engine/pch.h +++ b/D3D11Engine/pch.h @@ -33,7 +33,7 @@ using namespace DirectX; #define ENABLE_TESSELATION 0 #ifndef VERSION_NUMBER -#define VERSION_NUMBER "17.8-dev15" +#define VERSION_NUMBER "17.8-dev16" #endif __declspec(selectany) const char* VERSION_NUMBER_STR = VERSION_NUMBER; From 29af87c88d447ce46358617dc2b24c0f1e278d25 Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Sun, 8 Sep 2024 21:51:03 +0200 Subject: [PATCH 6/7] Fix rain effect --- D3D11Engine/D3D11Effect.cpp | 19 ++++++++++--------- D3D11Engine/D3D11Effect.h | 2 +- D3D11Engine/D3D11ShaderManager.cpp | 6 +++--- D3D11Engine/D3D11VShader.cpp | 14 ++++++++++++++ D3D11Engine/Shaders/GS_Raindrops.hlsl | 2 +- D3D11Engine/Shaders/VS_AdvanceRain.hlsl | 4 ++-- .../Shaders/VS_ParticlePointShaded.hlsl | 4 ++-- D3D11Engine/WorldObjects.h | 8 ++++++++ 8 files changed, 41 insertions(+), 18 deletions(-) diff --git a/D3D11Engine/D3D11Effect.cpp b/D3D11Engine/D3D11Effect.cpp index db93aad0..ac107e36 100644 --- a/D3D11Engine/D3D11Effect.cpp +++ b/D3D11Engine/D3D11Effect.cpp @@ -34,14 +34,14 @@ D3D11Effect::~D3D11Effect() { HRESULT LoadTextureArray( Microsoft::WRL::ComPtr pd3dDevice, Microsoft::WRL::ComPtr context, char* sTexturePrefix, int iNumTextures, ID3D11Texture2D** ppTex2D, ID3D11ShaderResourceView** ppSRV ); /** Fills a vector of random raindrop data */ -void D3D11Effect::FillRandomRaindropData( std::vector& data ) { +void D3D11Effect::FillRandomRaindropData( std::vector& data ) { /** Base taken from Nvidias Rain-Sample **/ float radius = Engine::GAPI->GetRendererState().RendererSettings.RainRadiusRange; float height = Engine::GAPI->GetRendererState().RendererSettings.RainHeightRange; for ( size_t i = 0; i < data.size(); i++ ) { - ParticleInstanceInfo raindrop; + RainParticleInstanceInfo raindrop; //use rejection sampling to generate random points inside a circle of radius 1 centered at 0, 0 float SeedX; float SeedZ; @@ -82,7 +82,7 @@ void D3D11Effect::FillRandomRaindropData( std::vector& dat raindrop.color = float4( SeedX, SeedY, SeedZ, randomIncrease ); float height = 30.0f; - raindrop.scale = float3( height / 10.0f, height / 2.0f, 0.f ); + raindrop.scale = float2( height / 10.0f, height / 2.0f ); data[i] = raindrop; } @@ -118,15 +118,15 @@ XRESULT D3D11Effect::DrawRain() { e->CreateVertexBuffer( &RainBufferInitial ); UINT numParticles = Engine::GAPI->GetRendererState().RendererSettings.RainNumParticles; - std::vector particles( numParticles ); + std::vector particles( numParticles ); // Fill the vector with random raindrop data FillRandomRaindropData( particles ); // Create vertexbuffers - RainBufferInitial->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferInitial" ); - RainBufferDrawFrom->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferDrawFrom" ); - RainBufferStreamTo->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferStreamTo" ); + RainBufferInitial->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferInitial" ); + RainBufferDrawFrom->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferDrawFrom" ); + RainBufferStreamTo->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferStreamTo" ); firstFrame = true; @@ -146,7 +146,7 @@ XRESULT D3D11Effect::DrawRain() { firstFrame = false; - UINT stride = sizeof( ParticleInstanceInfo ); + UINT stride = sizeof( RainParticleInstanceInfo ); UINT offset = 0; // Bind buffer to draw from last frame @@ -156,6 +156,7 @@ XRESULT D3D11Effect::DrawRain() { e->GetContext()->SOSetTargets( 1, RainBufferStreamTo->GetVertexBuffer().GetAddressOf(), &offset ); // Apply shaders + e->GetContext()->PSSetShader( nullptr, nullptr, 0 ); particleAdvanceVS->Apply(); streamOutGS->Apply(); @@ -236,7 +237,7 @@ XRESULT D3D11Effect::DrawRain() { e->GetContext()->PSSetShaderResources( 0, 1, RainTextureArraySRV.GetAddressOf() ); // Draw the vertexbuffer - e->DrawVertexBuffer( RainBufferDrawFrom, numParticles, sizeof( ParticleInstanceInfo ) ); + e->DrawVertexBuffer( RainBufferDrawFrom, numParticles, sizeof( RainParticleInstanceInfo ) ); // Reset this e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); diff --git a/D3D11Engine/D3D11Effect.h b/D3D11Engine/D3D11Effect.h index 4a874c32..a374ca0f 100644 --- a/D3D11Engine/D3D11Effect.h +++ b/D3D11Engine/D3D11Effect.h @@ -28,7 +28,7 @@ class D3D11Effect { protected: /** Fills a vector of random raindrop data */ - void FillRandomRaindropData( std::vector& data ); + void FillRandomRaindropData( std::vector& data ); /** Rain */ D3D11VertexBuffer* RainBufferInitial; diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 79d14c90..9ed1c7eb 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -116,12 +116,12 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo( "VS_ParticlePoint", "VS_ParticlePoint.hlsl", "v", 11 ) ); Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); - Shaders.push_back( ShaderInfo( "VS_ParticlePointShaded", "VS_ParticlePointShaded.hlsl", "v", 11 ) ); + Shaders.push_back( ShaderInfo( "VS_ParticlePointShaded", "VS_ParticlePointShaded.hlsl", "v", 13 ) ); Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); Shaders.back().cBufferSizes.push_back( sizeof( ParticlePointShadingConstantBuffer ) ); - Shaders.push_back( ShaderInfo( "VS_AdvanceRain", "VS_AdvanceRain.hlsl", "v", 11 ) ); + Shaders.push_back( ShaderInfo( "VS_AdvanceRain", "VS_AdvanceRain.hlsl", "v", 13 ) ); Shaders.back().cBufferSizes.push_back( sizeof( AdvanceRainConstantBuffer ) ); Shaders.push_back( ShaderInfo( "VS_Ocean", "VS_Ocean.hlsl", "v", 1 ) ); @@ -348,7 +348,7 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo( "GS_Cubemap", "GS_Cubemap.hlsl", "g" ) ); Shaders.back().cBufferSizes.push_back( sizeof( CubemapGSConstantBuffer ) ); - Shaders.push_back( ShaderInfo( "GS_ParticleStreamOut", "VS_AdvanceRain.hlsl", "g", 11 ) ); + Shaders.push_back( ShaderInfo( "GS_ParticleStreamOut", "VS_AdvanceRain.hlsl", "g", 13 ) ); Shaders.back().cBufferSizes.push_back( sizeof( ParticleGSInfoConstantBuffer ) ); m.Name = "NORMALMAPPING"; diff --git a/D3D11Engine/D3D11VShader.cpp b/D3D11Engine/D3D11VShader.cpp index bcd85a38..12524735 100644 --- a/D3D11Engine/D3D11VShader.cpp +++ b/D3D11Engine/D3D11VShader.cpp @@ -151,6 +151,15 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st { "INSTANCE_REMAP_INDEX", 0, DXGI_FORMAT_R32_UINT, 1, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_INSTANCE_DATA, 1}, }; + const D3D11_INPUT_ELEMENT_DESC layout13[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "DIFFUSE", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "SIZE", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TYPE", 0, DXGI_FORMAT_R32_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "VELOCITY", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + switch ( layout ) { case 1: LE( engine->GetDevice()->CreateInputLayout( layout1, ARRAYSIZE( layout1 ), vsBlob->GetBufferPointer(), @@ -211,6 +220,11 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st LE( engine->GetDevice()->CreateInputLayout( layout12, ARRAYSIZE( layout12 ), vsBlob->GetBufferPointer(), vsBlob->GetBufferSize(), InputLayout.ReleaseAndGetAddressOf() ) ); break; + + case 13: + LE( engine->GetDevice()->CreateInputLayout( layout13, ARRAYSIZE( layout13 ), vsBlob->GetBufferPointer(), + vsBlob->GetBufferSize(), InputLayout.ReleaseAndGetAddressOf() ) ); + break; } return XR_SUCCESS; diff --git a/D3D11Engine/Shaders/GS_Raindrops.hlsl b/D3D11Engine/Shaders/GS_Raindrops.hlsl index 2f94c8df..5d4c405b 100644 --- a/D3D11Engine/Shaders/GS_Raindrops.hlsl +++ b/D3D11Engine/Shaders/GS_Raindrops.hlsl @@ -1,4 +1,4 @@ -#include +#include cbuffer ParticleGSInfo : register( b2 ) { diff --git a/D3D11Engine/Shaders/VS_AdvanceRain.hlsl b/D3D11Engine/Shaders/VS_AdvanceRain.hlsl index f4a1ee69..a22dd4bf 100644 --- a/D3D11Engine/Shaders/VS_AdvanceRain.hlsl +++ b/D3D11Engine/Shaders/VS_AdvanceRain.hlsl @@ -31,7 +31,7 @@ struct VS_INPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float3 vSize : SIZE; + float2 vSize : SIZE; unsigned int type : TYPE; float3 vVelocity : VELOCITY; }; @@ -78,7 +78,7 @@ VS_OUTPUT VSMain( VS_INPUT Input ) Output.vPosition = Input.vPosition; Output.vDiffuse = Input.vDiffuse; - Output.vSize = Input.vSize.xy; + Output.vSize = Input.vSize; Output.vVelocity = Input.vVelocity; Output.type = Input.type; return Output; diff --git a/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl b/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl index d2970e34..44f071ee 100644 --- a/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl +++ b/D3D11Engine/Shaders/VS_ParticlePointShaded.hlsl @@ -23,7 +23,7 @@ struct VS_INPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float3 vSize : SIZE; + float2 vSize : SIZE; unsigned int type : TYPE; float3 vVelocity : VELOCITY; }; @@ -69,7 +69,7 @@ VS_OUTPUT VSMain( VS_INPUT Input ) Output.vPosition = Input.vPosition; Output.vDiffuse = Input.vDiffuse; //float4(Input.vDiffuse.gba, pow(Input.vDiffuse.r, 2.2f)); - Output.vSize = Input.vSize.xy; + Output.vSize = Input.vSize; Output.vVelocity = Input.vVelocity; Output.type = Input.type; return Output; diff --git a/D3D11Engine/WorldObjects.h b/D3D11Engine/WorldObjects.h index 7978b5aa..70094721 100644 --- a/D3D11Engine/WorldObjects.h +++ b/D3D11Engine/WorldObjects.h @@ -36,6 +36,14 @@ struct ParticleInstanceInfo { float3 velocity; }; +struct RainParticleInstanceInfo { + float3 position; + float4 color; + float2 scale; + int drawMode; // 0 = billboard, 1 = y-locked billboard, 2 = y-plane, 3 = velo aligned + float3 velocity; +}; + struct MeshKey { zCTexture* Texture; zCMaterial* Material; From 4046b4e3291fafd838877ef271f5765eb9399db9 Mon Sep 17 00:00:00 2001 From: SaiyansKing <38609240+SaiyansKing@users.noreply.github.com> Date: Mon, 9 Sep 2024 01:49:25 +0200 Subject: [PATCH 7/7] Simd optimize quantization --- D3D11Engine/DLLMain.cpp | 193 +++++++++++++++++++++++++++++++++ D3D11Engine/GothicAPI.cpp | 45 ++++---- D3D11Engine/WorldConverter.cpp | 13 ++- D3D11Engine/pch.h | 170 ++--------------------------- 4 files changed, 237 insertions(+), 184 deletions(-) diff --git a/D3D11Engine/DLLMain.cpp b/D3D11Engine/DLLMain.cpp index a63ea182..add84e13 100644 --- a/D3D11Engine/DLLMain.cpp +++ b/D3D11Engine/DLLMain.cpp @@ -25,6 +25,12 @@ extern "C" { _declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x00000001; } +ZQuantizeHalfFloat QuantizeHalfFloat; +ZQuantizeHalfFloat_X4 QuantizeHalfFloat_X4; +ZUnquantizeHalfFloat UnquantizeHalfFloat; +ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X4; +ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X8; + static HINSTANCE hLThis = 0; typedef void (WINAPI* DirectDrawSimple)(); @@ -38,6 +44,170 @@ WinMainFunc originalWinMain = reinterpret_cast(GothicMemoryLocation bool FeatureLevel10Compatibility = false; bool GMPModeActive = false; +unsigned short QuantizeHalfFloat_Scalar( float input ) +{ + union { float f; unsigned int ui; } u = { input }; + unsigned int ui = u.ui; + + int s = ( ui >> 16 ) & 0x8000; + int em = ui & 0x7fffffff; + + int h = ( em - ( 112 << 23 ) + ( 1 << 12 ) ) >> 13; + h = ( em < ( 113 << 23 ) ) ? 0 : h; + h = ( em >= ( 143 << 23 ) ) ? 0x7c00 : h; + h = ( em > ( 255 << 23 ) ) ? 0x7e00 : h; + return static_cast(s | h); +} + +void QuantizeHalfFloats_X4_SSE2( float* input, unsigned short* output ) +{ + __m128i v = _mm_castps_si128( _mm_load_ps( input ) ); + __m128i s = _mm_and_si128( _mm_srli_epi32( v, 16 ), _mm_set1_epi32( 0x8000 ) ); + __m128i em = _mm_and_si128( v, _mm_set1_epi32( 0x7FFFFFFF ) ); + __m128i h = _mm_srli_epi32( _mm_sub_epi32( em, _mm_set1_epi32( 0x37FFF000 ) ), 13 ); + + __m128i mask = _mm_cmplt_epi32( em, _mm_set1_epi32( 0x38800000 ) ); + h = _mm_or_si128( _mm_and_si128( mask, _mm_setzero_si128() ), _mm_andnot_si128( mask, h ) ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x47800000 - 1 ) ); + h = _mm_or_si128( _mm_and_si128( mask, _mm_set1_epi32( 0x7C00 ) ), _mm_andnot_si128( mask, h ) ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x7F800000 ) ); + h = _mm_or_si128( _mm_and_si128( mask, _mm_set1_epi32( 0x7E00 ) ), _mm_andnot_si128( mask, h ) ); + + // We need to stay in int16_t range due to signed saturation + __m128i halfs = _mm_sub_epi32( _mm_or_si128( s, h ), _mm_set1_epi32( 32768 ) ); + _mm_store_sd( reinterpret_cast(output), _mm_castsi128_pd( _mm_add_epi16( _mm_packs_epi32( halfs, halfs ), _mm_set1_epi16( 32768 ) ) ) ); +} + +void QuantizeHalfFloats_X4_SSE41( float* input, unsigned short* output ) +{ + __m128i v = _mm_castps_si128( _mm_load_ps( input ) ); + __m128i s = _mm_and_si128( _mm_srli_epi32( v, 16 ), _mm_set1_epi32( 0x8000 ) ); + __m128i em = _mm_and_si128( v, _mm_set1_epi32( 0x7FFFFFFF ) ); + __m128i h = _mm_srli_epi32( _mm_sub_epi32( em, _mm_set1_epi32( 0x37FFF000 ) ), 13 ); + + __m128i mask = _mm_cmplt_epi32( em, _mm_set1_epi32( 0x38800000 ) ); + h = _mm_blendv_epi8( h, _mm_setzero_si128(), mask ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x47800000 - 1 ) ); + h = _mm_blendv_epi8( h, _mm_set1_epi32( 0x7C00 ), mask ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x7F800000 ) ); + h = _mm_blendv_epi8( h, _mm_set1_epi32( 0x7E00 ), mask ); + + __m128i halfs = _mm_or_si128( s, h ); + _mm_store_sd( reinterpret_cast(output), _mm_castsi128_pd( _mm_packus_epi32( halfs, halfs ) ) ); +} + +#ifdef _XM_AVX_INTRINSICS_ +unsigned short QuantizeHalfFloat_F16C( float input ) +{ + return static_cast(_mm_cvtsi128_si32( _mm_cvtps_ph( _mm_set_ss( input ), _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC ) )); +} + +void QuantizeHalfFloats_X4_F16C( float* input, unsigned short* output ) +{ + _mm_store_sd( reinterpret_cast(output), _mm_castsi128_pd( _mm_cvtps_ph( _mm_load_ps( input ), _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC ) ) ); +} +#endif + +float UnquantizeHalfFloat_Scalar( unsigned short input ) +{ + unsigned int s = input & 0x8000; + unsigned int m = input & 0x03FF; + unsigned int e = input & 0x7C00; + e += 0x0001C000; + + float out; + unsigned int r = (s << 16) | (m << 13) | (e << 13); + memcpy( &out, &r, sizeof( float ) ); + return out; +} + +void UnquantizeHalfFloat_X4_SSE2( unsigned short* input, float* output ) +{ + const __m128i mask_zero = _mm_setzero_si128(); + const __m128i mask_s = _mm_set1_epi16( 0x8000 ); + const __m128i mask_m = _mm_set1_epi16( 0x03FF ); + const __m128i mask_e = _mm_set1_epi16( 0x7C00 ); + const __m128i bias_e = _mm_set1_epi32( 0x0001C000 ); + + __m128i halfs = _mm_loadl_epi64( reinterpret_cast(input) ); + + __m128i s = _mm_and_si128( halfs, mask_s ); + __m128i m = _mm_and_si128( halfs, mask_m ); + __m128i e = _mm_and_si128( halfs, mask_e ); + + __m128i s4 = _mm_unpacklo_epi16( s, mask_zero ); + s4 = _mm_slli_epi32( s4, 16 ); + + __m128i m4 = _mm_unpacklo_epi16( m, mask_zero ); + m4 = _mm_slli_epi32( m4, 13 ); + + __m128i e4 = _mm_unpacklo_epi16( e, mask_zero ); + e4 = _mm_add_epi32( e4, bias_e ); + e4 = _mm_slli_epi32( e4, 13 ); + + _mm_store_si128( reinterpret_cast<__m128i*>(output), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) ); +} + +void UnquantizeHalfFloat_X8_SSE2( unsigned short* input, float* output ) +{ + const __m128i mask_zero = _mm_setzero_si128(); + const __m128i mask_s = _mm_set1_epi16( 0x8000 ); + const __m128i mask_m = _mm_set1_epi16( 0x03FF ); + const __m128i mask_e = _mm_set1_epi16( 0x7C00 ); + const __m128i bias_e = _mm_set1_epi32( 0x0001C000 ); + + __m128i halfs = _mm_load_si128( reinterpret_cast(input) ); + + __m128i s = _mm_and_si128( halfs, mask_s ); + __m128i m = _mm_and_si128( halfs, mask_m ); + __m128i e = _mm_and_si128( halfs, mask_e ); + + __m128i s4 = _mm_unpacklo_epi16( s, mask_zero ); + s4 = _mm_slli_epi32( s4, 16 ); + + __m128i m4 = _mm_unpacklo_epi16( m, mask_zero ); + m4 = _mm_slli_epi32( m4, 13 ); + + __m128i e4 = _mm_unpacklo_epi16( e, mask_zero ); + e4 = _mm_add_epi32( e4, bias_e ); + e4 = _mm_slli_epi32( e4, 13 ); + + _mm_store_si128( reinterpret_cast<__m128i*>(output + 0), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) ); + + s4 = _mm_unpackhi_epi16( s, mask_zero ); + s4 = _mm_slli_epi32( s4, 16 ); + + m4 = _mm_unpackhi_epi16( m, mask_zero ); + m4 = _mm_slli_epi32( m4, 13 ); + + e4 = _mm_unpackhi_epi16( e, mask_zero ); + e4 = _mm_add_epi32( e4, bias_e ); + e4 = _mm_slli_epi32( e4, 13 ); + + _mm_store_si128( reinterpret_cast<__m128i*>(output + 4), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) ); +} + +#ifdef _XM_AVX_INTRINSICS_ +float UnquantizeHalfFloat_F16C( unsigned short input ) +{ + return _mm_cvtss_f32( _mm_cvtph_ps( _mm_cvtsi32_si128( input ) ) ); +} + +void UnquantizeHalfFloat_X4_F16C( unsigned short* input, float* output ) +{ + _mm_store_ps( output, _mm_cvtph_ps( _mm_loadl_epi64( reinterpret_cast(input) ) ) ); +} + +void UnquantizeHalfFloat_X8_F16C( unsigned short* input, float* output ) +{ + _mm256_store_ps( output, _mm256_cvtph_ps( _mm_load_si128( reinterpret_cast(input) ) ) ); +} +#endif + void SignalHandler( int signal ) { LogInfo() << "Signal:" << signal; throw "!Access Violation!"; @@ -206,6 +376,29 @@ void CheckPlatformSupport() { #elif __SSE__ support_message( "SSE", InstructionSet::SSE() ); #endif + +#ifdef _XM_AVX_INTRINSICS_ + if ( InstructionSet::F16C() ) { + QuantizeHalfFloat = QuantizeHalfFloat_F16C; + QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_F16C; + UnquantizeHalfFloat = UnquantizeHalfFloat_F16C; + UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_F16C; + UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_F16C; + } else +#endif + if ( InstructionSet::SSE41() ) { + QuantizeHalfFloat = QuantizeHalfFloat_Scalar; + QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_SSE41; + UnquantizeHalfFloat = UnquantizeHalfFloat_Scalar; + UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_SSE2; + UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_SSE2; + } else { + QuantizeHalfFloat = QuantizeHalfFloat_Scalar; + QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_SSE2; + UnquantizeHalfFloat = UnquantizeHalfFloat_Scalar; + UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_SSE2; + UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_SSE2; + } } #if defined(BUILD_GOTHIC_2_6_fix) diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index b2fd7681..89e4fa09 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -1929,27 +1929,32 @@ float3* GothicAPI::GetLowestLODPoly_SkeletalMesh( zCModel* model, const int poly VERTEX_INDEX _polyId = mesh->Indices[polyIndex + i]; ExSkelVertexStruct& _polyVert = mesh->Vertices[_polyId]; + alignas(32) float floats_0[8]; + alignas(32) float floats_1[8]; + alignas(16) unsigned short half2float_0[8] = { _polyVert.Position[0][0], _polyVert.Position[0][1], _polyVert.Position[0][2], _polyVert.weights[0], + _polyVert.Position[1][0], _polyVert.Position[1][1], _polyVert.Position[1][2], _polyVert.weights[1] }; + alignas(16) unsigned short half2float_1[8] = { _polyVert.Position[2][0], _polyVert.Position[2][1], _polyVert.Position[2][2], _polyVert.weights[2], + _polyVert.Position[3][0], _polyVert.Position[3][1], _polyVert.Position[3][2], _polyVert.weights[3] }; + UnquantizeHalfFloat_X8( half2float_0, floats_0 ); + UnquantizeHalfFloat_X8( half2float_1, floats_1 ); + XMVECTOR position = XMVectorZero(); - position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[0] ) ) * XMVector3Transform( - XMVectorSet( unquantizeHalfFloat( _polyVert.Position[0][0] ), - unquantizeHalfFloat( _polyVert.Position[0][1] ), - unquantizeHalfFloat( _polyVert.Position[0][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[0]] ) ) ); - - position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[1] ) ) * XMVector3Transform( - XMVectorSet( unquantizeHalfFloat( _polyVert.Position[1][0] ), - unquantizeHalfFloat( _polyVert.Position[1][1] ), - unquantizeHalfFloat( _polyVert.Position[1][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[1]] ) ) ); - - position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[2] ) ) * XMVector3Transform( - XMVectorSet( unquantizeHalfFloat( _polyVert.Position[2][0] ), - unquantizeHalfFloat( _polyVert.Position[2][1] ), - unquantizeHalfFloat( _polyVert.Position[2][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[2]] ) ) ); - - position += XMVectorReplicate( unquantizeHalfFloat( _polyVert.weights[3] ) ) * XMVector3Transform( - XMVectorSet( unquantizeHalfFloat( _polyVert.Position[3][0] ), - unquantizeHalfFloat( _polyVert.Position[3][1] ), - unquantizeHalfFloat( _polyVert.Position[3][2] ), 1.f ), XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[3]] ) ) ); - + position += XMVectorReplicate( floats_0[3] ) * XMVector3Transform( + XMVectorSet( floats_0[0], floats_0[1], floats_0[2], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[0]] ) ) ); + + position += XMVectorReplicate( floats_0[7] ) * XMVector3Transform( + XMVectorSet( floats_0[4], floats_0[5], floats_0[6], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[1]] ) ) ); + + position += XMVectorReplicate( floats_1[3] ) * XMVector3Transform( + XMVectorSet( floats_1[0], floats_1[1], floats_1[2], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[2]] ) ) ); + + position += XMVectorReplicate( floats_1[7] ) * XMVector3Transform( + XMVectorSet( floats_1[4], floats_1[5], floats_1[6], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[3]] ) ) ); + position += XMVectorReplicate( fatness ) * XMLoadFloat3( reinterpret_cast(&_polyVert.BindPoseNormal) ) ; // world matrix is applied later when particle calculate world position diff --git a/D3D11Engine/WorldConverter.cpp b/D3D11Engine/WorldConverter.cpp index 6e949047..b7230a94 100644 --- a/D3D11Engine/WorldConverter.cpp +++ b/D3D11Engine/WorldConverter.cpp @@ -788,11 +788,16 @@ void WorldConverter::ExtractSkeletalMeshFromVob( zCModel* model, SkeletalMeshVis // Get index and weight if ( n < 4 ) { - vx.weights[n] = quantizeHalfFloat( weightEntry.Weight ); + alignas(16) float floats[4] = { weightEntry.VertexPosition.x, weightEntry.VertexPosition.y, + weightEntry.VertexPosition.z, weightEntry.Weight }; + alignas(16) unsigned short halfs[4]; + QuantizeHalfFloat_X4( floats, halfs ); + + vx.weights[n] = halfs[3]; vx.boneIndices[n] = weightEntry.NodeIndex; - vx.Position[n][0] = quantizeHalfFloat( weightEntry.VertexPosition.x ); - vx.Position[n][1] = quantizeHalfFloat( weightEntry.VertexPosition.y ); - vx.Position[n][2] = quantizeHalfFloat( weightEntry.VertexPosition.z ); + vx.Position[n][0] = halfs[0]; + vx.Position[n][1] = halfs[1]; + vx.Position[n][2] = halfs[2]; } } diff --git a/D3D11Engine/pch.h b/D3D11Engine/pch.h index d70cf670..a55e1c1d 100644 --- a/D3D11Engine/pch.h +++ b/D3D11Engine/pch.h @@ -61,163 +61,13 @@ void DebugWrite_i( LPCSTR lpDebugMessage, void* thisptr ); /** Computes the size in bytes of the given FVF */ int ComputeFVFSize( DWORD fvf ); -inline unsigned short quantizeHalfFloat( float v ) -{ - union { float f; unsigned int ui; } u = { v }; - unsigned int ui = u.ui; - - int s = ( ui >> 16 ) & 0x8000; - int em = ui & 0x7fffffff; - - int h = ( em - ( 112 << 23 ) + ( 1 << 12 ) ) >> 13; - h = ( em < ( 113 << 23 ) ) ? 0 : h; - h = ( em >= ( 143 << 23 ) ) ? 0x7c00 : h; - h = ( em > ( 255 << 23 ) ) ? 0x7e00 : h; - return static_cast(s | h); -} - -inline float unquantizeHalfFloat( unsigned short v ) -{ - static const unsigned int mantissa_table[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, - 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, - 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, - 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, - 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, - 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, - 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, - 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, - 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, - 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, - 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, - 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, - 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, - 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, - 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, - 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, - 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, - 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, - 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, - 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, - 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, - 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, - 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, - 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, - 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, - 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, - 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, - 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, - 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, - 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, - 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, - 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, - 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, - 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, - 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, - 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, - 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, - 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, - 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, - 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, - 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, - 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, - 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, - 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, - 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, - 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, - 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, - 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, - 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, - 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, - 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, - 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, - 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, - 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, - 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, - 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, - 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, - 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, - 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, - 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, - 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, - 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, - 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, - 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, - 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, - 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, - 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, - 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, - 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, - 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, - 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, - 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, - 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, - 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, - 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, - 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, - 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, - 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, - 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, - 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, - 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, - 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, - 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, - 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, - 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, - 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, - 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, - 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, - 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, - 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, - 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, - 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, - 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, - 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, - 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, - 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, - 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, - 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, - 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, - 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, - 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, - 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; - static const unsigned int exponent_table[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, - 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, - 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; - static const unsigned short offset_table[64] = { - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; - unsigned int fbits = mantissa_table[offset_table[v >> 10] + (v & 0x3FF)] + exponent_table[v >> 10]; - - float out; - memcpy( &out, &fbits, sizeof( float ) ); - return out; -} +typedef unsigned short (*ZQuantizeHalfFloat)(float input); +typedef void (*ZQuantizeHalfFloat_X4)(float* input, unsigned short* output); +typedef float (*ZUnquantizeHalfFloat)(unsigned short input); +typedef void (*ZUnquantizeHalfFloat_X4)(unsigned short* input, float* output); + +extern ZQuantizeHalfFloat QuantizeHalfFloat; +extern ZQuantizeHalfFloat_X4 QuantizeHalfFloat_X4; +extern ZUnquantizeHalfFloat UnquantizeHalfFloat; +extern ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X4; +extern ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X8;