From a8eabf23a78471ba92f0635e778ce27a3b590eec Mon Sep 17 00:00:00 2001 From: Daniel Dresser Date: Mon, 9 Sep 2024 15:54:15 -0700 Subject: [PATCH] FIX : Perf --- .../PrimitiveAlgoTest.py | 51 +++++++++++++++ .../IECoreScenePreview/PrimitiveAlgo.cpp | 64 +++++++++++++------ 2 files changed, 97 insertions(+), 18 deletions(-) diff --git a/python/GafferSceneTest/IECoreScenePreviewTest/PrimitiveAlgoTest.py b/python/GafferSceneTest/IECoreScenePreviewTest/PrimitiveAlgoTest.py index cf3b991f73..539cff0c85 100644 --- a/python/GafferSceneTest/IECoreScenePreviewTest/PrimitiveAlgoTest.py +++ b/python/GafferSceneTest/IECoreScenePreviewTest/PrimitiveAlgoTest.py @@ -177,6 +177,23 @@ def slowTransform(): thread.join() self.assertLess( time.time() - t, 0.01 + acceptableCancellationDelay ) + @GafferTest.TestRunner.PerformanceTestMethod() + def testTransformPrimitivePerf( self ) : + + mesh = IECoreScene.MeshPrimitive.createPlane( + imath.Box2f( imath.V2f( -2 ), imath.V2f( 2 ) ), + divisions = imath.V2i( 4000, 4000 ) + ) + + m = imath.M44f() + m.setTranslation( imath.V3f( 0, 1, 0 ) ) + + with GafferTest.TestRunner.PerformanceScope() : + # It's too expensive to allocate a mesh big enough to really make this slow, + # so run it 10 times to give a less noisy timing result + for i in range( 10 ): + PrimitiveAlgo.transformPrimitive( mesh, m ) + def testMergePrimitivesSimpleMeshes( self ) : @@ -621,6 +638,40 @@ def testMergeMeshes( self ) : {'labelSource', 'uv', 'indexedVertex', 'stringConstant', 'altUv', 'indexedUniform', 'N', 'unindexedUniform', 'unindexedFaceVarying'} ) + @GafferTest.TestRunner.PerformanceTestMethod() + def testMergeManyPerf( self ) : + + mesh = IECoreScene.MeshPrimitive.createPlane( + imath.Box2f( imath.V2f( -2 ), imath.V2f( 2 ) ), + divisions = imath.V2i( 100, 100 ) + ) + + meshes = [] + for i in range( 1000 ): + m = imath.M44f() + m.setTranslation( imath.V3f( 0, i, 0 ) ) + + meshes.append( ( mesh, m ) ) + + with GafferTest.TestRunner.PerformanceScope() : + PrimitiveAlgo.mergePrimitives( meshes ) + + @GafferTest.TestRunner.PerformanceTestMethod() + def testMergeFewPerf( self ) : + + mesh = IECoreScene.MeshPrimitive.createPlane( + imath.Box2f( imath.V2f( -2 ), imath.V2f( 2 ) ), + divisions = imath.V2i( 2000, 2000 ) + ) + + m = imath.M44f() + m.setTranslation( imath.V3f( 0, 1, 0 ) ) + + meshes = [ ( mesh, imath.M44f() ), ( mesh, m ) ] + + with GafferTest.TestRunner.PerformanceScope() : + PrimitiveAlgo.mergePrimitives( meshes ) + if __name__ == "__main__": unittest.main() diff --git a/src/GafferScene/IECoreScenePreview/PrimitiveAlgo.cpp b/src/GafferScene/IECoreScenePreview/PrimitiveAlgo.cpp index fea473d381..405c8406f0 100644 --- a/src/GafferScene/IECoreScenePreview/PrimitiveAlgo.cpp +++ b/src/GafferScene/IECoreScenePreview/PrimitiveAlgo.cpp @@ -162,25 +162,37 @@ void dataResize( Data *data, size_t size ) } inline void transformPrimVarValue( - const Imath::V3f &source, Imath::V3f &dest, + const Imath::V3f *source, Imath::V3f *dest, int numElements, const Imath::M44f &matrix, const Imath::M44f &normalMatrix, GeometricData::Interpretation interpretation ) { if( interpretation == GeometricData::Point ) { - dest = source * matrix; + for( int i = 0; i < numElements; i++ ) + { + *(dest++) = *(source++) * matrix; + } } else if( interpretation == GeometricData::Vector ) { - matrix.multDirMatrix( source, dest ); + for( int i = 0; i < numElements; i++ ) + { + matrix.multDirMatrix( *(source++), *(dest++) ); + } } else if( interpretation == GeometricData::Normal ) { - normalMatrix.multDirMatrix( source, dest ); + for( int i = 0; i < numElements; i++ ) + { + normalMatrix.multDirMatrix( *(source++), *(dest++) ); + } } else { - dest = source; + for( int i = 0; i < numElements; i++ ) + { + *(dest++) = *(source++); + } } } @@ -211,7 +223,8 @@ inline void copyElements( const Data *sourceData, size_t sourceIndex, Data *dest // Fairly weird corner case, but technically Constant primvars could need transforming too GeometricData::Interpretation interp = singleElementTypedSourceData->getInterpretation(); transformPrimVarValue( - singleElementTypedSourceData->readable(), typedDest[ destIndex ], matrix, normalMatrix, interp + &singleElementTypedSourceData->readable(), &typedDest[ destIndex ], 1, + matrix, normalMatrix, interp ); } else @@ -236,12 +249,9 @@ inline void copyElements( const Data *sourceData, size_t sourceIndex, Data *dest if constexpr( std::is_same_v< DataType, V3fVectorData > ) { GeometricData::Interpretation interp = typedSourceData->getInterpretation(); - for( size_t i = 0; i < num; i++ ) - { - transformPrimVarValue( - typedSource[ sourceIndex + i ], typedDest[ destIndex + i ], matrix, normalMatrix, interp - ); - } + transformPrimVarValue( + &typedSource[ sourceIndex ], &typedDest[ destIndex ], num, matrix, normalMatrix, interp + ); } else { @@ -1129,6 +1139,14 @@ IECoreScene::PrimitivePtr mergePrimitivesInternal( // data to the destination. // + // In theory, we would a finer-grained threading of this, so we could optimize the case where a small + // number of extremely large primitives are combined. However, with the current level of multithreading, + // it's already hard to produce cases where this loop is a significant contributor to runtime cost. The + // more significant issue is that when allocating *VectorData, we currently don't have a way to skip + // the zero-initialize, so much of the cost is currently in that ( which is hard to parallelize, and + // also is completely unnecessary ). There's probably not much point threading this further until we fix + // that. + tbb::task_group_context taskGroupContext( tbb::task_group_context::isolated ); tbb::parallel_for( @@ -1239,6 +1257,8 @@ void PrimitiveAlgo::transformPrimitive( Imath::M44f normalMatrix = normalTransform( matrix ); + tbb::task_group_context taskGroupContext( tbb::task_group_context::isolated ); + for( const auto &[name, var] : primitive.variables ) { Canceller::check( canceller ); @@ -1261,16 +1281,24 @@ void PrimitiveAlgo::transformPrimitive( if( vecVar ) { - for( Imath::V3f &i : vecVar->writable() ) - { - Canceller::check( canceller ); - transformPrimVarValue( i, i, matrix, normalMatrix, interp ); - } + std::vector< Imath::V3f >& writable = vecVar->writable(); + + tbb::parallel_for( + tbb::blocked_range( 0, writable.size(), 10000 ), + [&]( tbb::blocked_range &range ) + { + Canceller::check( canceller ); + transformPrimVarValue( + &writable[range.begin()], &writable[range.begin()], range.end() - range.begin(), + matrix, normalMatrix, interp + ); + } + ); } else { // Fairly weird corner case, but technically Constant primvars could need transforming too - transformPrimVarValue( vecConstVar->writable(), vecConstVar->writable(), matrix, normalMatrix, interp ); + transformPrimVarValue( &vecConstVar->writable(), &vecConstVar->writable(), 1, matrix, normalMatrix, interp ); } } }