From ca37126da504afebfc0e47a722d692e5b27b7b93 Mon Sep 17 00:00:00 2001 From: John Haddon Date: Wed, 6 Dec 2023 14:25:56 +0000 Subject: [PATCH] BackgroundTask binding : Release GIL when destroying BackgroundTask The BackgroundTask constructor calls `cancelAndWait()`, and a Python function can't be cancelled unless it is able to aquire the GIL. Thus if we don't release the GIL before calling `wait()`, we can deadlock. We were already releasing the GIL appropriately for tasks returned by `ParallelAlgo::callOnBackgroundThread()` but not for those constructed with `BackgroundTask()` directly. Fixing this revealed another problem that was dealt with correctly by `callOnBackgroundThread()` but not by the `BackgroundTask()` constructor. We need to hold the GIL when destroying the Python function owned by the task, and we were doing this by manually deleting it after it ran, at a point when we knew we had the GIL. But this wasn't sufficient if the BackgroundTask was cancelled before the function even started. In this case, the function would be destroyed from C++ and we would be deleting Python objects without holding the GIL. The solution is again to adopt the GIL management used by `callOnBackgroundThread()`. --- Changes.md | 5 ++ python/GafferTest/BackgroundTaskTest.py | 21 +++++++++ src/GafferModule/ParallelAlgoBinding.cpp | 60 ++++++++++++++---------- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/Changes.md b/Changes.md index d97e4b3761f..00f6e3dcd4a 100644 --- a/Changes.md +++ b/Changes.md @@ -9,6 +9,11 @@ Improvements - TaskList, FrameMask : Reimplemented in C++ for improved performance. - Cache : Increased default computation cache size to 8Gb. Call `Gaffer.ValuePlug.setCacheMemoryLimit()` from a startup file to override this. +Fixes +----- + +- BackgroundTask : Fixed potential deadlock caused by destroying a BackgroundTask from Python while it was still running. + API --- diff --git a/python/GafferTest/BackgroundTaskTest.py b/python/GafferTest/BackgroundTaskTest.py index 90ec43ca657..4c8ccfbf6a2 100644 --- a/python/GafferTest/BackgroundTaskTest.py +++ b/python/GafferTest/BackgroundTaskTest.py @@ -35,6 +35,7 @@ ########################################################################## import functools +import threading import time import IECore @@ -234,5 +235,25 @@ def f( canceller ) : t.cancelAndWait() + def testDestroyWhileRunning( self ) : + + started = threading.Event() + + def f( canceller ) : + + started.set() + while True : + IECore.Canceller.check( canceller ) + + # Start task and wait for it to be running + task = Gaffer.BackgroundTask( None, f ) + started.wait() + + # Destroy task. This will cancel it via `canceller`, + # and wait for it to return. If we don't release the + # GIL while doing that then it'll never be able to + # check for cancellation, and we'll deadlock. + del task + if __name__ == "__main__": unittest.main() diff --git a/src/GafferModule/ParallelAlgoBinding.cpp b/src/GafferModule/ParallelAlgoBinding.cpp index f5305686294..afeb3ebfa99 100644 --- a/src/GafferModule/ParallelAlgoBinding.cpp +++ b/src/GafferModule/ParallelAlgoBinding.cpp @@ -56,28 +56,52 @@ using namespace boost::python; namespace { -BackgroundTask *backgroundTaskConstructor( const Plug *subject, object f ) +std::shared_ptr withGILReleaseDeleter( std::unique_ptr &backgroundTask ) { - auto fPtr = std::make_shared( f ); - return new BackgroundTask( + return std::shared_ptr( + backgroundTask.release(), + // Custom deleter. We need to release the GIL when deleting, because the + // destructor waits on the background task, and the background task + // might need the GIL in order to complete. + []( BackgroundTask *t ) { + IECorePython::ScopedGILRelease gilRelease; + delete t; + } + ); +} + +std::shared_ptr withGILAcquireDeleter( const boost::python::object &o ) +{ + return std::shared_ptr( + new boost::python::object( o ), + []( boost::python::object *o ) { + // Custom deleter. We must hold the GIL when deleting Python + // objects. + IECorePython::ScopedGILLock gilLock; + delete o; + } + ); +} + +std::shared_ptr backgroundTaskConstructor( const Plug *subject, object f ) +{ + auto fPtr = withGILAcquireDeleter( f ); + auto backgroundTask = std::make_unique( subject, [fPtr]( const IECore::Canceller &canceller ) mutable { IECorePython::ScopedGILLock gilLock; try { (*fPtr)( boost::ref( canceller ) ); - // We are likely to be the last owner of the python - // function object. Make sure we release it while we - // still hold the GIL. - fPtr.reset(); } catch( boost::python::error_already_set & ) { - fPtr.reset(); IECorePython::ExceptionAlgo::translatePythonException(); } } ); + + return withGILReleaseDeleter( backgroundTask ); } void backgroundTaskCancel( BackgroundTask &b ) @@ -203,13 +227,7 @@ std::shared_ptr callOnBackgroundThread( const Plug *subject, boo // The BackgroundTask we return will own the python function we // pass to it. Wrap the function so that the GIL is acquired // before the python object is destroyed. - auto fPtr = std::shared_ptr( - new boost::python::object( f ), - []( boost::python::object *o ) { - IECorePython::ScopedGILLock gilLock; - delete o; - } - ); + auto fPtr = withGILAcquireDeleter( f ); auto backgroundTask = ParallelAlgo::callOnBackgroundThread( subject, @@ -226,17 +244,7 @@ std::shared_ptr callOnBackgroundThread( const Plug *subject, boo } ); - return std::shared_ptr( - backgroundTask.release(), - // Custom deleter. We need to release - // the GIL when deleting, because the destructor - // waits on the background task, and the background - // task might need the GIL in order to complete. - []( BackgroundTask *t ) { - IECorePython::ScopedGILRelease gilRelease; - delete t; - } - ); + return withGILReleaseDeleter( backgroundTask ); } } // namespace