-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Interpreter Pool Executor #4
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,4 +4,5 @@ | |
*swp | ||
.eggs | ||
build/ | ||
|
||
*.py[co] | ||
[.]venv/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import concurrent.futures | ||
from concurrent.futures.thread import _WorkItem, _worker | ||
import itertools | ||
import os | ||
import queue | ||
import threading | ||
import weakref | ||
from extrainterpreters import Interpreter | ||
|
||
_threads_queues = weakref.WeakKeyDictionary() | ||
_shutdown = False | ||
# Lock that ensures that new workers are not created while the interpreter is | ||
# shutting down. Must be held while mutating _threads_queues and _shutdown. | ||
_global_shutdown_lock = threading.Lock() | ||
|
||
|
||
class SubinterpreterPoolExecutor(concurrent.futures.Executor): | ||
|
||
# Used to assign unique thread names when thread_name_prefix is not supplied. | ||
_counter = itertools.count().__next__ | ||
|
||
def __init__(self, max_workers=None, thread_name_prefix='', | ||
initializer=None, initargs=()): | ||
"""Initializes a new SubinterpreterPoolExecutor instance. | ||
|
||
Args: | ||
max_workers: The maximum number of threads that can be used to | ||
execute the given calls. | ||
thread_name_prefix: An optional name prefix to give our threads. | ||
initializer: A callable used to initialize worker threads. | ||
initargs: A tuple of arguments to pass to the initializer. | ||
""" | ||
if max_workers is None: | ||
# SubinterpreterPoolExecutor is often used to: | ||
# * CPU bound task which releases GIL | ||
# * I/O bound task (which releases GIL, of course) | ||
# | ||
# We use cpu_count + 4 for both types of tasks. | ||
# But we limit it to 32 to avoid consuming surprisingly large resource | ||
# on many core machine. | ||
max_workers = min(32, (os.cpu_count() or 1) + 4) | ||
if max_workers <= 0: | ||
raise ValueError("max_workers must be greater than 0") | ||
|
||
if initializer is not None and not callable(initializer): | ||
raise TypeError("initializer must be a callable") | ||
|
||
self._max_workers = max_workers | ||
self._work_queue = queue.SimpleQueue() | ||
tonybaloney marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self._idle_semaphore = threading.Semaphore(0) | ||
self._threads = set() | ||
self._broken = False | ||
self._shutdown = False | ||
self._shutdown_lock = threading.Lock() | ||
self._thread_name_prefix = (thread_name_prefix or | ||
("SubinterpreterPoolExecutor-%d" % self._counter())) | ||
tonybaloney marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self._initializer = initializer | ||
self._initargs = initargs | ||
|
||
def submit(self, fn, /, *args, **kwargs): | ||
with self._shutdown_lock, _global_shutdown_lock: | ||
|
||
if self._shutdown: | ||
raise RuntimeError('cannot schedule new futures after shutdown') | ||
if _shutdown: | ||
raise RuntimeError('cannot schedule new futures after ' | ||
'interpreter shutdown') | ||
|
||
f = concurrent.futures.Future() | ||
w = _WorkItem(f, fn, args, kwargs) | ||
|
||
self._work_queue.put(w) | ||
self._adjust_thread_count() | ||
return f | ||
|
||
def _adjust_thread_count(self): | ||
# if idle threads are available, don't spin new threads | ||
if self._idle_semaphore.acquire(timeout=0): | ||
return | ||
|
||
# When the executor gets lost, the weakref callback will wake up | ||
# the worker threads. | ||
def weakref_cb(_, q=self._work_queue): | ||
q.put(None) | ||
|
||
num_threads = len(self._threads) | ||
if num_threads < self._max_workers: | ||
interp_name = '%s_%d' % (self._thread_name_prefix or self, | ||
num_threads) | ||
t = Interpreter(name=interp_name, | ||
target=_worker, | ||
args=(None, # weakref.ref(self, weakref_cb), # TODO : Pickleable alternative | ||
self._work_queue, # TODO : Pickleable | ||
self._initializer, # TODO: Pickleable | ||
self._initargs)) | ||
t.start() | ||
self._threads.add(t) | ||
_threads_queues[t] = self._work_queue | ||
|
||
def shutdown(self, wait=True, *, cancel_futures=False): | ||
with self._shutdown_lock: | ||
self._shutdown = True | ||
if cancel_futures: | ||
# Drain all work items from the queue, and then cancel their | ||
# associated futures. | ||
while True: | ||
try: | ||
work_item = self._work_queue.get_nowait() | ||
except queue.Empty: | ||
break | ||
if work_item is not None: | ||
work_item.future.cancel() | ||
|
||
# Send a wake-up to prevent threads calling | ||
# _work_queue.get(block=True) from permanently blocking. | ||
self._work_queue.put(None) | ||
if wait: | ||
for t in self._threads: | ||
t.join() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,8 +41,8 @@ class FuncData(StructBase): | |
def _dispatcher(pipe, buffer): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is in a very early stage - but the concept looks fine: a looping listener function in each interpreter tied to a queue which will send different tasks - which could be run a function, spin a thread, and so on. I am thinking of doing away with the "PipedInterpreter" altogether, and have an optional post-start call which would start such a listenner function. The InterpreterPoolExecutor would do it automatically. Just let me know if you have a different idea to approach this. |
||
"""the core running function in a PipedInterpreter | ||
|
||
This is responsible for watching comunications with the parent | ||
interpreter, dispathing execution, and place the return values | ||
This is responsible for watching communications with the parent | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as you can see, my code editor does not spell check English - :-) I always have a hard time with doubled consonants. |
||
interpreter, dispatching execution, and place the return values | ||
""" | ||
|
||
while True: | ||
|
@@ -113,6 +113,3 @@ def _close_channel(self): | |
self.pipe.read(timeout=None) | ||
self.pipe.close() | ||
super()._close_channel() | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you also add name to
__repr__
in the same batch?