From d1c9951d09e9f232e9a6ab3dabee9a292bc51fef Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Wed, 6 Sep 2023 13:12:20 +0200 Subject: [PATCH] PoC: agent spawner Relates to #123 --- agentspawner/daemon.py | 123 +++++++++++++++++++++++++++ agentspawner/hook-converge-to | 3 + agentspawner/hook-release | 8 ++ agentspawner/hook-take | 4 + agentspawner/resalloc-testing-server | 6 ++ agentspawner/test-it-in-tmux | 4 + etc/pools.yaml | 9 ++ etc/server.yaml | 5 ++ 8 files changed, 162 insertions(+) create mode 100755 agentspawner/daemon.py create mode 100755 agentspawner/hook-converge-to create mode 100755 agentspawner/hook-release create mode 100755 agentspawner/hook-take create mode 100755 agentspawner/resalloc-testing-server create mode 100755 agentspawner/test-it-in-tmux create mode 100644 etc/pools.yaml create mode 100644 etc/server.yaml diff --git a/agentspawner/daemon.py b/agentspawner/daemon.py new file mode 100755 index 0000000..e99f194 --- /dev/null +++ b/agentspawner/daemon.py @@ -0,0 +1,123 @@ +#! /bin/python3 + +""" +Manage ideal number of "agent" like resources in Resalloc. +""" + +import subprocess +import time +import logging + +from resalloc.client import ( + Connection as ResallocConnection, + Ticket, +) + +RESALLOC_SERVER = "http://localhost:49100" + + +class SpawnerPool: + """ + Manage ideal number of "agent" like resources in Resalloc. + """ + sleep = 30 + + def __init__(self, resalloc_connection, logger): + self.tags = ["A"] + # TODO: use a persistent storage so we can restart the process + self.tickets = [] + self.conn = resalloc_connection + self.log = logger + + def call_converge_to(self): + """ Execute the configured hook script """ + while True: + result = subprocess.run(["./hook-converge-to"], capture_output=True, + check=False) + if result.returncode == 0: + try: + return int(result.stdout.decode("utf-8").strip()) + except ValueError: + pass + + self.log.debug("Failing to run converge-to hook") + + def call_take(self, data): + """ + Call hook that prepares the resource + """ + return not subprocess.run(["./hook-take", f"{data}"], check=True) + + def call_release(self, data): + """ + Call hook that releases the resource + """ + result = subprocess.run(["./hook-release", f"{data}"], check=False) + return not result.returncode + + def start(self, count): + """ Start N agent-like resources """ + self.log.info("Starting %s resources", count) + for _ in range(count): + ticket = self.conn.newTicket(self.tags) + self.log.debug("Taking ticket id %s", ticket.id) + self.tickets.append(ticket.id) + data = ticket.wait() + self.call_take(data) + + def try_to_stop(self, to_stop): + """ + Attempt to stop TO_STOP resources by closing Resalloc tickets. Not all + the resources may be closed at this time. + """ + self.log.info("Trying to stop %s resources", to_stop) + stopped = 0 + for ticket_id in self.tickets: + if stopped >= to_stop: + break + + ticket = Ticket(ticket_id, connection=self.conn) + data = ticket.collect() + if not self.call_release(data): + self.log.debug("Can't release %s", ticket.id) + continue + + self.log.debug("Closing ticket %s", ticket.id) + ticket.close() + self.tickets.remove(ticket_id) + stopped += 1 + + + def loop(self): + """ + Periodically query the ideal number of builders, and attempt to converge + to the ideal state. + """ + while True: + start = time.time() + todo = self.call_converge_to() - len(self.tickets) + if todo > 0: + self.start(todo) + elif todo < 0: + self.try_to_stop(-todo) + took = time.time() - start + sleep = self.sleep - took + if sleep > 0: + self.log.debug("Sleeping %ss", sleep) + time.sleep(sleep) + + +def _main(): + logging.basicConfig( + level=logging.DEBUG, + format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', + datefmt='%H:%M:%S' + ) + log = logging.getLogger() + conn = ResallocConnection(RESALLOC_SERVER, request_survives_server_restart=True) + spawner = SpawnerPool(conn, log) + spawner.loop() + + +if __name__ == "__main__": + _main() diff --git a/agentspawner/hook-converge-to b/agentspawner/hook-converge-to new file mode 100755 index 0000000..80e91bc --- /dev/null +++ b/agentspawner/hook-converge-to @@ -0,0 +1,3 @@ +#! /bin/sh + +echo 3 diff --git a/agentspawner/hook-release b/agentspawner/hook-release new file mode 100755 index 0000000..7327aeb --- /dev/null +++ b/agentspawner/hook-release @@ -0,0 +1,8 @@ +#! /bin/bash + +eval 'set -- $1' # strip +echo "Releasing with resalloc ticket data: $1" + +# ~33% chance of closing this one +test $(( RANDOM % 3 )) -eq 0 && exit 0 +exit 1 diff --git a/agentspawner/hook-take b/agentspawner/hook-take new file mode 100755 index 0000000..aa03295 --- /dev/null +++ b/agentspawner/hook-take @@ -0,0 +1,4 @@ +#! /bin/sh + +eval 'set -- $1' # strip +echo "Taking with resalloc ticket data: $1" diff --git a/agentspawner/resalloc-testing-server b/agentspawner/resalloc-testing-server new file mode 100755 index 0000000..7e04cc8 --- /dev/null +++ b/agentspawner/resalloc-testing-server @@ -0,0 +1,6 @@ +#! /bin/sh + +cd .. +rm /tmp/server-sql +mkdir -p /tmp/logdir +./test-tooling/resalloc-server diff --git a/agentspawner/test-it-in-tmux b/agentspawner/test-it-in-tmux new file mode 100755 index 0000000..cc79f5e --- /dev/null +++ b/agentspawner/test-it-in-tmux @@ -0,0 +1,4 @@ +#! /bin/sh -x + +tmux new-session -n "resalloc-server" ./resalloc-testing-server ';' \ + new-window -n "agent spawner" python3 ./daemon.py diff --git a/etc/pools.yaml b/etc/pools.yaml new file mode 100644 index 0000000..9cf4307 --- /dev/null +++ b/etc/pools.yaml @@ -0,0 +1,9 @@ +--- +basic: + max: 15 + max_prealloc: 5 + cmd_new: "echo >&2 before; env | grep RESALLOC_; echo >&2 after" + cmd_delete: "echo >&2 stderr; echo stdout" + tags: + - A + - B diff --git a/etc/server.yaml b/etc/server.yaml new file mode 100644 index 0000000..8689c9b --- /dev/null +++ b/etc/server.yaml @@ -0,0 +1,5 @@ +--- +db_url: "sqlite:////tmp/server-sql" +logdir: /tmp/logdir +port: 49100 +loglevel: debug