Skip to content

Commit

Permalink
Add resalloc agent spawner in the hub container
Browse files Browse the repository at this point in the history
Signed-off-by: Siteshwar Vashisht <[email protected]>
  • Loading branch information
siteshwar committed Mar 14, 2024
1 parent d9197d9 commit 7bc120b
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 3 deletions.
59 changes: 59 additions & 0 deletions containers/configs/resalloc-agent-spawner-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Configuration for resalloc-agent-spawner.service. YAML format.
# Specify groups of agents within the `agent_groups` section that agent spawner
# should take care of.

agent_groups:
workers:
# These commands are executed in the background async as they may take
# quite some time to process. If `cmd_prepare` fails (exit non-zero), the
# agent immediately marked for removal. `cmd_terminate` exit status is
# just ignored (we need to remove the agent no matter what).

# Prepare the agent. Variable $RESALLOC_RESOURCE_DATA (base64 encoded)
# is provided in the script environment. Other variables like
# RESOURCE_NAME, RESOURCE_POOL_ID, etc. are provided as well.
cmd_prepare: /usr/bin/osh-worker-manager --create-worker $(echo "$AGENT_SPAWNER_RESOURCE_DATA" | base64 -d | head -1) &> /tmp/$$-preparation.log
# cmd_prepare: echo "$RESALLOC_RESOURCE_DATA" | base64 -d | head -1 &> /tmp/$$-preparation.log
# cmd_prepare: env &> /tmp/$$-preparation.log
# Prepare the agent for termination. Upon finishing this command, the
# resalloc resource ticket is closed and the resource deallocated.
cmd_terminate: /usr/bin/osh-worker-manager --delete-worker $(echo "$AGENT_SPAWNER_RESOURCE_DATA" | base64 -d | head -1) &> /tmp/$$-deletion.log
# cmd_terminate: /sbin/osh-resalloc --delete-worker "$(echo "$RESALLOC_RESOURCE_DATA" | base64 -d | head -1)"

# The following commands are executed synchronously by the agent spawner
# daemon (polling). Please keep them super fast to avoid overall system #
# halt!

# The `cmd_converge_to` needs to print integer number (the currently ideal
# number of agents to converge to) onto stdout.
cmd_converge_to: /usr/bin/osh-worker-manager --workers-needed

# Agents may decide to stop themselves. This hook is used to detect
# such a case -> if exit status 0 is returned, agent is going to be
# terminated (cmd_terminate is called against it).
cmd_check_finished: /usr/bin/osh-worker-manager --check-throwaway $(echo "$AGENT_SPAWNER_RESOURCE_DATA" | base64 -d | head -1) &> /tmp/$$-check-finished.log

# Some agents might be expected to run long-term (or indefinitely). This
# hook helps us to politely ask the agent whether it is OK to terminate.
# Returning exit status 1 means the agent can not be terminated.
# Returning 0 means that the agent was prepared for termination, and
# this has to be removed now. This is useful for gently downsizing
# the agent count while converging to `cmd_converge_to`.
cmd_try_release: /bin/false

# List of resalloc tags to use while requesting tickets
tags:
- arch_x86_64

# Note that we use the 'request_survives_server_restart' resalloc client option,
# so the resalloc server must be running to avoid the overall system hang!
resalloc_server: "http://resalloc-server:49100"

# Where to log events.
logfile: /var/log/resalloc-agent-spawner/agent-spawner.log

# How to connect to redis-db. By default connects to 127.0.0.1:6379.
#redis_db: null
redis_host: redis
#redis_port": null
#redis_password": null
124 changes: 124 additions & 0 deletions containers/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
resalloc agent spawner helpers
"""

import os
import subprocess

from resalloc.helpers import load_config_file


def rk_to_tid(key):
""" Redis agent key to Resalloc Ticket ID """
return int(key.split(":")[1])


def tid_to_rk(ticket):
""" Resalloc Ticket ID to Redis Agent key """
return f"agent:{ticket}"


def get_config():
"""
Load the agent-spawner YAML configuration
"""
conf_dir = os.environ.get("CONFIG_DIR", "/etc/resalloc-agent-spawner")
config_file = os.path.join(conf_dir, "config.yaml")
config = load_config_file(config_file)
config.setdefault("agent_groups", {})
config.setdefault("resalloc_server", "http://localhost:49100")
config.setdefault("logfile", "/tmp/agent-spawner.log")
groups = config["agent_groups"]
for group_id in groups.keys():
group = groups[group_id]
group.setdefault("cmd_converge_to", "/usr/bin/echo 1")
group.setdefault("cmd_check_finished", "/bin/false")
group.setdefault("cmd_prepare", "/bin/true")
group.setdefault("cmd_terminate", "/bin/true")
group.setdefault("cmd_try_release", "/bin/false")
group.setdefault("tags", ["please-specify-some-tags"])

# This dictionary is passed to redis APIs and other parts of code which
# expects configurations to be attributes rather than key/value pairs.
# This class works around this requirement.
# Thanks to https://stackoverflow.com/a/14620633
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self

return AttrDict(config)


class CmdCallerMixin:
"""
Wrapper around calling command hooks.
"""

def _cmd(self, group_id, cmd_id):
return self.opts["agent_groups"][group_id][cmd_id]

def cmd_converge_to(self, group_id):
"""
Query the outside world for the ideal number of agents in given group.
"""
result = subprocess.run(
self._cmd(group_id, "cmd_converge_to"),
stdout=subprocess.PIPE, check=False, shell=True)
if result.returncode == 0:
try:
return int(result.stdout.decode("utf-8").strip())
except ValueError:
self.log.error("Converge-to hook failure, expected int, "
"got: %s", result.stdout)
return None

self.log.debug("Failing to run converge-to hook")
return None

def cmd_try_release(self, group_id, data):
"""
Call hook that releases the resource
"""
cmd = self._cmd(group_id, "cmd_try_release")
result = subprocess.run(cmd, check=False, **self.subproces_kwargs(data))
return not result.returncode

def cmd_is_finished(self, group_id, data):
"""
Call hook that releases the resource
"""
result = subprocess.run(
self._cmd(group_id, "cmd_check_finished"),
check=False, **self.subproces_kwargs(data))
return not result.returncode

def cmd_take(self, group_id, data):
"""
Initialize the agent
"""
return not subprocess.run(
self._cmd(group_id, "cmd_prepare"), check=False,
**self.subproces_kwargs(data),
).returncode

def cmd_terminate(self, group_id, data):
"""
Prepare the agent for removal.
"""
subprocess.run(
self._cmd(group_id, "cmd_terminate"),
check=False,
**self.subproces_kwargs(data),
)

def subproces_kwargs(self, data):
"""
generate "generic" subprocess.Popen kwargs
"""
return {
"env": {
"AGENT_SPAWNER_RESOURCE_DATA": str(data),
},
"shell": True,
}
24 changes: 21 additions & 3 deletions containers/hub.Containerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
# Builds should be available on https://quay.io/organization/openscanhub-fedora-infra/ocp
FROM registry.access.redhat.com/ubi9/httpd-24
# TODO: Check if we can use ubi.
# FROM registry.access.redhat.com/ubi9/httpd-24
# resalloc dependencies require a subscription.
FROM quay.io/sclorg/httpd-24-c9s

USER 0

RUN dnf install -y dnf-plugins-core https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm

RUN dnf config-manager --set-enabled crb extras-common

# enable installation of gettext message objects
RUN rm /etc/rpm/macros.image-language-conf

#TODO: How to enable installation of a specifiec commit?
RUN dnf copr enable -y @openscanhub/devel
# RUN dnf copr enable -y @openscanhub/devel
RUN dnf copr enable -y packit/openscanhub-openscanhub-234
RUN dnf copr enable -y praiskup/resalloc

# TODO: This would install osh-hub configurations from the `hub-conf-devel` package. How to install
# non-devel configurations for fedora infrastrucutre?
# TODO: There may be a race condition here, as it installs latest `osh-hub` package, that may have
# been built after a specific commit.
RUN dnf install -y osh-hub osh-hub-conf-devel openssl krb5-workstation
# tzdata is a dependency for django
RUN dnf install -y osh-hub osh-hub-conf-devel openssl krb5-workstation tzdata

RUN dnf install -y resalloc-agent-spawner python3-resalloc

# TODO: Shall `/var/log/osh/` be a persistennt path? Shall this log be redirected to another logging
# service like splunk?
Expand Down Expand Up @@ -43,8 +54,15 @@ COPY configs/redhat.css /usr/lib/python3.9/site-packages/osh/hub/static/css/redh
# COPY configs/settings_local.ocp.py /usr/lib/python3.9/site-packages/osh/hub/settings_local.py
# COPY configs/osh-hub-httpd.conf /etc/httpd/conf.d/osh-hub-httpd.conf

COPY configs/resalloc-agent-spawner-config.yaml /etc/resalloc-agent-spawner/config.yaml
RUN mkdir /var/log/resalloc-agent-spawner
RUN chmod g+rwx /var/log/resalloc-agent-spawner

COPY scripts/run_hub.sh /run_hub.sh
# TODO: Remove this before merging.
COPY helpers.py /usr/lib/python3.6/site-packages/resalloc_agent_spawner/helpers.py
RUN chmod a+x /run_hub.sh
# This is for backward compatibility. Remove this?
USER 1001
ENV PATH=/sbin:/bin:/usr/sbin:/usr/bin
CMD /run_hub.sh

0 comments on commit 7bc120b

Please sign in to comment.