From 2a3f4415656f6347142806ad2c3bbf2fb3bfbe2a Mon Sep 17 00:00:00 2001 From: Yanks Yoon <37652070+yanksyoon@users.noreply.github.com> Date: Thu, 3 Oct 2024 20:35:42 +0800 Subject: [PATCH 1/7] fix: add apt-update to userdata (#385) * fix: add apt-update to userdata * test: update dep conflict * chore: update commit hash --- requirements.txt | 2 +- tests/integration/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index c0420e69d..da8088eed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ cosl ==0.0.15 # juju 3.1.2.0 depends on pyyaml<=6.0 and >=5.1.2 PyYAML ==6.0.* pyOpenSSL==24.2.1 -github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@30fcc502eba82c4c4216f66f61380cfcbed517b1 +github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@96178d52d4508775b7c6487c9168c529776426f1 diff --git a/tests/integration/requirements.txt b/tests/integration/requirements.txt index 29e88219a..7363d00b0 100644 --- a/tests/integration/requirements.txt +++ b/tests/integration/requirements.txt @@ -1,4 +1,4 @@ GitPython>3,<4 pygithub kombu==5.4.* -pymongo==4.9.* +pymongo==4.10.* From 40c9b5d43db1ae16e43521ddf9e037ca98ef8107 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Mon, 7 Oct 2024 13:44:28 +0200 Subject: [PATCH 2/7] Fix regressions for LXD runners (#387) * pin branch * don't check hostname for lxd in e2e * remove test_reactive for lxd * fix regression * remove openstack tests and run lxd tests - REVERT ME * Revert "fix regression" This reverts commit 0ff8a9556e605a22e3ad0220a7591999783f5c7b. * include env variables * only assume flush idle for openstack * increase timeout * remove unnecessary and invalid wait_for * use GITHUB_HEAD_REF for testing using pull_request * fix test appname to start with a letter after hyphen * lint * use GITHUB_HEAD_REF for other events * fix app_name * add workflow_dispatch * add workflow_dispatch * update pull request template * re-add openstack tests * revert pinning * update changelog --- .github/pull_request_template.md | 1 + .github/workflows/e2e_test_run.yaml | 1 + .github/workflows/scheduled_e2e_test.yaml | 6 ++- .../workflows/scheduled_integration_test.yaml | 4 +- README.md | 2 +- docs/changelog.md | 4 ++ templates/env.j2 | 14 +++++++ tests/integration/conftest.py | 7 +++- tests/integration/test_charm_base_image.py | 4 +- tests/integration/test_charm_runner.py | 41 ++++++++++--------- 10 files changed, 56 insertions(+), 28 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 869c6b82c..6eb30c88f 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -29,5 +29,6 @@ Applicable spec: - [ ] The documentation for charmhub is updated. - [ ] The PR is tagged with appropriate label (`urgent`, `trivial`, `complex`). - [ ] The changelog is updated with changes that affects the users of the charm. +- [ ] The changes do not introduce any regression in code or tests related to LXD runner mode. \ No newline at end of file diff --git a/.github/workflows/e2e_test_run.yaml b/.github/workflows/e2e_test_run.yaml index 4eeeccbe1..7abfa10f5 100644 --- a/.github/workflows/e2e_test_run.yaml +++ b/.github/workflows/e2e_test_run.yaml @@ -37,6 +37,7 @@ jobs: runs-on: [self-hosted, linux, "${{ inputs.runner-tag }}"] steps: - name: Hostname is set to "github-runner" + if: "${{ github.event.inputs.runner-virt-type == 'openstack' }}" run: sudo hostnamectl hostname | grep github-runner # Snapd can have some issues in privileged LXD containers without setting # security.nesting=True and this. diff --git a/.github/workflows/scheduled_e2e_test.yaml b/.github/workflows/scheduled_e2e_test.yaml index 508ead224..7e3099109 100644 --- a/.github/workflows/scheduled_e2e_test.yaml +++ b/.github/workflows/scheduled_e2e_test.yaml @@ -7,6 +7,8 @@ on: schedule: - cron: "15 4 * * 2" + workflow_dispatch: + jobs: build-charm: name: Build Charm @@ -246,12 +248,12 @@ jobs: # https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow GH_TOKEN: ${{ matrix.event.name == 'workflow_dispatch' && secrets.GITHUB_TOKEN || secrets.E2E_TESTING_TOKEN }} run: | - # Base any future branches on the current branch + # Base any future branches on the current branch. REF_SHA=$(gh api \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ /repos/${{ secrets.E2E_TESTING_REPO }}/git/ref/heads/$GITHUB_REF_NAME \ - --jq .object.sha) + --jq .object.sha) || (echo "Failed to get REF_SHA using $GITHUB_REF_NAME" && false) # Create a temporary reference/branch # For push, this should trigger the "Push Event Tests" workflow automatically diff --git a/.github/workflows/scheduled_integration_test.yaml b/.github/workflows/scheduled_integration_test.yaml index 6fe0604c7..e5d017d39 100644 --- a/.github/workflows/scheduled_integration_test.yaml +++ b/.github/workflows/scheduled_integration_test.yaml @@ -7,6 +7,8 @@ on: schedule: - cron: "15 5 * * 2" + workflow_dispatch: + jobs: # test option values defined at test/conftest.py are passed on via repository secret # INTEGRATION_TEST_ARGS to operator-workflows automatically. @@ -29,4 +31,4 @@ jobs: pre-run-script: scripts/pre-integration-test.sh provider: lxd test-tox-env: integration-juju3.1 - modules: '["test_charm_base_image", "test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_lxd_runner", "test_charm_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage", "test_debug_ssh", "test_charm_upgrade", "test_reactive"]' + modules: '["test_charm_base_image", "test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_lxd_runner", "test_charm_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage", "test_debug_ssh", "test_charm_upgrade"]' diff --git a/README.md b/README.md index 2fdd88003..92a1952e3 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ## Description -This machine charm creates [self-hosted runners for running GitHub Actions](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners). Each unit of this charm will start a configurable number of LXD based containers and virtual +This machine charm creates [self-hosted runners for running GitHub Actions](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners). Each unit of this charm will start a configurable number of OpenStack or LXD based containers and virtual machines to host them. Every runner performs only one job, after which it unregisters from GitHub to ensure that each job runs in a clean environment. diff --git a/docs/changelog.md b/docs/changelog.md index 07369049b..3c84ea120 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +### 2024-10-07 + +- Fix removal of proxy vars in `.env` file for LXD runners. + ### 2024-09-18 - Changed code to be able to spawn a runner in reactive mode. diff --git a/templates/env.j2 b/templates/env.j2 index f7da33219..c0de54aad 100644 --- a/templates/env.j2 +++ b/templates/env.j2 @@ -1,4 +1,18 @@ PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin +{% if proxies.http %} +HTTP_PROXY={{proxies.http}} +http_proxy={{proxies.http}} +{% endif %} +{% if proxies.https %} +HTTPS_PROXY={{proxies.https}} +https_proxy={{proxies.https}} +{% endif %} +{% if proxies.ftp_proxy %} +{% endif %} +{% if proxies.no_proxy %} +NO_PROXY={{proxies.no_proxy}} +no_proxy={{proxies.no_proxy}} +{% endif %} {% if dockerhub_mirror %} DOCKERHUB_MIRROR={{dockerhub_mirror}} CONTAINER_REGISTRY_URL={{dockerhub_mirror}} diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index e88b399e8..decd988f5 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -85,7 +85,12 @@ def existing_app(pytestconfig: pytest.Config) -> Optional[str]: def app_name(existing_app: Optional[str]) -> str: """Randomized application name.""" # Randomized app name to avoid collision when runner is connecting to GitHub. - return existing_app or f"test-{secrets.token_hex(4)}" + # The char after the hyphen has to be a letter. + return ( + existing_app + or f"test-{random.choice(string.ascii_lowercase)}" + f"{''.join(random.choices(string.ascii_lowercase + string.digits, k=7))}" + ) @pytest.fixture(scope="module", name="openstack_clouds_yaml") diff --git a/tests/integration/test_charm_base_image.py b/tests/integration/test_charm_base_image.py index 8167bcbb8..ceee4a6b7 100644 --- a/tests/integration/test_charm_base_image.py +++ b/tests/integration/test_charm_base_image.py @@ -12,7 +12,6 @@ from tests.integration.helpers.common import ( DISPATCH_E2E_TEST_RUN_WORKFLOW_FILENAME, dispatch_workflow, - wait_for, ) from tests.integration.helpers.lxd import ( ensure_charm_has_runner, @@ -48,7 +47,7 @@ async def test_runner_base_image( assert "noble" in str(stdout) # Workflow completes successfully - workflow_run = await dispatch_workflow( + await dispatch_workflow( app=app_no_wait, branch=test_github_branch, github_repository=github_repository, @@ -56,4 +55,3 @@ async def test_runner_base_image( workflow_id_or_name=DISPATCH_E2E_TEST_RUN_WORKFLOW_FILENAME, dispatch_input={"runner-tag": app_no_wait.name, "runner-virt-type": "lxd"}, ) - await wait_for(lambda: workflow_run.update() and workflow_run.status == "completed") diff --git a/tests/integration/test_charm_runner.py b/tests/integration/test_charm_runner.py index cc3fb169a..96837e28d 100644 --- a/tests/integration/test_charm_runner.py +++ b/tests/integration/test_charm_runner.py @@ -86,7 +86,7 @@ async def test_flush_runner_and_resource_config( 4. a. The runner name should be different to the runner prior running the action. b. LXD profile matching virtual machine resources of step 2 exists. - 5. The runner is not flushed since by default it flushes idle. + 5. The runner is not flushed since by default it flushes idle. (Only valid for OpenStack) Test are combined to reduce number of runner spawned. """ @@ -138,28 +138,29 @@ async def test_flush_runner_and_resource_config( assert new_runner_names[0] != runner_names[0] # 5. - workflow = await dispatch_workflow( - app=app, - branch=test_github_branch, - github_repository=github_repository, - conclusion="success", - workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, - dispatch_input={"runner": app.name, "minutes": "5"}, - wait=False, - ) - await wait_for(lambda: workflow.update() or workflow.status == "in_progress") - action = await app.units[0].run_action("flush-runners") - await action.wait() + if instance_type == InstanceType.OPENSTACK: + workflow = await dispatch_workflow( + app=app, + branch=test_github_branch, + github_repository=github_repository, + conclusion="success", + workflow_id_or_name=DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, + dispatch_input={"runner": app.name, "minutes": "5"}, + wait=False, + ) + await wait_for(lambda: workflow.update() or workflow.status == "in_progress") + action = await app.units[0].run_action("flush-runners") + await action.wait() - assert action.status == "completed" - assert action.results["delta"]["virtual-machines"] == "0" + assert action.status == "completed" + assert action.results["delta"]["virtual-machines"] == "0" - await wait_for(lambda: workflow.update() or workflow.status == "completed") - action = await app.units[0].run_action("flush-runners") - await action.wait() + await wait_for(lambda: workflow.update() or workflow.status == "completed") + action = await app.units[0].run_action("flush-runners") + await action.wait() - assert action.status == "completed" - assert action.results["delta"]["virtual-machines"] == "1" + assert action.status == "completed" + assert action.results["delta"]["virtual-machines"] == "1" @pytest.mark.openstack From 3778f19b33b8117a66796e6a3bf9d50b65fe84db Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Mon, 7 Oct 2024 17:10:44 +0200 Subject: [PATCH 3/7] Fix: Regression on removal of residual dirs (#388) * ensure remove_residual_venv_dirs runs first * run test_charm_upgrade on every PR * update changelog * remove TODO (should be an item in the backlog) --- .github/workflows/integration_test.yaml | 6 ++-- docs/changelog.md | 3 +- src-docs/charm.md | 10 +++---- src/charm.py | 39 +++++++++++++------------ tests/integration/test_charm_upgrade.py | 2 +- 5 files changed, 31 insertions(+), 29 deletions(-) diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index 8e0bc700a..eb141a091 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -19,11 +19,11 @@ jobs: pre-run-script: scripts/pre-integration-test.sh provider: lxd test-tox-env: integration-juju3.1 - # These important local LXD test has no OpenStack integration versions. + # These important local LXD test have no OpenStack integration versions. # test_charm_scheduled_events ensures reconcile events are fired on a schedule. # test_debug_ssh ensures tmate SSH actions works. - # TODO: Add OpenStack integration versions of these tests. - modules: '["test_charm_scheduled_events", "test_debug_ssh"]' + # The test test_charm_upgrade needs to run to ensure the charm can be upgraded. + modules: '["test_charm_scheduled_events", "test_debug_ssh", "test_charm_upgrade"]' openstack-interface-tests-private-endpoint: name: openstack interface test using private-endpoint uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main diff --git a/docs/changelog.md b/docs/changelog.md index 3c84ea120..6ce6db61b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,7 +2,8 @@ ### 2024-10-07 -- Fix removal of proxy vars in `.env` file for LXD runners. +- Fixed a regression in the removal of leftover directories. +- Fixed the removal of proxy vars in `.env` file for LXD runners. ### 2024-09-18 diff --git a/src-docs/charm.md b/src-docs/charm.md index 7bed9337b..8479c1eb5 100644 --- a/src-docs/charm.md +++ b/src-docs/charm.md @@ -22,7 +22,7 @@ Charm for creating and managing GitHub self-hosted runner instances. --- - + ## function `catch_charm_errors` @@ -48,7 +48,7 @@ Catch common errors in charm. --- - + ## function `catch_action_errors` @@ -74,7 +74,7 @@ Catch common errors in actions. --- - + ## class `ReconcileRunnersEvent` Event representing a periodic check to ensure runners are ok. @@ -85,7 +85,7 @@ Event representing a periodic check to ensure runners are ok. --- - + ## class `GithubRunnerCharm` Charm for managing GitHub self-hosted runners. @@ -102,7 +102,7 @@ Charm for managing GitHub self-hosted runners. - `ram_pool_path`: The path to memdisk storage. - `kernel_module_path`: The path to kernel modules. - + ### method `__init__` diff --git a/src/charm.py b/src/charm.py index ed98094ee..a53da1d41 100755 --- a/src/charm.py +++ b/src/charm.py @@ -7,29 +7,11 @@ # pylint: disable=too-many-lines """Charm for creating and managing GitHub self-hosted runner instances.""" -from github_runner_manager.manager.cloud_runner_manager import ( - GitHubRunnerConfig, - SupportServiceConfig, -) -from github_runner_manager.manager.runner_manager import ( - FlushMode, - RunnerManager, - RunnerManagerConfig, -) -from github_runner_manager.manager.runner_scaler import RunnerScaler -from github_runner_manager.openstack_cloud.openstack_runner_manager import ( - OpenStackCloudConfig, - OpenStackRunnerManager, - OpenStackRunnerManagerConfig, - OpenStackServerConfig, -) -from github_runner_manager.reactive.types_ import QueueConfig as ReactiveQueueConfig -from github_runner_manager.reactive.types_ import RunnerConfig as ReactiveRunnerConfig -from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus, parse_github_path from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry # This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335 +# It is important that this is run before importation of any other modules. # pylint: disable=wrong-import-position,wrong-import-order # TODO: 2024-07-17 remove this once the issue has been fixed remove_residual_venv_dirs() @@ -48,6 +30,25 @@ import ops from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires from charms.grafana_agent.v0.cos_agent import COSAgentProvider +from github_runner_manager.manager.cloud_runner_manager import ( + GitHubRunnerConfig, + SupportServiceConfig, +) +from github_runner_manager.manager.runner_manager import ( + FlushMode, + RunnerManager, + RunnerManagerConfig, +) +from github_runner_manager.manager.runner_scaler import RunnerScaler +from github_runner_manager.openstack_cloud.openstack_runner_manager import ( + OpenStackCloudConfig, + OpenStackRunnerManager, + OpenStackRunnerManagerConfig, + OpenStackServerConfig, +) +from github_runner_manager.reactive.types_ import QueueConfig as ReactiveQueueConfig +from github_runner_manager.reactive.types_ import RunnerConfig as ReactiveRunnerConfig +from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus, parse_github_path from ops.charm import ( ActionEvent, CharmBase, diff --git a/tests/integration/test_charm_upgrade.py b/tests/integration/test_charm_upgrade.py index 73011d0ee..eb12dd591 100644 --- a/tests/integration/test_charm_upgrade.py +++ b/tests/integration/test_charm_upgrade.py @@ -40,7 +40,7 @@ async def test_charm_upgrade( assert: the charm is upgraded successfully. """ latest_stable_path = tmp_path / "github-runner.charm" - latest_stable_revision = 161 # update this value every release to stable. + latest_stable_revision = 256 # update this value every release to stable. # download the charm and inject lxd profile for testing retcode, stdout, stderr = await ops_test.juju( "download", From 3c7063703185a6a1c8398f96db860a7432672512 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Wed, 9 Oct 2024 06:40:15 +0200 Subject: [PATCH 4/7] feat: Reactive reconcile (#384) * pin branch * add integration test for downscaling * update changelog * only run reactive integration test on push - REVERT ME * enable tmate debug * remove test_reactive for lxd * fix test * Revert "only run reactive integration test on push - REVERT ME" This reverts commit 5332b569460fc751e48e7e8a0a877f645773b179. * remove outdated src-docs * some changes * explain TimeoutError * pin correct commit * clear queue before reconciling in setup * regenerate src-docs --- docs/changelog.md | 12 +- generate-src-docs.sh | 2 +- requirements.txt | 2 +- src-docs/{charm.md => charm.py.md} | 26 +- .../{charm_state.md => charm_state.py.md} | 556 ++++++++---------- src-docs/{errors.md => errors.py.md} | 150 ++--- .../{event_timer.md => event_timer.py.md} | 94 ++- src-docs/{firewall.md => firewall.py.md} | 108 ++-- .../{github_client.md => github_client.py.md} | 6 +- src-docs/github_type.md | 189 ------ src-docs/{logrotate.md => logrotate.py.md} | 34 +- src-docs/{lxd.md => lxd.py.md} | 546 +++++++++-------- src-docs/{lxd_type.md => lxd_type.py.md} | 117 ++-- src-docs/managed_requests.md | 32 - src-docs/metrics.events.md | 241 -------- src-docs/metrics.github.md | 46 -- src-docs/metrics.md | 29 - src-docs/metrics.runner.md | 176 ------ src-docs/metrics.runner_logs.md | 48 -- src-docs/metrics.storage.md | 194 ------ src-docs/metrics.type.md | 27 - src-docs/openstack_cloud.md | 60 -- src-docs/openstack_cloud.openstack_cloud.md | 246 -------- src-docs/openstack_cloud.openstack_manager.md | 212 ------- ...penstack_cloud.openstack_runner_manager.md | 282 --------- src-docs/reactive.consumer.md | 86 --- src-docs/reactive.job.md | 225 ------- src-docs/reactive.md | 14 - src-docs/reactive.runner.md | 28 - src-docs/reactive.runner_manager.md | 57 -- src-docs/repo_policy_compliance_client.md | 67 --- src-docs/{runner.md => runner.py.md} | 114 ++-- ...runner_manager.md => runner_manager.py.md} | 24 +- ...ager_type.md => runner_manager_type.py.md} | 82 +-- src-docs/runner_type.md | 192 ------ src-docs/runner_type.py.md | 102 ++++ src-docs/{shared_fs.md => shared_fs.py.md} | 2 +- src-docs/{utilities.md => utilities.py.md} | 2 +- src/logrotate.py | 2 +- tests/integration/conftest.py | 46 +- tests/integration/helpers/common.py | 29 +- tests/integration/test_reactive.py | 205 +++++-- 42 files changed, 1113 insertions(+), 3599 deletions(-) rename src-docs/{charm.md => charm.py.md} (90%) rename src-docs/{charm_state.md => charm_state.py.md} (74%) rename src-docs/{errors.md => errors.py.md} (51%) rename src-docs/{event_timer.md => event_timer.py.md} (75%) rename src-docs/{firewall.md => firewall.py.md} (76%) rename src-docs/{github_client.md => github_client.py.md} (83%) delete mode 100644 src-docs/github_type.md rename src-docs/{logrotate.md => logrotate.py.md} (81%) rename src-docs/{lxd.md => lxd.py.md} (85%) rename src-docs/{lxd_type.md => lxd_type.py.md} (65%) delete mode 100644 src-docs/managed_requests.md delete mode 100644 src-docs/metrics.events.md delete mode 100644 src-docs/metrics.github.md delete mode 100644 src-docs/metrics.md delete mode 100644 src-docs/metrics.runner.md delete mode 100644 src-docs/metrics.runner_logs.md delete mode 100644 src-docs/metrics.storage.md delete mode 100644 src-docs/metrics.type.md delete mode 100644 src-docs/openstack_cloud.md delete mode 100644 src-docs/openstack_cloud.openstack_cloud.md delete mode 100644 src-docs/openstack_cloud.openstack_manager.md delete mode 100644 src-docs/openstack_cloud.openstack_runner_manager.md delete mode 100644 src-docs/reactive.consumer.md delete mode 100644 src-docs/reactive.job.md delete mode 100644 src-docs/reactive.md delete mode 100644 src-docs/reactive.runner.md delete mode 100644 src-docs/reactive.runner_manager.md delete mode 100644 src-docs/repo_policy_compliance_client.md rename src-docs/{runner.md => runner.py.md} (74%) rename src-docs/{runner_manager.md => runner_manager.py.md} (89%) rename src-docs/{runner_manager_type.md => runner_manager_type.py.md} (60%) delete mode 100644 src-docs/runner_type.md create mode 100644 src-docs/runner_type.py.md rename src-docs/{shared_fs.md => shared_fs.py.md} (98%) rename src-docs/{utilities.md => utilities.py.md} (98%) diff --git a/docs/changelog.md b/docs/changelog.md index 6ce6db61b..de64a9810 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,8 +2,18 @@ ### 2024-10-07 -- Fixed a regression in the removal of leftover directories. - Fixed the removal of proxy vars in `.env` file for LXD runners. +- Fixed a regression in the removal of leftover directories. +- Improved reconciliation for reactive runners. + +### 2024-09-27 + +- Added job label validation when consuming a job from the message queue. + +### 2024-09-24 + +- Added support for spawning a runner reactively. +- Fixed a bug where busy runners are killed instead of only idle runners. ### 2024-09-18 diff --git a/generate-src-docs.sh b/generate-src-docs.sh index 7f9bbe506..20ce5990b 100644 --- a/generate-src-docs.sh +++ b/generate-src-docs.sh @@ -3,4 +3,4 @@ # Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. -lazydocs --no-watermark --output-path src-docs src +lazydocs --no-watermark --output-path src-docs src/*.py diff --git a/requirements.txt b/requirements.txt index da8088eed..3bed85a6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ cosl ==0.0.15 # juju 3.1.2.0 depends on pyyaml<=6.0 and >=5.1.2 PyYAML ==6.0.* pyOpenSSL==24.2.1 -github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@96178d52d4508775b7c6487c9168c529776426f1 +github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@33bbaff42d7cc0f250006fdd08d24659cef364c9 diff --git a/src-docs/charm.md b/src-docs/charm.py.md similarity index 90% rename from src-docs/charm.md rename to src-docs/charm.py.md index 8479c1eb5..4908713db 100644 --- a/src-docs/charm.md +++ b/src-docs/charm.py.md @@ -2,7 +2,7 @@ -# module `charm` +# module `charm.py` Charm for creating and managing GitHub self-hosted runner instances. **Global Variables** @@ -74,19 +74,6 @@ Catch common errors in actions. --- - - -## class `ReconcileRunnersEvent` -Event representing a periodic check to ensure runners are ok. - - - - - ---- - - - ## class `GithubRunnerCharm` Charm for managing GitHub self-hosted runners. @@ -104,7 +91,7 @@ Charm for managing GitHub self-hosted runners. -### method `__init__` +### function `__init__` ```python __init__(*args: Any, **kwargs: Any) → None @@ -165,3 +152,12 @@ Unit that this execution is responsible for. +--- + +## class `ReconcileRunnersEvent` +Event representing a periodic check to ensure runners are ok. + + + + + diff --git a/src-docs/charm_state.md b/src-docs/charm_state.py.md similarity index 74% rename from src-docs/charm_state.md rename to src-docs/charm_state.py.md index 9b4889d5e..354e0416a 100644 --- a/src-docs/charm_state.md +++ b/src-docs/charm_state.py.md @@ -2,11 +2,12 @@ -# module `charm_state` +# module `charm_state.py` State of the Charm. **Global Variables** --------------- +- **REACTIVE_MODE_NOT_SUPPORTED_WITH_LXD_ERR_MSG** - **ARCHITECTURES_ARM64** - **ARCHITECTURES_X86** - **BASE_IMAGE_CONFIG_NAME** @@ -39,8 +40,6 @@ State of the Charm. --- - - ## class `AnyHttpsUrl` Represents an HTTPS URL. @@ -56,28 +55,15 @@ Represents an HTTPS URL. --- - - -## class `GithubConfig` -Charm configuration related to GitHub. +## class `Arch` +Supported system architectures. **Attributes:** - - `token`: The Github API access token (PAT). - - `path`: The Github org/repo path. - - - -### method `__init__` - -```python -__init__(token: str, path: GitHubOrg | GitHubRepo) → None -``` - - - + - `ARM64`: Represents an ARM64 system architecture. + - `X64`: Represents an X64/AMD64 system architecture. @@ -85,111 +71,104 @@ __init__(token: str, path: GitHubOrg | GitHubRepo) → None --- - - -### classmethod `from_charm` - -```python -from_charm(charm: CharmBase) → GithubConfig -``` - -Get github related charm configuration values from charm. - - - -**Args:** - - - `charm`: The charm instance. +## class `BaseImage` +The ubuntu OS base image to build and deploy runners on. -**Raises:** +**Attributes:** - - `CharmConfigInvalidError`: If an invalid configuration value was set. + - `JAMMY`: The jammy ubuntu LTS image. + - `NOBLE`: The noble ubuntu LTS image. -**Returns:** - The parsed GitHub configuration values. --- - +## class `CharmConfig` +General charm configuration. -## class `VirtualMachineResources` -Virtual machine resource configuration. +Some charm configurations are grouped into other configuration models. **Attributes:** - - `cpu`: Number of vCPU for the virtual machine. - - `memory`: Amount of memory for the virtual machine. - - `disk`: Amount of disk for the virtual machine. - + - `denylist`: List of IPv4 to block the runners from accessing. + - `dockerhub_mirror`: Private docker registry as dockerhub mirror for the runners to use. + - `labels`: Additional runner labels to append to default (i.e. os, flavor, architecture). + - `openstack_clouds_yaml`: The openstack clouds.yaml configuration. + - `path`: GitHub repository path in the format '/', or the GitHub organization name. + - `reconcile_interval`: Time between each reconciliation of runners in minutes. + - `repo_policy_compliance`: Configuration for the repo policy compliance service. + - `token`: GitHub personal access token for GitHub API. --- - - -## class `Arch` -Supported system architectures. + +### classmethod `check_reconcile_interval` +```python +check_reconcile_interval(reconcile_interval: int) → int +``` -**Attributes:** - - - `ARM64`: Represents an ARM64 system architecture. - - `X64`: Represents an X64/AMD64 system architecture. +Validate the general charm configuration. +**Args:** + + - `reconcile_interval`: The value of reconcile_interval passed to class instantiation. ---- - +**Raises:** + + - `ValueError`: if an invalid reconcile_interval value of less than 2 has been passed. -## class `RunnerStorage` -Supported storage as runner disk. +**Returns:** + The validated reconcile_interval value. -**Attributes:** - - - `JUJU_STORAGE`: Represents runner storage from Juju storage. - - `MEMORY`: Represents tempfs storage (ramdisk). +--- + +### classmethod `from_charm` +```python +from_charm(charm: CharmBase) → CharmConfig +``` +Initialize the config from charm. ---- - -## class `InstanceType` -Type of instance for runner. +**Args:** + + - `charm`: The charm instance. -**Attributes:** +**Raises:** - - `LOCAL_LXD`: LXD instance on the local juju machine. - - `OPENSTACK`: OpenStack instance on a cloud. + - `CharmConfigInvalidError`: If any invalid configuration has been set on the charm. +**Returns:** + Current config of the charm. --- - - ## class `CharmConfigInvalidError` Raised when charm config is invalid. @@ -199,9 +178,9 @@ Raised when charm config is invalid. - `msg`: Explanation of the error. - + -### method `__init__` +### function `__init__` ```python __init__(msg: str) @@ -221,38 +200,43 @@ Initialize a new instance of the CharmConfigInvalidError exception. --- - - -## class `RepoPolicyComplianceConfig` -Configuration for the repo policy compliance service. +## class `CharmState` +The charm state. **Attributes:** - - `token`: Token for the repo policy compliance service. - - `url`: URL of the repo policy compliance service. + - `arch`: The underlying compute architecture, i.e. x86_64, amd64, arm64/aarch64. + - `charm_config`: Configuration of the juju charm. + - `is_metrics_logging_available`: Whether the charm is able to issue metrics. + - `proxy_config`: Proxy-related configuration. + - `instance_type`: The type of instances, e.g., local lxd, openstack. + - `reactive_config`: The charm configuration related to reactive spawning mode. + - `runner_config`: The charm configuration related to runner VM configuration. + - `ssh_debug_connections`: SSH debug connections configuration information. --- - + ### classmethod `from_charm` ```python -from_charm(charm: CharmBase) → RepoPolicyComplianceConfig +from_charm(charm: CharmBase, database: DatabaseRequires) → CharmState ``` -Initialize the config from charm. +Initialize the state from charm. **Args:** - `charm`: The charm instance. + - `database`: The database instance. @@ -263,123 +247,90 @@ Initialize the config from charm. **Returns:** - Current repo-policy-compliance config. - - ---- - - - -## class `OpenStackCloudsYAML` -The OpenStack clouds YAML dict mapping. - - - -**Attributes:** - - - `clouds`: The map of cloud name to cloud connection info. - - - + Current state of the charm. --- - - -## class `CharmConfig` -General charm configuration. - -Some charm configurations are grouped into other configuration models. +## class `GithubConfig` +Charm configuration related to GitHub. **Attributes:** - - `denylist`: List of IPv4 to block the runners from accessing. - - `dockerhub_mirror`: Private docker registry as dockerhub mirror for the runners to use. - - `labels`: Additional runner labels to append to default (i.e. os, flavor, architecture). - - `openstack_clouds_yaml`: The openstack clouds.yaml configuration. - - `path`: GitHub repository path in the format '/', or the GitHub organization name. - - `reconcile_interval`: Time between each reconciliation of runners in minutes. - - `repo_policy_compliance`: Configuration for the repo policy compliance service. - - `token`: GitHub personal access token for GitHub API. + - `token`: The Github API access token (PAT). + - `path`: The Github org/repo path. --- - + -### classmethod `check_reconcile_interval` +### classmethod `from_charm` ```python -check_reconcile_interval(reconcile_interval: int) → int +from_charm(charm: CharmBase) → GithubConfig ``` -Validate the general charm configuration. +Get github related charm configuration values from charm. **Args:** - - `reconcile_interval`: The value of reconcile_interval passed to class instantiation. + - `charm`: The charm instance. **Raises:** - - `ValueError`: if an invalid reconcile_interval value of less than 2 has been passed. + - `CharmConfigInvalidError`: If an invalid configuration value was set. **Returns:** - The validated reconcile_interval value. + The parsed GitHub configuration values. + --- - +## class `ImmutableConfigChangedError` +Represents an error when changing immutable charm state. -### classmethod `from_charm` + + +### function `__init__` ```python -from_charm(charm: CharmBase) → CharmConfig +__init__(msg: str) ``` -Initialize the config from charm. +Initialize a new instance of the ImmutableConfigChangedError exception. **Args:** - - `charm`: The charm instance. - - - -**Raises:** - - - `CharmConfigInvalidError`: If any invalid configuration has been set on the charm. + - `msg`: Explanation of the error. -**Returns:** - Current config of the charm. --- - - -## class `BaseImage` -The ubuntu OS base image to build and deploy runners on. +## class `InstanceType` +Type of instance for runner. **Attributes:** - - `JAMMY`: The jammy ubuntu LTS image. - - `NOBLE`: The noble ubuntu LTS image. + - `LOCAL_LXD`: LXD instance on the local juju machine. + - `OPENSTACK`: OpenStack instance on a cloud. @@ -387,74 +338,89 @@ The ubuntu OS base image to build and deploy runners on. --- - - -## class `OpenstackImage` -OpenstackImage from image builder relation data. +## class `LocalLxdRunnerConfig` +Runner configurations for local LXD instances. **Attributes:** - - `id`: The OpenStack image ID. - - `tags`: Image tags, e.g. jammy + - `base_image`: The ubuntu base image to run the runner virtual machines on. + - `virtual_machines`: Number of virtual machine-based runner to spawn. + - `virtual_machine_resources`: Hardware resource used by one virtual machine for a runner. + - `runner_storage`: Storage to be used as disk for the runner. --- - + -### classmethod `from_charm` +### classmethod `check_virtual_machine_resources` ```python -from_charm(charm: CharmBase) → OpenstackImage | None +check_virtual_machine_resources( + vm_resources: VirtualMachineResources +) → VirtualMachineResources ``` -Initialize the OpenstackImage info from relation data. - -None represents relation not established. None values for id/tags represent image not yet ready but the relation exists. +Validate the virtual_machine_resources field values. **Args:** - - `charm`: The charm instance. + - `vm_resources`: the virtual_machine_resources value to validate. -**Returns:** - OpenstackImage metadata from charm relation data. +**Raises:** + + - `ValueError`: if an invalid number of cpu was given or invalid memory/disk size was given. + +**Returns:** + The validated virtual_machine_resources value. + --- - + -## class `OpenstackRunnerConfig` -Runner configuration for OpenStack Instances. +### classmethod `check_virtual_machines` + +```python +check_virtual_machines(virtual_machines: int) → int +``` +Validate the virtual machines configuration value. -**Attributes:** + +**Args:** - - `virtual_machines`: Number of virtual machine-based runner to spawn. - - `openstack_flavor`: flavor on openstack to use for virtual machines. - - `openstack_network`: Network on openstack to use for virtual machines. - - `openstack_image`: Openstack image to use for virtual machines. + - `virtual_machines`: The virtual machines value to validate. + + + +**Raises:** + + - `ValueError`: if a negative integer was passed. +**Returns:** + Validated virtual_machines value. --- - + ### classmethod `from_charm` ```python -from_charm(charm: CharmBase) → OpenstackRunnerConfig +from_charm(charm: CharmBase) → LocalLxdRunnerConfig ``` Initialize the config from charm. @@ -469,101 +435,95 @@ Initialize the config from charm. **Raises:** - - `CharmConfigInvalidError`: Error with charm configuration virtual-machines not of int type. + - `CharmConfigInvalidError`: if an invalid runner charm config has been set on the charm. **Returns:** - Openstack runner config of the charm. + Local LXD runner config of the charm. --- - - -## class `LocalLxdRunnerConfig` -Runner configurations for local LXD instances. +## class `OpenStackCloudsYAML` +The OpenStack clouds YAML dict mapping. **Attributes:** - - `base_image`: The ubuntu base image to run the runner virtual machines on. - - `virtual_machines`: Number of virtual machine-based runner to spawn. - - `virtual_machine_resources`: Hardware resource used by one virtual machine for a runner. - - `runner_storage`: Storage to be used as disk for the runner. - - - - ---- - - + - `clouds`: The map of cloud name to cloud connection info. -### classmethod `check_virtual_machine_resources` -```python -check_virtual_machine_resources( - vm_resources: VirtualMachineResources -) → VirtualMachineResources -``` -Validate the virtual_machine_resources field values. +--- -**Args:** - - - `vm_resources`: the virtual_machine_resources value to validate. +## class `OpenstackImage` +OpenstackImage from image builder relation data. -**Raises:** +**Attributes:** - - `ValueError`: if an invalid number of cpu was given or invalid memory/disk size was given. + - `id`: The OpenStack image ID. + - `tags`: Image tags, e.g. jammy -**Returns:** - The validated virtual_machine_resources value. --- - + -### classmethod `check_virtual_machines` +### classmethod `from_charm` ```python -check_virtual_machines(virtual_machines: int) → int +from_charm(charm: CharmBase) → OpenstackImage | None ``` -Validate the virtual machines configuration value. +Initialize the OpenstackImage info from relation data. + +None represents relation not established. None values for id/tags represent image not yet ready but the relation exists. **Args:** - - `virtual_machines`: The virtual machines value to validate. + - `charm`: The charm instance. -**Raises:** +**Returns:** + OpenstackImage metadata from charm relation data. + + +--- + +## class `OpenstackRunnerConfig` +Runner configuration for OpenStack Instances. + + + +**Attributes:** - - `ValueError`: if a negative integer was passed. + - `virtual_machines`: Number of virtual machine-based runner to spawn. + - `openstack_flavor`: flavor on openstack to use for virtual machines. + - `openstack_network`: Network on openstack to use for virtual machines. + - `openstack_image`: Openstack image to use for virtual machines. -**Returns:** - Validated virtual_machines value. --- - + ### classmethod `from_charm` ```python -from_charm(charm: CharmBase) → LocalLxdRunnerConfig +from_charm(charm: CharmBase) → OpenstackRunnerConfig ``` Initialize the config from charm. @@ -578,18 +538,16 @@ Initialize the config from charm. **Raises:** - - `CharmConfigInvalidError`: if an invalid runner charm config has been set on the charm. + - `CharmConfigInvalidError`: Error with charm configuration virtual-machines not of int type. **Returns:** - Local LXD runner config of the charm. + Openstack runner config of the charm. --- - - ## class `ProxyConfig` Proxy configuration. @@ -614,7 +572,7 @@ Return the aproxy address. --- - + ### classmethod `check_use_aproxy` @@ -644,7 +602,7 @@ Validate the proxy configuration. --- - + ### classmethod `from_charm` @@ -668,67 +626,74 @@ Initialize the proxy config from charm. --- - - -## class `UnsupportedArchitectureError` -Raised when given machine charm architecture is unsupported. +## class `ReactiveConfig` +Represents the configuration for reactive scheduling. **Attributes:** - - `arch`: The current machine architecture. + - `mq_uri`: The URI of the MQ to use to spawn runners reactively. + + - -### method `__init__` +--- + + + +### classmethod `from_database` ```python -__init__(arch: str) → None +from_database(database: DatabaseRequires) → ReactiveConfig | None ``` -Initialize a new instance of the CharmConfigInvalidError exception. +Initialize the ReactiveConfig from charm config and integration data. **Args:** - - `arch`: The current machine architecture. + - `database`: The database to fetch integration data from. +**Returns:** + The connection information for the reactive MQ or None if not available. ---- - +**Raises:** + + - `MissingMongoDBError`: If the information on howto access MongoDB is missing in the integration data. -## class `SSHDebugConnection` -SSH connection information for debug workflow. + +--- + +## class `RepoPolicyComplianceConfig` +Configuration for the repo policy compliance service. **Attributes:** - - `host`: The SSH relay server host IP address inside the VPN. - - `port`: The SSH relay server port. - - `rsa_fingerprint`: The host SSH server public RSA key fingerprint. - - `ed25519_fingerprint`: The host SSH server public ed25519 key fingerprint. + - `token`: Token for the repo policy compliance service. + - `url`: URL of the repo policy compliance service. --- - + ### classmethod `from_charm` ```python -from_charm(charm: CharmBase) → list['SSHDebugConnection'] +from_charm(charm: CharmBase) → RepoPolicyComplianceConfig ``` -Initialize the SSHDebugInfo from charm relation data. +Initialize the config from charm. @@ -738,155 +703,118 @@ Initialize the SSHDebugInfo from charm relation data. +**Raises:** + + - `CharmConfigInvalidError`: If an invalid configuration was set. + + + **Returns:** - List of connection information for ssh debug access. + Current repo-policy-compliance config. --- - - -## class `ReactiveConfig` -Represents the configuration for reactive scheduling. +## class `RunnerStorage` +Supported storage as runner disk. **Attributes:** - - `mq_uri`: The URI of the MQ to use to spawn runners reactively. - - + - `JUJU_STORAGE`: Represents runner storage from Juju storage. + - `MEMORY`: Represents tempfs storage (ramdisk). ---- - -### classmethod `from_database` -```python -from_database(database: DatabaseRequires) → ReactiveConfig | None -``` +--- -Initialize the ReactiveConfig from charm config and integration data. +## class `SSHDebugConnection` +SSH connection information for debug workflow. -**Args:** +**Attributes:** - - `database`: The database to fetch integration data from. - - - -**Returns:** - The connection information for the reactive MQ or None if not available. + - `host`: The SSH relay server host IP address inside the VPN. + - `port`: The SSH relay server port. + - `rsa_fingerprint`: The host SSH server public RSA key fingerprint. + - `ed25519_fingerprint`: The host SSH server public ed25519 key fingerprint. -**Raises:** - - - `MissingMongoDBError`: If the information on howto access MongoDB is missing in the integration data. - --- - - -## class `ImmutableConfigChangedError` -Represents an error when changing immutable charm state. - - + -### method `__init__` +### classmethod `from_charm` ```python -__init__(msg: str) +from_charm(charm: CharmBase) → list['SSHDebugConnection'] ``` -Initialize a new instance of the ImmutableConfigChangedError exception. +Initialize the SSHDebugInfo from charm relation data. **Args:** - - `msg`: Explanation of the error. + - `charm`: The charm instance. +**Returns:** + List of connection information for ssh debug access. --- - - -## class `CharmState` -The charm state. +## class `UnsupportedArchitectureError` +Raised when given machine charm architecture is unsupported. **Attributes:** - - `arch`: The underlying compute architecture, i.e. x86_64, amd64, arm64/aarch64. - - `charm_config`: Configuration of the juju charm. - - `is_metrics_logging_available`: Whether the charm is able to issue metrics. - - `proxy_config`: Proxy-related configuration. - - `instance_type`: The type of instances, e.g., local lxd, openstack. - - `reactive_config`: The charm configuration related to reactive spawning mode. - - `runner_config`: The charm configuration related to runner VM configuration. - - `ssh_debug_connections`: SSH debug connections configuration information. + - `arch`: The current machine architecture. - + -### method `__init__` +### function `__init__` ```python -__init__( - arch: Arch, - is_metrics_logging_available: bool, - proxy_config: ProxyConfig, - instance_type: InstanceType, - charm_config: CharmConfig, - runner_config: OpenstackRunnerConfig | LocalLxdRunnerConfig, - reactive_config: ReactiveConfig | None, - ssh_debug_connections: list[SSHDebugConnection] -) → None +__init__(arch: str) → None ``` +Initialize a new instance of the CharmConfigInvalidError exception. +**Args:** + + - `arch`: The current machine architecture. ---- - - - -### classmethod `from_charm` - -```python -from_charm(charm: CharmBase, database: DatabaseRequires) → CharmState -``` - -Initialize the state from charm. - +--- -**Args:** - - - `charm`: The charm instance. - - `database`: The database instance. +## class `VirtualMachineResources` +Virtual machine resource configuration. -**Raises:** +**Attributes:** - - `CharmConfigInvalidError`: If an invalid configuration was set. + - `cpu`: Number of vCPU for the virtual machine. + - `memory`: Amount of memory for the virtual machine. + - `disk`: Amount of disk for the virtual machine. -**Returns:** - Current state of the charm. diff --git a/src-docs/errors.md b/src-docs/errors.py.md similarity index 51% rename from src-docs/errors.md rename to src-docs/errors.py.md index c61dd8410..f976fcf0a 100644 --- a/src-docs/errors.md +++ b/src-docs/errors.py.md @@ -2,28 +2,15 @@ -# module `errors` +# module `errors.py` Errors used by the charm. --- - - -## class `RunnerCreateError` -Error for runner creation failure. - - - - - ---- - - - -## class `RunnerFileLoadError` -Error for loading file on runner. +## class `ConfigurationError` +Error for juju configuration. @@ -31,10 +18,8 @@ Error for loading file on runner. --- - - -## class `RunnerRemoveError` -Error for runner removal failure. +## class `IssueMetricEventError` +Represents an error when issuing a metric event. @@ -42,10 +27,8 @@ Error for runner removal failure. --- - - -## class `RunnerBinaryError` -Error of getting runner binary. +## class `LogrotateSetupError` +Represents an error raised when logrotate cannot be setup. @@ -53,10 +36,8 @@ Error of getting runner binary. --- - - -## class `RunnerAproxyError` -Error for setting up aproxy. +## class `LxdError` +Error for executing LXD actions. @@ -64,10 +45,8 @@ Error for setting up aproxy. --- - - -## class `MissingServerConfigError` -Error for unable to create runner due to missing server configurations. +## class `MissingMongoDBError` +Error for missing integration data. @@ -75,8 +54,6 @@ Error for unable to create runner due to missing server configurations. --- - - ## class `MissingRunnerBinaryError` Error for missing runner binary. @@ -86,10 +63,8 @@ Error for missing runner binary. --- - - -## class `ConfigurationError` -Error for juju configuration. +## class `MissingServerConfigError` +Error for unable to create runner due to missing server configurations. @@ -97,10 +72,8 @@ Error for juju configuration. --- - - -## class `MissingMongoDBError` -Error for missing integration data. +## class `RunnerAproxyError` +Error for setting up aproxy. @@ -108,10 +81,8 @@ Error for missing integration data. --- - - -## class `LxdError` -Error for executing LXD actions. +## class `RunnerBinaryError` +Error of getting runner binary. @@ -119,43 +90,26 @@ Error for executing LXD actions. --- - +## class `RunnerCreateError` +Error for runner creation failure. -## class `SubprocessError` -Error for Subprocess calls. -**Attributes:** - - - `cmd`: Command in list form. - - `return_code`: Return code of the subprocess. - - `stdout`: Content of stdout of the subprocess. - - `stderr`: Content of stderr of the subprocess. - +--- + +## class `RunnerFileLoadError` +Error for loading file on runner. -### method `__init__` -```python -__init__( - cmd: 'list[str]', - return_code: 'int', - stdout: 'Union[bytes, str]', - stderr: 'Union[bytes, str]' -) -``` -Construct the subprocess error. +--- -**Args:** - - - `cmd`: Command in list form. - - `return_code`: Return code of the subprocess. - - `stdout`: Content of stdout of the subprocess. - - `stderr`: Content of stderr of the subprocess. +## class `RunnerLogsError` +Base class for all runner logs errors. @@ -163,10 +117,8 @@ Construct the subprocess error. --- - - -## class `IssueMetricEventError` -Represents an error when issuing a metric event. +## class `RunnerRemoveError` +Error for runner removal failure. @@ -174,10 +126,8 @@ Represents an error when issuing a metric event. --- - - -## class `LogrotateSetupError` -Represents an error raised when logrotate cannot be setup. +## class `SharedFilesystemError` +Base class for all shared filesystem errors. @@ -185,10 +135,8 @@ Represents an error raised when logrotate cannot be setup. --- - - -## class `SharedFilesystemError` -Base class for all shared filesystem errors. +## class `SharedFilesystemMountError` +Represents an error related to the mounting of the shared filesystem. @@ -196,21 +144,41 @@ Base class for all shared filesystem errors. --- - +## class `SubprocessError` +Error for Subprocess calls. -## class `SharedFilesystemMountError` -Represents an error related to the mounting of the shared filesystem. +**Attributes:** + + - `cmd`: Command in list form. + - `return_code`: Return code of the subprocess. + - `stdout`: Content of stdout of the subprocess. + - `stderr`: Content of stderr of the subprocess. + +### function `__init__` ---- +```python +__init__( + cmd: 'list[str]', + return_code: 'int', + stdout: 'Union[bytes, str]', + stderr: 'Union[bytes, str]' +) +``` - +Construct the subprocess error. -## class `RunnerLogsError` -Base class for all runner logs errors. + + +**Args:** + + - `cmd`: Command in list form. + - `return_code`: Return code of the subprocess. + - `stdout`: Content of stdout of the subprocess. + - `stderr`: Content of stderr of the subprocess. diff --git a/src-docs/event_timer.md b/src-docs/event_timer.py.md similarity index 75% rename from src-docs/event_timer.md rename to src-docs/event_timer.py.md index 7d10d807c..d0f034426 100644 --- a/src-docs/event_timer.md +++ b/src-docs/event_timer.py.md @@ -2,7 +2,7 @@ -# module `event_timer` +# module `event_timer.py` EventTimer for scheduling dispatch of juju event on regular intervals. **Global Variables** @@ -12,52 +12,6 @@ EventTimer for scheduling dispatch of juju event on regular intervals. --- - - -## class `TimerError` -Generic timer error as base exception. - - - - - ---- - - - -## class `TimerEnableError` -Raised when unable to enable a event timer. - - - - - ---- - - - -## class `TimerDisableError` -Raised when unable to disable a event timer. - - - - - ---- - - - -## class `TimerStatusError` -Raised when unable to check status of a event timer. - - - - - ---- - - - ## class `EventConfig` Configuration used by service and timer templates. @@ -77,8 +31,6 @@ Configuration used by service and timer templates. --- - - ## class `EventTimer` Manages the timer to emit juju events at regular intervals. @@ -90,7 +42,7 @@ Manages the timer to emit juju events at regular intervals. -### method `__init__` +### function `__init__` ```python __init__(unit_name: str) @@ -111,7 +63,7 @@ Construct the timer manager. -### method `disable_event_timer` +### function `disable_event_timer` ```python disable_event_timer(event_name: str) → None @@ -135,7 +87,7 @@ Disable the systemd timer for the given event. -### method `ensure_event_timer` +### function `ensure_event_timer` ```python ensure_event_timer(event_name: str, interval: int, timeout: int) → None @@ -165,7 +117,7 @@ The timeout is the number of seconds before an event is timed out. If not set or -### method `is_active` +### function `is_active` ```python is_active(event_name: str) → bool @@ -191,3 +143,39 @@ Check if the systemd timer is active for the given event. - `TimerStatusError`: Timer status cannot be determined. +--- + +## class `TimerDisableError` +Raised when unable to disable a event timer. + + + + + +--- + +## class `TimerEnableError` +Raised when unable to enable a event timer. + + + + + +--- + +## class `TimerError` +Generic timer error as base exception. + + + + + +--- + +## class `TimerStatusError` +Raised when unable to check status of a event timer. + + + + + diff --git a/src-docs/firewall.md b/src-docs/firewall.py.md similarity index 76% rename from src-docs/firewall.md rename to src-docs/firewall.py.md index 3b59a41d5..486c03290 100644 --- a/src-docs/firewall.md +++ b/src-docs/firewall.py.md @@ -2,133 +2,117 @@ -# module `firewall` +# module `firewall.py` The runner firewall manager. --- - - -## class `FirewallEntry` -Represent an entry in the firewall. - - - -**Attributes:** - - - `ip_range`: The IP address range using CIDR notation. +## class `Firewall` +Represent a firewall and provides methods to refresh its configuration. - + -### method `__init__` +### function `__init__` ```python -__init__(ip_range: str) → None +__init__(network: str) ``` +Initialize a new Firewall instance. +**Args:** + + - `network`: The LXD network name. --- - + -### classmethod `decode` +### function `get_host_ip` ```python -decode(entry: str) → FirewallEntry +get_host_ip() → str ``` -Decode a firewall entry from a string. - - - -**Args:** - - - `entry`: The firewall entry string, e.g. '192.168.0.1:80' or '192.168.0.0/24:80-90:udp'. +Get the host IP address for the corresponding LXD network. **Returns:** - - - `FirewallEntry`: A FirewallEntry instance representing the decoded entry. - - - -**Raises:** - - - `ValueError`: If the entry string is not in the expected format. - + The host IP address. --- - - -## class `Firewall` -Represent a firewall and provides methods to refresh its configuration. - - + -### method `__init__` +### function `refresh_firewall` ```python -__init__(network: str) +refresh_firewall( + denylist: Iterable[FirewallEntry], + allowlist: Optional[Iterable[FirewallEntry]] = None +) → None ``` -Initialize a new Firewall instance. +Refresh the firewall configuration. **Args:** - - `network`: The LXD network name. - - + - `denylist`: The list of FirewallEntry rules to allow. + - `allowlist`: The list of FirewallEntry rules to allow. --- - +## class `FirewallEntry` +Represent an entry in the firewall. -### method `get_host_ip` -```python -get_host_ip() → str -``` -Get the host IP address for the corresponding LXD network. +**Attributes:** + + - `ip_range`: The IP address range using CIDR notation. -**Returns:** - The host IP address. --- - + -### method `refresh_firewall` +### classmethod `decode` ```python -refresh_firewall( - denylist: Iterable[FirewallEntry], - allowlist: Optional[Iterable[FirewallEntry]] = None -) → None +decode(entry: str) → FirewallEntry ``` -Refresh the firewall configuration. +Decode a firewall entry from a string. **Args:** - - `denylist`: The list of FirewallEntry rules to allow. - - `allowlist`: The list of FirewallEntry rules to allow. + - `entry`: The firewall entry string, e.g. '192.168.0.1:80' or '192.168.0.0/24:80-90:udp'. + + + +**Returns:** + + - `FirewallEntry`: A FirewallEntry instance representing the decoded entry. + + + +**Raises:** + + - `ValueError`: If the entry string is not in the expected format. diff --git a/src-docs/github_client.md b/src-docs/github_client.py.md similarity index 83% rename from src-docs/github_client.md rename to src-docs/github_client.py.md index 679c9f907..795b92d10 100644 --- a/src-docs/github_client.md +++ b/src-docs/github_client.py.md @@ -2,7 +2,7 @@ -# module `github_client` +# module `github_client.py` GitHub API client. Migrate to PyGithub in the future. PyGithub is still lacking some API such as remove token for runner. @@ -11,8 +11,6 @@ Migrate to PyGithub in the future. PyGithub is still lacking some API such as re --- - - ## class `GithubClient` GitHub API client. @@ -23,7 +21,7 @@ GitHub API client. -### method `get_runner_application` +### function `get_runner_application` ```python get_runner_application( diff --git a/src-docs/github_type.md b/src-docs/github_type.md deleted file mode 100644 index 4c196b632..000000000 --- a/src-docs/github_type.md +++ /dev/null @@ -1,189 +0,0 @@ - - - - -# module `github_type` -Return type for the GitHub web API. - - - ---- - - - -## class `GitHubRunnerStatus` -Status of runner on GitHub. - - - -**Attributes:** - - - `ONLINE`: Represents an online runner status. - - `OFFLINE`: Represents an offline runner status. - - - - - ---- - - - -## class `RunnerApplication` -Information on the runner application. - - - -**Attributes:** - - - `os`: Operating system to run the runner application on. - - `architecture`: Computer Architecture to run the runner application on. - - `download_url`: URL to download the runner application. - - `filename`: Filename of the runner application. - - `temp_download_token`: A short lived bearer token used to download the runner, if needed. - - `sha256_checksum`: SHA256 Checksum of the runner application. - - - - - ---- - - - -## class `SelfHostedRunnerLabel` -A single label of self-hosted runners. - - - -**Attributes:** - - - `id`: Unique identifier of the label. - - `name`: Name of the label. - - `type`: Type of label. Read-only labels are applied automatically when the runner is configured. - - - - - ---- - - - -## class `SelfHostedRunner` -Information on a single self-hosted runner. - - - -**Attributes:** - - - `busy`: Whether the runner is executing a job. - - `id`: Unique identifier of the runner. - - `labels`: Labels of the runner. - - `os`: Operation system of the runner. - - `name`: Name of the runner. - - `status`: The Github runner status. - - - - - ---- - - - -## class `SelfHostedRunnerList` -Information on a collection of self-hosted runners. - - - -**Attributes:** - - - `total_count`: Total number of runners. - - `runners`: List of runners. - - - - - ---- - - - -## class `RegistrationToken` -Token used for registering GitHub runners. - - - -**Attributes:** - - - `token`: Token for registering GitHub runners. - - `expires_at`: Time the token expires at. - - - - - ---- - - - -## class `RemoveToken` -Token used for removing GitHub runners. - - - -**Attributes:** - - - `token`: Token for removing GitHub runners. - - `expires_at`: Time the token expires at. - - - - - ---- - - - -## class `JobConclusion` -Conclusion of a job on GitHub. - -See :https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository - - - -**Attributes:** - - - `ACTION_REQUIRED`: Represents additional action required on the job. - - `CANCELLED`: Represents a cancelled job status. - - `FAILURE`: Represents a failed job status. - - `NEUTRAL`: Represents a job status that can optionally succeed or fail. - - `SKIPPED`: Represents a skipped job status. - - `SUCCESS`: Represents a successful job status. - - `TIMED_OUT`: Represents a job that has timed out. - - - - - ---- - - - -## class `JobStats` -Stats for a job on GitHub. - - - -**Attributes:** - - - `job_id`: The ID of the job. - - `created_at`: The time the job was created. - - `started_at`: The time the job was started. - - `conclusion`: The end result of a job. - - - - - diff --git a/src-docs/logrotate.md b/src-docs/logrotate.py.md similarity index 81% rename from src-docs/logrotate.md rename to src-docs/logrotate.py.md index 275ab0a5c..fecc13634 100644 --- a/src-docs/logrotate.md +++ b/src-docs/logrotate.py.md @@ -2,7 +2,7 @@ -# module `logrotate` +# module `logrotate.py` Logrotate setup and configuration. **Global Variables** @@ -32,19 +32,19 @@ Enable and configure logrotate. --- - - -## class `LogrotateFrequency` -The frequency of log rotation. +## class `LogrotateConfig` +Configuration for logrotate. **Attributes:** - - `DAILY`: Rotate the log daily. - - `WEEKLY`: Rotate the log weekly. - - `MONTHLY`: Rotate the log monthly. - - `YEARLY`: Rotate the log yearly. + - `name`: The name of the logrotate configuration. + - `log_path_glob_pattern`: The glob pattern for the log path. + - `rotate`: The number of log files to keep. + - `create`: Whether to create the log file if it does not exist. + - `notifempty`: Whether to not rotate the log file if it is empty. + - `frequency`: The frequency of log rotation. @@ -52,21 +52,17 @@ The frequency of log rotation. --- - - -## class `LogrotateConfig` -Configuration for logrotate. +## class `LogrotateFrequency` +The frequency of log rotation. **Attributes:** - - `name`: The name of the logrotate configuration. - - `log_path_glob_pattern`: The glob pattern for the log path. - - `rotate`: The number of log files to keep. - - `create`: Whether to create the log file if it does not exist. - - `notifempty`: Whether to not rotate the log file if it is empty. - - `frequency`: The frequency of log rotation. + - `DAILY`: Rotate the log daily. + - `WEEKLY`: Rotate the log weekly. + - `MONTHLY`: Rotate the log monthly. + - `YEARLY`: Rotate the log yearly. diff --git a/src-docs/lxd.md b/src-docs/lxd.py.md similarity index 85% rename from src-docs/lxd.md rename to src-docs/lxd.py.md index 231905c85..3e9bd771d 100644 --- a/src-docs/lxd.md +++ b/src-docs/lxd.py.md @@ -2,7 +2,7 @@ -# module `lxd` +# module `lxd.py` Low-level LXD client interface. The LxdClient class offers a low-level interface to isolate the underlying implementation of LXD. @@ -14,174 +14,98 @@ The LxdClient class offers a low-level interface to isolate the underlying imple --- - - -## class `LxdInstanceFileManager` -File manager of an LXD instance. - - - -**Attributes:** - - - `instance` (LxdInstance): LXD instance where the files are located in. +## class `LxdClient` +LXD client. - + -### method `__init__` +### function `__init__` ```python -__init__(instance: 'LxdInstance') +__init__() → None ``` -Instantiate the file manager. - - +Instantiate the LXD client. -**Args:** - - - `instance`: LXD instance where the files are located in. --- - - -### method `mk_dir` - -```python -mk_dir(dir_name: 'str') → None -``` - -Create a directory in the LXD instance. - - - -**Args:** - - - `dir_name`: Name of the directory to create. - ---- +## class `LxdImageManager` +LXD image manager. - + -### method `pull_file` +### function `__init__` ```python -pull_file(source: 'str', destination: 'str', is_dir: 'bool' = False) → None +__init__(pylxd_client: 'Client') ``` -Pull a file from the LXD instance to the local machine. +Instantiate the LXD image manager. **Args:** - - `source`: Path of the file to pull in the LXD instance. - - `destination`: Path in local machine. - - `is_dir`: Whether the source is a directory. + - `pylxd_client`: Instance of pylxd.Client. -**Raises:** - - - `LxdError`: Unable to load the file from the LXD instance. --- - + -### method `push_file` +### function `create` ```python -push_file( - source: 'str', - destination: 'str', - mode: 'Optional[str]' = None -) → None +create(name: 'str', path: 'Path') → None ``` -Push a file to the LXD instance. +Import a LXD image. **Args:** - - `source`: Path of the file to push to the LXD instance. - - `destination`: Path in the LXD instance to load the file. - - `mode`: File permissions. + - `name`: Alias for the image. + - `path`: Path of the LXD image file. **Raises:** - - `LxdError`: Unable to load the file into the LXD instance. + - `LxdError`: Unable to import the file as LXD image. --- - + -### method `read_file` +### function `exists` ```python -read_file(filepath: 'str') → str +exists(alias: 'str') → bool ``` -Read the content of a file in the LXD instance. +Check if an image with the given name exists. **Args:** - - `filepath`: Path of the file in the LXD instance. - - - -**Raises:** - - - `LxdError`: Unable to load the file from the LXD instance. + - `alias`: Alias name of the image to check. **Returns:** - The content of the file. - ---- - - - -### method `write_file` - -```python -write_file( - filepath: 'str', - content: 'Union[str, bytes]', - mode: 'Optional[str]' = None -) → None -``` - -Write a file with the given content into the LXD instance. - - - -**Args:** - - - `filepath`: Path in the LXD instance to load the file. - - `content`: Content of the file. - - `mode`: File permission setting. - - - -**Raises:** - - - `LxdError`: Unable to load the file to the LXD instance. + Whether the image exists. --- - - ## class `LxdInstance` An LXD instance. @@ -195,7 +119,7 @@ An LXD instance. -### method `__init__` +### function `__init__` ```python __init__(pylxd_instance: 'Instance') @@ -227,7 +151,7 @@ Status of the LXD instance. -### method `delete` +### function `delete` ```python delete(wait: 'bool' = False) → None @@ -251,7 +175,7 @@ Delete the LXD instance. -### method `execute` +### function `execute` ```python execute( @@ -288,7 +212,7 @@ The command is executed with `subprocess.run`, additional arguments can be passe -### method `start` +### function `start` ```python start(timeout: 'int' = 30, force: 'bool' = True, wait: 'bool' = False) → None @@ -314,7 +238,7 @@ Start the LXD instance. -### method `stop` +### function `stop` ```python stop(timeout: 'int' = 30, force: 'bool' = True, wait: 'bool' = False) → None @@ -339,260 +263,254 @@ Stop the LXD instance. --- - +## class `LxdInstanceFileManager` +File manager of an LXD instance. -## class `LxdInstanceManager` -LXD instance manager. - -### method `__init__` +**Attributes:** + + - `instance` (LxdInstance): LXD instance where the files are located in. + + + +### function `__init__` ```python -__init__(pylxd_client: 'Client') +__init__(instance: 'LxdInstance') ``` -Instantiate the LXD instance manager. +Instantiate the file manager. **Args:** - - `pylxd_client`: Instance of pylxd.Client. + - `instance`: LXD instance where the files are located in. --- - + -### method `all` +### function `mk_dir` ```python -all() → list[LxdInstance] +mk_dir(dir_name: 'str') → None ``` -Get list of LXD instances. +Create a directory in the LXD instance. -**Raises:** +**Args:** - - `LxdError`: Unable to get all LXD instances. - - - -**Returns:** - List of LXD instances. + - `dir_name`: Name of the directory to create. --- - + -### method `create` +### function `pull_file` ```python -create(config: 'LxdInstanceConfig', wait: 'bool') → LxdInstance +pull_file(source: 'str', destination: 'str', is_dir: 'bool' = False) → None ``` -Create an LXD instance. +Pull a file from the LXD instance to the local machine. **Args:** - - `config`: Configuration for the LXD instance. - - `wait`: Whether to wait until the LXD instance is created before returning. + - `source`: Path of the file to pull in the LXD instance. + - `destination`: Path in local machine. + - `is_dir`: Whether the source is a directory. **Raises:** - - `LxdError`: Unable to get all LXD instances. - - - -**Returns:** - The created LXD instance. - + - `LxdError`: Unable to load the file from the LXD instance. --- - - -## class `LxdProfileManager` -LXD profile manager. - - + -### method `__init__` +### function `push_file` ```python -__init__(pylxd_client: 'Client') +push_file( + source: 'str', + destination: 'str', + mode: 'Optional[str]' = None +) → None ``` -Instantiate the LXD profile manager. +Push a file to the LXD instance. **Args:** - - `pylxd_client`: Instance of pylxd.Client. + - `source`: Path of the file to push to the LXD instance. + - `destination`: Path in the LXD instance to load the file. + - `mode`: File permissions. +**Raises:** + + - `LxdError`: Unable to load the file into the LXD instance. --- - + -### method `create` +### function `read_file` ```python -create( - name: 'str', - config: 'LxdResourceProfileConfig', - devices: 'LxdResourceProfileDevices' -) → None +read_file(filepath: 'str') → str ``` -Create an LXD profile. +Read the content of a file in the LXD instance. **Args:** - - `name`: Name of the LXD profile to create. - - `config`: Configuration of the LXD profile. - - `devices`: Devices configuration of the LXD profile. + - `filepath`: Path of the file in the LXD instance. **Raises:** - - `LxdError`: Unable to create the LXD profile. + - `LxdError`: Unable to load the file from the LXD instance. + + + +**Returns:** + The content of the file. --- - + -### method `exists` +### function `write_file` ```python -exists(name: 'str') → bool +write_file( + filepath: 'str', + content: 'Union[str, bytes]', + mode: 'Optional[str]' = None +) → None ``` -Check whether an LXD profile of a given name exists. +Write a file with the given content into the LXD instance. **Args:** - - `name`: Name for LXD profile to check. + - `filepath`: Path in the LXD instance to load the file. + - `content`: Content of the file. + - `mode`: File permission setting. **Raises:** - - `LxdError`: Unable to check the LXD profile existence. - - + - `LxdError`: Unable to load the file to the LXD instance. -**Returns:** - Whether the LXD profile of the given name exists. --- - +## class `LxdInstanceManager` +LXD instance manager. -### method `get` + + +### function `__init__` ```python -get(name: 'str') → LxdProfile +__init__(pylxd_client: 'Client') ``` -Get an LXD profile. +Instantiate the LXD instance manager. **Args:** - - `name`: Name of the LXD profile. - + - `pylxd_client`: Instance of pylxd.Client. -**Raises:** - - - `LxdError`: Unable to get the LXD profile with the name. - - - -**Returns:** - LXDProfile with given name. --- - - -## class `LxdProfile` -LXD profile. - - + -### method `__init__` +### function `all` ```python -__init__(pylxd_profile: 'Profile') +all() → list[LxdInstance] ``` -Instantiate the LXD profile. +Get list of LXD instances. -**Args:** +**Raises:** - - `pylxd_profile`: Instance of the pylxd.models.Profile. + - `LxdError`: Unable to get all LXD instances. +**Returns:** + List of LXD instances. --- - + -### method `delete` +### function `create` ```python -delete() → None +create(config: 'LxdInstanceConfig', wait: 'bool') → LxdInstance ``` -Delete the profile. +Create an LXD instance. ---- - -### method `save` +**Args:** + + - `config`: Configuration for the LXD instance. + - `wait`: Whether to wait until the LXD instance is created before returning. -```python -save() → None -``` -Save the current configuration of profile. + +**Raises:** + + - `LxdError`: Unable to get all LXD instances. ---- - +**Returns:** + The created LXD instance. + + +--- ## class `LxdNetworkManager` LXD network manager. -### method `__init__` +### function `__init__` ```python __init__(pylxd_client: 'Client') @@ -613,7 +531,7 @@ Instantiate the LXD profile manager. -### method `get` +### function `get` ```python get(name: 'str') → LxdNetwork @@ -635,127 +553,168 @@ Get the LXD network information. --- - - -## class `LxdStoragePoolManager` -LXD storage pool manager. +## class `LxdProfile` +LXD profile. - + -### method `__init__` +### function `__init__` ```python -__init__(pylxd_client: 'Client') +__init__(pylxd_profile: 'Profile') ``` -Instantiate the LXD storage pool manager. +Instantiate the LXD profile. **Args:** - - `pylxd_client`: Instance of pylxd.Client. + - `pylxd_profile`: Instance of the pylxd.models.Profile. --- - + -### method `all` +### function `delete` ```python -all() → list[LxdStoragePool] +delete() → None ``` -Get all LXD storage pool. +Delete the profile. + +--- + +### function `save` + +```python +save() → None +``` + +Save the current configuration of profile. -**Returns:** - List of LXD storage pools. --- - +## class `LxdProfileManager` +LXD profile manager. + + -### method `create` +### function `__init__` ```python -create(config: 'LxdStoragePoolConfiguration') → LxdStoragePool +__init__(pylxd_client: 'Client') ``` -Create an LXD storage pool. +Instantiate the LXD profile manager. **Args:** - - `config`: Configuration for the storage pool. + - `pylxd_client`: Instance of pylxd.Client. -**Returns:** - The LXD storage pool. --- - + + +### function `create` + +```python +create( + name: 'str', + config: 'LxdResourceProfileConfig', + devices: 'LxdResourceProfileDevices' +) → None +``` + +Create an LXD profile. + -### method `exists` + +**Args:** + + - `name`: Name of the LXD profile to create. + - `config`: Configuration of the LXD profile. + - `devices`: Devices configuration of the LXD profile. + + + +**Raises:** + + - `LxdError`: Unable to create the LXD profile. + +--- + + + +### function `exists` ```python exists(name: 'str') → bool ``` -Check if an LXD storage pool exists. +Check whether an LXD profile of a given name exists. **Args:** - - `name`: Name to check for. + - `name`: Name for LXD profile to check. + + + +**Raises:** + + - `LxdError`: Unable to check the LXD profile existence. **Returns:** - Whether the storage pool exists. + Whether the LXD profile of the given name exists. --- - + -### method `get` +### function `get` ```python -get(name: 'str') → LxdStoragePool +get(name: 'str') → LxdProfile ``` -Get an LXD storage pool. +Get an LXD profile. **Args:** - - `name`: Name of the storage pool. + - `name`: Name of the LXD profile. **Raises:** - - `LxdError`: If the storage pool with given name was not found. + - `LxdError`: Unable to get the LXD profile with the name. **Returns:** - The LXD storage pool. + LXDProfile with given name. --- - - ## class `LxdStoragePool` An LXD storage pool. @@ -771,7 +730,7 @@ An LXD storage pool. -### method `__init__` +### function `__init__` ```python __init__(pylxd_storage_pool: 'StoragePool') @@ -792,7 +751,7 @@ Instantiate the LXD storage pool. -### method `delete` +### function `delete` ```python delete() → None @@ -804,7 +763,7 @@ Delete the storage pool. -### method `save` +### function `save` ```python save() → None @@ -815,20 +774,18 @@ Save the current configuration of storage pool. --- - - -## class `LxdImageManager` -LXD image manager. +## class `LxdStoragePoolManager` +LXD storage pool manager. - + -### method `__init__` +### function `__init__` ```python __init__(pylxd_client: 'Client') ``` -Instantiate the LXD image manager. +Instantiate the LXD storage pool manager. @@ -841,71 +798,94 @@ Instantiate the LXD image manager. --- - + -### method `create` +### function `all` ```python -create(name: 'str', path: 'Path') → None +all() → list[LxdStoragePool] ``` -Import a LXD image. +Get all LXD storage pool. + + + +**Returns:** + List of LXD storage pools. + +--- + + + +### function `create` + +```python +create(config: 'LxdStoragePoolConfiguration') → LxdStoragePool +``` + +Create an LXD storage pool. **Args:** - - `name`: Alias for the image. - - `path`: Path of the LXD image file. + - `config`: Configuration for the storage pool. -**Raises:** - - - `LxdError`: Unable to import the file as LXD image. +**Returns:** + The LXD storage pool. --- - + -### method `exists` +### function `exists` ```python -exists(alias: 'str') → bool +exists(name: 'str') → bool ``` -Check if an image with the given name exists. +Check if an LXD storage pool exists. **Args:** - - `alias`: Alias name of the image to check. + - `name`: Name to check for. **Returns:** - Whether the image exists. - + Whether the storage pool exists. --- - - -## class `LxdClient` -LXD client. - - + -### method `__init__` +### function `get` ```python -__init__() → None +get(name: 'str') → LxdStoragePool ``` -Instantiate the LXD client. +Get an LXD storage pool. +**Args:** + + - `name`: Name of the storage pool. + + + +**Raises:** + + - `LxdError`: If the storage pool with given name was not found. + + + +**Returns:** + The LXD storage pool. diff --git a/src-docs/lxd_type.md b/src-docs/lxd_type.py.md similarity index 65% rename from src-docs/lxd_type.md rename to src-docs/lxd_type.py.md index b34f99406..0b4ec54bb 100644 --- a/src-docs/lxd_type.md +++ b/src-docs/lxd_type.py.md @@ -2,7 +2,7 @@ -# module `lxd_type` +# module `lxd_type.py` Types used by Lxd class. The details of the configuration of different types of devices can be found here: https://linuxcontainers.org/lxd/docs/latest/reference/devices/ @@ -15,32 +15,20 @@ The unit of storage and network limits can be found here: https://linuxcontainer --- - - -## class `LxdNetworkConfig` -Represent LXD network configuration. - - - - - ---- - - - -## class `LxdResourceProfileConfig` -Configuration LXD profile. - - - +## class `LxdInstanceConfig` +Configuration for the LXD instance. +See https://documentation.ubuntu.com/lxd/en/latest/howto/instances_create/ ---- - -## class `LxdResourceProfileDevicesDisk` -LXD device profile of disk. +**Attributes:** + + - `name`: Name of the instance. + - `type`: Instance type, i.e. "container" or "virtual-machine". + - `source`: Instance creation source configuration. + - `ephemeral`: Whether the container should be deleted after a single run. + - `profiles`: List of LXD profiles applied to the instance. @@ -48,8 +36,6 @@ LXD device profile of disk. --- - - ## class `LxdInstanceConfigSource` Configuration for source image in the LXD instance. @@ -68,22 +54,19 @@ Configuration for source image in the LXD instance. --- - - -## class `LxdInstanceConfig` -Configuration for the LXD instance. - -See https://documentation.ubuntu.com/lxd/en/latest/howto/instances_create/ +## class `LxdNetwork` +LXD network information. **Attributes:** - - `name`: Name of the instance. - - `type`: Instance type, i.e. "container" or "virtual-machine". - - `source`: Instance creation source configuration. - - `ephemeral`: Whether the container should be deleted after a single run. - - `profiles`: List of LXD profiles applied to the instance. + - `name`: The name of LXD network. + - `description`: LXD network descriptor. + - `type`: Network type, i.e. "bridge", "physical" + - `config`: The LXD network configuration values. + - `managed`: Whether the network is being managed by lxd. + - `used_by`: Number of instances using the network. @@ -91,36 +74,26 @@ See https://documentation.ubuntu.com/lxd/en/latest/howto/instances_create/ --- - - -## class `LxdStoragePoolConfig` -Configuration of the storage pool. - +## class `LxdNetworkConfig` +Represent LXD network configuration. -**Attributes:** - - - `source`: The storage pool configuration source image. - - `size`: The size of the storage pool, e.g. 30GiB +--- +## class `LxdResourceProfileConfig` +Configuration LXD profile. ---- - -## class `LxdStoragePoolConfiguration` -Configuration for LXD storage pool. +--- -**Attributes:** - - - `name`: The storage pool name. - - `driver`: The storage driver being used, i.e. "dir", "btrfs", ... . See https://documentation.ubuntu.com/lxd/en/stable-5.0/reference/storage_drivers/ for more information. - - `config`: The storage pool configuration. +## class `LxdResourceProfileDevicesDisk` +LXD device profile of disk. @@ -128,40 +101,32 @@ Configuration for LXD storage pool. --- - - -## class `LxdNetwork` -LXD network information. +## class `LxdStoragePoolConfig` +Configuration of the storage pool. **Attributes:** - - `name`: The name of LXD network. - - `description`: LXD network descriptor. - - `type`: Network type, i.e. "bridge", "physical" - - `config`: The LXD network configuration values. - - `managed`: Whether the network is being managed by lxd. - - `used_by`: Number of instances using the network. + - `source`: The storage pool configuration source image. + - `size`: The size of the storage pool, e.g. 30GiB + + - -### method `__init__` -```python -__init__( - name: str, - description: str, - type: str, - config: LxdNetworkConfig, - managed: bool, - used_by: tuple[str] -) → None -``` +--- +## class `LxdStoragePoolConfiguration` +Configuration for LXD storage pool. +**Attributes:** + + - `name`: The storage pool name. + - `driver`: The storage driver being used, i.e. "dir", "btrfs", ... . See https://documentation.ubuntu.com/lxd/en/stable-5.0/reference/storage_drivers/ for more information. + - `config`: The storage pool configuration. diff --git a/src-docs/managed_requests.md b/src-docs/managed_requests.md deleted file mode 100644 index 23939bcc2..000000000 --- a/src-docs/managed_requests.md +++ /dev/null @@ -1,32 +0,0 @@ - - - - -# module `managed_requests` -Get configured requests session instance - - ---- - - - -## function `get_requests_session` - -```python -get_requests_session(proxy: ProxyConfig) → Session -``` - -Get managed requests session instance. - - - -**Args:** - - - `proxy`: HTTP proxy configurations. - - - -**Returns:** - Requests session with proxy and retry setup. - - diff --git a/src-docs/metrics.events.md b/src-docs/metrics.events.md deleted file mode 100644 index d816fdc89..000000000 --- a/src-docs/metrics.events.md +++ /dev/null @@ -1,241 +0,0 @@ - - - - -# module `metrics.events` -Models and functions for the metric events. - - ---- - - - -## function `issue_event` - -```python -issue_event(event: Event) → None -``` - -Issue a metric event. - -The metric event is logged to the metrics log. - - - -**Args:** - - - `event`: The metric event to log. - - - -**Raises:** - - - `IssueMetricEventError`: If the event cannot be logged. - - ---- - - - -## class `Event` -Base class for metric events. - - - -**Attributes:** - - - `timestamp`: The UNIX time stamp of the time at which the event was originally issued. - - `event`: The name of the event. Will be set to the class name in snake case if not provided. - - - -### method `__init__` - -```python -__init__(*args: Any, **kwargs: Any) -``` - -Initialize the event. - - - -**Args:** - - - `args`: The positional arguments to pass to the base class. - - `kwargs`: The keyword arguments to pass to the base class. These are used to set the specific fields. E.g. timestamp=12345 will set the timestamp field to 12345. - - - - - ---- - - - -## class `RunnerInstalled` -Metric event for when a runner is installed. - - - -**Attributes:** - - - `flavor`: Describes the characteristics of the runner. The flavor could be for example "small". - - `duration`: The duration of the installation in seconds. - - - -### method `__init__` - -```python -__init__(*args: Any, **kwargs: Any) -``` - -Initialize the event. - - - -**Args:** - - - `args`: The positional arguments to pass to the base class. - - `kwargs`: The keyword arguments to pass to the base class. These are used to set the specific fields. E.g. timestamp=12345 will set the timestamp field to 12345. - - - - - ---- - - - -## class `RunnerStart` -Metric event for when a runner is started. - - - -**Attributes:** - - - `flavor`: Describes the characteristics of the runner. The flavor could be for example "small". - - `workflow`: The workflow name. - - `repo`: The repository name. - - `github_event`: The github event. - - `idle`: The idle time in seconds. - - `queue_duration`: The time in seconds it took before the runner picked up the job. This is optional as we rely on the Github API and there may be problems retrieving the data. - - - -### method `__init__` - -```python -__init__(*args: Any, **kwargs: Any) -``` - -Initialize the event. - - - -**Args:** - - - `args`: The positional arguments to pass to the base class. - - `kwargs`: The keyword arguments to pass to the base class. These are used to set the specific fields. E.g. timestamp=12345 will set the timestamp field to 12345. - - - - - ---- - - - -## class `CodeInformation` -Information about a status code. - -This could e.g. be an exit code or a http status code. - - - -**Attributes:** - - - `code`: The status code. - - - - - ---- - - - -## class `RunnerStop` -Metric event for when a runner is stopped. - - - -**Attributes:** - - - `flavor`: Describes the characteristics of the runner. The flavor could be for example "small". - - `workflow`: The workflow name. - - `repo`: The repository name. - - `github_event`: The github event. - - `status`: A string describing the reason for stopping the runner. - - `status_info`: More information about the status. - - `job_duration`: The duration of the job in seconds. - - `job_conclusion`: The job conclusion, e.g. "success", "failure", ... - - - -### method `__init__` - -```python -__init__(*args: Any, **kwargs: Any) -``` - -Initialize the event. - - - -**Args:** - - - `args`: The positional arguments to pass to the base class. - - `kwargs`: The keyword arguments to pass to the base class. These are used to set the specific fields. E.g. timestamp=12345 will set the timestamp field to 12345. - - - - - ---- - - - -## class `Reconciliation` -Metric event for when the charm has finished reconciliation. - - - -**Attributes:** - - - `flavor`: Describes the characteristics of the runner. The flavor could be for example "small". - - `crashed_runners`: The number of crashed runners. - - `idle_runners`: The number of idle runners. - - `duration`: The duration of the reconciliation in seconds. - - - -### method `__init__` - -```python -__init__(*args: Any, **kwargs: Any) -``` - -Initialize the event. - - - -**Args:** - - - `args`: The positional arguments to pass to the base class. - - `kwargs`: The keyword arguments to pass to the base class. These are used to set the specific fields. E.g. timestamp=12345 will set the timestamp field to 12345. - - - - - diff --git a/src-docs/metrics.github.md b/src-docs/metrics.github.md deleted file mode 100644 index 30e6981eb..000000000 --- a/src-docs/metrics.github.md +++ /dev/null @@ -1,46 +0,0 @@ - - - - -# module `metrics.github` -Functions to calculate metrics from data retrieved from GitHub. - - ---- - - - -## function `job` - -```python -job( - github_client: GithubClient, - pre_job_metrics: PreJobMetrics, - runner_name: str -) → GithubJobMetrics -``` - -Calculate the job metrics for a runner. - -The Github API is accessed to retrieve the job data for the runner. - - - -**Args:** - - - `github_client`: The GitHub API client. - - `pre_job_metrics`: The pre-job metrics. - - `runner_name`: The name of the runner. - - - -**Raises:** - - - `GithubMetricsError`: If the job for given workflow run is not found. - - - -**Returns:** - The job metrics. - - diff --git a/src-docs/metrics.md b/src-docs/metrics.md deleted file mode 100644 index e4275c340..000000000 --- a/src-docs/metrics.md +++ /dev/null @@ -1,29 +0,0 @@ - - - - -# module `metrics` -Package for common metrics-related code. - -**Global Variables** ---------------- -- **events**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -- **storage**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -- **type**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -- **runner**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -- **github**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -- **runner_logs**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - - - diff --git a/src-docs/metrics.runner.md b/src-docs/metrics.runner.md deleted file mode 100644 index 269d2581a..000000000 --- a/src-docs/metrics.runner.md +++ /dev/null @@ -1,176 +0,0 @@ - - - - -# module `metrics.runner` -Classes and function to extract the metrics from storage and issue runner metrics events. - -**Global Variables** ---------------- -- **FILE_SIZE_BYTES_LIMIT** -- **PRE_JOB_METRICS_FILE_NAME** -- **POST_JOB_METRICS_FILE_NAME** -- **RUNNER_INSTALLED_TS_FILE_NAME** - ---- - - - -## function `extract` - -```python -extract( - metrics_storage_manager: StorageManager, - runners: set[str], - include: bool = False -) → Iterator[RunnerMetrics] -``` - -Extract metrics from runners. - -The metrics are extracted from the metrics storage of the runners. Orphan storages are cleaned up. - -If corrupt data is found, the metrics are not processed further and the storage is moved to a special quarantine directory, as this may indicate that a malicious runner is trying to manipulate the files on the storage. - -In order to avoid DoS attacks, the file size is also checked. - - - -**Args:** - - - `metrics_storage_manager`: The metrics storage manager. - - `runners`: The runners to include or exclude. - - `include`: If true the provided runners are included for metric extraction, else the provided runners are excluded. - - - -**Yields:** - Extracted runner metrics of a particular runner. - - ---- - - - -## function `issue_events` - -```python -issue_events( - runner_metrics: RunnerMetrics, - flavor: str, - job_metrics: Optional[GithubJobMetrics] -) → set[Type[Event]] -``` - -Issue the metrics events for a runner. - - - -**Args:** - - - `runner_metrics`: The metrics for the runner. - - `flavor`: The flavor of the runner. - - `job_metrics`: The metrics about the job run by the runner. - - - -**Returns:** - A set of issued events. - - ---- - - - -## class `PreJobMetrics` -Metrics for the pre-job phase of a runner. - - - -**Attributes:** - - - `timestamp`: The UNIX time stamp of the time at which the event was originally issued. - - `workflow`: The workflow name. - - `workflow_run_id`: The workflow run id. - - `repository`: The repository path in the format '/'. - - `event`: The github event. - - - - - ---- - - - -## class `PostJobStatus` -The status of the post-job phase of a runner. - - - -**Attributes:** - - - `NORMAL`: Represents a normal post-job. - - `ABNORMAL`: Represents an error with post-job. - - `REPO_POLICY_CHECK_FAILURE`: Represents an error with repo-policy-compliance check. - - - - - ---- - - - -## class `CodeInformation` -Information about a status code. - - - -**Attributes:** - - - `code`: The status code. - - - - - ---- - - - -## class `PostJobMetrics` -Metrics for the post-job phase of a runner. - - - -**Attributes:** - - - `timestamp`: The UNIX time stamp of the time at which the event was originally issued. - - `status`: The status of the job. - - `status_info`: More information about the status. - - - - - ---- - - - -## class `RunnerMetrics` -Metrics for a runner. - - - -**Attributes:** - - - `installed_timestamp`: The UNIX time stamp of the time at which the runner was installed. - - `pre_job`: The metrics for the pre-job phase. - - `post_job`: The metrics for the post-job phase. - - `runner_name`: The name of the runner. - - - - - diff --git a/src-docs/metrics.runner_logs.md b/src-docs/metrics.runner_logs.md deleted file mode 100644 index 5821f81f6..000000000 --- a/src-docs/metrics.runner_logs.md +++ /dev/null @@ -1,48 +0,0 @@ - - - - -# module `metrics.runner_logs` -Functions to pull and remove the logs of the crashed runners. - -**Global Variables** ---------------- -- **OUTDATED_LOGS_IN_SECONDS** - ---- - - - -## function `create_logs_dir` - -```python -create_logs_dir(runner_name: str) → Path -``` - -Create the directory to store the logs of the crashed runners. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Returns:** - The path to the directory where the logs of the crashed runners will be stored. - - ---- - - - -## function `remove_outdated` - -```python -remove_outdated() → None -``` - -Remove the logs that are too old. - - diff --git a/src-docs/metrics.storage.md b/src-docs/metrics.storage.md deleted file mode 100644 index 20250cd09..000000000 --- a/src-docs/metrics.storage.md +++ /dev/null @@ -1,194 +0,0 @@ - - - - -# module `metrics.storage` -Classes and functions defining the metrics storage. - -It contains a protocol and reference implementation. - -**Global Variables** ---------------- -- **FILESYSTEM_OWNER** - ---- - - - -## function `create` - -```python -create(runner_name: str) → MetricsStorage -``` - -Create metrics storage for the runner. - -The method is not idempotent and will raise an exception if the storage already exists. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Returns:** - The metrics storage object. - - - -**Raises:** - - - `CreateMetricsStorageError`: If the creation of the shared filesystem fails. - - ---- - - - -## function `list_all` - -```python -list_all() → Iterator[MetricsStorage] -``` - -List all the metric storages. - - - -**Yields:** - A metrics storage object. - - ---- - - - -## function `get` - -```python -get(runner_name: str) → MetricsStorage -``` - -Get the metrics storage for the runner. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Returns:** - The metrics storage object. - - - -**Raises:** - - - `GetMetricsStorageError`: If the storage does not exist. - - ---- - - - -## function `delete` - -```python -delete(runner_name: str) → None -``` - -Delete the metrics storage for the runner. - - - -**Args:** - - - `runner_name`: The name of the runner. - - - -**Raises:** - - - `DeleteMetricsStorageError`: If the storage could not be deleted. - - ---- - - - -## function `move_to_quarantine` - -```python -move_to_quarantine(storage_manager: StorageManager, runner_name: str) → None -``` - -Archive the metrics storage for the runner and delete it. - - - -**Args:** - - - `storage_manager`: The storage manager. - - `runner_name`: The name of the runner. - - - -**Raises:** - - - `QuarantineMetricsStorageError`: If the metrics storage could not be quarantined. - - ---- - - - -## class `MetricsStorage` -Storage for the metrics. - - - -**Attributes:** - - - `path`: The path to the directory holding the metrics inside the charm. - - `runner_name`: The name of the associated runner. - - - -### method `__init__` - -```python -__init__(path: Path, runner_name: str) → None -``` - - - - - - - - - ---- - - - -## class `StorageManager` -A protocol defining the methods for managing the metrics storage. - - - -**Attributes:** - - - `create`: Method to create a new storage. Returns the created storage. Raises an exception CreateMetricsStorageError if the storage already exists. - - `list_all`: Method to list all storages. - - `get`: Method to get a storage by name. - - `delete`: Method to delete a storage by name. - - - - - diff --git a/src-docs/metrics.type.md b/src-docs/metrics.type.md deleted file mode 100644 index 1e5e61e67..000000000 --- a/src-docs/metrics.type.md +++ /dev/null @@ -1,27 +0,0 @@ - - - - -# module `metrics.type` -Data types used by modules handling metrics. - - - ---- - - - -## class `GithubJobMetrics` -Metrics about a job. - - - -**Attributes:** - - - `queue_duration`: The time in seconds the job took before the runner picked it up. - - `conclusion`: The conclusion of the job. - - - - - diff --git a/src-docs/openstack_cloud.md b/src-docs/openstack_cloud.md deleted file mode 100644 index 34aa3f26f..000000000 --- a/src-docs/openstack_cloud.md +++ /dev/null @@ -1,60 +0,0 @@ - - - - -# module `openstack_cloud` -Module for managing Openstack cloud. - -**Global Variables** ---------------- -- **openstack_cloud**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -- **openstack_runner_manager**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - - ---- - - - -## function `initialize` - -```python -initialize(cloud_config: dict) → None -``` - -Initialize Openstack integration. - -Validates config and writes it to disk. - - - -**Raises:** - - - `OpenStackInvalidConfigError`: If there was an given cloud config. - - - -**Args:** - - - `cloud_config`: The configuration in clouds.yaml format to apply. - - ---- - - - -## class `CloudConfig` -The parsed clouds.yaml configuration dictionary. - - - -**Attributes:** - - - `clouds`: A mapping of key "clouds" to cloud name mapped to cloud configuration. - - - - - diff --git a/src-docs/openstack_cloud.openstack_cloud.md b/src-docs/openstack_cloud.openstack_cloud.md deleted file mode 100644 index d49b62008..000000000 --- a/src-docs/openstack_cloud.openstack_cloud.md +++ /dev/null @@ -1,246 +0,0 @@ - - - - -# module `openstack_cloud.openstack_cloud` -Class for accessing OpenStack API for managing servers. - - - ---- - - - -## class `OpenstackInstance` -Represents an OpenStack instance. - - - -**Attributes:** - - - `server_id`: ID of server assigned by OpenStack. - - `server_name`: Name of the server on OpenStack. - - `instance_id`: ID used by OpenstackCloud class to manage the instances. See docs on the OpenstackCloud. - - `addresses`: IP addresses assigned to the server. - - `status`: Status of the server. - - - -### method `__init__` - -```python -__init__(server: Server, prefix: str) -``` - -Construct the object. - - - -**Args:** - - - `server`: The OpenStack server. - - `prefix`: The name prefix for the servers. - - - -**Raises:** - - - `ValueError`: Provided server should not be managed under this prefix. - - - - - ---- - - - -## class `OpenstackCloud` -Client to interact with OpenStack cloud. - -The OpenStack server name is managed by this cloud. Caller refers to the instances via instance_id. If the caller needs the server name, e.g., for logging, it can be queried with get_server_name. - - - -### method `__init__` - -```python -__init__(clouds_config: dict[str, dict], cloud: str, prefix: str) -``` - -Create the object. - - - -**Args:** - - - `clouds_config`: The openstack clouds.yaml in dict format. - - `cloud`: The name of cloud to use in the clouds.yaml. - - `prefix`: Prefix attached to names of resource managed by this instance. Used for identifying which resource belongs to this instance. - - - - ---- - - - -### method `cleanup` - -```python -cleanup() → None -``` - -Cleanup unused key files and openstack keypairs. - ---- - - - -### method `delete_instance` - -```python -delete_instance(instance_id: str) → None -``` - -Delete a openstack instance. - - - -**Args:** - - - `instance_id`: The instance ID of the instance to delete. - ---- - - - -### method `get_instance` - -```python -get_instance(instance_id: str) → OpenstackInstance | None -``` - -Get OpenStack instance by instance ID. - - - -**Args:** - - - `instance_id`: The instance ID. - - - -**Returns:** - The OpenStack instance if found. - ---- - - - -### method `get_instances` - -```python -get_instances() → tuple[OpenstackInstance, ] -``` - -Get all OpenStack instances. - - - -**Returns:** - The OpenStack instances. - ---- - - - -### method `get_server_name` - -```python -get_server_name(instance_id: str) → str -``` - -Get server name on OpenStack. - - - -**Args:** - - - `instance_id`: ID used to identify a instance. - - - -**Returns:** - The OpenStack server name. - ---- - - - -### method `get_ssh_connection` - -```python -get_ssh_connection(instance: OpenstackInstance) → Connection -``` - -Get SSH connection to an OpenStack instance. - - - -**Args:** - - - `instance`: The OpenStack instance to connect to. - - - -**Raises:** - - - `SSHError`: Unable to get a working SSH connection to the instance. - - `KeyfileError`: Unable to find the keyfile to connect to the instance. - - - -**Returns:** - SSH connection object. - ---- - - - -### method `launch_instance` - -```python -launch_instance( - instance_id: str, - image: str, - flavor: str, - network: str, - cloud_init: str -) → OpenstackInstance -``` - -Create an OpenStack instance. - - - -**Args:** - - - `instance_id`: The instance ID to form the instance name. - - `image`: The image used to create the instance. - - `flavor`: The flavor used to create the instance. - - `network`: The network used to create the instance. - - `cloud_init`: The cloud init userdata to startup the instance. - - - -**Raises:** - - - `OpenStackError`: Unable to create OpenStack server. - - - -**Returns:** - The OpenStack instance created. - - diff --git a/src-docs/openstack_cloud.openstack_manager.md b/src-docs/openstack_cloud.openstack_manager.md deleted file mode 100644 index 115eec05b..000000000 --- a/src-docs/openstack_cloud.openstack_manager.md +++ /dev/null @@ -1,212 +0,0 @@ - - - - -# module `openstack_cloud.openstack_manager` -Module for handling interactions with OpenStack. - -**Global Variables** ---------------- -- **RUNNER_INSTALLED_TS_FILE_NAME** -- **SECURITY_GROUP_NAME** -- **BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME** -- **MAX_METRICS_FILE_SIZE** -- **RUNNER_STARTUP_PROCESS** -- **RUNNER_LISTENER_PROCESS** -- **RUNNER_WORKER_PROCESS** -- **CREATE_SERVER_TIMEOUT** - ---- - - - -## function `create_instance_config` - -```python -create_instance_config( - app_name: str, - unit_num: int, - image_id: str, - path: GitHubOrg | GitHubRepo, - labels: Iterable[str], - registration_token: str -) → InstanceConfig -``` - -Create an instance config from charm data. - - - -**Args:** - - - `app_name`: The juju application name. - - `unit_num`: The juju unit number. - - `image_id`: The openstack image id to create the instance with. - - `path`: Github organisation or repository path. - - `labels`: Addition labels for the runner. - - `registration_token`: The Github runner registration token. See https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository - - - -**Returns:** - Instance configuration created. - - ---- - - - -## class `InstanceConfig` -The configuration values for creating a single runner instance. - - - -**Attributes:** - - - `github_path`: The GitHub repo/org path to register the runner. - - `image_id`: The Openstack image id to use to boot the instance with. - - `labels`: The runner instance labels. - - `name`: Name of the image to launch the GitHub runner instance with. - - `registration_token`: Token for registering the runner on GitHub. - - - -### method `__init__` - -```python -__init__( - github_path: GitHubOrg | GitHubRepo, - image_id: str, - labels: Iterable[str], - name: str, - registration_token: str -) → None -``` - - - - - - - - - ---- - - - -## class `GithubRunnerRemoveError` -Represents an error removing registered runner from Github. - - - - - ---- - - - -## class `OpenstackRunnerManager` -Runner manager for OpenStack-based instances. - - - -**Attributes:** - - - `app_name`: The juju application name. - - `unit_num`: The juju unit number. - - `instance_name`: Prefix of the name for the set of runners. - - - -### method `__init__` - -```python -__init__( - app_name: str, - unit_num: int, - openstack_runner_manager_config: OpenstackRunnerManagerConfig, - cloud_config: dict[str, dict] -) -``` - -Construct OpenstackRunnerManager object. - - - -**Args:** - - - `app_name`: The juju application name. - - `unit_num`: The juju unit number. - - `openstack_runner_manager_config`: Configurations related to runner manager. - - `cloud_config`: The openstack clouds.yaml in dict format. - - - - ---- - - - -### method `flush` - -```python -flush(mode: FlushMode = ) → int -``` - -Flush Openstack servers. - -1. Kill the processes depending on flush mode. 2. Get unhealthy runners after process purging. 3. Delete unhealthy runners. - - - -**Args:** - - - `mode`: The mode to determine which runner to flush. - - - -**Returns:** - The number of runners flushed. - ---- - - - -### method `get_github_runner_info` - -```python -get_github_runner_info() → tuple[RunnerGithubInfo, ] -``` - -Get information on GitHub for the runners. - - - -**Returns:** - Collection of runner GitHub information. - ---- - - - -### method `reconcile` - -```python -reconcile(quantity: int) → int -``` - -Reconcile the quantity of runners. - - - -**Args:** - - - `quantity`: The number of intended runners. - - - -**Returns:** - The change in number of runners. - - diff --git a/src-docs/openstack_cloud.openstack_runner_manager.md b/src-docs/openstack_cloud.openstack_runner_manager.md deleted file mode 100644 index 64e7ce91d..000000000 --- a/src-docs/openstack_cloud.openstack_runner_manager.md +++ /dev/null @@ -1,282 +0,0 @@ - - - - -# module `openstack_cloud.openstack_runner_manager` -Manager for self-hosted runner on OpenStack. - -**Global Variables** ---------------- -- **BUILD_OPENSTACK_IMAGE_SCRIPT_FILENAME** -- **MAX_METRICS_FILE_SIZE** -- **RUNNER_STARTUP_PROCESS** -- **RUNNER_LISTENER_PROCESS** -- **RUNNER_WORKER_PROCESS** -- **CREATE_SERVER_TIMEOUT** - - ---- - - - -## class `OpenStackCloudConfig` -Configuration for OpenStack cloud authorisation information. - - - -**Attributes:** - - - `clouds_config`: The clouds.yaml. - - `cloud`: The cloud name to connect to. - - - -### method `__init__` - -```python -__init__(clouds_config: dict[str, dict], cloud: str) → None -``` - - - - - - - - - ---- - - - -## class `OpenStackServerConfig` -Configuration for OpenStack server. - - - -**Attributes:** - - - `image`: The image name for runners to use. - - `flavor`: The flavor name for runners to use. - - `network`: The network name for runners to use. - - - -### method `__init__` - -```python -__init__(image: str, flavor: str, network: str) → None -``` - - - - - - - - - ---- - - - -## class `OpenStackRunnerManager` -Manage self-hosted runner on OpenStack cloud. - - - -**Attributes:** - - - `name_prefix`: The name prefix of the runners created. - - - -### method `__init__` - -```python -__init__( - manager_name: str, - prefix: str, - cloud_config: OpenStackCloudConfig, - server_config: OpenStackServerConfig | None, - runner_config: GitHubRunnerConfig, - service_config: SupportServiceConfig -) → None -``` - -Construct the object. - - - -**Args:** - - - `manager_name`: A name to identify this manager. - - `prefix`: The prefix to runner name. - - `cloud_config`: The configuration for OpenStack authorisation. - - `server_config`: The configuration for creating OpenStack server. Unable to create runner if None. - - `runner_config`: The configuration for the runner. - - `service_config`: The configuration of supporting services of the runners. - - ---- - -#### property name_prefix - -The prefix of runner names. - - - -**Returns:** - The prefix of the runner names managed by this class. - - - ---- - - - -### method `cleanup` - -```python -cleanup(remove_token: str) → Iterator[RunnerMetrics] -``` - -Cleanup runner and resource on the cloud. - - - -**Args:** - - - `remove_token`: The GitHub remove token. - - - -**Returns:** - Any metrics retrieved from cleanup runners. - ---- - - - -### method `create_runner` - -```python -create_runner(registration_token: str) → str -``` - -Create a self-hosted runner. - - - -**Args:** - - - `registration_token`: The GitHub registration token for registering runners. - - - -**Raises:** - - - `MissingServerConfigError`: Unable to create runner due to missing configuration. - - `RunnerCreateError`: Unable to create runner due to OpenStack issues. - - - -**Returns:** - Instance ID of the runner. - ---- - - - -### method `delete_runner` - -```python -delete_runner(instance_id: str, remove_token: str) → RunnerMetrics | None -``` - -Delete self-hosted runners. - - - -**Args:** - - - `instance_id`: The instance id of the runner to delete. - - `remove_token`: The GitHub remove token. - - - -**Returns:** - Any metrics collected during the deletion of the runner. - ---- - - - -### method `flush_runners` - -```python -flush_runners(remove_token: str, busy: bool = False) → Iterator[RunnerMetrics] -``` - -Remove idle and/or busy runners. - - - -**Args:** - remove_token: - - `busy`: If false, only idle runners are removed. If true, both idle and busy runners are removed. - - - -**Returns:** - Any metrics retrieved from flushed runners. - ---- - - - -### method `get_runner` - -```python -get_runner(instance_id: str) → CloudRunnerInstance | None -``` - -Get a self-hosted runner by instance id. - - - -**Args:** - - - `instance_id`: The instance id. - - - -**Returns:** - Information on the runner instance. - ---- - - - -### method `get_runners` - -```python -get_runners( - states: Optional[Sequence[CloudRunnerState]] = None -) → tuple[CloudRunnerInstance, ] -``` - -Get self-hosted runners by state. - - - -**Args:** - - - `states`: Filter for the runners with these github states. If None all states will be included. - - - -**Returns:** - Information on the runner instances. - - diff --git a/src-docs/reactive.consumer.md b/src-docs/reactive.consumer.md deleted file mode 100644 index 08e36bd8e..000000000 --- a/src-docs/reactive.consumer.md +++ /dev/null @@ -1,86 +0,0 @@ - - - - -# module `reactive.consumer` -Module responsible for consuming jobs from the message queue. - - ---- - - - -## function `consume` - -```python -consume(mongodb_uri: str, queue_name: str) → None -``` - -Consume a job from the message queue. - -Log the job details and acknowledge the message. If the job details are invalid, reject the message and raise an error. - - - -**Args:** - - - `mongodb_uri`: The URI of the MongoDB database. - - `queue_name`: The name of the queue. - - - -**Raises:** - - - `JobError`: If the job details are invalid. - - ---- - - - -## function `signal_handler` - -```python -signal_handler(signal_code: Signals) → Generator[NoneType, NoneType, NoneType] -``` - -Set a signal handler and after the context, restore the default handler. - -The signal handler exits the process. - - - -**Args:** - - - `signal_code`: The signal code to handle. - - ---- - - - -## class `JobDetails` -A class to translate the payload. - - - -**Attributes:** - - - `labels`: The labels of the job. - - `run_url`: The URL of the job. - - - - - ---- - - - -## class `JobError` -Raised when a job error occurs. - - - - - diff --git a/src-docs/reactive.job.md b/src-docs/reactive.job.md deleted file mode 100644 index 89f06521e..000000000 --- a/src-docs/reactive.job.md +++ /dev/null @@ -1,225 +0,0 @@ - - - - -# module `reactive.job` -Module responsible for job retrieval and handling. - - - ---- - - - -## class `JobDetails` -A class to translate the payload. - - - -**Attributes:** - - - `labels`: The labels of the job. - - `run_url`: The URL of the job. - - - - - ---- - - - -## class `MessageQueueConnectionInfo` -The connection information for the MQ. - - - -**Attributes:** - - - `uri`: The URI of the MQ. - - `queue_name`: The name of the queue. - - - - - ---- - - - -## class `JobSourceError` -Raised when a job source error occurs. - - - - - ---- - - - -## class `JobSource` -A protocol for a job source. - - - - ---- - - - -### method `ack` - -```python -ack() → None -``` - -Acknowledge the message. - ---- - - - -### method `get_job` - -```python -get_job() → JobDetails -``` - -Get the job details from the source. - ---- - - - -### method `reject` - -```python -reject() → None -``` - -Reject the message. - - ---- - - - -## class `JobError` -Raised when a job error occurs. - - - - - ---- - - - -## class `Job` -A class to represent a job to be picked up by a runner. - - - -### method `__init__` - -```python -__init__(job_source: JobSource) -``` - -Initialize the message. - - - -**Args:** - - - `job_source`: The source of the job. - - - - ---- - - - -### method `from_message_queue` - -```python -from_message_queue(mq_connection_info: MessageQueueConnectionInfo) → Job -``` - -Get a job from a message queue. - -This method will block until a job is available. - - - -**Args:** - - - `mq_connection_info`: The connection information for the MQ. - - - -**Returns:** - The retrieved Job. - ---- - - - -### method `get_details` - -```python -get_details() → JobDetails -``` - -Get the job details. - - - -**Raises:** - - - `JobError`: If the job details could not be retrieved. - - - -**Returns:** - The job details. - ---- - - - -### method `picked_up` - -```python -picked_up() → None -``` - -Indicate that the job has been picked up by a runner. - - - -**Raises:** - - - `JobError`: If the job could not be acknowledged. - ---- - - - -### method `reject` - -```python -reject() → None -``` - -Mark the job as rejected. - - - -**Raises:** - - - `JobError`: If the job could not be rejected. - - diff --git a/src-docs/reactive.md b/src-docs/reactive.md deleted file mode 100644 index d3453c979..000000000 --- a/src-docs/reactive.md +++ /dev/null @@ -1,14 +0,0 @@ - - - - -# module `reactive` -Package for code implementing reactive scheduling. - -**Global Variables** ---------------- -- **runner_manager**: # Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - - - diff --git a/src-docs/reactive.runner.md b/src-docs/reactive.runner.md deleted file mode 100644 index eb8badbbd..000000000 --- a/src-docs/reactive.runner.md +++ /dev/null @@ -1,28 +0,0 @@ - - - - -# module `reactive.runner` -Module which contains code to spawn a runner reactively. - - ---- - - - -## function `spawn_reactive_runner` - -```python -spawn_reactive_runner(mq_uri: str, queue_name: str) → None -``` - -Spawn a runner reactively. - - - -**Args:** - - - `mq_uri`: The URI of the message queue. - - `queue_name`: The name of the queue. - - diff --git a/src-docs/reactive.runner_manager.md b/src-docs/reactive.runner_manager.md deleted file mode 100644 index 07b2e750b..000000000 --- a/src-docs/reactive.runner_manager.md +++ /dev/null @@ -1,57 +0,0 @@ - - - - -# module `reactive.runner_manager` -Module for managing reactive runners. - -**Global Variables** ---------------- -- **MQ_URI_ENV_VAR** -- **QUEUE_NAME_ENV_VAR** -- **REACTIVE_RUNNER_SCRIPT_FILE** -- **PYTHON_BIN** -- **REACTIVE_RUNNER_CMD_LINE_PREFIX** -- **PID_CMD_COLUMN_WIDTH** -- **PIDS_COMMAND_LINE** -- **UBUNTU_USER** - ---- - - - -## function `reconcile` - -```python -reconcile(quantity: int, mq_uri: str, queue_name: str) → int -``` - -Spawn a runner reactively. - - - -**Args:** - - - `quantity`: The number of runners to spawn. - - `mq_uri`: The message queue URI. - - `queue_name`: The name of the queue. - -Raises a ReactiveRunnerError if the runner fails to spawn. - - - -**Returns:** - The number of reactive runner processes spawned. - - ---- - - - -## class `ReactiveRunnerError` -Raised when a reactive runner error occurs. - - - - - diff --git a/src-docs/repo_policy_compliance_client.md b/src-docs/repo_policy_compliance_client.md deleted file mode 100644 index 01823750c..000000000 --- a/src-docs/repo_policy_compliance_client.md +++ /dev/null @@ -1,67 +0,0 @@ - - - - -# module `repo_policy_compliance_client` -Client for requesting repo policy compliance service. - - - ---- - - - -## class `RepoPolicyComplianceClient` -Client for repo policy compliance service. - - - -**Attributes:** - - - `base_url`: Base url to the repo policy compliance service. - - `token`: Charm token configured for the repo policy compliance service. - - - -### method `__init__` - -```python -__init__(url: str, charm_token: str) → None -``` - -Construct the RepoPolicyComplianceClient. - - - -**Args:** - - - `url`: Base URL to the repo policy compliance service. - - `charm_token`: Charm token configured for the repo policy compliance service. - - - - ---- - - - -### method `get_one_time_token` - -```python -get_one_time_token() → str -``` - -Get a single-use token for repo policy compliance check. - - - -**Raises:** - - - `HTTPError`: If there was an error getting one-time token from repo-policy-compliance service. - - - -**Returns:** - The one-time token to be used in a single request of repo policy compliance check. - - diff --git a/src-docs/runner.md b/src-docs/runner.py.md similarity index 74% rename from src-docs/runner.md rename to src-docs/runner.py.md index d7bfb93c1..96e5102d2 100644 --- a/src-docs/runner.md +++ b/src-docs/runner.py.md @@ -2,7 +2,7 @@ -# module `runner` +# module `runner.py` Manage the lifecycle of runners. The `Runner` class stores the information on the runners and manages the lifecycle of the runners on LXD and GitHub. @@ -17,57 +17,6 @@ The `RunnerManager` class from `runner_manager.py` creates and manages a collect --- - - -## class `Snap` -This class represents a snap installation. - - - -**Attributes:** - - - `name`: The snap application name. - - `channel`: The channel to install the snap from. - - `revision`: The revision number of the snap installation. - - - - - ---- - - - -## class `WgetExecutable` -The executable to be installed through wget. - - - -**Attributes:** - - - `url`: The URL of the executable binary. - - `cmd`: Executable command name. E.g. yq_linux_amd64 -> yq - - - -### method `__init__` - -```python -__init__(url: str, cmd: str) → None -``` - - - - - - - - - ---- - - - ## class `CreateRunnerConfig` The configuration values for creating a single runner instance. @@ -81,32 +30,12 @@ The configuration values for creating a single runner instance. - `registration_token`: Token for registering the runner on GitHub. - `arch`: Current machine architecture. - - -### method `__init__` - -```python -__init__( - image: str, - resources: VirtualMachineResources, - binary_path: Path, - registration_token: str, - arch: Arch = -) → None -``` - - - - - --- - - ## class `Runner` Single instance of GitHub self-hosted runner. @@ -122,7 +51,7 @@ Single instance of GitHub self-hosted runner. -### method `__init__` +### function `__init__` ```python __init__( @@ -151,7 +80,7 @@ Construct the runner instance. -### method `create` +### function `create` ```python create(config: CreateRunnerConfig) → None @@ -175,7 +104,7 @@ Create the runner instance on LXD and register it on GitHub. -### method `pull_logs` +### function `pull_logs` ```python pull_logs() → None @@ -195,7 +124,7 @@ Expects the runner to have an instance. -### method `remove` +### function `remove` ```python remove(remove_token: Optional[str]) → None @@ -216,3 +145,36 @@ Remove this runner instance from LXD and GitHub. - `RunnerRemoveError`: Failure in removing runner. +--- + +## class `Snap` +This class represents a snap installation. + + + +**Attributes:** + + - `name`: The snap application name. + - `channel`: The channel to install the snap from. + - `revision`: The revision number of the snap installation. + + + + + +--- + +## class `WgetExecutable` +The executable to be installed through wget. + + + +**Attributes:** + + - `url`: The URL of the executable binary. + - `cmd`: Executable command name. E.g. yq_linux_amd64 -> yq + + + + + diff --git a/src-docs/runner_manager.md b/src-docs/runner_manager.py.md similarity index 89% rename from src-docs/runner_manager.md rename to src-docs/runner_manager.py.md index ebf6806b8..1f0844e44 100644 --- a/src-docs/runner_manager.md +++ b/src-docs/runner_manager.py.md @@ -2,7 +2,7 @@ -# module `runner_manager` +# module `runner_manager.py` Runner Manager manages the runners on LXD and GitHub. **Global Variables** @@ -13,8 +13,6 @@ Runner Manager manages the runners on LXD and GitHub. --- - - ## class `LXDRunnerManager` Manage a group of runners according to configuration. @@ -27,7 +25,7 @@ Manage a group of runners according to configuration. -### method `__init__` +### function `__init__` ```python __init__( @@ -54,7 +52,7 @@ Construct RunnerManager object for creating and managing runners. -### method `build_runner_image` +### function `build_runner_image` ```python build_runner_image() → None @@ -74,7 +72,7 @@ Build container image in test mode, else virtual machine image. -### method `check_runner_bin` +### function `check_runner_bin` ```python check_runner_bin() → bool @@ -91,7 +89,7 @@ Check if runner binary exists. -### method `flush` +### function `flush` ```python flush(mode: LXDFlushMode = ) → int @@ -120,7 +118,7 @@ Remove existing runners. -### method `get_github_info` +### function `get_github_info` ```python get_github_info() → Iterator[RunnerInfo] @@ -137,7 +135,7 @@ Get information on the runners from GitHub. -### method `get_latest_runner_bin_url` +### function `get_latest_runner_bin_url` ```python get_latest_runner_bin_url(os_name: str = 'linux') → RunnerApplication @@ -168,7 +166,7 @@ The runner binary URL changes when a new version is available. -### method `has_runner_image` +### function `has_runner_image` ```python has_runner_image() → bool @@ -185,7 +183,7 @@ Check if the runner image exists. -### method `reconcile` +### function `reconcile` ```python reconcile(quantity: int, resources: VirtualMachineResources) → int @@ -209,7 +207,7 @@ Bring runners in line with target. -### method `schedule_build_runner_image` +### function `schedule_build_runner_image` ```python schedule_build_runner_image() → None @@ -221,7 +219,7 @@ Install cron job for building runner image. -### method `update_runner_bin` +### function `update_runner_bin` ```python update_runner_bin(binary: RunnerApplication) → None diff --git a/src-docs/runner_manager_type.md b/src-docs/runner_manager_type.py.md similarity index 60% rename from src-docs/runner_manager_type.md rename to src-docs/runner_manager_type.py.md index 7cf5c5fe9..7cb3b71c6 100644 --- a/src-docs/runner_manager_type.md +++ b/src-docs/runner_manager_type.py.md @@ -2,15 +2,13 @@ -# module `runner_manager_type` +# module `runner_manager_type.py` Types used by RunnerManager class. --- - - ## class `LXDFlushMode` Strategy for flushing runners. @@ -32,45 +30,6 @@ During pre-job (repo-check), the runners are marked as idle and if the pre-job f --- - - -## class `RunnerManagerClients` -Clients for accessing various services. - - - -**Attributes:** - - - `github`: Used to query GitHub API. - - `jinja`: Used for templating. - - `lxd`: Used to interact with LXD API. - - `repo`: Used to interact with repo-policy-compliance API. - - - -### method `__init__` - -```python -__init__( - github: GithubClient, - jinja: Environment, - lxd: LxdClient, - repo: RepoPolicyComplianceClient -) → None -``` - - - - - - - - - ---- - - - ## class `LXDRunnerManagerConfig` Configuration of runner manager. @@ -88,27 +47,6 @@ Configuration of runner manager. - `dockerhub_mirror`: URL of dockerhub mirror to use. - `reactive_config`: The configuration to spawn runners reactively. - - -### method `__init__` - -```python -__init__( - charm_state: CharmState, - image: str, - lxd_storage_path: Path, - path: GitHubOrg | GitHubRepo, - service_token: str, - token: str, - dockerhub_mirror: str | None = None, - reactive_config: ReactiveConfig | None = None -) → None -``` - - - - - --- @@ -121,8 +59,6 @@ Whether metrics for the runners should be collected. --- - - ## class `RunnerInfo` Information from GitHub of a runner. @@ -136,17 +72,23 @@ Used as a returned type to method querying runner information. - `status`: Status of the runner. - `busy`: Whether the runner has taken a job. - -### method `__init__` -```python -__init__(name: str, status: GitHubRunnerStatus, busy: bool) → None -``` +--- + +## class `RunnerManagerClients` +Clients for accessing various services. + +**Attributes:** + + - `github`: Used to query GitHub API. + - `jinja`: Used for templating. + - `lxd`: Used to interact with LXD API. + - `repo`: Used to interact with repo-policy-compliance API. diff --git a/src-docs/runner_type.md b/src-docs/runner_type.md deleted file mode 100644 index cde5b2a7e..000000000 --- a/src-docs/runner_type.md +++ /dev/null @@ -1,192 +0,0 @@ - - - - -# module `runner_type` -Types used by Runner class. - - - ---- - - - -## class `RunnerNameByHealth` -Set of runners instance by health state. - - - -**Attributes:** - - - `healthy`: Runners that are correctly running runner script. - - `unhealthy`: Runners that are not running runner script. - - - -### method `__init__` - -```python -__init__(healthy: tuple[str, ], unhealthy: tuple[str, ]) → None -``` - - - - - - - - - ---- - - - -## class `ProxySetting` -Represent HTTP-related proxy settings. - - - -**Attributes:** - - - `no_proxy`: The comma separated URLs to not go through proxy. - - `http`: HTTP proxy URL. - - `https`: HTTPS proxy URL. - - `aproxy_address`: Aproxy URL. - - - -### method `__init__` - -```python -__init__( - no_proxy: Optional[str], - http: Optional[str], - https: Optional[str], - aproxy_address: Optional[str] -) → None -``` - - - - - - - - - ---- - - - -## class `RunnerConfig` -Configuration for runner. - - - -**Attributes:** - - - `app_name`: Application name of the charm. - - `issue_metrics`: Whether to issue metrics. - - `labels`: Custom runner labels. - - `lxd_storage_path`: Path to be used as LXD storage. - - `name`: Name of the runner. - - `path`: GitHub repository path in the format '/', or the GitHub organization name. - - `proxies`: HTTP(S) proxy settings. - - `dockerhub_mirror`: URL of dockerhub mirror to use. - - `ssh_debug_connections`: The SSH debug server connections metadata. - - - -### method `__init__` - -```python -__init__( - app_name: str, - issue_metrics: bool, - labels: tuple[str], - lxd_storage_path: Path, - name: str, - path: GitHubOrg | GitHubRepo, - proxies: ProxySetting, - dockerhub_mirror: str | None = None, - ssh_debug_connections: list[SSHDebugConnection] | None = None -) → None -``` - - - - - - - - - ---- - - - -## class `RunnerStatus` -Status of runner. - - - -**Attributes:** - - - `runner_id`: ID of the runner. - - `exist`: Whether the runner instance exists on LXD. - - `online`: Whether GitHub marks this runner as online. - - `busy`: Whether GitHub marks this runner as busy. - - - -### method `__init__` - -```python -__init__( - runner_id: Optional[int] = None, - exist: bool = False, - online: bool = False, - busy: bool = False -) → None -``` - - - - - - - - - ---- - - - -## class `RunnerGithubInfo` -GitHub info of a runner. - - - -**Attributes:** - - - `runner_name`: Name of the runner. - - `runner_id`: ID of the runner assigned by GitHub. - - `online`: Whether GitHub marks this runner as online. - - `busy`: Whether GitHub marks this runner as busy. - - - -### method `__init__` - -```python -__init__(runner_name: str, runner_id: int, online: bool, busy: bool) → None -``` - - - - - - - - - diff --git a/src-docs/runner_type.py.md b/src-docs/runner_type.py.md new file mode 100644 index 000000000..e3a7025aa --- /dev/null +++ b/src-docs/runner_type.py.md @@ -0,0 +1,102 @@ + + + + +# module `runner_type.py` +Types used by Runner class. + + + +--- + +## class `ProxySetting` +Represent HTTP-related proxy settings. + + + +**Attributes:** + + - `no_proxy`: The comma separated URLs to not go through proxy. + - `http`: HTTP proxy URL. + - `https`: HTTPS proxy URL. + - `aproxy_address`: Aproxy URL. + + + + + +--- + +## class `RunnerConfig` +Configuration for runner. + + + +**Attributes:** + + - `app_name`: Application name of the charm. + - `issue_metrics`: Whether to issue metrics. + - `labels`: Custom runner labels. + - `lxd_storage_path`: Path to be used as LXD storage. + - `name`: Name of the runner. + - `path`: GitHub repository path in the format '/', or the GitHub organization name. + - `proxies`: HTTP(S) proxy settings. + - `dockerhub_mirror`: URL of dockerhub mirror to use. + - `ssh_debug_connections`: The SSH debug server connections metadata. + + + + + +--- + +## class `RunnerGithubInfo` +GitHub info of a runner. + + + +**Attributes:** + + - `runner_name`: Name of the runner. + - `runner_id`: ID of the runner assigned by GitHub. + - `online`: Whether GitHub marks this runner as online. + - `busy`: Whether GitHub marks this runner as busy. + + + + + +--- + +## class `RunnerNameByHealth` +Set of runners instance by health state. + + + +**Attributes:** + + - `healthy`: Runners that are correctly running runner script. + - `unhealthy`: Runners that are not running runner script. + + + + + +--- + +## class `RunnerStatus` +Status of runner. + + + +**Attributes:** + + - `runner_id`: ID of the runner. + - `exist`: Whether the runner instance exists on LXD. + - `online`: Whether GitHub marks this runner as online. + - `busy`: Whether GitHub marks this runner as busy. + + + + + diff --git a/src-docs/shared_fs.md b/src-docs/shared_fs.py.md similarity index 98% rename from src-docs/shared_fs.md rename to src-docs/shared_fs.py.md index 5ae59a8ca..205f2343c 100644 --- a/src-docs/shared_fs.md +++ b/src-docs/shared_fs.py.md @@ -2,7 +2,7 @@ -# module `shared_fs` +# module `shared_fs.py` Classes and functions to operate on the shared filesystem between the charm and the runners. **Global Variables** diff --git a/src-docs/utilities.md b/src-docs/utilities.py.md similarity index 98% rename from src-docs/utilities.md rename to src-docs/utilities.py.md index b2c4cbf21..75a3c0386 100644 --- a/src-docs/utilities.md +++ b/src-docs/utilities.py.md @@ -2,7 +2,7 @@ -# module `utilities` +# module `utilities.py` Utilities used by the charm. diff --git a/src/logrotate.py b/src/logrotate.py index 294c651dd..733a70177 100644 --- a/src/logrotate.py +++ b/src/logrotate.py @@ -7,7 +7,7 @@ from charms.operator_libs_linux.v1 import systemd from github_runner_manager.metrics.events import METRICS_LOG_PATH -from github_runner_manager.reactive.runner_manager import REACTIVE_RUNNER_LOG_DIR +from github_runner_manager.reactive.process_manager import REACTIVE_RUNNER_LOG_DIR from pydantic import BaseModel from errors import LogrotateSetupError diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index decd988f5..1d52fd1ea 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -371,30 +371,35 @@ async def app_no_runner( @pytest_asyncio.fixture(scope="module", name="image_builder") async def image_builder_fixture( - model: Model, private_endpoint_config: PrivateEndpointConfigs | None + model: Model, + private_endpoint_config: PrivateEndpointConfigs | None, + existing_app: Optional[str], ): """The image builder application for OpenStack runners.""" if not private_endpoint_config: raise ValueError("Private endpoints are required for testing OpenStack runners.") - app = await model.deploy( - "github-runner-image-builder", - channel="latest/edge", - revision=2, - constraints="cores=2 mem=16G root-disk=20G virt-type=virtual-machine", - config={ - "app-channel": "edge", - "build-interval": "12", - "revision-history-limit": "5", - "openstack-auth-url": private_endpoint_config["auth_url"], - # Bandit thinks this is a hardcoded password - "openstack-password": private_endpoint_config["password"], # nosec: B105 - "openstack-project-domain-name": private_endpoint_config["project_domain_name"], - "openstack-project-name": private_endpoint_config["project_name"], - "openstack-user-domain-name": private_endpoint_config["user_domain_name"], - "openstack-user-name": private_endpoint_config["username"], - }, - ) - await model.wait_for_idle(apps=[app.name], wait_for_active=True, timeout=15 * 60) + if not existing_app: + app = await model.deploy( + "github-runner-image-builder", + channel="latest/edge", + revision=2, + constraints="cores=2 mem=16G root-disk=20G virt-type=virtual-machine", + config={ + "app-channel": "edge", + "build-interval": "12", + "revision-history-limit": "5", + "openstack-auth-url": private_endpoint_config["auth_url"], + # Bandit thinks this is a hardcoded password + "openstack-password": private_endpoint_config["password"], # nosec: B105 + "openstack-project-domain-name": private_endpoint_config["project_domain_name"], + "openstack-project-name": private_endpoint_config["project_name"], + "openstack-user-domain-name": private_endpoint_config["user_domain_name"], + "openstack-user-name": private_endpoint_config["username"], + }, + ) + await model.wait_for_idle(apps=[app.name], wait_for_active=True, timeout=15 * 60) + else: + app = model.applications["github-runner-image-builder"] return app @@ -774,7 +779,6 @@ async def app_for_reactive_fixture( if not existing_app: await model.relate(f"{app_openstack_runner.name}:mongodb", f"{mongodb.name}:database") - await app_openstack_runner.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "1"}) await model.wait_for_idle(apps=[app_openstack_runner.name, mongodb.name], status=ACTIVE) return app_openstack_runner diff --git a/tests/integration/helpers/common.py b/tests/integration/helpers/common.py index 95c8b051b..53128b340 100644 --- a/tests/integration/helpers/common.py +++ b/tests/integration/helpers/common.py @@ -369,8 +369,21 @@ def _is_workflow_run_complete(run: WorkflowRun) -> bool: Whether the run status is "completed". """ + return _has_workflow_run_status(run=run, status="completed") + + +def _has_workflow_run_status(run: WorkflowRun, status: str) -> bool: + """Check if the workflow run has a specific status. + + Args: + run: The workflow run to check status for. + status: The status to check for. + + Returns: + Whether the run status is the expected status. + """ if run.update(): - return run.status == "completed" + return run.status == status return False @@ -426,6 +439,20 @@ async def dispatch_workflow( return run +async def wait_for_status(run: WorkflowRun, status: str) -> None: + """Wait for the workflow run to start. + + Args: + run: The workflow run to wait for. + status: The expected status of the run. + """ + await wait_for( + partial(_has_workflow_run_status, run=run, status=status), + timeout=60 * 5, + check_interval=10, + ) + + async def wait_for_completion(run: WorkflowRun, conclusion: str) -> None: """Wait for the workflow run to complete. diff --git a/tests/integration/test_reactive.py b/tests/integration/test_reactive.py index ac4966d13..167bded81 100644 --- a/tests/integration/test_reactive.py +++ b/tests/integration/test_reactive.py @@ -4,9 +4,12 @@ """Testing reactive mode. This is only supported for the OpenStack cloud.""" import json import re +from typing import AsyncIterator import pytest +import pytest_asyncio from github import Branch, Repository +from github.WorkflowRun import WorkflowRun from github_runner_manager.reactive.consumer import JobDetails from juju.application import Application from juju.model import Model @@ -14,127 +17,206 @@ from kombu import Connection from pytest_operator.plugin import OpsTest +from charm_state import VIRTUAL_MACHINES_CONFIG_NAME from tests.integration.helpers.common import ( + DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, DISPATCH_TEST_WORKFLOW_FILENAME, dispatch_workflow, reconcile, + wait_for, wait_for_completion, + wait_for_status, ) pytestmark = pytest.mark.openstack -@pytest.fixture(name="setup_queue", autouse=True) -async def setup_queue_fixture( - ops_test: OpsTest, - app_for_reactive: Application, -): +@pytest_asyncio.fixture(name="app") +async def app_fixture( + ops_test: OpsTest, app_for_reactive: Application +) -> AsyncIterator[Application]: + """Setup the reactive charm with 1 virtual machine and tear down afterwards.""" mongodb_uri = await _get_mongodb_uri(ops_test, app_for_reactive) - _clear_queue(mongodb_uri, app_for_reactive.name) _assert_queue_is_empty(mongodb_uri, app_for_reactive.name) + await app_for_reactive.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "1"}) + await reconcile(app_for_reactive, app_for_reactive.model) + + yield app_for_reactive + + # Call reconcile to enable cleanup of any runner spawned + await app_for_reactive.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) + await reconcile(app_for_reactive, app_for_reactive.model) + async def test_reactive_mode_spawns_runner( ops_test: OpsTest, - app_for_reactive: Application, + app: Application, github_repository: Repository, test_github_branch: Branch, ): """ - arrange: A charm integrated with mongodb and a message is added to the queue. + arrange: Place a message in the queue and dispatch a workflow. act: Call reconcile. - assert: The message is consumed and a runner is spawned. + assert: A runner is spawned to process the job and the message is removed from the queue. """ - mongodb_uri = await _get_mongodb_uri(ops_test, app_for_reactive) + mongodb_uri = await _get_mongodb_uri(ops_test, app) run = await dispatch_workflow( - app=app_for_reactive, + app=app, branch=test_github_branch, github_repository=github_repository, conclusion="success", workflow_id_or_name=DISPATCH_TEST_WORKFLOW_FILENAME, wait=False, ) - jobs = list(run.jobs()) - assert len(jobs) == 1, "Expected 1 job to be created" - job = jobs[0] - job_url = job.url - job = JobDetails( - labels={app_for_reactive.name, "x64"}, # The architecture label should be ignored in the - # label validation in the reactive consumer. - url=job_url, - ) + labels = {app.name, "x64"} # The architecture label should be ignored in the + # label validation in the reactive consumer. + job = _create_job_details(run=run, labels=labels) _add_to_queue( json.dumps(json.loads(job.json()) | {"ignored_noise": "foobar"}), mongodb_uri, - app_for_reactive.name, + app.name, ) - await reconcile(app_for_reactive, app_for_reactive.model) + # This reconcile call is to check that we are not killing machines that are under + # construction in a subsequent reconciliation. + await reconcile(app, app.model) - await wait_for_completion(run, conclusion="success") - _assert_queue_is_empty(mongodb_uri, app_for_reactive.name) + try: + await wait_for_completion(run, conclusion="success") + except TimeoutError: + assert False, ( + "Job did not complete successfully, check the reactive log using tmate," + " it might be due to infrastructure issues" + ) - # Call reconcile to enable cleanup of the runner - await reconcile(app_for_reactive, app_for_reactive.model) + _assert_queue_is_empty(mongodb_uri, app.name) async def test_reactive_mode_does_not_consume_jobs_with_unsupported_labels( ops_test: OpsTest, - app_for_reactive: Application, + app: Application, github_repository: Repository, test_github_branch: Branch, ): """ - arrange: A charm integrated with mongodb and an unsupported label is added to the queue. + arrange: Place a message with an unsupported label in the queue and dispatch a workflow. act: Call reconcile. - assert: No runner is spawned and the message is requeued. + assert: No runner is spawned and the message is not requeued. """ - mongodb_uri = await _get_mongodb_uri(ops_test, app_for_reactive) + mongodb_uri = await _get_mongodb_uri(ops_test, app) run = await dispatch_workflow( - app=app_for_reactive, + app=app, branch=test_github_branch, github_repository=github_repository, conclusion="success", # this is ignored currently if wait=False kwarg is used workflow_id_or_name=DISPATCH_TEST_WORKFLOW_FILENAME, wait=False, ) - jobs = list(run.jobs()) - assert len(jobs) == 1, "Expected 1 job to be created" - job = jobs[0] - job_url = job.url - job = JobDetails(labels={"not supported label"}, url=job_url) + job = _create_job_details(run=run, labels={"not supported label"}) _add_to_queue( job.json(), mongodb_uri, - app_for_reactive.name, + app.name, ) - await reconcile(app_for_reactive, app_for_reactive.model) + # wait for queue being empty, there could be a race condition where it takes some + # time for the job message to be consumed and the queue to be empty + try: + await wait_for(lambda: _get_queue_size(mongodb_uri, app.name) == 0) + run.update() + assert run.status == "queued" + finally: + run.cancel() # cancel the run to avoid a queued run in GitHub actions page + + +async def test_reactive_mode_scale_down( + ops_test: OpsTest, + app: Application, + github_repository: Repository, + test_github_branch: Branch, +): + """ + arrange: Scale down the number of virtual machines to 2 and spawn a job. + act: + 1. Scale down the number of virtual machines to 0 and call reconcile. + 2. Spawn a job. + assert: + 1. The job fails. + 2. The job is queued and there is a message in the queue. + """ + mongodb_uri = await _get_mongodb_uri(ops_test, app) + + await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "2"}) + await reconcile(app, app.model) + + run = await dispatch_workflow( + app=app, + branch=test_github_branch, + github_repository=github_repository, + conclusion="success", # this is ignored currently if wait=False kwarg is used + workflow_id_or_name=DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, + wait=False, + ) + job = _create_job_details(run=run, labels={app.name}) + _add_to_queue( + job.json(), + mongodb_uri, + app.name, + ) + + await wait_for_status(run, "in_progress") + + # 1. Scale down the number of virtual machines to 0 and call reconcile. + await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) + await reconcile(app, app.model) + + # we assume that the runner got deleted while running the job, so we expect a failed job + await wait_for_completion(run, conclusion="failure") + _assert_queue_is_empty(mongodb_uri, app.name) + + # 2. Spawn a job. + run = await dispatch_workflow( + app=app, + branch=test_github_branch, + github_repository=github_repository, + conclusion="success", # this is ignored currently if wait=False kwarg is used + workflow_id_or_name=DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, + wait=False, + ) + job = _create_job_details(run=run, labels={app.name}) + _add_to_queue( + job.json(), + mongodb_uri, + app.name, + ) + + await reconcile(app, app.model) run.update() assert run.status == "queued" run.cancel() - _assert_queue_has_size(mongodb_uri, app_for_reactive.name, 1) + _assert_queue_has_size(mongodb_uri, app.name, 1) -async def _get_mongodb_uri(ops_test: OpsTest, app_for_reactive: Application) -> str: +async def _get_mongodb_uri(ops_test: OpsTest, app: Application) -> str: """Get the mongodb uri. Args: ops_test: The ops_test plugin. - app_for_reactive: The juju application containing the unit. + app: The juju application containing the unit. Returns: The mongodb uri. """ - mongodb_uri = await _get_mongodb_uri_from_integration_data(ops_test, app_for_reactive.units[0]) + mongodb_uri = await _get_mongodb_uri_from_integration_data(ops_test, app.units[0]) if not mongodb_uri: - mongodb_uri = await _get_mongodb_uri_from_secrets(ops_test, app_for_reactive.model) + mongodb_uri = await _get_mongodb_uri_from_secrets(ops_test, app.model) assert mongodb_uri, "mongodb uri not found in integration data or secret" return mongodb_uri @@ -192,6 +274,28 @@ async def _get_mongodb_uri_from_secrets(ops_test, model: Model) -> str | None: return mongodb_uri +def _create_job_details(run: WorkflowRun, labels: set[str]) -> JobDetails: + """Create a JobDetails object. + + Args: + run: The workflow run containing the job. Used to retrieve the job url. We assyne + the run only contains one job. + labels: The labels for the job. + + Returns: + The job details. + """ + jobs = list(run.jobs()) + assert len(jobs) == 1, "Expected 1 job to be created" + job = jobs[0] + job_url = job.url + job = JobDetails( + labels=labels, + url=job_url, + ) + return job + + def _add_to_queue(msg: str, mongodb_uri: str, queue_name: str) -> None: """Add a message to a queue. @@ -235,6 +339,19 @@ def _assert_queue_has_size(mongodb_uri: str, queue_name: str, size: int): queue_name: The name of the queue to check. size: The expected size of the queue. """ + assert _get_queue_size(mongodb_uri, queue_name) == size + + +def _get_queue_size(mongodb_uri: str, queue_name: str) -> int: + """Get the size of the queue. + + Args: + mongodb_uri: The mongodb uri. + queue_name: The name of the queue to check. + + Returns: + The size of the queue. + """ with Connection(mongodb_uri) as conn: with conn.SimpleQueue(queue_name) as simple_queue: - assert simple_queue.qsize() == size + return simple_queue.qsize() From d960bd5638d9dc44c62f9be4f18a3bef054b8062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ali=20U=C4=9EUR?= <39213991+alithethird@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:24:06 +0300 Subject: [PATCH 5/7] Update docs (#389) * Adopt template page (ISD-2375) Removed jargon Rearranged overview text * Update how-to titles in contents (ISD-2376) Remove "How to" in the titles to conform with the naming convention * Update order of contents (ISD-2379) Updated the order so the Charmhub left-hand column follows the order Tutorial, How to, Reference, Explanation * Update contents Add missing how-to docs Reorder * Update actions.md (ISD-2377) Add link to Juju docs * Update configurations.md (ISD-2377) Configure --> Configurations Add link to Juju docs * Update integrations.md (ISD-2378) Add trailing whitespace so "Interface" and "Supported charms" appear on separate lines Example command into a command block * Create CONTRIBUTING.md * Copy contributing text from how-to into CONTRIBUTING.md * Additional contributing sections from the template * Update contribute.md Remove text that has been added to CONTRIBUTING.md Point to CONTRIBUTING.md (not in the main branch yet...) * Update Tutorial * chore(docs): Update set up tutorial model. * chore(docs): Update Charm channel * chore(docs): Add Integration test warning * Update CONTRIBUTING.md Co-authored-by: Christopher Bartz --------- Co-authored-by: Erin Conley Co-authored-by: Christopher Bartz --- CONTRIBUTING.md | 60 ++++++++++++++++++++++++++++++ docs/how-to/contribute.md | 31 +--------------- docs/index.md | 63 ++++++++++++++++---------------- docs/reference/actions.md | 4 +- docs/reference/configurations.md | 4 +- docs/reference/integrations.md | 7 +++- docs/tutorial/quick-start.md | 44 ++++++++++++++++++---- 7 files changed, 141 insertions(+), 72 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..e622eec58 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,60 @@ +# Contribute + +## Overview + +This document explains the processes and practices recommended for contributing enhancements to the GitHub Runner operator. + +* Generally, before developing enhancements to this charm, you should consider [opening an issue](https://github.com/canonical/github-runner-operator/issues) explaining your use case. +* If you would like to chat with us about your use-cases or proposed implementation, you can reach us at [Canonical Charm Development Matrix public channel](https://matrix.to/#/#charmhub-charmdev:ubuntu.com) or [Discourse](https://discourse.charmhub.io/). +* Familiarizing yourself with the [Charmed Operator Framework](https://juju.is/docs/sdk) library will help you a lot when working on new features or bug fixes. +* All enhancements require review before being merged. Code review typically examines + * code quality + * test coverage + * user experience for Juju administrators of this charm. +For more details, check our [contributing guide](https://github.com/canonical/is-charms-contributing-guide/blob/main/CONTRIBUTING.md). + +## Developing + +For any problems with this charm, please [report bugs here](https://github.com/canonical/github-runner-operator/issues). + +The code for this charm can be downloaded as follows: + +```shell +git clone https://github.com/canonical/github-runner-operator.git +``` + +Prior to working on the charm ensure juju is connected to an LXD cloud, see the [upstream documentation](https://juju.is/docs/lxd-cloud) for details. + +### Testing + +This project uses `tox` for managing test environments. There are some pre-configured environments +that can be used for linting and formatting code when you're preparing contributions to the charm: + +* `tox`: Runs all of the basic checks (`lint`, `unit`, `static`, and `coverage-report`). +* `tox -e fmt`: Runs formatting using `black` and `isort`. +* `tox -e lint`: Runs a range of static code analysis to check the code. +* `tox -e static`: Runs other checks such as `bandit` for security issues. +* `tox -e unit`: Runs the unit tests. +* `tox -e integration`: Runs the integration tests. + +> NOTE: Integration tests are currently intended to run on the CI and may not work locally without further argument. +### Generating src docs for every commit + +Run the following command: + +```bash +echo -e "tox -e src-docs\ngit add src-docs\n" >> .git/hooks/pre-commit +chmod +x .git/hooks/pre-commit +``` + +### Building the charm + +Build the charm in this git repository using: + +```shell +charmcraft pack +``` + +## Canonical Contributor Agreement + +Canonical welcomes contributions to the GitHub Runner Operator. Please check out our [contributor agreement](https://ubuntu.com/legal/contributors) if you’re interested in contributing to the solution. diff --git a/docs/how-to/contribute.md b/docs/how-to/contribute.md index 6d7c2e0a4..0b1f83fd0 100644 --- a/docs/how-to/contribute.md +++ b/docs/how-to/contribute.md @@ -1,32 +1,3 @@ # How to contribute -## Overview - -This document explains the processes and practices recommended for contributing enhancements to the GitHub Runner operator. - -* Generally, before developing enhancements to this charm, you should consider [opening an issue](https://github.com/canonical/github-runner-operator/issues) explaining your use case. -* If you would like to chat with us about your use-cases or proposed implementation, you can reach us at [Canonical Mattermost public channel](https://chat.charmhub.io/charmhub/channels/charm-dev) or [Discourse](https://discourse.charmhub.io/). -* Familiarizing yourself with the [Charmed Operator Framework](https://juju.is/docs/sdk) library will help you a lot when working on new features or bug fixes. -* All enhancements require review before being merged. Code review typically examines - * code quality - * test coverage - * user experience for Juju administrators of this charm. -For more details, check our [contributing guide](https://github.com/canonical/is-charms-contributing-guide/blob/main/CONTRIBUTING.md). - -## Developing - -For any problems with this charm, please [report bugs here](https://github.com/canonical/github-runner-operator/issues). - -The code for this charm can be downloaded as follows: - -```shell -git clone https://github.com/canonical/github-runner-operator.git -``` - -Prior to working on the charm ensure juju is connected to an LXD cloud, see the [upstream documentation](https://juju.is/docs/lxd-cloud) for details. - -To test the charm, unit test can be ran with `tox -e unit` and the integration test on juju 3.1 can be ran with `tox -e integration-juju3.1`. - -## Canonical Contributor Agreement - -Canonical welcomes contributions to the GitHub Runner Operator. Please check out our [contributor agreement](https://ubuntu.com/legal/contributors) if you’re interested in contributing to the solution. \ No newline at end of file +See the [contributing guide](https://github.com/canonical/github-runner-operator/blob/main/CONTRIBUTING.md) on GitHub. diff --git a/docs/index.md b/docs/index.md index e39a4fe1d..205865aa4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,23 +1,20 @@ -A [Juju](https://juju.is/) [charm](https://juju.is/docs/olm/charmed-operators) for deploying and managing [GitHub self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners) on virtual machines. +# GitHub Runner Operator -This charm simplifies the initial deployment and "day N" operations of GitHub self-hosted runners. The charm makes it easy to manage self-hosted runners with security and hardware resource usage in mind. +A [Juju](https://juju.is/) [charm](https://juju.is/docs/olm/charmed-operators) for deploying and managing [GitHub self-hosted runners](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners) on virtual machines. The charm maintains a set of ephemeral self-hosted runners, each isolated in a single-use virtual machine instance. -Operating a self-hosted runner comes with [certain security concerns according to GitHub](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). -Just like GitHub's, the self-hosted runners managed by the charm are isolated in a single-use virtual machine. - -Some of the charm dependencies upgrades on a schedule to migrate security risks. The landscape-client charm can be deployed with this charm to ensure other dependencies are up to date. - -The charm maintains a set of ephemeral self-hosted runners, each isolated in a single-use virtual machine instance. In addition, resource limits for the self-hosted runners can be configured. +Like any Juju charm, this charm supports one-line deployment, configuration, integration, scaling, and more. +For the github-runner-operator charm, this includes: +* Stateless operation. +* Configurable resource limits. +* Ability to redeploy without losing any data (no need to back up). +* Supported observability through the `cos-agent` integration. +* Scheduled dependences upgrades to mitigate security risks. Furthermore, the landscape-client charm can be deployed with this charm to ensure other dependencies are kept up to date. -See [charm architecture](https://charmhub.io/github-runner/docs/explanation-charm-architecture) for more information. - -The charm operates in a stateless manner. It can be redeployed without losing any data and there is no need to backup the charm's state. +Operating a self-hosted runner comes with [certain security concerns according to GitHub](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). +Just like GitHub's runners, the self-hosted runners managed by the charm are isolated in a single-use virtual machine. -The charm also supports observability through the optional `cos-agent` integration. Metrics and logs about the runners and the charm itself are collected and sent to the [Canonical Observability Stack](https://charmhub.io/topics/canonical-observability-stack) for analysis and visualisation. -This charm will make operating GitHub self-hosted runners simple and straightforward for DevOps or SRE teams through Juju's clean interface. - The charm enforces a set of GitHub repository settings as best practice. This is planned to be opt-in in the future. See [How to comply with repository policies](https://charmhub.io/github-runner/docs/how-to-repo-policy). ## In this documentation @@ -46,20 +43,24 @@ Thinking about using the GitHub runner charm for your next project? [Get in touc # Contents -1. [Explanation](explanation) - 1. [ARM64](explanation/arm64.md) - 1. [Charm architecture](explanation/charm-architecture.md) -1. [How To](how-to) - 1. [How to add custom labels](how-to/add-custom-labels.md) - 1. [How to change repository or organization](how-to/change-path.md) - 1. [How to change GitHub personal access token](how-to/change-token.md) - 1. [How to comply with security requirements](how-to/comply-security.md) - 1. [How to restrict self-hosted runner network access](how-to/configure-denylist.md) - 1. [How to contribute](how-to/contribute.md) - 1. [How to deploy on ARM64](how-to/deploy-on-arm64.md) - 1. [How to integrate with COS](how-to/integrate-with-cos.md) - 1. [How to comply with repository policies](how-to/repo-policy.md) - 1. [How to run on LXD cloud](how-to/run-on-lxd.md) +1. [Tutorial](tutorial) + 1. [Managing resource usage](tutorial/managing-resource-usage.md) + 1. [Quick start](tutorial/quick-start.md) +1. [How to](how-to) + 1. [Add custom labels](how-to/add-custom-labels.md) + 1. [Change repository or organization](how-to/change-path.md) + 1. [Change GitHub personal access token](how-to/change-token.md) + 1. [Comply with security requirements](how-to/comply-security.md) + 1. [Restrict self-hosted runner network access](how-to/configure-denylist.md) + 1. [Configure runner storage](how-to/configure-runner-storage.md) + 1. [Contribute](how-to/contribute.md) + 1. [Debug with SSH](how-to/debug-with-ssh.md) + 1. [Deploy on ARM64](how-to/deploy-on-arm64.md) + 1. [Integrate with COS](how-to/integrate-with-cos.md) + 1. [Spawn OpenStack runner](how-to/openstack-runner.md) + 1. [Comply with repository policies](how-to/repo-policy.md) + 1. [Run on LXD cloud](how-to/run-on-lxd.md) + 1. [Set base image](how-to/set-base-image.md) 1. [Reference](reference) 1. [Actions](reference/actions.md) 1. [ARM64](reference/arm64.md) @@ -68,6 +69,6 @@ Thinking about using the GitHub runner charm for your next project? [Get in touc 1. [External Access](reference/external-access.md) 1. [Integrations](reference/integrations.md) 1. [Token scopes](reference/token-scopes.md) -1. [Tutorial](tutorial) - 1. [Managing resource usage](tutorial/managing-resource-usage.md) - 1. [Quick start](tutorial/quick-start.md) +1. [Explanation](explanation) + 1. [ARM64](explanation/arm64.md) + 1. [Charm architecture](explanation/charm-architecture.md) diff --git a/docs/reference/actions.md b/docs/reference/actions.md index e2e78e54a..3ff48543b 100644 --- a/docs/reference/actions.md +++ b/docs/reference/actions.md @@ -1,3 +1,5 @@ # Actions -See [Actions](https://charmhub.io/github-runner/actions). \ No newline at end of file +See [Actions](https://charmhub.io/github-runner/actions). + +> Read more about actions in the Juju docs: [Action](https://juju.is/docs/juju/action) diff --git a/docs/reference/configurations.md b/docs/reference/configurations.md index 68043cf7c..130062bef 100644 --- a/docs/reference/configurations.md +++ b/docs/reference/configurations.md @@ -1,3 +1,5 @@ # Configurations -See [Configure](https://charmhub.io/github-runner/configure). \ No newline at end of file +See [Configurations](https://charmhub.io/github-runner/configure). + +> Read more about configurations in the Juju docs: [Configuration](https://juju.is/docs/juju/configuration) diff --git a/docs/reference/integrations.md b/docs/reference/integrations.md index 123e0b3e0..afcc79aad 100644 --- a/docs/reference/integrations.md +++ b/docs/reference/integrations.md @@ -2,10 +2,13 @@ ### debug-ssh -_Interface_: debug-ssh +_Interface_: debug-ssh _Supported charms_: [tmate-ssh-server](https://charmhub.io/tmate-ssh-server) Debug-ssh integration provides necessary information for runners to provide ssh reverse-proxy applications to setup inside the runner. -Example debug-ssh integrate command: `juju integrate github-runner tmate-ssh-server` +Example debug-ssh integrate command: +``` +juju integrate github-runner tmate-ssh-server +``` diff --git a/docs/tutorial/quick-start.md b/docs/tutorial/quick-start.md index 488a3b224..892b86a81 100644 --- a/docs/tutorial/quick-start.md +++ b/docs/tutorial/quick-start.md @@ -2,7 +2,7 @@ ## What you'll do -- Setup a GitHub repository +- Set up a GitHub repository - Activate the GitHub APIs related to self-hosted runner - Deploy the [GitHub runner charm](https://charmhub.io/github-runner) - Ensure GitHub repository setting are secure @@ -11,13 +11,32 @@ ## Requirements - GitHub Account. -- Juju 3 installed. -- Juju controller on OpenStack or LXD (see [How to run on LXD cloud](https://charmhub.io/github-runner/docs/how-to-run-on-lxd)). - -For more information about how to install and use Juju, see [Get started with Juju](https://juju.is/docs/olm/get-started-with-juju). +- A working station, e.g., a laptop, with amd64 architecture and at least 16 GB of RAM. +- Juju 3 installed and bootstrapped to an LXD controller. You can accomplish this process by following this guide: [Set up / Tear down your test environment](https://juju.is/docs/juju/set-up--tear-down-your-test-environment) +For more information about how to install Juju, see [Get started with Juju](https://juju.is/docs/olm/get-started-with-juju). ## Steps +### Add more RAM to the Multipass VM +> NOTE: If you're working locally, you don't need to do this step. +The blueprint used for deploying Multipass VM is configured with 8 GB of RAM. To add more RAM to the VM, follow these steps: +Stop the VM: +``` +multipass stop my-juju-vm +``` +Set the RAM to 16 GB with the following command: +``` +multipass set local.my-juju-vm.memory=16G +``` + +### Shell into the Multipass VM +> NOTE: If you're working locally, you don't need to do this step. + +To be able to work inside the Multipass VM first you need to log in with the following command: +``` +multipass shell my-juju-vm +``` + ### Create GitHub repository The GitHub self-hosted runner spawned by the charm needs to connect to a GitHub repository or organization. GitHub repositories are used as it is simpler to manage. @@ -34,6 +53,11 @@ The registration token can be requested by calling the [GitHub API](https://docs ### Set up the tutorial model +Switch to the LXD controller(`lxd` is the default LXD controller name if you are using the Multipass VM, if you bootstrapped LXD yourself please use the name set for it): +``` +juju switch lxd +``` + To easily clean up the resources and to separate your workload from the contents of this tutorial, set up a new Juju model with the following command. ``` @@ -48,7 +72,7 @@ For information on token scopes, see [How to change GitHub personal access token Once the personal access token is created, the charm can be deployed with: -```shell +``` juju deploy github-runner --constraints="cores=4 mem=16G root-disk=20G virt-type=virtual-machine" --config token= --config path= --config runner-storage=memory --config vm-memory=2GiB --config vm-disk=10GiB ``` @@ -68,7 +92,7 @@ The charm will spawn new runners on a schedule. During this time, the charm will ### Run a simple workflow on the self-hosted runner -Once the self-hosted runner is available on GitHub, it can be used to run GitHub Actions jobs similar to runners provided by GitHub. The only difference being the label specified in the `runs-on` of a job. +Once the self-hosted runner is available on GitHub, the runner can be used to run GitHub Actions jobs similar to runners provided by GitHub. The only difference being the label specified in the `runs-on` of a job. In addition to the labels added by the GitHub runner application by default, the charm will include labels from the [`labels` charm configuration](https://charmhub.io/github-runner/configurations#labels). @@ -102,3 +126,9 @@ The Juju model, charm and the self-hosted runners can be removed with the follow ```shell juju destroy-model --destroy-storage github-runner-tutorial ``` + +If you used Multipass, to remove the Multipass instance you created for this tutorial, run the following command outside of the VM. + +``` +multipass delete --purge my-juju-vm +``` From 5a31f10911d10b19f3b99018b26c2b50113d11db Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Wed, 16 Oct 2024 14:44:14 +0200 Subject: [PATCH 6/7] feat: Adjustments for COS integration for reactive mode (#390) * pin requirements.txt * issue active_runners in lxd runner_manager * create and use dedicated user * only run test_reactive - REVERT ME * some adaptions * check metrics * create home dir * create home dir * WIP checkin * pin dev branch * use tmate * fix test * add move_to_quarantine to shared_fs * outcomment other tests * pass in RUNNER_MANAGER_USER * fix setup RUNNER_MANAGER_USER * add a reconcile before metric extraction * re-add the other reactive integration tests * fix shared_fs * run all tests * fix test runner_manager_openstack * fix test runner_manager_openstack * reconcile for runner_installed_metric * fix keypath * outcomment integration tests * call ops.main * fix keypath * outcomment patching * add sleep * remove charm metrics tests as they do pass * remove test_charm_fork_repo * make test_charm_runner less brittle * add sleep to test_reactive to see if it fixes things * catch FileNotFoundError in test_runner_manager_openstack.py * remove test_charm_runner * remove wrong use of usefixtures * replace sleep by wait_for * test if sleep fixes issues * outcomment test reactive * replace sleep by wait_for * check for None * add assert * set log-level to DEBUG * change wait_for condition * remove check for metrics * remove outcommented code * restore log level * refactor test_reactive * run all tests * fix changed health check * increase timeout for reactive * only run test_reactive * update dashboard with active runners * Revert "only run test_reactive" This reverts commit 5a511848b6322aeb9a5c4d2373bd479521e71419. * update changelog * make test_runner_manager_openstack.py less brittle * pin branch * remove tmate-debug * re-add e2e test * lint * remove TODO * fix case where pre_job metric stats are empty * move StorageManager instantiation into functions * move staticmethod to module level * pin correct commit --- docs/changelog.md | 5 + docs/reference/cos.md | 2 +- requirements.txt | 2 +- src-docs/charm.py.md | 9 +- src-docs/runner_manager.py.md | 10 +- src-docs/shared_fs.py.md | 27 +++++- src/charm.py | 45 ++++++++- src/grafana_dashboards/metrics.json | 27 ++++-- src/runner_manager.py | 22 +++-- src/shared_fs.py | 37 ++++++- tests/integration/helpers/charm_metrics.py | 1 + tests/integration/helpers/openstack.py | 6 +- .../integration/test_charm_metrics_success.py | 4 + tests/integration/test_charm_runner.py | 13 ++- tests/integration/test_reactive.py | 50 +++++++++- .../test_runner_manager_openstack.py | 97 ++++++++++++++----- tests/unit/test_lxd_runner_manager.py | 1 + tests/unit/test_shared_fs.py | 16 ++- 18 files changed, 304 insertions(+), 70 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index de64a9810..acf0fc20b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,4 +1,9 @@ # Changelog +### 2024-10-11 + +- Added support for COS integration with reactive runners. +- The charm now creates a dedicated user which is used for running the reactive process and + storing metrics and ssh keys (also for non-reactive mode). ### 2024-10-07 diff --git a/docs/reference/cos.md b/docs/reference/cos.md index 799bc8a95..d1fcdd736 100644 --- a/docs/reference/cos.md +++ b/docs/reference/cos.md @@ -13,7 +13,7 @@ The "GitHub Self-Hosted Runner Metrics" metrics dashboard presents the following - General: Displays general metrics about the charm and runners, such as: - Lifecycle counters: Tracks the frequency of Runner initialisation, start, stop, and crash events. - Available runners: A horizontal bar graph showing the number of runners available during the last reconciliation event. Note: This data is updated after each reconciliation event and is not real-time. - - Idle runners after reconciliation: A time series graph showing the number of runners marked as idle during the last reconciliation event over time. Note: This data is updated after each reconciliation event and is not real-time. + - Runners after reconciliation: A time series graph showing the number of runners marked as active/idle during the last reconciliation event over time. Note: This data is updated after each reconciliation event and is not real-time. - Duration observations: Each data point aggregates the last hour and shows the 50th, 90th, 95th percentile and maximum durations for: - Runner installation - Runner idle duration diff --git a/requirements.txt b/requirements.txt index 3bed85a6d..da972473e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ cosl ==0.0.15 # juju 3.1.2.0 depends on pyyaml<=6.0 and >=5.1.2 PyYAML ==6.0.* pyOpenSSL==24.2.1 -github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@33bbaff42d7cc0f250006fdd08d24659cef364c9 +github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@601dbb70a983b2902566503e462a8567f80758e7 diff --git a/src-docs/charm.py.md b/src-docs/charm.py.md index 4908713db..9355d1b3b 100644 --- a/src-docs/charm.py.md +++ b/src-docs/charm.py.md @@ -19,10 +19,13 @@ Charm for creating and managing GitHub self-hosted runner instances. - **RECONCILE_RUNNERS_EVENT** - **REACTIVE_MQ_DB_NAME** - **GITHUB_SELF_HOSTED_ARCH_LABELS** +- **ROOT_USER** +- **RUNNER_MANAGER_USER** +- **RUNNER_MANAGER_GROUP** --- - + ## function `catch_charm_errors` @@ -48,7 +51,7 @@ Catch common errors in charm. --- - + ## function `catch_action_errors` @@ -89,7 +92,7 @@ Charm for managing GitHub self-hosted runners. - `ram_pool_path`: The path to memdisk storage. - `kernel_module_path`: The path to kernel modules. - + ### function `__init__` diff --git a/src-docs/runner_manager.py.md b/src-docs/runner_manager.py.md index 1f0844e44..94f344c1b 100644 --- a/src-docs/runner_manager.py.md +++ b/src-docs/runner_manager.py.md @@ -50,7 +50,7 @@ Construct RunnerManager object for creating and managing runners. --- - + ### function `build_runner_image` @@ -87,7 +87,7 @@ Check if runner binary exists. --- - + ### function `flush` @@ -164,7 +164,7 @@ The runner binary URL changes when a new version is available. --- - + ### function `has_runner_image` @@ -181,7 +181,7 @@ Check if the runner image exists. --- - + ### function `reconcile` @@ -205,7 +205,7 @@ Bring runners in line with target. --- - + ### function `schedule_build_runner_image` diff --git a/src-docs/shared_fs.py.md b/src-docs/shared_fs.py.md index 205f2343c..2c5c8220e 100644 --- a/src-docs/shared_fs.py.md +++ b/src-docs/shared_fs.py.md @@ -13,7 +13,7 @@ Classes and functions to operate on the shared filesystem between the charm and --- - + ## function `create` @@ -45,7 +45,7 @@ The method is not idempotent and will raise an exception if the shared filesyste --- - + ## function `list_all` @@ -63,7 +63,7 @@ List all the metric storages. --- - + ## function `get` @@ -95,7 +95,7 @@ Mounts the filesystem if it is not currently mounted. --- - + ## function `delete` @@ -118,3 +118,22 @@ Delete the shared filesystem for the runner. - `DeleteMetricsStorageError`: If the shared filesystem could not be deleted. +--- + + + +## function `move_to_quarantine` + +```python +move_to_quarantine(runner_name: str) → None +``` + +Archive the mshared filesystem for the runner and delete it. + + + +**Args:** + + - `runner_name`: The name of the runner. + + diff --git a/src/charm.py b/src/charm.py index a53da1d41..33c359621 100755 --- a/src/charm.py +++ b/src/charm.py @@ -48,6 +48,7 @@ ) from github_runner_manager.reactive.types_ import QueueConfig as ReactiveQueueConfig from github_runner_manager.reactive.types_ import RunnerConfig as ReactiveRunnerConfig +from github_runner_manager.types_ import SystemUserConfig from github_runner_manager.types_.github import GitHubPath, GitHubRunnerStatus, parse_github_path from ops.charm import ( ActionEvent, @@ -61,7 +62,6 @@ UpgradeCharmEvent, ) from ops.framework import StoredState -from ops.main import main from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus import logrotate @@ -110,6 +110,10 @@ GITHUB_SELF_HOSTED_ARCH_LABELS = {"x64", "arm64"} +ROOT_USER = "root" +RUNNER_MANAGER_USER = "runner-manager" +RUNNER_MANAGER_GROUP = "runner-manager" + logger = logging.getLogger(__name__) @@ -454,6 +458,12 @@ def _common_install_code(self, state: CharmState) -> bool: # noqa: C901 logger.error("Failed to install charm dependencies") raise + try: + _setup_runner_manager_user() + except SubprocessError: + logger.error("Failed to setup runner manager user") + raise + try: logrotate.setup() except LogrotateSetupError: @@ -1295,6 +1305,7 @@ def _get_runner_scaler( cloud_runner_manager=openstack_runner_manager_config, github_token=token, supported_labels=supported_labels, + system_user=SystemUserConfig(user=RUNNER_MANAGER_USER, group=RUNNER_MANAGER_GROUP), ) return RunnerScaler( runner_manager=runner_manager, reactive_runner_config=reactive_runner_config @@ -1363,9 +1374,39 @@ def _create_openstack_runner_manager_config( server_config=server_config, runner_config=runner_config, service_config=service_config, + system_user_config=SystemUserConfig( + user=RUNNER_MANAGER_USER, group=RUNNER_MANAGER_GROUP + ), ) return openstack_runner_manager_config +def _setup_runner_manager_user() -> None: + """Create the user and required directories for the runner manager.""" + # check if runner_manager user is already existing + _, retcode = execute_command(["/usr/bin/id", RUNNER_MANAGER_USER], check_exit=False) + if retcode != 0: + logger.info("Creating user %s", RUNNER_MANAGER_USER) + execute_command( + [ + "/usr/sbin/useradd", + "--system", + "--create-home", + "--user-group", + RUNNER_MANAGER_USER, + ], + ) + execute_command(["/usr/bin/mkdir", "-p", f"/home/{RUNNER_MANAGER_USER}/.ssh"]) + execute_command( + [ + "/usr/bin/chown", + "-R", + f"{RUNNER_MANAGER_USER}:{RUNNER_MANAGER_USER}", + f"/home/{RUNNER_MANAGER_USER}/.ssh", + ] + ) + execute_command(["/usr/bin/chmod", "700", f"/home/{RUNNER_MANAGER_USER}/.ssh"]) + + if __name__ == "__main__": - main(GithubRunnerCharm) + ops.main(GithubRunnerCharm) diff --git a/src/grafana_dashboards/metrics.json b/src/grafana_dashboards/metrics.json index 0e8728765..96aec6dc1 100644 --- a/src/grafana_dashboards/metrics.json +++ b/src/grafana_dashboards/metrics.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 567, + "id": 313, "links": [], "liveNow": false, "panels": [ @@ -635,7 +635,7 @@ "type": "loki", "uid": "${lokids}" }, - "description": "This panel totals the number of idle runners reported by the charm units after a reconciliation event. Note that there may be active runners in the time between reconciliations which will not be shown in this panel.", + "description": "This panel totals the number of active/idle runners reported by charm units after reconciliation. Note that this is not a real-time metric and should only be used to identify trends or to investigate problems. A given point in time shows the aggregation of the last reported values from the reconciliation. This means, for example, that if the graph shows 10 idle runners at a particular time, it doesn't mean that there are 10 idle runners at that time, just that this is the most recently reported value. Also note that runners who are reported as idle at the time of reconciliation may become active immediately afterwards. And active runners may become offline/inactive.", "fieldConfig": { "defaults": { "color": { @@ -702,7 +702,7 @@ "calcs": [], "displayMode": "list", "placement": "bottom", - "showLegend": false + "showLegend": true }, "tooltip": { "mode": "single", @@ -721,9 +721,21 @@ "legendFormat": "Idle", "queryType": "range", "refId": "D" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "code", + "expr": "sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",active_runners=\"active_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap active_runners[60m]))", + "hide": false, + "legendFormat": "Active", + "queryType": "range", + "refId": "A" } ], - "title": "Idle Runners after Reconciliation", + "title": "Runners after Reconciliation", "transformations": [], "type": "timeseries" }, @@ -958,7 +970,7 @@ "uid": "${lokids}" }, "editorMode": "code", - "expr": "1 / sum by (flavor) (rate({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\", flavor=\"flavor\" | event=\"reconciliation\" [$__range]))", + "expr": "(1 / sum by(flavor)(rate({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"reconciliation\"[$__range])))", "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", "legendFormat": "", "queryType": "instant", @@ -1028,8 +1040,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2045,6 +2056,6 @@ "timepicker": {}, "timezone": "", "title": "GitHub Self-Hosted Runner Metrics", - "version": 15, + "version": 16, "weekStart": "" } diff --git a/src/runner_manager.py b/src/runner_manager.py index ab4b232c5..6e806a6f9 100644 --- a/src/runner_manager.py +++ b/src/runner_manager.py @@ -331,14 +331,21 @@ def _issue_runner_metrics(self) -> IssuedMetricEventsStats: for extracted_metrics in runner_metrics.extract( metrics_storage_manager=shared_fs, runners=set(runner_states.healthy) ): - try: - job_metrics = github_metrics.job( - github_client=self._clients.github, - pre_job_metrics=extracted_metrics.pre_job, - runner_name=extracted_metrics.runner_name, + if extracted_metrics.pre_job: + try: + job_metrics = github_metrics.job( + github_client=self._clients.github, + pre_job_metrics=extracted_metrics.pre_job, + runner_name=extracted_metrics.runner_name, + ) + except GithubMetricsError: + logger.exception("Failed to calculate job metrics") + job_metrics = None + else: + logger.debug( + "No pre-job metrics found for %s, will not calculate job metrics.", + extracted_metrics.runner_name, ) - except GithubMetricsError: - logger.exception("Failed to calculate job metrics") job_metrics = None issued_events = runner_metrics.issue_events( @@ -390,6 +397,7 @@ def _issue_reconciliation_metric( crashed_runners=metric_stats.get(metric_events.RunnerStart, 0) - metric_stats.get(metric_events.RunnerStop, 0), idle_runners=idle_online_count + idle_offline_count, + active_runners=active_count, duration=reconciliation_end_ts - reconciliation_start_ts, ) ) diff --git a/src/shared_fs.py b/src/shared_fs.py index 48c392113..e8af848c1 100644 --- a/src/shared_fs.py +++ b/src/shared_fs.py @@ -8,6 +8,7 @@ from typing import Iterator import github_runner_manager.metrics.storage as metrics_storage +from github_runner_manager.types_ import SystemUserConfig from errors import ( CreateMetricsStorageError, @@ -19,11 +20,16 @@ from utilities import execute_command DIR_NO_MOUNTPOINT_EXIT_CODE = 32 +METRICS_STORAGE_USER_CONFIG = SystemUserConfig( + user="ubuntu", + group="ubuntu", +) logger = logging.getLogger(__name__) FILESYSTEM_OWNER = "ubuntu:ubuntu" FILESYSTEM_IMAGES_PATH = Path("/home/ubuntu/runner-fs-images") +FILESYSTEM_QUARANTINE_PATH = Path("/home/ubuntu/runner-fs-quarantine") FILESYSTEM_SIZE = "1M" @@ -46,7 +52,9 @@ def create(runner_name: str) -> metrics_storage.MetricsStorage: Raises: CreateMetricsStorageError: If the creation of the shared filesystem fails. """ - ms = metrics_storage.create(runner_name) + ms = metrics_storage.StorageManager(system_user_config=METRICS_STORAGE_USER_CONFIG).create( + runner_name + ) try: FILESYSTEM_IMAGES_PATH.mkdir(exist_ok=True) except OSError as exc: @@ -75,7 +83,9 @@ def list_all() -> Iterator[metrics_storage.MetricsStorage]: Yields: A metrics storage object. """ - for ms in metrics_storage.list_all(): + for ms in metrics_storage.StorageManager( + system_user_config=METRICS_STORAGE_USER_CONFIG + ).list_all(): try: # we try to check if it is mounted by using this module's get function get(ms.runner_name) @@ -99,7 +109,9 @@ def get(runner_name: str) -> metrics_storage.MetricsStorage: Raises: GetMetricsStorageError: If the shared filesystem could not be retrieved/mounted. """ - ms = metrics_storage.get(runner_name) + ms = metrics_storage.StorageManager(system_user_config=METRICS_STORAGE_USER_CONFIG).get( + runner_name + ) try: is_mounted = _is_mountpoint(ms.path) @@ -135,7 +147,11 @@ def delete(runner_name: str) -> None: DeleteMetricsStorageError: If the shared filesystem could not be deleted. """ try: - runner_fs_path = metrics_storage.get(runner_name).path + runner_fs_path = ( + metrics_storage.StorageManager(system_user_config=METRICS_STORAGE_USER_CONFIG) + .get(runner_name) + .path + ) except GetMetricsStorageError as exc: raise DeleteMetricsStorageError( f"Failed to get shared filesystem for runner {runner_name}" @@ -165,6 +181,19 @@ def delete(runner_name: str) -> None: ) from exc +def move_to_quarantine( + runner_name: str, +) -> None: + """Archive the mshared filesystem for the runner and delete it. + + Args: + runner_name: The name of the runner. + """ + metrics_storage.StorageManager( + system_user_config=METRICS_STORAGE_USER_CONFIG + ).move_to_quarantine(runner_name) + + def _unmount_runner_fs_path(runner_fs_path: Path) -> Path: """Unmount shared filesystem for given runner. diff --git a/tests/integration/helpers/charm_metrics.py b/tests/integration/helpers/charm_metrics.py index 15cd7e3db..0ca7a5e84 100644 --- a/tests/integration/helpers/charm_metrics.py +++ b/tests/integration/helpers/charm_metrics.py @@ -220,6 +220,7 @@ async def assert_events_after_reconciliation( assert metric_log.get("duration") >= 0 assert metric_log.get("crashed_runners") == 0 assert metric_log.get("idle_runners") >= 0 + assert metric_log.get("active_runners") >= 0 async def wait_for_runner_to_be_marked_offline( diff --git a/tests/integration/helpers/openstack.py b/tests/integration/helpers/openstack.py index a77c14604..ddb89bfd8 100644 --- a/tests/integration/helpers/openstack.py +++ b/tests/integration/helpers/openstack.py @@ -6,11 +6,11 @@ from typing import Optional, TypedDict, cast import openstack.connection -from github_runner_manager.openstack_cloud.openstack_cloud import OpenstackCloud from juju.application import Application from juju.unit import Unit from openstack.compute.v2.server import Server +from charm import RUNNER_MANAGER_USER from charm_state import VIRTUAL_MACHINES_CONFIG_NAME from tests.integration.helpers.common import InstanceHelper, reconcile, run_in_unit, wait_for @@ -58,7 +58,7 @@ async def expose_to_instance( break assert ip, f"Failed to get IP address for OpenStack server {runner.name}" - key_path = OpenstackCloud._get_key_path(runner.name) + key_path = f"/home/{RUNNER_MANAGER_USER}/.ssh/{runner.name}.key" exit_code, _, _ = await run_in_unit(unit, f"ls {key_path}") assert exit_code == 0, f"Unable to find key file {key_path}" ssh_cmd = f'ssh -fNT -R {port}:localhost:{port} -i {key_path} -o "StrictHostKeyChecking no" -o "ControlPersist yes" ubuntu@{ip} &' @@ -113,7 +113,7 @@ async def run_in_instance( break assert ip, f"Failed to get IP address for OpenStack server {runner.name}" - key_path = OpenstackCloud._get_key_path(runner.name) + key_path = f"/home/{RUNNER_MANAGER_USER}/.ssh/{runner.name}.key" exit_code, _, _ = await run_in_unit(unit, f"ls {key_path}") assert exit_code == 0, f"Unable to find key file {key_path}" ssh_cmd = f'ssh -i {key_path} -o "StrictHostKeyChecking no" ubuntu@{ip} {command}' diff --git a/tests/integration/test_charm_metrics_success.py b/tests/integration/test_charm_metrics_success.py index 5e8254e5d..283d7eba1 100644 --- a/tests/integration/test_charm_metrics_success.py +++ b/tests/integration/test_charm_metrics_success.py @@ -59,6 +59,10 @@ async def test_charm_issues_runner_installed_metric( """ await instance_helper.ensure_charm_has_runner(app) + # Set the number of virtual machines to 0 to speedup reconciliation + await app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) + await reconcile(app=app, model=model) + metrics_log = await get_metrics_log(app.units[0]) log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines())) events = set(map(lambda line: line.get("event"), log_lines)) diff --git a/tests/integration/test_charm_runner.py b/tests/integration/test_charm_runner.py index 96837e28d..3fd587200 100644 --- a/tests/integration/test_charm_runner.py +++ b/tests/integration/test_charm_runner.py @@ -13,6 +13,7 @@ from juju.model import Model from charm_state import ( + VIRTUAL_MACHINES_CONFIG_NAME, VM_CPU_CONFIG_NAME, VM_DISK_CONFIG_NAME, VM_MEMORY_CONFIG_NAME, @@ -24,6 +25,7 @@ DISPATCH_WAIT_TEST_WORKFLOW_FILENAME, InstanceHelper, dispatch_workflow, + reconcile, wait_for, ) from tests.integration.helpers.openstack import OpenStackInstanceHelper, setup_repo_policy @@ -40,8 +42,12 @@ async def app_fixture( Ensure the charm has one runner before starting a test. """ await instance_helper.ensure_charm_has_runner(basic_app) + yield basic_app + await basic_app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"}) + await reconcile(basic_app, basic_app.model) + @pytest.mark.openstack @pytest.mark.asyncio @@ -155,13 +161,6 @@ async def test_flush_runner_and_resource_config( assert action.status == "completed" assert action.results["delta"]["virtual-machines"] == "0" - await wait_for(lambda: workflow.update() or workflow.status == "completed") - action = await app.units[0].run_action("flush-runners") - await action.wait() - - assert action.status == "completed" - assert action.results["delta"]["virtual-machines"] == "1" - @pytest.mark.openstack @pytest.mark.asyncio diff --git a/tests/integration/test_reactive.py b/tests/integration/test_reactive.py index 167bded81..869214ec3 100644 --- a/tests/integration/test_reactive.py +++ b/tests/integration/test_reactive.py @@ -10,6 +10,7 @@ import pytest_asyncio from github import Branch, Repository from github.WorkflowRun import WorkflowRun +from github_runner_manager.metrics.runner import PostJobStatus from github_runner_manager.reactive.consumer import JobDetails from juju.application import Application from juju.model import Model @@ -18,6 +19,10 @@ from pytest_operator.plugin import OpsTest from charm_state import VIRTUAL_MACHINES_CONFIG_NAME +from tests.integration.helpers.charm_metrics import ( + assert_events_after_reconciliation, + get_metrics_log, +) from tests.integration.helpers.common import ( DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, DISPATCH_TEST_WORKFLOW_FILENAME, @@ -59,7 +64,8 @@ async def test_reactive_mode_spawns_runner( """ arrange: Place a message in the queue and dispatch a workflow. act: Call reconcile. - assert: A runner is spawned to process the job and the message is removed from the queue. + assert: A runner is spawned to process the job and the message is removed from the queue. + The metrics are logged. """ mongodb_uri = await _get_mongodb_uri(ops_test, app) @@ -94,6 +100,26 @@ async def test_reactive_mode_spawns_runner( _assert_queue_is_empty(mongodb_uri, app.name) + async def _runner_installed_in_metrics_log() -> bool: + """Check if the runner_installed event is logged in the metrics log. + + Returns: + True if the runner_installed event is logged, False otherwise. + """ + # trigger reconcile which extracts metrics + await reconcile(app, app.model) + metrics_log = await get_metrics_log(app.units[0]) + log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines())) + events = set(map(lambda line: line.get("event"), log_lines)) + return "runner_installed" in events + + try: + await wait_for(_runner_installed_in_metrics_log, check_interval=30, timeout=600) + except TimeoutError: + assert False, "runner_installed event has not been logged" + + await _assert_metrics_are_logged(app, github_repository) + async def test_reactive_mode_does_not_consume_jobs_with_unsupported_labels( ops_test: OpsTest, @@ -107,7 +133,6 @@ async def test_reactive_mode_does_not_consume_jobs_with_unsupported_labels( assert: No runner is spawned and the message is not requeued. """ mongodb_uri = await _get_mongodb_uri(ops_test, app) - run = await dispatch_workflow( app=app, branch=test_github_branch, @@ -355,3 +380,24 @@ def _get_queue_size(mongodb_uri: str, queue_name: str) -> int: with Connection(mongodb_uri) as conn: with conn.SimpleQueue(queue_name) as simple_queue: return simple_queue.qsize() + + +async def _assert_metrics_are_logged(app: Application, github_repository: Repository): + """Assert that all runner metrics are logged. + + Args: + app: The juju application, used to extract the metrics log and flavor name. + github_repository: The GitHub repository to be included in the metrics. + """ + metrics_log = await get_metrics_log(app.units[0]) + log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines())) + for metric_log in log_lines: + if metric_log.get("event") == "runner_installed": + assert metric_log.get("flavor") == app.name + assert metric_log.get("event") == "runner_installed" + assert metric_log.get("duration") >= 0 + await assert_events_after_reconciliation( + app=app, + github_repository=github_repository, + post_job_status=PostJobStatus.NORMAL, + ) diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py index 582ebca00..dd7e85876 100644 --- a/tests/integration/test_runner_manager_openstack.py +++ b/tests/integration/test_runner_manager_openstack.py @@ -1,10 +1,13 @@ # Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. -"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager.""" +"""Testing the RunnerManager class with OpenStackRunnerManager as CloudManager. +It is assumed that the test runs in the CI under the ubuntu user. +""" import json +import logging from pathlib import Path from secrets import token_hex from typing import AsyncGenerator, Iterator @@ -26,7 +29,8 @@ RunnerManager, RunnerManagerConfig, ) -from github_runner_manager.metrics import events, storage +from github_runner_manager.metrics import events +from github_runner_manager.openstack_cloud import health_checks from github_runner_manager.openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH from github_runner_manager.openstack_cloud.openstack_runner_manager import ( OpenStackCloudConfig, @@ -34,6 +38,7 @@ OpenStackRunnerManagerConfig, OpenStackServerConfig, ) +from github_runner_manager.types_ import SystemUserConfig from github_runner_manager.types_.github import GitHubPath, parse_github_path from openstack.connection import Connection as OpenstackConnection @@ -44,6 +49,8 @@ wait_for, ) +logger = logging.getLogger(__name__) + @pytest.fixture(scope="module", name="runner_label") def runner_label(): @@ -58,17 +65,11 @@ def log_dir_base_path_fixture( with pytest.MonkeyPatch.context() as monkeypatch: temp_log_dir = tmp_path_factory.mktemp("log") - filesystem_base_path = temp_log_dir / "runner-fs" - filesystem_quarantine_path = temp_log_dir / "runner-fs-quarantine" metric_log_path = temp_log_dir / "metric_log" - monkeypatch.setattr(storage, "FILESYSTEM_BASE_PATH", filesystem_base_path) - monkeypatch.setattr(storage, "FILESYSTEM_QUARANTINE_PATH", filesystem_quarantine_path) monkeypatch.setattr(events, "METRICS_LOG_PATH", metric_log_path) yield { - "filesystem_base_path": filesystem_base_path, - "filesystem_quarantine_path": filesystem_quarantine_path, "metric_log": metric_log_path, } @@ -142,6 +143,8 @@ async def openstack_runner_manager_fixture( server_config=server_config, runner_config=runner_config, service_config=service_config, + # we assume the test runs as ubuntu user + system_user_config=SystemUserConfig(user="ubuntu", group="ubuntu"), ) yield OpenStackRunnerManager( @@ -153,8 +156,8 @@ async def openstack_runner_manager_fixture( async def runner_manager_fixture( openstack_runner_manager: OpenStackRunnerManager, token: str, - github_path: GitHubPath, log_dir_base_path: dict[str, Path], + github_path: GitHubPath, ) -> AsyncGenerator[RunnerManager, None]: """Get RunnerManager instance. @@ -202,7 +205,9 @@ def workflow_is_status(workflow: Workflow, status: str) -> bool: return workflow.status == status -async def wait_runner_amount(runner_manager: RunnerManager, num: int): +async def wait_runner_amount( + runner_manager: RunnerManager, num: int, timeout: int = 600, check_interval: int = 60 +) -> None: """Wait until the runner manager has the number of runners. A TimeoutError will be thrown if runners amount is not correct after timeout. @@ -210,6 +215,8 @@ async def wait_runner_amount(runner_manager: RunnerManager, num: int): Args: runner_manager: The RunnerManager to check. num: Number of runner to check for. + timeout: The timeout in seconds. + check_interval: The interval to check in seconds. """ runner_list = runner_manager.get_runners() assert isinstance(runner_list, tuple) @@ -217,7 +224,11 @@ async def wait_runner_amount(runner_manager: RunnerManager, num: int): return # The openstack server can take sometime to fully clean up or create. - await wait_for(lambda: len(runner_manager.get_runners()) == num) + await wait_for( + lambda: len(runner_manager.get_runners()) == num, + timeout=timeout, + check_interval=check_interval, + ) @pytest.mark.openstack @@ -279,10 +290,12 @@ async def test_runner_normal_idle_lifecycle( # 2. openstack_instances = openstack_runner_manager._openstack_cloud.get_instances() + assert len(openstack_instances) == 1, "Test arrange failed: Needs one runner." runner = openstack_instances[0] - assert openstack_runner_manager._health_check(runner) + ssh_conn = openstack_runner_manager._openstack_cloud.get_ssh_connection(runner) + assert health_checks.check_active_runner(ssh_conn=ssh_conn, instance=runner) # 3. runner_manager.cleanup() @@ -378,7 +391,10 @@ async def test_runner_normal_lifecycle( 2. The runner should be deleted. The metrics should be recorded. """ metric_log_path = log_dir_base_path["metric_log"] - metric_log_existing_content = metric_log_path.read_text(encoding="utf-8") + try: + metric_log_existing_content = metric_log_path.read_text(encoding="utf-8") + except FileNotFoundError: + metric_log_existing_content = "" workflow = await dispatch_workflow( app=None, @@ -391,9 +407,42 @@ async def test_runner_normal_lifecycle( ) await wait_for(lambda: workflow_is_status(workflow, "completed")) - issue_metrics_events = runner_manager_with_one_runner.cleanup() - assert issue_metrics_events[events.RunnerStart] == 1 - assert issue_metrics_events[events.RunnerStop] == 1 + # We encountered a race condition where runner_manager.cleanup was called while + # there was no runner process, but the post-metrics still had not yet been issued. + # Make the test more robust by waiting for the runner to go offline + # to reduce the race condition. + def is_runner_offline() -> bool: + """Check if the runner is offline. + + Returns: + True if the runner is offline, False otherwise. + """ + runners = runner_manager_with_one_runner.get_runners() + assert len(runners) == 1 + return runners[0].github_state in (GitHubRunnerState.OFFLINE, None) + + await wait_for(is_runner_offline, check_interval=60, timeout=600) + + def have_metrics_been_issued() -> bool: + """Check if the expected metrics have been issued. + + Returns: + True if the expected metrics have been issued, False otherwise. + """ + issued_metrics_events = runner_manager_with_one_runner.cleanup() + logger.info("issued_metrics_events: %s", issued_metrics_events) + return ( + {events.RunnerInstalled, events.RunnerStart, events.RunnerStop} + == set(issued_metrics_events) + and issued_metrics_events[events.RunnerInstalled] == 1 + and issued_metrics_events[events.RunnerStart] == 1 + and issued_metrics_events[events.RunnerStop] == 1 + ) + + try: + await wait_for(have_metrics_been_issued, check_interval=60, timeout=600) + except TimeoutError: + assert False, "The expected metrics were not issued" metric_log_full_content = metric_log_path.read_text(encoding="utf-8") assert metric_log_full_content.startswith( @@ -401,13 +450,17 @@ async def test_runner_normal_lifecycle( ), "The metric log was modified in ways other than appending" metric_log_new_content = metric_log_full_content[len(metric_log_existing_content) :] metric_logs = [json.loads(metric) for metric in metric_log_new_content.splitlines()] - assert ( - len(metric_logs) == 2 - ), "Assuming two events should be runner_start and runner_stop, modify this if new events are added" - assert metric_logs[0]["event"] == "runner_start" - assert metric_logs[0]["workflow"] == "Workflow Dispatch Wait Tests" - assert metric_logs[1]["event"] == "runner_stop" + assert len(metric_logs) == 3, ( + "Assuming three events " + "should be runner_installed, runner_start and runner_stop, " + "modify this if new events are added" + ) + assert metric_logs[0]["event"] == "runner_installed" + assert metric_logs[0]["flavor"] == runner_manager_with_one_runner.manager_name + assert metric_logs[1]["event"] == "runner_start" assert metric_logs[1]["workflow"] == "Workflow Dispatch Wait Tests" + assert metric_logs[2]["event"] == "runner_stop" + assert metric_logs[2]["workflow"] == "Workflow Dispatch Wait Tests" await wait_runner_amount(runner_manager_with_one_runner, 0) diff --git a/tests/unit/test_lxd_runner_manager.py b/tests/unit/test_lxd_runner_manager.py index 2c5aca46c..0b50a7943 100644 --- a/tests/unit/test_lxd_runner_manager.py +++ b/tests/unit/test_lxd_runner_manager.py @@ -448,6 +448,7 @@ def mock_get_runners(): flavor=runner_manager.app_name, crashed_runners=1, idle_runners=2, + active_runners=1, duration=0, ) ) diff --git a/tests/unit/test_shared_fs.py b/tests/unit/test_shared_fs.py index 2a21bf3cc..12dc11506 100644 --- a/tests/unit/test_shared_fs.py +++ b/tests/unit/test_shared_fs.py @@ -35,7 +35,8 @@ def metrics_storage_fixture( ) -> MagicMock: """Mock the metrics storage.""" metrics_storage_mock = MagicMock() - monkeypatch.setattr(shared_fs, "metrics_storage", metrics_storage_mock) + storage_manager_cls_mock = MagicMock(return_value=metrics_storage_mock) + monkeypatch.setattr(shared_fs.metrics_storage, "StorageManager", storage_manager_cls_mock) fs_base_path = filesystem_paths["base"] fs_base_path.mkdir() @@ -318,3 +319,16 @@ def test_get_mounts_if_unmounted(filesystem_paths: dict[str, Path], exc_cmd_mock ], check_exit=True, ) + + +def test_move_to_quarantine(metrics_storage_mock: MagicMock): + """ + arrange: Given a runner name. + act: Call move_to_quarantine. + assert: The method is called on the metrics storage manager. + """ + runner_name = secrets.token_hex(16) + + shared_fs.move_to_quarantine(runner_name) + + metrics_storage_mock.move_to_quarantine.assert_called_once_with(runner_name) From 3168532125185cf84ba17c15e1e5b09ae81b4afb Mon Sep 17 00:00:00 2001 From: yhaliaw <43424755+yhaliaw@users.noreply.github.com> Date: Thu, 17 Oct 2024 20:29:10 +0800 Subject: [PATCH 7/7] Fix/openstack connection creation (#378) * Update to use openstack credential directly * Remove test for removed code * Fix the unit tests * Fix dict values type error * Fix dependency issues * Try new deps * Try new deps * Fix merge issue * Remove a todo comment * Add debugging * Fix merge issue * Remove debug * Fix clouds.yaml access * Fix clouds.yaml dict access * Add comments on the auth fields of clouds.yaml * Fix fmt * Add reactive debug test * Debug reactive * Try if region_name * Print deubgging info * add region_name to _OpenStackAuth * add region_name to _OpenStackCloud * run other integration tests * fix test_runner_manager_openstack * fix unit tests * revert integration_test.yaml * fix unit test * lint * update changelog * increase image-builder deploy timeout * pin correct commit * final new line * remove todo * lint --------- Co-authored-by: Christopher Bartz --- docs/changelog.md | 8 +++++ requirements.txt | 2 +- src-docs/charm_state.py.md | 34 +++++++++---------- src/charm.py | 16 ++++++--- src/charm_state.py | 12 ++----- tests/integration/conftest.py | 6 +++- .../test_runner_manager_openstack.py | 29 +++++++++++----- tests/unit/conftest.py | 12 ------- tests/unit/test_charm.py | 3 +- tests/unit/test_charm_state.py | 28 +++------------ 10 files changed, 71 insertions(+), 79 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index acf0fc20b..aef5d9642 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,4 +1,12 @@ # Changelog + +### 2024-10-17 + +- Use in-memory authentication instead of clouds.yaml on disk for OpenStack. This prevents +the multi-processing fighting over the file handle for the clouds.yaml file in the github-runner-manager. + +- Fixed a bug where metrics storage for unmatched runners could not get cleaned up. + ### 2024-10-11 - Added support for COS integration with reactive runners. diff --git a/requirements.txt b/requirements.txt index da972473e..083c189a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ cosl ==0.0.15 # juju 3.1.2.0 depends on pyyaml<=6.0 and >=5.1.2 PyYAML ==6.0.* pyOpenSSL==24.2.1 -github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@601dbb70a983b2902566503e462a8567f80758e7 +github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@6b136f4e915c7dfec22e801981bcc0a6af581df1 diff --git a/src-docs/charm_state.py.md b/src-docs/charm_state.py.md index 354e0416a..64ff18954 100644 --- a/src-docs/charm_state.py.md +++ b/src-docs/charm_state.py.md @@ -110,7 +110,7 @@ Some charm configurations are grouped into other configuration models. --- - + ### classmethod `check_reconcile_interval` @@ -139,7 +139,7 @@ Validate the general charm configuration. --- - + ### classmethod `from_charm` @@ -178,7 +178,7 @@ Raised when charm config is invalid. - `msg`: Explanation of the error. - + ### function `__init__` @@ -221,7 +221,7 @@ The charm state. --- - + ### classmethod `from_charm` @@ -267,7 +267,7 @@ Charm configuration related to GitHub. --- - + ### classmethod `from_charm` @@ -300,7 +300,7 @@ Get github related charm configuration values from charm. ## class `ImmutableConfigChangedError` Represents an error when changing immutable charm state. - + ### function `__init__` @@ -355,7 +355,7 @@ Runner configurations for local LXD instances. --- - + ### classmethod `check_virtual_machine_resources` @@ -386,7 +386,7 @@ Validate the virtual_machine_resources field values. --- - + ### classmethod `check_virtual_machines` @@ -415,7 +415,7 @@ Validate the virtual machines configuration value. --- - + ### classmethod `from_charm` @@ -475,7 +475,7 @@ OpenstackImage from image builder relation data. --- - + ### classmethod `from_charm` @@ -518,7 +518,7 @@ Runner configuration for OpenStack Instances. --- - + ### classmethod `from_charm` @@ -572,7 +572,7 @@ Return the aproxy address. --- - + ### classmethod `check_use_aproxy` @@ -602,7 +602,7 @@ Validate the proxy configuration. --- - + ### classmethod `from_charm` @@ -640,7 +640,7 @@ Represents the configuration for reactive scheduling. --- - + ### classmethod `from_database` @@ -685,7 +685,7 @@ Configuration for the repo policy compliance service. --- - + ### classmethod `from_charm` @@ -748,7 +748,7 @@ SSH connection information for debug workflow. --- - + ### classmethod `from_charm` @@ -781,7 +781,7 @@ Raised when given machine charm architecture is unsupported. - `arch`: The current machine architecture. - + ### function `__init__` diff --git a/src/charm.py b/src/charm.py index 33c359621..f3c6858a3 100755 --- a/src/charm.py +++ b/src/charm.py @@ -41,7 +41,7 @@ ) from github_runner_manager.manager.runner_scaler import RunnerScaler from github_runner_manager.openstack_cloud.openstack_runner_manager import ( - OpenStackCloudConfig, + OpenStackCredentials, OpenStackRunnerManager, OpenStackRunnerManagerConfig, OpenStackServerConfig, @@ -1346,9 +1346,15 @@ def _create_openstack_runner_manager_config( logger.warning( "Multiple clouds defined in clouds.yaml. Using the first one to connect." ) - cloud_config = OpenStackCloudConfig( - clouds_config=state.charm_config.openstack_clouds_yaml, - cloud=clouds[0], + first_cloud_config = state.charm_config.openstack_clouds_yaml["clouds"][clouds[0]] + credentials = OpenStackCredentials( + auth_url=first_cloud_config["auth"]["auth_url"], + project_name=first_cloud_config["auth"]["project_name"], + username=first_cloud_config["auth"]["username"], + password=first_cloud_config["auth"]["password"], + user_domain_name=first_cloud_config["auth"]["user_domain_name"], + project_domain_name=first_cloud_config["auth"]["project_domain_name"], + region_name=first_cloud_config["region_name"], ) server_config = None image = state.runner_config.openstack_image @@ -1370,7 +1376,7 @@ def _create_openstack_runner_manager_config( name=self.app.name, # The prefix is set to f"{application_name}-{unit number}" prefix=self.unit.name.replace("/", "-"), - cloud_config=cloud_config, + credentials=credentials, server_config=server_config, runner_config=runner_config, service_config=service_config, diff --git a/src/charm_state.py b/src/charm_state.py index afe174388..ba4e8731b 100644 --- a/src/charm_state.py +++ b/src/charm_state.py @@ -19,8 +19,6 @@ import yaml from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires -from github_runner_manager import openstack_cloud -from github_runner_manager.errors import OpenStackInvalidConfigError from github_runner_manager.types_.github import GitHubPath, parse_github_path from ops import CharmBase from pydantic import ( @@ -315,9 +313,11 @@ class _OpenStackCloud(TypedDict): Attributes: auth: The connection authentication info. + region_name: The OpenStack region to authenticate to. """ auth: _OpenStackAuth + region_name: str class OpenStackCloudsYAML(TypedDict): @@ -435,14 +435,6 @@ def _parse_openstack_clouds_config(cls, charm: CharmBase) -> OpenStackCloudsYAML f"Invalid {OPENSTACK_CLOUDS_YAML_CONFIG_NAME} config. Invalid yaml." ) from exc - try: - openstack_cloud.initialize(openstack_clouds_yaml) - except OpenStackInvalidConfigError as exc: - logger.error("Invalid openstack config, %s.", exc) - raise CharmConfigInvalidError( - "Invalid openstack config. Not able to initialize openstack integration." - ) from exc - return openstack_clouds_yaml @validator("reconcile_interval") diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 1d52fd1ea..da3359250 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -48,6 +48,8 @@ from tests.integration.helpers.openstack import OpenStackInstanceHelper, PrivateEndpointConfigs from tests.status_name import ACTIVE +IMAGE_BUILDER_DEPLOY_TIMEOUT_IN_SECONDS = 30 * 60 + # The following line is required because we are using request.getfixturevalue in conjunction # with pytest-asyncio. See https://github.com/pytest-dev/pytest-asyncio/issues/112 nest_asyncio.apply() @@ -397,7 +399,9 @@ async def image_builder_fixture( "openstack-user-name": private_endpoint_config["username"], }, ) - await model.wait_for_idle(apps=[app.name], wait_for_active=True, timeout=15 * 60) + await model.wait_for_idle( + apps=[app.name], wait_for_active=True, timeout=IMAGE_BUILDER_DEPLOY_TIMEOUT_IN_SECONDS + ) else: app = model.applications["github-runner-image-builder"] return app diff --git a/tests/integration/test_runner_manager_openstack.py b/tests/integration/test_runner_manager_openstack.py index dd7e85876..5bdecc9e8 100644 --- a/tests/integration/test_runner_manager_openstack.py +++ b/tests/integration/test_runner_manager_openstack.py @@ -31,9 +31,8 @@ ) from github_runner_manager.metrics import events from github_runner_manager.openstack_cloud import health_checks -from github_runner_manager.openstack_cloud.openstack_cloud import _CLOUDS_YAML_PATH from github_runner_manager.openstack_cloud.openstack_runner_manager import ( - OpenStackCloudConfig, + OpenStackCredentials, OpenStackRunnerManager, OpenStackRunnerManagerConfig, OpenStackServerConfig, @@ -113,13 +112,27 @@ async def openstack_runner_manager_fixture( The prefix args of OpenstackRunnerManager set to app_name to let openstack_connection_fixture perform the cleanup of openstack resources. """ - _CLOUDS_YAML_PATH.unlink(missing_ok=True) clouds_config = yaml.safe_load(private_endpoint_clouds_yaml) - cloud_config = OpenStackCloudConfig( - clouds_config=clouds_config, - cloud="testcloud", - ) + try: + # Pick the first cloud in the clouds.yaml + cloud = tuple(clouds_config["clouds"].values())[0] + print("============================================") + print(cloud) + print("============================================") + + credentials = OpenStackCredentials( + auth_url=cloud["auth"]["auth_url"], + project_name=cloud["auth"]["project_name"], + username=cloud["auth"]["username"], + password=cloud["auth"]["password"], + user_domain_name=cloud["auth"]["user_domain_name"], + project_domain_name=cloud["auth"]["project_domain_name"], + region_name=cloud["region_name"], + ) + except KeyError as err: + raise AssertionError("Issue with the format of the clouds.yaml used in test") from err + server_config = OpenStackServerConfig( image=openstack_test_image, flavor=flavor_name, @@ -139,7 +152,7 @@ async def openstack_runner_manager_fixture( openstack_runner_manager_config = OpenStackRunnerManagerConfig( name=app_name, prefix=f"{app_name}-0", - cloud_config=cloud_config, + credentials=credentials, server_config=server_config, runner_config=runner_config, service_config=service_config, diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index c0b760144..5a9182080 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -102,18 +102,6 @@ def cloud_name_fixture() -> str: return "microstack" -@pytest.fixture(autouse=True, name="clouds_yaml_path") -def clouds_yaml_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: - """Mocked clouds.yaml path. - - Returns: - Path: Mocked clouds.yaml path. - """ - clouds_yaml_path = tmp_path / "clouds.yaml" - monkeypatch.setattr("github_runner_manager.openstack_cloud.CLOUDS_YAML_PATH", clouds_yaml_path) - return clouds_yaml_path - - @pytest.fixture(name="clouds_yaml") def clouds_yaml_fixture(cloud_name: str) -> dict: """Testing clouds.yaml.""" diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index a28fc9743..f2451686f 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -681,7 +681,8 @@ def test_on_config_changed_openstack_clouds_yaml(self, run, wt, mkdir, orm, rm): "username": secrets.token_hex(16), "user_domain_name": secrets.token_hex(16), "password": secrets.token_hex(16), - } + }, + "region_name": secrets.token_hex(16), } } } diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py index 52b19781c..f3d3564f7 100644 --- a/tests/unit/test_charm_state.py +++ b/tests/unit/test_charm_state.py @@ -8,7 +8,6 @@ from pathlib import Path from unittest.mock import MagicMock -import github_runner_manager.openstack_cloud import pytest import yaml from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires @@ -297,7 +296,7 @@ def valid_yaml_config(): auth_url: 'http://keystone.openstack.svc.cluster.local:5000/v3' user_domain_name: 'Default' project_domain_name: 'Default' - region_name: 'RegionOne' + region_name: 'RegionOne' """ @@ -314,7 +313,7 @@ def invalid_yaml_config(): auth_url: 'http://keystone.openstack.svc.cluster.local:5000/v3' user_domain_name: 'Default' project_domain_name: 'Default' - region_name: 'RegionOne' + region_name: 'RegionOne' """ @@ -358,26 +357,6 @@ def test_parse_openstack_clouds_config_invalid_yaml_list(): CharmConfig._parse_openstack_clouds_config(mock_charm) -def test_parse_openstack_clouds_initialize_fail( - valid_yaml_config: str, monkeypatch: pytest.MonkeyPatch -): - """ - arrange: Given monkeypatched openstack_cloud.initialize that raises an error. - act: Call _parse_openstack_clouds_config method with the mock CharmBase instance. - assert: Verify that the method raises CharmConfigInvalidError. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.config[OPENSTACK_CLOUDS_YAML_CONFIG_NAME] = valid_yaml_config - monkeypatch.setattr( - github_runner_manager.openstack_cloud, - "initialize", - MagicMock(side_effect=github_runner_manager.openstack_cloud.OpenStackInvalidConfigError), - ) - - with pytest.raises(CharmConfigInvalidError): - CharmConfig._parse_openstack_clouds_config(mock_charm) - - def test_parse_openstack_clouds_config_valid(valid_yaml_config: str): """ arrange: Create a mock CharmBase instance with a valid OpenStack clouds YAML config. @@ -488,7 +467,8 @@ def test_charm_config_from_charm_valid(): "project_name": "test-project-name", "user_domain_name": "Default", "username": "test-user-name", - } + }, + "region_name": secrets.token_hex(16), } } }