diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index 003d046d7..341728001 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -13,7 +13,7 @@ jobs: pre-run-script: scripts/pre-integration-test.sh provider: lxd test-tox-env: integration-juju2.9 - modules: '["test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_one_runner", "test_charm_metrics", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage"]' + modules: '["test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_one_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage"]' integration-tests-juju3: name: Integration test with juju 3.1 uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main @@ -23,4 +23,4 @@ jobs: pre-run-script: scripts/pre-integration-test.sh provider: lxd test-tox-env: integration-juju3.1 - modules: '["test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_one_runner", "test_charm_metrics", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage"]' + modules: '["test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_one_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage"]' diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 000000000..188515554 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. diff --git a/tests/integration/test_charm_metrics.py b/tests/integration/charm_metrics_helpers.py similarity index 50% rename from tests/integration/test_charm_metrics.py rename to tests/integration/charm_metrics_helpers.py index dd25d325d..5b61ca58c 100644 --- a/tests/integration/test_charm_metrics.py +++ b/tests/integration/charm_metrics_helpers.py @@ -1,39 +1,31 @@ -# Copyright 2024 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. -"""Integration tests for metrics/logs.""" +"""Utilities for charm metrics integration tests.""" + + import json import logging from datetime import datetime, timezone from time import sleep -from typing import AsyncIterator -import pytest -import pytest_asyncio import requests from github.Branch import Branch from github.Repository import Repository from github.Workflow import Workflow from juju.application import Application -from juju.model import Model from juju.unit import Unit -import runner_logs from github_type import JobConclusion from metrics import METRICS_LOG_PATH from runner_metrics import PostJobStatus from tests.integration.helpers import ( - DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, DISPATCH_FAILURE_TEST_WORKFLOW_FILENAME, DISPATCH_TEST_WORKFLOW_FILENAME, - ensure_charm_has_runner, get_runner_name, get_runner_names, - reconcile, - run_in_lxd_instance, run_in_unit, ) -from tests.status_name import ACTIVE TEST_WORKFLOW_NAMES = [ "Workflow Dispatch Tests", @@ -43,97 +35,6 @@ JOB_LOG_START_MSG_TEMPLATE = "Job is about to start running on the runner: {runner_name}" -@pytest_asyncio.fixture(scope="module", name="app_integrated") -async def app_integrated_fixture( - model: Model, app_no_runner: Application -) -> AsyncIterator[Application]: - """Setup the charm to be integrated with grafana-agent using the cos-agent integration.""" - await _integrate_apps(app_no_runner, model) - - yield app_no_runner - - -async def _clear_metrics_log(unit: Unit) -> None: - """Clear the metrics log on the unit. - - Args: - unit: The unit to clear the metrics log on. - """ - retcode, _ = await run_in_unit( - unit=unit, - command=f"if [ -f {METRICS_LOG_PATH} ]; then rm {METRICS_LOG_PATH}; fi", - ) - assert retcode == 0, "Failed to clear metrics log" - - -async def _print_loop_device_info(unit: Unit, loop_device: str) -> None: - """Print loop device info on the unit. - - Args: - unit: The unit to print the loop device info on. - loop_device: The loop device to print the info for. - """ - retcode, stdout = await run_in_unit( - unit=unit, - command="sudo losetup -lJ", - ) - assert retcode == 0, f"Failed to get loop devices: {stdout}" - assert stdout is not None, "Failed to get loop devices, no stdout message" - loop_devices_info = json.loads(stdout) - for loop_device_info in loop_devices_info["loopdevices"]: - if loop_device_info["name"] == loop_device: - logging.info("Loop device %s info: %s", loop_device, loop_device_info) - break - else: - logging.info("Loop device %s not found", loop_device) - - -@pytest_asyncio.fixture(scope="function", name="app") -async def app_fixture( - model: Model, app_integrated: Application, loop_device: str -) -> AsyncIterator[Application]: - """Setup and teardown the charm after each test. - - Clear the metrics log before each test. - """ - unit = app_integrated.units[0] - await _clear_metrics_log(unit) - await _print_loop_device_info(unit, loop_device) - yield app_integrated - - -async def _get_metrics_log(unit: Unit) -> str: - """Retrieve the metrics log from the unit. - - Args: - unit: The unit to retrieve the metrics log from. - - Returns: - The metrics log. - """ - retcode, stdout = await run_in_unit( - unit=unit, - command=f"if [ -f {METRICS_LOG_PATH} ]; then cat {METRICS_LOG_PATH}; else echo ''; fi", - ) - assert retcode == 0, f"Failed to get metrics log: {stdout}" - assert stdout is not None, "Failed to get metrics log, no stdout message" - logging.info("Metrics log: %s", stdout) - return stdout.strip() - - -async def _integrate_apps(app: Application, model: Model): - """Integrate the charm with grafana-agent using the cos-agent integration. - - Args: - app: The charm to integrate. - model: The model to deploy the grafana-agent to. - """ - grafana_agent = await model.deploy("grafana-agent", channel="latest/edge") - await model.relate(f"{app.name}:cos-agent", f"{grafana_agent.name}:cos-agent") - await model.wait_for_idle(apps=[app.name], status=ACTIVE) - await model.wait_for_idle(apps=[grafana_agent.name]) - - async def _wait_until_runner_is_used_up(runner_name: str, unit: Unit): """Wait until the runner is used up. @@ -215,6 +116,60 @@ async def _wait_for_workflow_to_start(unit: Unit, workflow: Workflow): assert False, "Timeout while waiting for the workflow to start" +async def clear_metrics_log(unit: Unit) -> None: + """Clear the metrics log on the unit. + + Args: + unit: The unit to clear the metrics log on. + """ + retcode, _ = await run_in_unit( + unit=unit, + command=f"if [ -f {METRICS_LOG_PATH} ]; then rm {METRICS_LOG_PATH}; fi", + ) + assert retcode == 0, "Failed to clear metrics log" + + +async def print_loop_device_info(unit: Unit, loop_device: str) -> None: + """Print loop device info on the unit. + + Args: + unit: The unit to print the loop device info on. + loop_device: The loop device to print the info for. + """ + retcode, stdout = await run_in_unit( + unit=unit, + command="sudo losetup -lJ", + ) + assert retcode == 0, f"Failed to get loop devices: {stdout}" + assert stdout is not None, "Failed to get loop devices, no stdout message" + loop_devices_info = json.loads(stdout) + for loop_device_info in loop_devices_info["loopdevices"]: + if loop_device_info["name"] == loop_device: + logging.info("Loop device %s info: %s", loop_device, loop_device_info) + break + else: + logging.info("Loop device %s not found", loop_device) + + +async def get_metrics_log(unit: Unit) -> str: + """Retrieve the metrics log from the unit. + + Args: + unit: The unit to retrieve the metrics log from. + + Returns: + The metrics log. + """ + retcode, stdout = await run_in_unit( + unit=unit, + command=f"if [ -f {METRICS_LOG_PATH} ]; then cat {METRICS_LOG_PATH}; else echo ''; fi", + ) + assert retcode == 0, f"Failed to get metrics log: {stdout}" + assert stdout is not None, "Failed to get metrics log, no stdout message" + logging.info("Metrics log: %s", stdout) + return stdout.strip() + + async def _cancel_workflow_run(unit: Unit, workflow: Workflow): """Cancel the workflow run. @@ -234,7 +189,7 @@ async def _cancel_workflow_run(unit: Unit, workflow: Workflow): run.cancel() -async def _dispatch_workflow( +async def dispatch_workflow( app: Application, branch: Branch, github_repository: Repository, conclusion: str ): """Dispatch a workflow on a branch for the runner to run. @@ -262,7 +217,7 @@ async def _dispatch_workflow( ) -async def _assert_events_after_reconciliation( +async def assert_events_after_reconciliation( app: Application, github_repository: Repository, post_job_status: PostJobStatus ): """Assert that the RunnerStart, RunnerStop and Reconciliation metric is logged. @@ -274,7 +229,7 @@ async def _assert_events_after_reconciliation( """ unit = app.units[0] - metrics_log = await _get_metrics_log(unit=unit) + metrics_log = await get_metrics_log(unit=unit) log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines())) events = set(map(lambda line: line.get("event"), log_lines)) assert { @@ -317,7 +272,7 @@ async def _assert_events_after_reconciliation( assert metric_log.get("idle_runners") >= 0 -async def _wait_for_runner_to_be_marked_offline( +async def wait_for_runner_to_be_marked_offline( forked_github_repository: Repository, runner_name: str ): """Wait for the runner to be marked offline or to be non-existent. @@ -341,223 +296,3 @@ async def _wait_for_runner_to_be_marked_offline( break else: assert False, "Timeout while waiting for runner to be marked offline" - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_issues_runner_installed_metric(app: Application, model: Model): - """ - arrange: A charm without runners integrated with grafana-agent using the cos-agent integration. - act: Config the charm to contain one runner. - assert: The RunnerInstalled metric is logged. - """ - - await ensure_charm_has_runner(app=app, model=model) - - metrics_log = await _get_metrics_log(app.units[0]) - log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines())) - events = set(map(lambda line: line.get("event"), log_lines)) - assert "runner_installed" in events, "runner_installed event has not been logged" - - for metric_log in log_lines: - if metric_log.get("event") == "runner_installed": - assert metric_log.get("flavor") == app.name - assert metric_log.get("event") == "runner_installed" - assert metric_log.get("duration") >= 0 - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_issues_metrics_after_reconciliation( - model: Model, - app: Application, - forked_github_repository: Repository, - forked_github_branch: Branch, -): - """ - arrange: A properly integrated charm with a runner registered on the fork repo. - act: Dispatch a workflow on a branch for the runner to run. After completion, reconcile. - assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. - The Reconciliation metric has the post job status set to normal. - """ - await app.set_config({"path": forked_github_repository.full_name}) - await ensure_charm_has_runner(app=app, model=model) - - # Clear metrics log to make reconciliation event more predictable - unit = app.units[0] - await _clear_metrics_log(unit) - await _dispatch_workflow( - app=app, - branch=forked_github_branch, - github_repository=forked_github_repository, - conclusion="success", - ) - - # Set the number of virtual machines to 0 to speedup reconciliation - await app.set_config({"virtual-machines": "0"}) - await reconcile(app=app, model=model) - - await _assert_events_after_reconciliation( - app=app, github_repository=forked_github_repository, post_job_status=PostJobStatus.NORMAL - ) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_issues_metrics_for_failed_repo_policy( - model: Model, - app: Application, - forked_github_repository: Repository, - forked_github_branch: Branch, -): - """ - arrange: A properly integrated charm with a runner registered on the fork repo. - act: Dispatch a test workflow that fails the repo-policy check. After completion, reconcile. - assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. - The Reconciliation metric has the post job status set to failure. - """ - await app.set_config({"path": forked_github_repository.full_name}) - await ensure_charm_has_runner(app=app, model=model) - - # Clear metrics log to make reconciliation event more predictable - unit = app.units[0] - await _clear_metrics_log(unit) - await _dispatch_workflow( - app=app, - branch=forked_github_branch, - github_repository=forked_github_repository, - conclusion="failure", - ) - - # Set the number of virtual machines to 0 to speedup reconciliation - await app.set_config({"virtual-machines": "0"}) - await reconcile(app=app, model=model) - - await _assert_events_after_reconciliation( - app=app, - github_repository=forked_github_repository, - post_job_status=PostJobStatus.REPO_POLICY_CHECK_FAILURE, - ) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_issues_metrics_for_abnormal_termination( - model: Model, - app: Application, - forked_github_repository: Repository, - forked_github_branch: Branch, -): - """ - arrange: A properly integrated charm with a runner registered on the fork repo. - act: Dispatch a test workflow and afterwards kill run.sh. After that, reconcile. - assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. - The Reconciliation metric has the post job status set to Abnormal. - """ - await app.set_config({"path": forked_github_repository.full_name}) - await ensure_charm_has_runner(app=app, model=model) - - unit = app.units[0] - - workflow = forked_github_repository.get_workflow( - id_or_file_name=DISPATCH_CRASH_TEST_WORKFLOW_FILENAME - ) - assert workflow.create_dispatch(forked_github_branch, {"runner": app.name}) - - await _wait_for_workflow_to_start(unit, workflow) - - # Make the runner terminate abnormally by killing run.sh - runner_name = await get_runner_name(unit) - kill_run_sh_cmd = "pkill -9 run.sh" - ret_code, _ = await run_in_lxd_instance(unit, runner_name, kill_run_sh_cmd) - assert ret_code == 0, "Failed to kill run.sh" - - # Cancel workflow and wait that the runner is marked offline - # to avoid errors during reconciliation. - await _cancel_workflow_run(unit, workflow) - await _wait_for_runner_to_be_marked_offline(forked_github_repository, runner_name) - - # Set the number of virtual machines to 0 to speedup reconciliation - await app.set_config({"virtual-machines": "0"}) - await reconcile(app=app, model=model) - - await _assert_events_after_reconciliation( - app=app, - github_repository=forked_github_repository, - post_job_status=PostJobStatus.ABNORMAL, - ) - - -async def test_charm_remounts_shared_fs( - model: Model, - app: Application, - forked_github_repository: Repository, - forked_github_branch: Branch, -): - """ - arrange: A properly integrated charm with a runner registered on the fork repo. - act: Dispatch a test workflow and afterwards unmount the shared fs. After that, reconcile. - assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. - """ - await app.set_config({"path": forked_github_repository.full_name}) - await ensure_charm_has_runner(app=app, model=model) - - # Clear metrics log to make reconciliation event more predictable - unit = app.units[0] - runner_name = await get_runner_name(unit) - await _clear_metrics_log(unit) - await _dispatch_workflow( - app=app, - branch=forked_github_branch, - github_repository=forked_github_repository, - conclusion="success", - ) - - # unmount shared fs - await run_in_unit(unit, f"sudo umount /home/ubuntu/runner-fs/{runner_name}") - - # Set the number of virtual machines to 0 to speedup reconciliation - await app.set_config({"virtual-machines": "0"}) - await reconcile(app=app, model=model) - - await _assert_events_after_reconciliation( - app=app, github_repository=forked_github_repository, post_job_status=PostJobStatus.NORMAL - ) - - -@pytest.mark.asyncio -@pytest.mark.abort_on_fail -async def test_charm_retrieves_logs_from_unhealthy_runners( - model: Model, - app: Application, -): - """ - arrange: A properly integrated charm with one runner. - act: Kill the start.sh script, which marks the runner as unhealthy. After that, reconcile. - assert: The logs are pulled from the crashed runner. - """ - await ensure_charm_has_runner(app=app, model=model) - - unit = app.units[0] - runner_name = await get_runner_name(unit) - - kill_start_sh_cmd = "pkill -9 start.sh" - ret_code, _ = await run_in_lxd_instance(unit, runner_name, kill_start_sh_cmd) - assert ret_code == 0, "Failed to kill start.sh" - - # Set the number of virtual machines to 0 to avoid to speedup reconciliation. - await app.set_config({"virtual-machines": "0"}) - await reconcile(app=app, model=model) - - ret_code, stdout = await run_in_unit(unit, f"ls {runner_logs.CRASHED_RUNNER_LOGS_DIR_PATH}") - assert ret_code == 0, "Failed to list crashed runner logs" - assert stdout - assert runner_name in stdout, "Failed to find crashed runner log" - - ret_code, stdout = await run_in_unit( - unit, f"ls {runner_logs.CRASHED_RUNNER_LOGS_DIR_PATH}/{runner_name}" - ) - assert ret_code == 0, "Failed to list crashed runner log" - assert stdout - assert "_diag" in stdout, "Failed to find crashed runner diag log" - assert "syslog" in stdout, "Failed to find crashed runner syslog log" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c23f88c18..f41e8e606 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -24,6 +24,7 @@ ensure_charm_has_runner, reconcile, ) +from tests.status_name import ACTIVE @pytest.fixture(scope="module") @@ -354,3 +355,16 @@ async def app_juju_storage( reconcile_interval=60, ) return application + + +@pytest_asyncio.fixture(scope="module", name="app_with_grafana_agent") +async def app_with_grafana_agent_integrated_fixture( + model: Model, app_no_runner: Application +) -> AsyncIterator[Application]: + """Setup the charm to be integrated with grafana-agent using the cos-agent integration.""" + grafana_agent = await model.deploy("grafana-agent", channel="latest/edge") + await model.relate(f"{app_no_runner.name}:cos-agent", f"{grafana_agent.name}:cos-agent") + await model.wait_for_idle(apps=[app_no_runner.name], status=ACTIVE) + await model.wait_for_idle(apps=[grafana_agent.name]) + + yield app_no_runner diff --git a/tests/integration/test_charm_metrics_failure.py b/tests/integration/test_charm_metrics_failure.py new file mode 100644 index 000000000..35b4b2ab3 --- /dev/null +++ b/tests/integration/test_charm_metrics_failure.py @@ -0,0 +1,170 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Integration tests for metrics/logs assuming Github workflow failures or a runner crash.""" +from typing import AsyncIterator + +import pytest +import pytest_asyncio +from github.Branch import Branch +from github.Repository import Repository +from juju.application import Application +from juju.model import Model + +import runner_logs +from runner_metrics import PostJobStatus +from tests.integration.charm_metrics_helpers import ( + _cancel_workflow_run, + _wait_for_workflow_to_start, + assert_events_after_reconciliation, + clear_metrics_log, + dispatch_workflow, + print_loop_device_info, + wait_for_runner_to_be_marked_offline, +) +from tests.integration.helpers import ( + DISPATCH_CRASH_TEST_WORKFLOW_FILENAME, + ensure_charm_has_runner, + get_runner_name, + reconcile, + run_in_lxd_instance, + run_in_unit, +) + + +@pytest_asyncio.fixture(scope="function", name="app") +async def app_fixture( + model: Model, app_with_grafana_agent: Application, loop_device: str +) -> AsyncIterator[Application]: + """Setup and teardown the charm after each test. + + Clear the metrics log before each test. + """ + unit = app_with_grafana_agent.units[0] + await clear_metrics_log(unit) + await print_loop_device_info(unit, loop_device) + yield app_with_grafana_agent + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_charm_issues_metrics_for_failed_repo_policy( + model: Model, + app: Application, + forked_github_repository: Repository, + forked_github_branch: Branch, +): + """ + arrange: A properly integrated charm with a runner registered on the fork repo. + act: Dispatch a test workflow that fails the repo-policy check. After completion, reconcile. + assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. + The Reconciliation metric has the post job status set to failure. + """ + await app.set_config({"path": forked_github_repository.full_name}) + await ensure_charm_has_runner(app=app, model=model) + + # Clear metrics log to make reconciliation event more predictable + unit = app.units[0] + await clear_metrics_log(unit) + await dispatch_workflow( + app=app, + branch=forked_github_branch, + github_repository=forked_github_repository, + conclusion="failure", + ) + + # Set the number of virtual machines to 0 to speedup reconciliation + await app.set_config({"virtual-machines": "0"}) + await reconcile(app=app, model=model) + + await assert_events_after_reconciliation( + app=app, + github_repository=forked_github_repository, + post_job_status=PostJobStatus.REPO_POLICY_CHECK_FAILURE, + ) + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_charm_issues_metrics_for_abnormal_termination( + model: Model, + app: Application, + forked_github_repository: Repository, + forked_github_branch: Branch, +): + """ + arrange: A properly integrated charm with a runner registered on the fork repo. + act: Dispatch a test workflow and afterwards kill run.sh. After that, reconcile. + assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. + The Reconciliation metric has the post job status set to Abnormal. + """ + await app.set_config({"path": forked_github_repository.full_name}) + await ensure_charm_has_runner(app=app, model=model) + + unit = app.units[0] + + workflow = forked_github_repository.get_workflow( + id_or_file_name=DISPATCH_CRASH_TEST_WORKFLOW_FILENAME + ) + assert workflow.create_dispatch(forked_github_branch, {"runner": app.name}) + + await _wait_for_workflow_to_start(unit, workflow) + + # Make the runner terminate abnormally by killing run.sh + runner_name = await get_runner_name(unit) + kill_run_sh_cmd = "pkill -9 run.sh" + ret_code, _ = await run_in_lxd_instance(unit, runner_name, kill_run_sh_cmd) + assert ret_code == 0, "Failed to kill run.sh" + + # Cancel workflow and wait that the runner is marked offline + # to avoid errors during reconciliation. + await _cancel_workflow_run(unit, workflow) + await wait_for_runner_to_be_marked_offline(forked_github_repository, runner_name) + + # Set the number of virtual machines to 0 to speedup reconciliation + await app.set_config({"virtual-machines": "0"}) + await reconcile(app=app, model=model) + + await assert_events_after_reconciliation( + app=app, + github_repository=forked_github_repository, + post_job_status=PostJobStatus.ABNORMAL, + ) + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_charm_retrieves_logs_from_unhealthy_runners( + model: Model, + app: Application, +): + """ + arrange: A properly integrated charm with one runner. + act: Kill the start.sh script, which marks the runner as unhealthy. After that, reconcile. + assert: The logs are pulled from the crashed runner. + """ + await ensure_charm_has_runner(app=app, model=model) + + unit = app.units[0] + runner_name = await get_runner_name(unit) + + kill_start_sh_cmd = "pkill -9 start.sh" + ret_code, _ = await run_in_lxd_instance(unit, runner_name, kill_start_sh_cmd) + assert ret_code == 0, "Failed to kill start.sh" + + # Set the number of virtual machines to 0 to avoid to speedup reconciliation. + await app.set_config({"virtual-machines": "0"}) + await reconcile(app=app, model=model) + + ret_code, stdout = await run_in_unit(unit, f"ls {runner_logs.CRASHED_RUNNER_LOGS_DIR_PATH}") + assert ret_code == 0, "Failed to list crashed runner logs" + assert stdout + assert runner_name in stdout, "Failed to find crashed runner log" + + ret_code, stdout = await run_in_unit( + unit, f"ls {runner_logs.CRASHED_RUNNER_LOGS_DIR_PATH}/{runner_name}" + ) + assert ret_code == 0, "Failed to list crashed runner log" + assert stdout + assert "_diag" in stdout, "Failed to find crashed runner diag log" + assert "syslog" in stdout, "Failed to find crashed runner syslog log" diff --git a/tests/integration/test_charm_metrics_success.py b/tests/integration/test_charm_metrics_success.py new file mode 100644 index 000000000..cf433084d --- /dev/null +++ b/tests/integration/test_charm_metrics_success.py @@ -0,0 +1,141 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Integration tests for metrics/logs assuming no Github workflow failures.""" + +import json +from typing import AsyncIterator + +import pytest +import pytest_asyncio +from github.Branch import Branch +from github.Repository import Repository +from juju.application import Application +from juju.model import Model + +from runner_metrics import PostJobStatus +from tests.integration.charm_metrics_helpers import ( + assert_events_after_reconciliation, + clear_metrics_log, + dispatch_workflow, + get_metrics_log, + print_loop_device_info, +) +from tests.integration.helpers import ( + ensure_charm_has_runner, + get_runner_name, + reconcile, + run_in_unit, +) + + +@pytest_asyncio.fixture(scope="function", name="app") +async def app_fixture( + model: Model, app_with_grafana_agent: Application, loop_device: str +) -> AsyncIterator[Application]: + """Setup and teardown the charm after each test. + + Clear the metrics log before each test. + """ + unit = app_with_grafana_agent.units[0] + await clear_metrics_log(unit) + await print_loop_device_info(unit, loop_device) + yield app_with_grafana_agent + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_charm_issues_runner_installed_metric(app: Application, model: Model): + """ + arrange: A charm without runners integrated with grafana-agent using the cos-agent integration. + act: Config the charm to contain one runner. + assert: The RunnerInstalled metric is logged. + """ + + await ensure_charm_has_runner(app=app, model=model) + + metrics_log = await get_metrics_log(app.units[0]) + log_lines = list(map(lambda line: json.loads(line), metrics_log.splitlines())) + events = set(map(lambda line: line.get("event"), log_lines)) + assert "runner_installed" in events, "runner_installed event has not been logged" + + for metric_log in log_lines: + if metric_log.get("event") == "runner_installed": + assert metric_log.get("flavor") == app.name + assert metric_log.get("event") == "runner_installed" + assert metric_log.get("duration") >= 0 + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_charm_issues_metrics_after_reconciliation( + model: Model, + app: Application, + forked_github_repository: Repository, + forked_github_branch: Branch, +): + """ + arrange: A properly integrated charm with a runner registered on the fork repo. + act: Dispatch a workflow on a branch for the runner to run. After completion, reconcile. + assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. + The Reconciliation metric has the post job status set to normal. + """ + await app.set_config({"path": forked_github_repository.full_name}) + await ensure_charm_has_runner(app=app, model=model) + + # Clear metrics log to make reconciliation event more predictable + unit = app.units[0] + await clear_metrics_log(unit) + await dispatch_workflow( + app=app, + branch=forked_github_branch, + github_repository=forked_github_repository, + conclusion="success", + ) + + # Set the number of virtual machines to 0 to speedup reconciliation + await app.set_config({"virtual-machines": "0"}) + await reconcile(app=app, model=model) + + await assert_events_after_reconciliation( + app=app, github_repository=forked_github_repository, post_job_status=PostJobStatus.NORMAL + ) + + +@pytest.mark.asyncio +@pytest.mark.abort_on_fail +async def test_charm_remounts_shared_fs( + model: Model, + app: Application, + forked_github_repository: Repository, + forked_github_branch: Branch, +): + """ + arrange: A properly integrated charm with a runner registered on the fork repo. + act: Dispatch a test workflow and afterwards unmount the shared fs. After that, reconcile. + assert: The RunnerStart, RunnerStop and Reconciliation metric is logged. + """ + await app.set_config({"path": forked_github_repository.full_name}) + await ensure_charm_has_runner(app=app, model=model) + + # Clear metrics log to make reconciliation event more predictable + unit = app.units[0] + runner_name = await get_runner_name(unit) + await clear_metrics_log(unit) + await dispatch_workflow( + app=app, + branch=forked_github_branch, + github_repository=forked_github_repository, + conclusion="success", + ) + + # unmount shared fs + await run_in_unit(unit, f"sudo umount /home/ubuntu/runner-fs/{runner_name}") + + # Set the number of virtual machines to 0 to speedup reconciliation + await app.set_config({"virtual-machines": "0"}) + await reconcile(app=app, model=model) + + await assert_events_after_reconciliation( + app=app, github_repository=forked_github_repository, post_job_status=PostJobStatus.NORMAL + )