From 9480cc7b65e3ecda2764115fde9e1ce879c250aa Mon Sep 17 00:00:00 2001
From: Christopher Bartz <christopher.bartz@canonical.com>
Date: Fri, 12 Jul 2024 14:44:21 +0200
Subject: [PATCH] Remove sensitive information from logs (#316)

* replace sensitive information in previous state log

* hide ps aux command

* hide ps aux stdout for openstack

* add unit test for coverage

* use gh hosted runners

* fix test name

---------

Co-authored-by: Yanks Yoon <37652070+yanksyoon@users.noreply.github.com>
---
 .github/workflows/integration_test.yaml  |  2 --
 src-docs/charm_state.py.md               | 39 ++++++++++++------------
 src-docs/openstack_manager.md            |  2 +-
 src-docs/runner_manager.py.md            | 10 +++---
 src/charm_state.py                       | 23 +++++++++++++-
 src/openstack_cloud/openstack_manager.py |  1 -
 src/runner_manager.py                    |  4 ++-
 tests/unit/test_charm_state.py           | 19 +++++++++++-
 8 files changed, 69 insertions(+), 31 deletions(-)
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index b83d8da81..bf851138e 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -20,8 +20,6 @@ jobs:
       provider: lxd
       test-tox-env: integration-juju2.9
       modules: '["test_charm_base_image", "test_charm_fork_repo", "test_charm_no_runner", "test_charm_scheduled_events", "test_charm_lxd_runner", "test_charm_runner", "test_charm_metrics_success", "test_charm_metrics_failure", "test_self_hosted_runner", "test_charm_with_proxy", "test_charm_with_juju_storage", "test_debug_ssh", "test_charm_upgrade"]'
-      self-hosted-runner: true
-      self-hosted-runner-label: xlarge,X64
   integration-tests:
     name: Integration test with juju 3.1
     uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
diff --git a/src-docs/charm_state.py.md b/src-docs/charm_state.py.md
index f6bb37338..5783f6821 100644
--- a/src-docs/charm_state.py.md
+++ b/src-docs/charm_state.py.md
@@ -22,6 +22,7 @@ State of the Charm.
 - **REPO_POLICY_COMPLIANCE_TOKEN_CONFIG_NAME**
 - **REPO_POLICY_COMPLIANCE_URL_CONFIG_NAME**
 - **RUNNER_STORAGE_CONFIG_NAME**
+- **SENSITIVE_PLACEHOLDER**
 - **TEST_MODE_CONFIG_NAME**
 - **TOKEN_CONFIG_NAME**
 - **USE_APROXY_CONFIG_NAME**
@@ -36,7 +37,7 @@ State of the Charm.
 
 ---
 
-<a href="../src/charm_state.py#L124"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L125"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ## <kbd>function</kbd> `parse_github_path`
 
@@ -137,7 +138,7 @@ Some charm configurations are grouped into other configuration models.
 
 ---
 
-<a href="../src/charm_state.py#L453"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L454"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `check_reconcile_interval`
 
@@ -166,7 +167,7 @@ Validate the general charm configuration.
 
 ---
 
-<a href="../src/charm_state.py#L480"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L481"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -205,7 +206,7 @@ Raised when charm config is invalid.
  
  - <b>`msg`</b>:  Explanation of the error. 
 
-<a href="../src/charm_state.py#L244"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L245"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `__init__`
 
@@ -247,7 +248,7 @@ The charm state.
 
 ---
 
-<a href="../src/charm_state.py#L1076"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L1097"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -292,7 +293,7 @@ Charm configuration related to GitHub.
 
 ---
 
-<a href="../src/charm_state.py#L160"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L161"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -337,7 +338,7 @@ Represent GitHub organization.
 
 ---
 
-<a href="../src/charm_state.py#L112"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L113"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `path`
 
@@ -370,7 +371,7 @@ Represent GitHub repository.
 
 ---
 
-<a href="../src/charm_state.py#L91"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L92"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `path`
 
@@ -391,7 +392,7 @@ Return a string representing the path.
 ## <kbd>class</kbd> `ImmutableConfigChangedError`
 Represents an error when changing immutable charm state. 
 
-<a href="../src/charm_state.py#L976"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L977"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `__init__`
 
@@ -446,7 +447,7 @@ Runner configurations for local LXD instances.
 
 ---
 
-<a href="../src/charm_state.py#L750"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L751"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `check_virtual_machine_resources`
 
@@ -477,7 +478,7 @@ Validate the virtual_machine_resources field values.
 
 ---
 
-<a href="../src/charm_state.py#L728"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L729"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `check_virtual_machines`
 
@@ -506,7 +507,7 @@ Validate the virtual machines configuration value.
 
 ---
 
-<a href="../src/charm_state.py#L676"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L677"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -551,7 +552,7 @@ OpenstackImage from image builder relation data.
 
 ---
 
-<a href="../src/charm_state.py#L586"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L587"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -594,7 +595,7 @@ Runner configuration for OpenStack Instances.
 
 ---
 
-<a href="../src/charm_state.py#L628"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L629"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -648,7 +649,7 @@ Return the aproxy address.
 
 ---
 
-<a href="../src/charm_state.py#L820"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L821"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `check_use_aproxy`
 
@@ -678,7 +679,7 @@ Validate the proxy configuration.
 
 ---
 
-<a href="../src/charm_state.py#L848"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L849"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -717,7 +718,7 @@ Configuration for the repo policy compliance service.
 
 ---
 
-<a href="../src/charm_state.py#L311"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L312"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -780,7 +781,7 @@ SSH connection information for debug workflow.
 
 ---
 
-<a href="../src/charm_state.py#L934"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L935"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>classmethod</kbd> `from_charm`
 
@@ -813,7 +814,7 @@ Raised when given machine charm architecture is unsupported.
  
  - <b>`arch`</b>:  The current machine architecture. 
 
-<a href="../src/charm_state.py#L891"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/charm_state.py#L892"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `__init__`
 
diff --git a/src-docs/openstack_manager.md b/src-docs/openstack_manager.md
index 3b1be2577..ccd677142 100644
--- a/src-docs/openstack_manager.md
+++ b/src-docs/openstack_manager.md
@@ -146,7 +146,7 @@ Construct OpenstackRunnerManager object.
 
 ---
 
-<a href="../src/openstack_cloud/openstack_manager.py#L1479"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/openstack_cloud/openstack_manager.py#L1478"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>method</kbd> `flush`
 
diff --git a/src-docs/runner_manager.py.md b/src-docs/runner_manager.py.md
index c3999763b..b7268cbcb 100644
--- a/src-docs/runner_manager.py.md
+++ b/src-docs/runner_manager.py.md
@@ -50,7 +50,7 @@ Construct RunnerManager object for creating and managing runners.
 
 ---
 
-<a href="../src/utilities.py#L790"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/utilities.py#L792"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `build_runner_image`
 
@@ -87,7 +87,7 @@ Check if runner binary exists.
 
 ---
 
-<a href="../src/runner_manager.py#L601"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L603"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `flush`
 
@@ -164,7 +164,7 @@ The runner binary URL changes when a new version is available.
 
 ---
 
-<a href="../src/runner_manager.py#L782"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L784"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `has_runner_image`
 
@@ -181,7 +181,7 @@ Check if the runner image exists.
 
 ---
 
-<a href="../src/runner_manager.py#L519"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L521"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `reconcile`
 
@@ -205,7 +205,7 @@ Bring runners in line with target.
 
 ---
 
-<a href="../src/runner_manager.py#L805"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+<a href="../src/runner_manager.py#L807"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 
 ### <kbd>function</kbd> `schedule_build_runner_image`
 
diff --git a/src/charm_state.py b/src/charm_state.py
index 4b9168ab5..f4245a867 100644
--- a/src/charm_state.py
+++ b/src/charm_state.py
@@ -48,6 +48,7 @@
 REPO_POLICY_COMPLIANCE_TOKEN_CONFIG_NAME = "repo-policy-compliance-token"  # nosec
 REPO_POLICY_COMPLIANCE_URL_CONFIG_NAME = "repo-policy-compliance-url"
 RUNNER_STORAGE_CONFIG_NAME = "runner-storage"
+SENSITIVE_PLACEHOLDER = "*****"
 TEST_MODE_CONFIG_NAME = "test-mode"
 # bandit thinks this is a hardcoded password.
 TOKEN_CONFIG_NAME = "token"  # nosec
@@ -1040,7 +1041,8 @@ def _check_immutable_config_change(
 
         json_data = CHARM_STATE_PATH.read_text(encoding="utf-8")
         prev_state = json.loads(json_data)
-        logger.info("Previous charm state: %s", prev_state)
+
+        cls._log_prev_state(prev_state)
 
         try:
             if prev_state["runner_config"]["runner_storage"] != runner_storage:
@@ -1071,6 +1073,25 @@ def _check_immutable_config_change(
         except KeyError as exc:
             logger.info("Key %s not found, this will be updated to current config.", exc.args[0])
 
+    @classmethod
+    def _log_prev_state(cls, prev_state_dict: dict) -> None:
+        """Log the previous state of the charm.
+
+        Replace sensitive information before logging.
+
+        Args:
+            prev_state_dict: The previous state of the charm as a dict.
+        """
+        if logger.isEnabledFor(logging.DEBUG):
+            prev_state_for_logging = prev_state_dict.copy()
+            charm_config = prev_state_for_logging.get("charm_config")
+            if charm_config and "token" in charm_config:
+                charm_config = charm_config.copy()
+                charm_config["token"] = SENSITIVE_PLACEHOLDER  # nosec
+            prev_state_for_logging["charm_config"] = charm_config
+
+            logger.debug("Previous charm state: %s", prev_state_for_logging)
+
     # Ignore the flake8 function too complex (C901). The function does not have much logic, the
     # lint is likely triggered with the multiple try-excepts, which are needed.
     @classmethod
diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py
index a5b0ed489..762d78f69 100644
--- a/src/openstack_cloud/openstack_manager.py
+++ b/src/openstack_cloud/openstack_manager.py
@@ -497,7 +497,6 @@ def _ssh_health_check(conn: OpenstackConnection, server_name: str, startup: bool
 
         result: invoke.runners.Result = ssh_conn.run("ps aux", warn=True)
         logger.debug("Output of `ps aux` on %s stderr: %s", server_name, result.stderr)
-        logger.debug("Output of `ps aux` on %s stdout: %s", server_name, result.stdout)
         if not result.ok or RUNNER_STARTUP_PROCESS not in result.stdout:
             logger.warning("List all process command failed on %s ", server_name)
             return False
diff --git a/src/runner_manager.py b/src/runner_manager.py
index 0ee2cee33..161d1e4c5 100644
--- a/src/runner_manager.py
+++ b/src/runner_manager.py
@@ -238,7 +238,9 @@ def _get_runner_health_states(self) -> RunnerByHealth:
         unhealthy = []
 
         for runner in local_runners:
-            _, stdout, _ = runner.execute(["ps", "aux"])
+            # we need to hide the command to prevent sensitive information on the workload
+            # from being exposed.
+            _, stdout, _ = runner.execute(["ps", "aux"], hide_cmd=True)
             if f"/bin/bash {Runner.runner_script}" in stdout.read().decode("utf-8"):
                 healthy.append(runner.name)
             else:
diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py
index bc2292852..117697dfa 100644
--- a/tests/unit/test_charm_state.py
+++ b/tests/unit/test_charm_state.py
@@ -1,7 +1,9 @@
 # Copyright 2024 Canonical Ltd.
 # See LICENSE file for licensing details.
 import json
+import logging
 import platform
+import secrets
 import typing
 from pathlib import Path
 from unittest.mock import MagicMock
@@ -982,7 +984,7 @@ def mock_charm_state_data():
         "arch": "x86_64",
         "is_metrics_logging_available": True,
         "proxy_config": {"http": "http://example.com", "https": "https://example.com"},
-        "charm_config": {"denylist": ["192.168.1.1"], "token": "abc123"},
+        "charm_config": {"denylist": ["192.168.1.1"], "token": secrets.token_hex(16)},
         "runner_config": {
             "base_image": "jammy",
             "virtual_machines": 2,
@@ -1173,3 +1175,18 @@ def test_charm_state_from_charm(monkeypatch: pytest.MonkeyPatch):
     monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock())
 
     assert CharmState.from_charm(mock_charm)
+
+
+def test_charm_state__log_prev_state_redacts_sensitive_information(
+    mock_charm_state_data: dict, caplog: pytest.LogCaptureFixture
+):
+    """
+    arrange: Arrange charm state data with a token and set log level to DEBUG.
+    act: Call the __log_prev_state method on the class.
+    assert: Verify that the method redacts the sensitive information in the log message.
+    """
+    caplog.set_level(logging.DEBUG)
+    CharmState._log_prev_state(mock_charm_state_data)
+
+    assert mock_charm_state_data["charm_config"]["token"] not in caplog.text
+    assert charm_state.SENSITIVE_PLACEHOLDER in caplog.text