From c789301d040fccaff0e6b583c35168a7e57599c5 Mon Sep 17 00:00:00 2001 From: "Endi S. Dewata" Date: Thu, 5 Sep 2024 09:57:50 -0500 Subject: [PATCH] Drop ClonesConnectivyAndDataCheck from pki-healthcheck pki-healthcheck is a tool to check the status of an instance or a node in a cluster so that if it reports a problem the admin can fix it or the monitoring service can replace it with a new node. The ClonesConnectivyAndDataCheck on the other hand is a plugin that checks the connectivity from the instance to other clones. This plugin will report a problem if another clone is down even though the instance itself is fine, which would be misleading. Since it doesn't really fit the purpose of pki-healthcheck this plugin has been dropped. --- .../ca-renewal-system-certs-hsm-test.yml | 1 - .../ca-renewal-system-certs-test.yml | 1 - .../pki/server/healthcheck/clones/__init__.py | 0 .../clones/connectivity_and_data.py | 214 ------------------ .../pki/server/healthcheck/clones/plugin.py | 163 ------------- base/server/healthcheck/setup.py | 7 - docs/admin/PKI_Health_Check_Tool.md | 2 +- 7 files changed, 1 insertion(+), 387 deletions(-) delete mode 100644 base/server/healthcheck/pki/server/healthcheck/clones/__init__.py delete mode 100644 base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py delete mode 100644 base/server/healthcheck/pki/server/healthcheck/clones/plugin.py diff --git a/.github/workflows/ca-renewal-system-certs-hsm-test.yml b/.github/workflows/ca-renewal-system-certs-hsm-test.yml index d48e14bcd66..275fc8cceee 100644 --- a/.github/workflows/ca-renewal-system-certs-hsm-test.yml +++ b/.github/workflows/ca-renewal-system-certs-hsm-test.yml @@ -205,7 +205,6 @@ jobs: echo "Expired Cert: sslserver" >> expected echo "Expired Cert: subsystem" >> expected echo "Expired Cert: audit_signing" >> expected - echo "Internal server error 404 Client Error: for url: http://pki.example.com:8080/ca/rest/securityDomain/domainInfo" >> expected echo "Internal server error 404 Client Error: for url: https://pki.example.com:8443/ca/admin/ca/getStatus" >> expected diff expected stderr diff --git a/.github/workflows/ca-renewal-system-certs-test.yml b/.github/workflows/ca-renewal-system-certs-test.yml index e032188c7f1..e21726daa47 100644 --- a/.github/workflows/ca-renewal-system-certs-test.yml +++ b/.github/workflows/ca-renewal-system-certs-test.yml @@ -168,7 +168,6 @@ jobs: echo "Expired Cert: sslserver" >> expected echo "Expired Cert: subsystem" >> expected echo "Expired Cert: audit_signing" >> expected - echo "Internal server error 404 Client Error: for url: http://pki.example.com:8080/ca/rest/securityDomain/domainInfo" >> expected echo "Internal server error 404 Client Error: for url: https://pki.example.com:8443/ca/admin/ca/getStatus" >> expected diff expected stderr diff --git a/base/server/healthcheck/pki/server/healthcheck/clones/__init__.py b/base/server/healthcheck/pki/server/healthcheck/clones/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py b/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py deleted file mode 100644 index 800046c074f..00000000000 --- a/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py +++ /dev/null @@ -1,214 +0,0 @@ -# Authors: -# Jack Magne # -# Copyright Red Hat, Inc. -# -# SPDX-License-Identifier: GPL-2.0-or-later -# -import logging - -from pki.server.healthcheck.clones.plugin import ClonesPlugin, registry -from pki.client import PKIConnection -from pki.cert import CertClient -from ipahealthcheck.core.plugin import Result, duration -from ipahealthcheck.core import constants - -logger = logging.getLogger(__name__) - - -@registry -class ClonesConnectivyAndDataCheck(ClonesPlugin): - """ - Assure master and clones within a pki instance are reachable - """ - def check_ca_clones(self): - host_error = [] - for host in self.clone_cas: - cur_clone_msg = ' Host: ' + host.Hostname + ' Port: ' + host.SecurePort - # Reach out and get some certs, to serve as a data and connectivity check - try: - connection = PKIConnection(protocol='https', - hostname=host.Hostname, - port=host.SecurePort, - verify=False) - - cert_client = CertClient(connection) - # get the first 3 in case we cant to make a sanity check of replicated data - certs = cert_client.list_certs(size=3) - - if certs is not None and len(certs.cert_data_info_list) == 3: - logger.info('Cert data successfully obtained from clone.') - else: - raise BaseException('CA clone problem reading data.' + cur_clone_msg) - except BaseException as e: - logger.error("Internal server error %s", e) - host_error.append( - BaseException('Internal error testing CA clone.' + cur_clone_msg)) - - return host_error - - def check_kra_clones(self): - host_error = [] - for host in self.clone_kras: - - url = 'https://' + host.Hostname + ':' + host.SecurePort - - try: - status = self.get_status( - host.Hostname, - host.SecurePort, - '/kra/admin/kra/getStatus') - - logger.info('KRA at %s is %s', url, status) - - if status != 'running': - raise BaseException('KRA at %s is %s' % (url, status)) - - except BaseException as e: - logger.error('Unable to reach KRA at %s: %s', url, e) - host_error.append(BaseException('Unable to reach KRA at %s: %s' % (url, e))) - return host_error - - def check_ocsp_clones(self): - host_error = [] - for host in self.clone_ocsps: - - url = 'https://' + host.Hostname + ':' + host.SecurePort - - try: - status = self.get_status( - host.Hostname, - host.SecurePort, - '/ocsp/admin/ocsp/getStatus') - - logger.info('OCSP at %s is %s', url, status) - - if status != 'running': - raise BaseException('OCSP at %s is %s' % (url, status)) - - except BaseException as e: - logger.error('Unable to reach OCSP at %s: %s', url, e) - host_error.append(BaseException('Unable to reach OCSP at %s: %s' % (url, e))) - return host_error - - def check_tks_clones(self): - host_error = [] - for host in self.clone_tkss: - - url = 'https://' + host.Hostname + ':' + host.SecurePort - - try: - status = self.get_status( - host.Hostname, - host.SecurePort, - '/tks/admin/tks/getStatus') - - logger.info('TKS at %s is %s', url, status) - - if status != 'running': - raise BaseException('TKS at %s is %s' % (url, status)) - - except BaseException as e: - logger.error('Unable to reach TKS at %s: %s', url, e) - host_error.append(BaseException('Unable to reach TKS at %s: %s' % (url, e))) - return host_error - - def check_tps_clones(self): - host_error = [] - for host in self.clone_tpss: - - url = 'https://' + host.Hostname + ':' + host.SecurePort - - try: - status = self.get_status( - host.Hostname, - host.SecurePort, - '/tps/admin/tps/getStatus') - - logger.info('TPS at %s is %s', url, status) - - if status != 'running': - raise BaseException('TPS at %s is %s' % (url, status)) - - except BaseException as e: - logger.error('Unable to reach TPS at %s: %s', url, e) - host_error.append(BaseException('Unable to reach TPS at %s: %s' % (url, e))) - return host_error - - @duration - def check(self): - logger.info("Entering ClonesConnectivityCheck : %s", self.instance.name) - if not self.instance.exists(): - logger.debug('Invalid instance: %s', self.instance.name) - yield Result(self, constants.CRITICAL, - status='Invalid PKI instance: %s' % self.instance.name) - return - self.instance.load() - - security_domain_ca, sechost, secport = self.get_security_domain_ca() - logger.info('security_domain_ca: %s ', security_domain_ca) - - logger.info('sechost %s secport %s ', sechost, secport) - if security_domain_ca is None: - yield Result(self, constants.SUCCESS, - status='Instance not a security domain. %s' % self.instance.name) - security_domain_data = self.get_security_domain_data(sechost, secport) - - if security_domain_data is not None: - logger.info('About to check the subsystem clones') - - hard_msg = ' Clones tested successfully, or not present.' - host_error = self.check_ca_clones() - if not host_error: - yield Result(self, constants.SUCCESS, - instance_name=self.instance.name, - status='CA' + hard_msg) - else: - for err in host_error: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - - host_error = self.check_kra_clones() - if not host_error: - yield Result(self, constants.SUCCESS, - instance_name=self.instance.name, - status='KRA' + hard_msg) - else: - for err in host_error: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - - host_error = self.check_ocsp_clones() - if not host_error: - yield Result(self, constants.SUCCESS, - instance_name=self.instance.name, - status='OCSP' + hard_msg) - else: - for err in host_error: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - - host_error = self.check_tks_clones() - if not host_error: - yield Result(self, constants.SUCCESS, - instance_name=self.instance.name, - status='TKS' + hard_msg) - else: - for err in host_error: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - - host_error = self.check_tps_clones() - if not host_error: - yield Result(self, constants.SUCCESS, - instance_name=self.instance.name, - status="TPS Clones tested successfully, or not present.") - else: - for err in host_error: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - else: - yield Result(self, constants.SUCCESS, - instance_name=self.instance.name, - status='Instance has no security domain.') - - return diff --git a/base/server/healthcheck/pki/server/healthcheck/clones/plugin.py b/base/server/healthcheck/pki/server/healthcheck/clones/plugin.py deleted file mode 100644 index bfa9edc01e7..00000000000 --- a/base/server/healthcheck/pki/server/healthcheck/clones/plugin.py +++ /dev/null @@ -1,163 +0,0 @@ -# Authors: -# Jack Magne -# -# Copyright Red Hat, Inc. -# -# SPDX-License-Identifier: GPL-2.0-or-later -# - -import json -import logging -import xml.etree.ElementTree as ET - -from ipahealthcheck.core.plugin import Plugin, Registry -from pki.server.instance import PKIInstance -from pki.client import PKIConnection -from pki.system import SecurityDomainClient - -from pki.server.healthcheck.core.main import merge_dogtag_config - -logger = logging.getLogger(__name__) - -# Temporary workaround to skip VERBOSE data. Fix already pushed to upstream -# freeipa-healthcheck: https://github.com/freeipa/freeipa-healthcheck/pull/126 -logging.getLogger().setLevel(logging.WARNING) - - -class ClonesPlugin(Plugin): - def __init__(self, registry): - # pylint: disable=redefined-outer-name - super(ClonesPlugin, self).__init__(registry) - - self.security_domain = None - self.db_dir = None - self.subsystem_token = None - self.passwd = None - - self.master_cas = [] - self.clone_cas = [] - self.master_kras = [] - self.clone_kras = [] - self.master_ocsps = [] - self.clone_ocsps = [] - self.master_tpss = [] - self.clone_tpss = [] - self.master_tkss = [] - self.clone_tkss = [] - - self.instance = PKIInstance(self.config.instance_name) - - def get_status(self, host, port, path): - - self.instance.export_ca_cert() - - connection = PKIConnection( - protocol='https', - hostname=host, - port=port, - cert_paths=self.instance.ca_cert) - - response = connection.get(path) - - content_type = response.headers['Content-Type'] - content = response.text - logger.info('Content:\n%s', content) - - # https://github.com/dogtagpki/pki/wiki/GetStatus-Service - if content_type == 'application/json': - json_response = json.loads(content) - status = json_response['Response']['Status'] - - elif content_type == 'application/xml': - root = ET.fromstring(content) - status = root.findtext('Status') - - else: - raise Exception('Unsupported content-type: %s' % content_type) - - logger.info('Status: %s', status) - return status - - def get_security_domain_data(self, host, port): - domain_data = None - - try: - connection = PKIConnection(protocol='http', - hostname=host, - port=port, - verify=False) - - securityDomainClient = SecurityDomainClient(connection) - domain_data = securityDomainClient.get_domain_info() - - except BaseException as e: - logger.error("Internal server error %s", e) - return domain_data - - systems = domain_data.subsystems - for s in systems.values(): - for h in s.hosts.values(): - if s.id == 'CA': - if h.Clone == 'TRUE': - self.clone_cas.append(h) - else: - self.master_cas.append(h) - elif s.id == 'KRA': - if h.Clone == 'TRUE': - self.clone_kras.append(h) - else: - self.master_kras.append(h) - elif s.id == 'OCSP': - if h.Clone == 'TRUE': - self.clone_ocsps.append(h) - else: - self.master_ocsps.append(h) - elif s.id == 'TPS': - if h.Clone == 'TRUE': - self.clone_tpss.append(h) - else: - self.master_tpss.append(h) - elif s.id == 'TKS': - if h.Clone == 'TRUE': - self.clone_tkss.append(h) - else: - self.master_tkss.append(h) - - return domain_data - - def get_security_domain_ca(self): - sec_domain = None - sechost = None - secport = None - ca_subsystem = self.instance.get_subsystem('ca') - if ca_subsystem: - # make sure this CA is the security domain - service_host = ca_subsystem.config.get('machineName') - server_config = self.instance.get_server_config() - service_port = server_config.get_unsecure_port() - sechost = ca_subsystem.config.get('securitydomain.host') - secport = ca_subsystem.config.get('securitydomain.httpport') - - if sechost == service_host and secport == service_port: - sec_domain = ca_subsystem - - if sec_domain: - self.security_domain = sec_domain - # Set some vars we will be using later - self.db_dir = self.security_domain.config.get('jss.configDir') - self.subsystem_token = self.security_domain.config.get('ca.subsystem.tokenname') - self.passwd = self.instance.get_token_password(self.subsystem_token) - - return sec_domain, sechost, secport - - -class ClonesRegistry(Registry): - def initialize(self, framework, config, options=None): - # Read dogtag specific config values and merge with already existing config - # before adding it to registry - merge_dogtag_config(config) - - super(ClonesRegistry, self).initialize(framework, config) - - -registry = ClonesRegistry() diff --git a/base/server/healthcheck/setup.py b/base/server/healthcheck/setup.py index fc0299d9635..c3abd4d4bba 100644 --- a/base/server/healthcheck/setup.py +++ b/base/server/healthcheck/setup.py @@ -10,7 +10,6 @@ 'pki.server.healthcheck.core', 'pki.server.healthcheck.meta', 'pki.server.healthcheck.certs', - 'pki.server.healthcheck.clones', ], entry_points={ # creates bin/pki-healthcheck @@ -21,13 +20,11 @@ 'ipahealthcheck.registry': [ 'pkihealthcheck.meta = pki.server.healthcheck.meta.plugin:registry', 'pkihealthcheck.certs = pki.server.healthcheck.certs.plugin:registry', - 'pkihealthcheck.clones = pki.server.healthcheck.clones.plugin:registry', ], # register the plugin with pki-healthcheck 'pkihealthcheck.registry': [ 'pkihealthcheck.meta = pki.server.healthcheck.meta.plugin:registry', 'pkihealthcheck.certs = pki.server.healthcheck.certs.plugin:registry', - 'pkihealthcheck.clones = pki.server.healthcheck.clones.plugin:registry', ], # plugin modules for pkihealthcheck.meta registry 'pkihealthcheck.meta': [ @@ -38,10 +35,6 @@ 'trust_flags = pki.server.healthcheck.certs.trustflags', 'expiration = pki.server.healthcheck.certs.expiration', ], - # plugin modules for pkihealthcheck.clones registry - 'pkihealthcheck.clones': [ - 'connectivity = pki.server.healthcheck.clones.connectivity_and_data', - ], }, classifiers=[ 'Programming Language :: Python :: 3.6', diff --git a/docs/admin/PKI_Health_Check_Tool.md b/docs/admin/PKI_Health_Check_Tool.md index fca9f7a4bfd..cf4cf539d15 100644 --- a/docs/admin/PKI_Health_Check_Tool.md +++ b/docs/admin/PKI_Health_Check_Tool.md @@ -62,7 +62,7 @@ It is difficult to simulate some issues and so, unit tests will use [unittest.mo ## How to use -Healthcheck executes a series of plugins to collect its information. Each plugin, referred to later as a source, is organized around a specific theme (system certificates, file system permissions and ownership, clones, etc.). A source is a collection of tests, refered to as checks, that should test one small piece of PKI. +Healthcheck executes a series of plugins to collect its information. Each plugin, referred to later as a source, is organized around a specific theme (system certificates, file system permissions and ownership, etc.). A source is a collection of tests, refered to as checks, that should test one small piece of PKI. The report will consist of a message describing what was run and the status. If the status is not successful, the message may include additional information, which can be used by the admin to correct the issue (e.g. a file has the wrong permissions, expected X and got Y).