From 21d52fbd610d15c0b0a3a6bb87851b8d7a7f9a2b Mon Sep 17 00:00:00 2001 From: Teoman ONAY Date: Wed, 11 Dec 2024 15:32:47 +0100 Subject: [PATCH 1/2] Dashboard: upgrade fails The command ceph dashboard grafana dashboards update fails with Error EINVAL: Traceback https://access.redhat.com/solutions/6965628 fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2295395 Signed-off-by: Teoman ONAY --- roles/ceph-dashboard/tasks/configure_grafana_layouts.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/ceph-dashboard/tasks/configure_grafana_layouts.yml b/roles/ceph-dashboard/tasks/configure_grafana_layouts.yml index 556c5870c0..df50f44605 100644 --- a/roles/ceph-dashboard/tasks/configure_grafana_layouts.yml +++ b/roles/ceph-dashboard/tasks/configure_grafana_layouts.yml @@ -10,4 +10,5 @@ delegate_to: "{{ groups[mon_group_name][0] }}" run_once: true changed_when: false + ignore_errors: true when: containerized_deployment | bool From f6d2153448a58a58a58224afe7feb7cef76c5cfb Mon Sep 17 00:00:00 2001 From: Teoman ONAY Date: Thu, 12 Dec 2024 10:59:46 +0100 Subject: [PATCH 2/2] rolling_update: disable FSMap sanity checks Disable FSMap sanity checks before upgrade to prevent mon crash(1) (1) https://access.redhat.com/solutions/7020523 fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2277756 Signed-off-by: Teoman ONAY --- infrastructure-playbooks/rolling_update.yml | 22 +++ library/ceph_config.py | 208 ++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 library/ceph_config.py diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml index 5914880b47..e90d048cd0 100644 --- a/infrastructure-playbooks/rolling_update.yml +++ b/infrastructure-playbooks/rolling_update.yml @@ -148,6 +148,15 @@ become: True gather_facts: false tasks: + # workaround to prevent mon crash https://access.redhat.com/solutions/7020523 + - name: Disable FSMap sanity checks before starting the upgrade + run_once: true + ceph_config: + action: set + who: mon + option: mon_mds_skip_sanity + value: true + - name: upgrade ceph mon cluster block: - name: remove ceph aliases @@ -1194,6 +1203,19 @@ path: /etc/ceph/{{ cluster }}-crushmap state: absent +- name: Remove FSMap sanity checks after the upgrade + hosts: "{{ mon_group_name|default('mons') }}" + become: True + gather_facts: false + tasks: + # workaround to prevent mon crash https://access.redhat.com/solutions/7020523 + - name: Remove FSMap sanity checks + run_once: true + ceph_config: + action: rm + who: mon + option: mon_mds_skip_sanity + - name: show ceph status hosts: "{{ mon_group_name|default('mons') }}" tags: always diff --git a/library/ceph_config.py b/library/ceph_config.py new file mode 100644 index 0000000000..c82c8cf57c --- /dev/null +++ b/library/ceph_config.py @@ -0,0 +1,208 @@ +# Copyright Red Hat +# SPDX-License-Identifier: Apache-2.0 +# Author: Guillaume Abrioux + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +from ansible.module_utils.basic import AnsibleModule # type: ignore +try: + from ansible.module_utils.ca_common import exit_module, generate_cmd, fatal, is_containerized # type: ignore +except ImportError: + from module_utils.ca_common import exit_module, generate_cmd, fatal, is_containerized # type: ignore + +import datetime +import json + +ANSIBLE_METADATA = { + 'metadata_version': '1.1', + 'status': ['preview'], + 'supported_by': 'community' +} + +DOCUMENTATION = ''' +--- +module: ceph_config +short_description: set ceph config +version_added: "2.10" +description: + - Set Ceph config options. +options: + fsid: + description: + - the fsid of the Ceph cluster to interact with. + required: false + image: + description: + - The Ceph container image to use. + required: false + action: + description: + - whether to get or set the parameter specified in 'option' + required: false + default: 'set' + who: + description: + - which daemon the configuration should be set to + required: true + option: + description: + - name of the parameter to be set + required: true + value: + description: + - value of the parameter + required: true if action is 'set' + +author: + - Guillaume Abrioux +''' + +EXAMPLES = ''' +- name: set osd_memory_target for osd.0 + ceph_config: + action: set + who: osd.0 + option: osd_memory_target + value: 5368709120 + +- name: set osd_memory_target for host ceph-osd-02 + ceph_config: + action: set + who: osd/host:ceph-osd-02 + option: osd_memory_target + value: 5368709120 + +- name: get osd_pool_default_size value + ceph_config: + action: get + who: global + option: osd_pool_default_size + value: 1 +''' + +RETURN = '''# ''' + + +def set_option(module, + who, + option, + value, + container_image=None): + + args = [] + args.extend([who, option, value]) + + cmd = generate_cmd(sub_cmd=['config', 'set'], + args=args, + cluster=module.params.get('cluster'), + container_image=container_image) + + rc, out, err = module.run_command(cmd) + + return rc, cmd, out.strip(), err + + +def rm_option(module, + who, + option, + container_image=None): + + args = [] + args.extend([who, option]) + + cmd = generate_cmd(sub_cmd=['config', 'rm'], + args=args, + cluster=module.params.get('cluster'), + container_image=container_image) + + rc, out, err = module.run_command(cmd) + + return rc, cmd, out.strip(), err + + +def get_config_dump(module, container_image=None): + cmd = generate_cmd(sub_cmd=['config', 'dump', '--format', 'json'], + args=[], + cluster=module.params.get('cluster'), + container_image=container_image) + rc, out, err = module.run_command(cmd) + if rc: + fatal(message=f"Can't get current configuration via `ceph config dump`.Error:\n{err}", module=module) + out = out.strip() + return rc, cmd, out, err + + +def get_current_value(who, option, config_dump): + for config in config_dump: + if config['section'] == who and config['name'] == option: + return config['value'] + return None + + +def main() -> None: + module = AnsibleModule( + argument_spec=dict( + who=dict(type='str', required=True), + action=dict(type='str', required=False, choices=['get', 'set', 'rm'], default='set'), + option=dict(type='str', required=True), + value=dict(type='str', required=False), + fsid=dict(type='str', required=False), + image=dict(type='str', required=False), + cluster=dict(type='str', required=False, default='ceph') + ), + supports_check_mode=True, + required_if=[['action', 'set', ['value']]] + ) + + # Gather module parameters in variables + who = module.params.get('who') + option = module.params.get('option') + value = module.params.get('value') + action = module.params.get('action') + + container_image = is_containerized() + + if module.check_mode: + module.exit_json( + changed=False, + stdout='', + cmd=[], + stderr='', + rc=0, + start='', + end='', + delta='', + ) + + startd = datetime.datetime.now() + changed = False + + rc, cmd, out, err = get_config_dump(module, container_image=container_image) + config_dump = json.loads(out) + current_value = get_current_value(who, option, config_dump) + + if action == 'set': + if current_value and value.lower() == current_value.lower(): + out = 'who={} option={} value={} already set. Skipping.'.format(who, option, value) + else: + rc, cmd, out, err = set_option(module, who, option, value, container_image=container_image) + changed = True + elif action == 'get': + if current_value is None: + out = '' + err = 'No value found for who={} option={}'.format(who, option) + else: + out = current_value + elif action == 'rm': + if current_value: + rc, cmd, out, err = rm_option(module, who, option, container_image=container_image) + changed = True + + exit_module(module=module, out=out, rc=rc, + cmd=cmd, err=err, startd=startd, + changed=changed) + + +if __name__ == '__main__': + main()