From 212c8740831a7650b5be86c27d14f8c0b6eacbef Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 31 Jul 2024 14:36:48 +0000 Subject: [PATCH] orch: disk replacement enhancement This introduces a new `ceph orch device replace` command in order to improve the user experience when it comes to replacing the underlying device of an OSD. Fixes: https://tracker.ceph.com/issues/68456 Signed-off-by: Guillaume Abrioux --- doc/cephadm/operations.rst | 69 +++ src/ceph-volume/ceph_volume/__init__.py | 1 + src/ceph-volume/ceph_volume/api/lvm.py | 17 +- .../ceph_volume/devices/lvm/zap.py | 173 +++++-- src/ceph-volume/ceph_volume/tests/conftest.py | 2 +- .../ceph_volume/tests/devices/lvm/test_zap.py | 23 +- .../ceph_volume/tests/test_inventory.py | 1 + .../ceph_volume/util/arg_validators.py | 14 + src/ceph-volume/ceph_volume/util/device.py | 26 +- src/ceph-volume/ceph_volume/util/disk.py | 17 +- src/pybind/mgr/cephadm/ceph_volume.py | 430 ++++++++++++++++++ src/pybind/mgr/cephadm/module.py | 53 +++ src/pybind/mgr/cephadm/serve.py | 5 +- src/pybind/mgr/cephadm/services/osd.py | 45 +- .../mgr/cephadm/tests/ceph_volume_data.py | 1 + src/pybind/mgr/cephadm/tests/conftest.py | 7 +- src/pybind/mgr/cephadm/tests/fixtures.py | 4 +- .../mgr/cephadm/tests/test_ceph_volume.py | 231 ++++++++++ .../mgr/cephadm/tests/test_replace_device.py | 53 +++ src/pybind/mgr/orchestrator/_interface.py | 15 + src/pybind/mgr/orchestrator/module.py | 20 +- .../deployment/drive_selection/selector.py | 4 + .../ceph/deployment/inventory.py | 10 +- 23 files changed, 1151 insertions(+), 70 deletions(-) create mode 100644 src/pybind/mgr/cephadm/ceph_volume.py create mode 100644 src/pybind/mgr/cephadm/tests/ceph_volume_data.py create mode 100644 src/pybind/mgr/cephadm/tests/test_ceph_volume.py create mode 100644 src/pybind/mgr/cephadm/tests/test_replace_device.py diff --git a/doc/cephadm/operations.rst b/doc/cephadm/operations.rst index 3b117c1bd6a60..420ee655ac8ba 100644 --- a/doc/cephadm/operations.rst +++ b/doc/cephadm/operations.rst @@ -734,3 +734,72 @@ Purge ceph daemons from all hosts in the cluster # For each host: cephadm rm-cluster --force --zap-osds --fsid + + +Replacing a device +================== + +The ``ceph orch device replace`` command automates the process of replacing the underlying device of an OSD. +Previously, this process required manual intervention at various stages. +With this new command, all necessary operations are performed automatically, streamlining the replacement process +and improving the overall user experience. + +.. note:: This only supports LVM-based deployed OSD(s) + +.. prompt:: bash # + + ceph orch device replace + +In the case the device being replaced is shared by multiple OSDs (eg: DB/WAL device shared by multiple OSDs), the orchestrator will warn you. + +.. prompt:: bash # + + [ceph: root@ceph /]# ceph orch device replace osd-1 /dev/vdd + + Error EINVAL: /dev/vdd is a shared device. + Replacing /dev/vdd implies destroying OSD(s): ['0', '1']. + Please, *be very careful*, this can be a very dangerous operation. + If you know what you are doing, pass --yes-i-really-mean-it + +If you know what you are doing, you can go ahead and pass ``--yes-i-really-mean-it``. + +.. prompt:: bash # + + [ceph: root@ceph /]# ceph orch device replace osd-1 /dev/vdd --yes-i-really-mean-it + Scheduled to destroy osds: ['6', '7', '8'] and mark /dev/vdd as being replaced. + +``cephadm`` will make ``ceph-volume`` zap and destroy all related devices and mark the corresponding OSD as ``destroyed`` so the +different OSD(s) ID(s) will be preserved: + +.. prompt:: bash # + + [ceph: root@ceph-1 /]# ceph osd tree + ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF + -1 0.97659 root default + -3 0.97659 host devel-1 + 0 hdd 0.29300 osd.0 destroyed 1.00000 1.00000 + 1 hdd 0.29300 osd.1 destroyed 1.00000 1.00000 + 2 hdd 0.19530 osd.2 up 1.00000 1.00000 + 3 hdd 0.19530 osd.3 up 1.00000 1.00000 + +The device being replaced is finally seen as ``being replaced`` preventing ``cephadm`` from redeploying the OSDs too fast: + +.. prompt:: bash # + + [ceph: root@ceph-1 /]# ceph orch device ls + HOST PATH TYPE DEVICE ID SIZE AVAILABLE REFRESHED REJECT REASONS + osd-1 /dev/vdb hdd 200G Yes 13s ago + osd-1 /dev/vdc hdd 200G Yes 13s ago + osd-1 /dev/vdd hdd 200G Yes 13s ago Is being replaced + osd-1 /dev/vde hdd 200G No 13s ago Has a FileSystem, Insufficient space (<10 extents) on vgs, LVM detected + osd-1 /dev/vdf hdd 200G No 13s ago Has a FileSystem, Insufficient space (<10 extents) on vgs, LVM detected + +If for any reason you need to clear the 'device replace header' on a device, then you can use ``ceph orch device replace --clear``: + +.. prompt:: bash # + + [ceph: root@devel-1 /]# ceph orch device replace devel-1 /dev/vdk --clear + Replacement header cleared on /dev/vdk + [ceph: root@devel-1 /]# + +After that, ``cephadm`` will redeploy the OSD service spec within a few minutes (unless the service is set to ``unmanaged``). diff --git a/src/ceph-volume/ceph_volume/__init__.py b/src/ceph-volume/ceph_volume/__init__.py index b10100c02185a..814619cfdddb4 100644 --- a/src/ceph-volume/ceph_volume/__init__.py +++ b/src/ceph-volume/ceph_volume/__init__.py @@ -6,6 +6,7 @@ sys_info = namedtuple('sys_info', ['devices']) sys_info.devices = dict() logger = logging.getLogger(__name__) +BEING_REPLACED_HEADER: str = 'CEPH_DEVICE_BEING_REPLACED' class AllowLoopDevices: diff --git a/src/ceph-volume/ceph_volume/api/lvm.py b/src/ceph-volume/ceph_volume/api/lvm.py index 16cbc08b26254..fc376f891fd25 100644 --- a/src/ceph-volume/ceph_volume/api/lvm.py +++ b/src/ceph-volume/ceph_volume/api/lvm.py @@ -10,6 +10,8 @@ from math import floor from ceph_volume import process, util, conf from ceph_volume.exceptions import SizeAllocationError +from typing import Any, Dict + logger = logging.getLogger(__name__) @@ -807,13 +809,16 @@ def get_all_devices_vgs(name_prefix=''): '--units=b', '--nosuffix'] -class Volume(object): +class Volume: """ Represents a Logical Volume from LVM, with some top-level attributes like ``lv_name`` and parsed tags as a dictionary of key/value pairs. """ - def __init__(self, **kw): + def __init__(self, **kw: str) -> None: + self.lv_path: str = '' + self.lv_name: str = '' + self.lv_uuid: str = '' for k, v in kw.items(): setattr(self, k, v) self.lv_api = kw @@ -824,13 +829,13 @@ def __init__(self, **kw): self.encrypted = self.tags.get('ceph.encrypted', '0') == '1' self.used_by_ceph = 'ceph.osd_id' in self.tags - def __str__(self): + def __str__(self) -> str: return '<%s>' % self.lv_api['lv_path'] - def __repr__(self): + def __repr__(self) -> str: return self.__str__() - def as_dict(self): + def as_dict(self) -> Dict[str, Any]: obj = {} obj.update(self.lv_api) obj['tags'] = self.tags @@ -839,7 +844,7 @@ def as_dict(self): obj['path'] = self.lv_path return obj - def report(self): + def report(self) -> Dict[str, Any]: if not self.used_by_ceph: return { 'name': self.lv_name, diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py index 2b6925f5b2739..388f6aeea2708 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/zap.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py @@ -5,12 +5,12 @@ from textwrap import dedent -from ceph_volume import decorators, terminal, process +from ceph_volume import decorators, terminal, process, BEING_REPLACED_HEADER from ceph_volume.api import lvm as api from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int, merge_dict from ceph_volume.util.device import Device from ceph_volume.systemd import systemctl -from typing import List +from typing import Any, Dict, List logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) @@ -95,29 +95,29 @@ def zap_data(path): 'conv=fsync' ]) - -def find_associated_devices(osd_id=None, osd_fsid=None): +def find_associated_devices(osd_id: str = '', osd_fsid: str = '') -> List[api.Volume]: """ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the system that match those tag values, further detect if any partitions are part of the OSD, and then return the set of LVs and partitions (if any). """ lv_tags = {} - if osd_id: - lv_tags['ceph.osd_id'] = osd_id - if osd_fsid: - lv_tags['ceph.osd_fsid'] = osd_fsid + lv_tags = {key: value for key, value in { + 'ceph.osd_id': osd_id, + 'ceph.osd_fsid': osd_fsid + }.items() if value} lvs = api.get_lvs(tags=lv_tags) + if not lvs: raise RuntimeError('Unable to find any LV for zapping OSD: ' - '%s' % osd_id or osd_fsid) - + f'{osd_id or osd_fsid}') devices_to_zap = ensure_associated_lvs(lvs, lv_tags) - return [Device(path) for path in set(devices_to_zap) if path] + return [Device(path) for path in set(devices_to_zap) if path] -def ensure_associated_lvs(lvs, lv_tags={}): +def ensure_associated_lvs(lvs: List[api.Volume], + lv_tags: Dict[str, Any] = {}) -> List[str]: """ Go through each LV and ensure if backing devices (journal, wal, block) are LVs or partitions, so that they can be accurately reported. @@ -166,14 +166,14 @@ def ensure_associated_lvs(lvs, lv_tags={}): return list(set(verified_devices)) -class Zap(object): - +class Zap: help = 'Removes all data and filesystems from a logical volume or partition.' - def __init__(self, argv): + def __init__(self, argv: List[str]) -> None: self.argv = argv + self.osd_ids_to_zap: List[str] = [] - def unmount_lv(self, lv): + def unmount_lv(self, lv: api.Volume) -> None: if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'): lv_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id']) else: @@ -186,39 +186,95 @@ def unmount_lv(self, lv): if dmcrypt and dmcrypt_uuid: self.dmcrypt_close(dmcrypt_uuid) - def zap_lv(self, device): + def _write_replacement_header(self, device: str) -> None: + """Write a replacement header to a device. + + This method writes the string defined in `BEING_REPLACED_HEADER` + to the specified device. This header indicates that the device + is in the process of being replaced. + + Args: + device (str): The path to the device on which the replacement + header will be written. + """ + disk._dd_write(device, + BEING_REPLACED_HEADER) + + def clear_replace_header(self) -> bool: + """Safely erase the replacement header on a device if it is marked as being replaced. + + This method checks whether the given device is marked as being replaced + (`device.is_being_replaced`). If true, it proceeds to erase the replacement header + from the device using the `_erase_replacement_header` method. The method returns + a boolean indicating whether any action was taken. + + Args: + device (Device): The device object, which includes information about the device's + path and status (such as whether it is currently being replaced). + + Returns: + bool: True if the replacement header was successfully erased, False if the + device was not marked as being replaced or no action was necessary. + """ + result: bool = False + device: Device = self.args.clear_replace_header + if device.is_being_replaced: + self._erase_replacement_header(device.path) + result = True + return result + + def _erase_replacement_header(self, device: str) -> None: + """Erase the replacement header on a device. + + This method writes a sequence of null bytes (`0x00`) over the area of the device + where the replacement header is stored, effectively erasing it. + + Args: + device (str): The path to the device from which the replacement header will be erased. + """ + disk._dd_write(device, + b'\x00' * len(BEING_REPLACED_HEADER)) + + def zap_lv(self, device: Device) -> None: """ Device examples: vg-name/lv-name, /dev/vg-name/lv-name Requirements: Must be a logical volume (LV) """ lv: api.Volume = device.lv_api self.unmount_lv(lv) - + self.parent_device: str = disk.get_parent_device_from_mapper(lv.lv_path) zap_device(device.path) if self.args.destroy: lvs = api.get_lvs(filters={'vg_name': device.vg_name}) - if lvs == []: - mlogger.info('No LVs left, exiting', device.vg_name) - return - elif len(lvs) <= 1: + if len(lvs) <= 1: mlogger.info('Only 1 LV left in VG, will proceed to destroy ' 'volume group %s', device.vg_name) pvs = api.get_pvs(filters={'lv_uuid': lv.lv_uuid}) api.remove_vg(device.vg_name) for pv in pvs: api.remove_pv(pv.pv_name) + replacement_args: Dict[str, bool] = { + 'block': self.args.replace_block, + 'db': self.args.replace_db, + 'wal': self.args.replace_wal + } + if replacement_args.get(lv.tags.get('ceph.type'), False): + mlogger.info(f'Marking {self.parent_device} as being replaced') + self._write_replacement_header(self.parent_device) else: mlogger.info('More than 1 LV left in VG, will proceed to ' 'destroy LV only') mlogger.info('Removing LV because --destroy was given: %s', device.path) + if self.args.replace_block: + mlogger.info(f'--replace-block passed but the device still has {str(len(lvs))} LV(s)') api.remove_lv(device.path) elif lv: # just remove all lvm metadata, leaving the LV around lv.clear_tags() - def zap_partition(self, device): + def zap_partition(self, device: Device) -> None: """ Device example: /dev/sda1 Requirements: Must be a partition @@ -246,7 +302,7 @@ def zap_partition(self, device): mlogger.info("Destroying partition since --destroy was used: %s" % device.path) disk.remove_partition(device) - def zap_lvm_member(self, device): + def zap_lvm_member(self, device: Device) -> None: """ An LVM member may have more than one LV and or VG, for example if it is a raw device with multiple partitions each belonging to a different LV @@ -266,7 +322,7 @@ def zap_lvm_member(self, device): - def zap_raw_device(self, device): + def zap_raw_device(self, device: Device) -> None: """ Any whole (raw) device passed in as input will be processed here, checking for LVM membership and partitions (if any). @@ -286,10 +342,19 @@ def zap_raw_device(self, device): self.zap_partition(Device('/dev/%s' % part_name)) zap_device(device.path) + # TODO(guits): I leave this commented out, this should be part of a separate patch in order to + # support device replacement with raw-based OSDs + # if self.args.replace_block: + # disk._dd_write(device.path, 'CEPH_DEVICE_BEING_REPLACED') @decorators.needs_root - def zap(self, devices=None): - devices = devices or self.args.devices + def zap(self) -> None: + """Zap a device. + + Raises: + SystemExit: When the device is a mapper and not a mpath device. + """ + devices = self.args.devices for device in devices: mlogger.info("Zapping: %s", device.path) @@ -316,21 +381,21 @@ def zap(self, devices=None): ) @decorators.needs_root - def zap_osd(self): + def zap_osd(self) -> None: if self.args.osd_id and not self.args.no_systemd: osd_is_running = systemctl.osd_is_active(self.args.osd_id) if osd_is_running: mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id) mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id) raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id) - devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid) - self.zap(devices) + self.args.devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid) + self.zap() - def dmcrypt_close(self, dmcrypt_uuid): + def dmcrypt_close(self, dmcrypt_uuid: str) -> None: mlogger.info("Closing encrypted volume %s", dmcrypt_uuid) encryption.dmcrypt_close(mapping=dmcrypt_uuid, skip_path_check=True) - def main(self): + def main(self) -> None: sub_command_help = dedent(""" Zaps the given logical volume(s), raw device(s) or partition(s) for reuse by ceph-volume. If given a path to a logical volume it must be in the format of vg/lv. Any @@ -418,12 +483,56 @@ def main(self): help='Skip systemd unit checks', ) + parser.add_argument( + '--replace-block', + dest='replace_block', + action='store_true', + help='Mark the block device as unavailable.' + ) + + parser.add_argument( + '--replace-db', + dest='replace_db', + action='store_true', + help='Mark the db device as unavailable.' + ) + + parser.add_argument( + '--replace-wal', + dest='replace_wal', + action='store_true', + help='Mark the wal device as unavailable.' + ) + + parser.add_argument( + '--clear-replace-header', + dest='clear_replace_header', + type=arg_validators.ValidClearReplaceHeaderDevice(), + help='clear the replace header on devices.' + ) + if len(self.argv) == 0: print(sub_command_help) return self.args = parser.parse_args(self.argv) + if self.args.clear_replace_header: + rc: bool = False + try: + rc = self.clear_replace_header() + except Exception as e: + raise SystemExit(e) + if rc: + mlogger.info(f'Replacement header cleared on {self.args.clear_replace_header}') + else: + mlogger.info(f'No replacement header detected on {self.args.clear_replace_header}, nothing to do.') + raise SystemExit(not rc) + + if self.args.replace_block or self.args.replace_db or self.args.replace_wal: + self.args.destroy = True + mlogger.info('--replace-block|db|wal passed, enforcing --destroy.') + if self.args.osd_id or self.args.osd_fsid: self.zap_osd() else: diff --git a/src/ceph-volume/ceph_volume/tests/conftest.py b/src/ceph-volume/ceph_volume/tests/conftest.py index ee58081d97da1..e6bf31737b69c 100644 --- a/src/ceph-volume/ceph_volume/tests/conftest.py +++ b/src/ceph-volume/ceph_volume/tests/conftest.py @@ -360,7 +360,7 @@ def apply(devices=None, lsblk=None, lv=None, blkid=None, udevadm=None, has_bluestore_label=False): if devices: for dev in devices.keys(): - devices[dev]['device_nodes'] = os.path.basename(dev) + devices[dev]['device_nodes'] = [os.path.basename(dev)] else: devices = {} lsblk = lsblk if lsblk else {} diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py index d630a7a6bf887..efe52c053ffc3 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py @@ -7,11 +7,30 @@ from ceph_volume.devices.lvm import zap -class TestZap(object): - def test_invalid_osd_id_passed(self): +class TestZap: + def test_invalid_osd_id_passed(self) -> None: with pytest.raises(SystemExit): zap.Zap(argv=['--osd-id', 'foo']).main() + @patch('ceph_volume.util.disk._dd_write', Mock()) + @patch('ceph_volume.util.arg_validators.Device') + def test_clear_replace_header_is_being_replaced(self, m_device: Mock) -> None: + m_dev = m_device.return_value + m_dev.is_being_replaced = True + with pytest.raises(SystemExit) as e: + zap.Zap(argv=['--clear', '/dev/foo']).main() + assert e.value.code == 0 + + @patch('ceph_volume.util.disk._dd_write', Mock()) + @patch('ceph_volume.util.arg_validators.Device') + def test_clear_replace_header_is_not_being_replaced(self, m_device: Mock) -> None: + m_dev = m_device.return_value + m_dev.is_being_replaced = False + with pytest.raises(SystemExit) as e: + zap.Zap(argv=['--clear', '/dev/foo']).main() + assert e.value.code == 1 + + class TestFindAssociatedDevices(object): def test_no_lvs_found_that_match_id(self, monkeypatch, device_info): diff --git a/src/ceph-volume/ceph_volume/tests/test_inventory.py b/src/ceph-volume/ceph_volume/tests/test_inventory.py index 785d8b56e86b6..832c083664212 100644 --- a/src/ceph-volume/ceph_volume/tests/test_inventory.py +++ b/src/ceph-volume/ceph_volume/tests/test_inventory.py @@ -126,6 +126,7 @@ class TestInventory(object): 'lvs', 'device_id', 'lsm_data', + 'being_replaced' ] expected_sys_api_keys = [ diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py index 99e7d039e742b..e75b34e550e3c 100644 --- a/src/ceph-volume/ceph_volume/util/arg_validators.py +++ b/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -7,6 +7,9 @@ from ceph_volume.util.encryption import set_dmcrypt_no_workqueue +mlogger = terminal.MultiLogger(__name__) + + def valid_osd_id(val): return str(int(val)) @@ -70,6 +73,17 @@ def _is_valid_device(self, raise_sys_exit=True): return self._device +class ValidClearReplaceHeaderDevice(ValidDevice): + def __call__(self, dev_path: str) -> str: + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self) -> Device: + if not self._device.is_being_replaced: + mlogger.info(f'{self.dev_path} has no replacement header.') + return self._device + + class ValidDataDevice(ValidDevice): def __call__(self, dev_path): super().get_device(dev_path) diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py index 9c2c11e7f316f..82ee3266e3f1f 100644 --- a/src/ceph-volume/ceph_volume/util/device.py +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- - +# type: ignore import logging import os from functools import total_ordering -from ceph_volume import sys_info, allow_loop_devices +from ceph_volume import sys_info, allow_loop_devices, BEING_REPLACED_HEADER from ceph_volume.api import lvm from ceph_volume.util import disk, system from ceph_volume.util.lsmdisk import LSMDisk from ceph_volume.util.constants import ceph_disk_guids +from typing import List, Tuple logger = logging.getLogger(__name__) @@ -92,6 +93,7 @@ class Device(object): 'sys_api', 'device_id', 'lsm_data', + 'being_replaced' ] pretty_report_sys_fields = [ 'actuators', @@ -136,6 +138,7 @@ def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_v self._exists = None self._is_lvm_member = None self.ceph_device = False + self.being_replaced: bool = self.is_being_replaced self._parse() if self.path in sys_info.devices.keys(): self.device_nodes = sys_info.devices[self.path]['device_nodes'] @@ -298,7 +301,7 @@ def report(self): rot=self.rotational, available=self.available, model=self.model, - device_nodes=self.device_nodes + device_nodes=','.join(self.device_nodes) ) def json_report(self): @@ -590,7 +593,7 @@ def vg_free(self): return [vg_free] @property - def has_partitions(self): + def has_partitions(self) -> bool: ''' Boolean to determine if a given device has partitions. ''' @@ -598,7 +601,14 @@ def has_partitions(self): return True return False - def _check_generic_reject_reasons(self): + @property + def is_being_replaced(self) -> bool: + ''' + Boolean to indicate if the device is being replaced. + ''' + return disk._dd_read(self.path, 26) == BEING_REPLACED_HEADER + + def _check_generic_reject_reasons(self) -> List[str]: reasons = [ ('id_bus', 'usb', 'id_bus'), ('ro', '1', 'read-only'), @@ -639,9 +649,11 @@ def _check_generic_reject_reasons(self): rejected.append('Has partitions') if self.has_fs: rejected.append('Has a FileSystem') + if self.is_being_replaced: + rejected.append('Is being replaced') return rejected - def _check_lvm_reject_reasons(self): + def _check_lvm_reject_reasons(self) -> Tuple[bool, List[str]]: rejected = [] if self.vgs: available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10] @@ -654,7 +666,7 @@ def _check_lvm_reject_reasons(self): return len(rejected) == 0, rejected - def _check_raw_reject_reasons(self): + def _check_raw_reject_reasons(self) -> Tuple[bool, List[str]]: rejected = self._check_generic_reject_reasons() if len(self.vgs) > 0: rejected.append('LVM detected') diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py index 3ac51c11e3469..30ee56808c762 100644 --- a/src/ceph-volume/ceph_volume/util/disk.py +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -7,7 +7,7 @@ from ceph_volume import process, allow_loop_devices from ceph_volume.api import lvm from ceph_volume.util.system import get_file_contents -from typing import Dict, List, Any +from typing import Dict, List, Any, Union logger = logging.getLogger(__name__) @@ -857,13 +857,14 @@ def get_devices(_sys_block_path='/sys/block', device=''): device_slaves = os.listdir(os.path.join(sysdir, 'slaves')) metadata['partitions'] = get_partitions_facts(sysdir) + metadata['device_nodes'] = [] if device_slaves: - metadata['device_nodes'] = ','.join(device_slaves) + metadata['device_nodes'].extend(device_slaves) else: if block[2] == 'part': - metadata['device_nodes'] = block[3] + metadata['device_nodes'].append(block[3]) else: - metadata['device_nodes'] = devname + metadata['device_nodes'].append(devname) metadata['actuators'] = None if os.path.isdir(sysdir + "/queue/independent_access_ranges/"): @@ -979,7 +980,7 @@ def _dd_read(device: str, count: int, skip: int = 0) -> str: return result -def _dd_write(device: str, data: str, skip: int = 0) -> None: +def _dd_write(device: str, data: Union[str, bytes], skip: int = 0) -> None: """Write bytes to a device Args: @@ -991,10 +992,14 @@ def _dd_write(device: str, data: str, skip: int = 0) -> None: OSError: If there is an error opening or writing to the device. Exception: If any other error occurs during the write operation. """ + + if isinstance(data, str): + data = data.encode('utf-8') + try: with open(device, 'r+b') as b: b.seek(skip) - b.write(data.encode('utf-8')) + b.write(data) except OSError: logger.warning(f"Can't write to {device}") raise diff --git a/src/pybind/mgr/cephadm/ceph_volume.py b/src/pybind/mgr/cephadm/ceph_volume.py new file mode 100644 index 0000000000000..a270bb7028f46 --- /dev/null +++ b/src/pybind/mgr/cephadm/ceph_volume.py @@ -0,0 +1,430 @@ +from cephadm.serve import CephadmServe +from typing import List, TYPE_CHECKING, Any, Dict, Set, Tuple +if TYPE_CHECKING: + from cephadm import CephadmOrchestrator + + +class CephVolume: + def __init__(self, mgr: "CephadmOrchestrator", _inheritance: bool = False) -> None: + self.mgr: "CephadmOrchestrator" = mgr + if not _inheritance: + self.lvm_list: "CephVolumeLvmList" = CephVolumeLvmList(mgr) + + def run_json(self, hostname: str, command: List[str]) -> Dict[str, Any]: + """Execute a JSON command on the specified hostname and return the result. + + This method wraps the asynchronous execution of a JSON command on the + specified hostname, waiting for the command to complete. It utilizes the + `_run_json` method to perform the actual execution. + + Args: + hostname (str): The hostname of the target node where the JSON command + will be executed. + command (List[str]): A list of command arguments to be passed to the + JSON command. + + Returns: + Dict[str, Any]: A dictionary containing the JSON response from the + executed command, which may include various data + based on the command executed. + """ + return self.mgr.wait_async(self._run_json(hostname, command)) + + def run(self, hostname: str, command: List[str], **kw: Any) -> Tuple[List[str], List[str], int]: + """Execute a command on the specified hostname and return the result. + + This method wraps the asynchronous execution of a command on the + specified hostname, waiting for the command to complete. It utilizes the + `_run` method to perform the actual execution. + + Args: + hostname (str): The hostname of the target node where the command + will be executed. + command (List[str]): A list of command arguments to be passed to the + command. + **kw (Any): Additional keyword arguments to customize the command + execution. + + Returns: + Tuple[List[str], List[str], int]: A tuple containing: + - A list of strings representing the standard output of the command. + - A list of strings representing the standard error output of the command. + - An integer representing the return code of the command execution. + """ + return self.mgr.wait_async(self._run(hostname, command, **kw)) + + async def _run(self, + hostname: str, + command: List[str], + **kw: Any) -> Tuple[List[str], List[str], int]: + """Execute a ceph-volume command on the specified hostname and return the result. + + This asynchronous method constructs a ceph-volume command and then executes + it on the specified host. + The result of the command is returned in JSON format. + + Args: + hostname (str): The hostname of the target node where the command will be executed. + command (List[str]): A list of command arguments to be passed to the Ceph command. + **kw (Any): Additional keyword arguments to customize the command execution. + + Returns: + Tuple[List[str], List[str], int]: A tuple containing: + - A list of strings representing the standard output of the command. + - A list of strings representing the standard error output of the command. + - An integer representing the return code of the command execution. + """ + cmd: List[str] = ['--'] + cmd.extend(command) + result = await CephadmServe(self.mgr)._run_cephadm( + hostname, 'osd', 'ceph-volume', + cmd, + **kw) + return result + + async def _run_json(self, + hostname: str, + command: List[str]) -> Dict[str, Any]: + """Execute a ceph-volume command on a specified hostname. + + This asynchronous method constructs a ceph-volume command and then executes + it on the specified host. + The result of the command is returned in JSON format. + + Args: + hostname (str): The hostname of the target node where the command will be executed. + command (List[str]): A list of command arguments to be passed to the Ceph command. + + Returns: + Dict[str, Any]: The result of the command execution as a dictionary parsed from + the JSON output. + """ + cmd: List[str] = ['--'] + cmd.extend(command) + result = await CephadmServe(self.mgr)._run_cephadm_json( + hostname, 'osd', 'ceph-volume', + cmd) + return result + + def clear_replace_header(self, hostname: str, device: str) -> str: + """Clear the replacement header on a specified device for a given hostname. + + This method checks if a replacement header exists on the specified device + and clears it if found. After clearing, it invalidates the cached device + information for the specified hostname and kicks the serve loop. + + Args: + hostname (str): The hostname of the device on which the replacement header + will be cleared. This is used to identify the specific + device within the manager's context. + device (str): The path to the device (e.g., '/dev/sda') from which the + replacement header will be cleared. + + Returns: + str: A message indicating the result of the operation. It will either confirm + that the replacement header was cleared or state that no replacement header + was detected on the device. + """ + output: str = '' + result = self.run(hostname, ['lvm', + 'zap', + '--clear-replace-header', + device], + error_ok=True) + out, err, rc = result + if not rc: + output = f'Replacement header cleared on {device}' + self.mgr.cache.invalidate_host_devices(hostname) + self.mgr._kick_serve_loop() + else: + plain_out: str = '\n'.join(out) + plain_err: str = '\n'.join(err) + output = f'No replacement header could be cleared on {device}.\n{plain_out}\n{plain_err}' + return output + + +class CephVolumeLvmList(CephVolume): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + super().__init__(mgr, True) + self.data: Dict[str, Any] = {} + + def get_data(self, hostname: str) -> None: + """Execute the `ceph-volume lvm list` command to list LVM-based OSDs. + + This asynchronous method interacts with the Ceph manager to retrieve + information about the Logical Volume Manager (LVM) devices associated + with the OSDs. It calls the `ceph-volume lvm list` command in JSON format + to gather relevant data. + + Returns: + None: This method does not return a value. The retrieved data is + stored in the `self.data` attribute for further processing. + """ + self.data = self.run_json(hostname, + ['lvm', 'list', '--format', 'json']) + + def devices_by_type(self, device_type: str) -> List[str]: + """Retrieve a list of devices of a specified type across all OSDs. + + This method iterates through all OSDs and collects devices that match + the specified type (e.g., 'block', 'db', 'wal'). The resulting list + contains unique device paths. + + Args: + device_type (str): The type of devices to retrieve. This should + be one of the recognized device types such as + 'block', 'db', or 'wal'. + + Returns: + List[str]: A list of unique device paths of the specified type + found across all OSDs. If no devices of the specified + type are found, an empty list is returned. + """ + result: Set[str] = set() + for osd in self.osd_ids(): + for lv in self.data.get(osd, []): + if lv.get('type') == device_type: + result.update(lv.get('devices', [])) + return list(result) + + def block_devices(self) -> List[str]: + """List all block devices used by OSDs. + + This method returns a list of devices that are used as 'block' devices + for storing the main OSD data. + + Returns: + List[str]: A list of device paths (strings) that are used as 'block' devices. + """ + return self.devices_by_type('block') + + def db_devices(self) -> List[str]: + """List all database (DB) devices used by OSDs. + + This method returns a list of devices that are used as 'db' devices + for storing the database files associated with OSDs. + + Returns: + List[str]: A list of device paths (strings) that are used as 'db' devices. + """ + return self.devices_by_type('db') + + def wal_devices(self) -> List[str]: + """List all write-ahead log (WAL) devices used by OSDs. + + This method returns a list of devices that are used as 'wal' devices + for storing write-ahead log data associated with OSDs. + + Returns: + List[str]: A list of device paths (strings) that are used as 'wal' devices. + """ + return self.devices_by_type('wal') + + def all_devices(self) -> List[str]: + """List all devices used by OSDs for 'block', 'db', or 'wal' purposes. + + This method aggregates all devices that are currently used by the OSDs + in the system for the following device types: + - 'block' devices: Used to store the OSD's data. + - 'db' devices: Used for database purposes. + - 'wal' devices: Used for Write-Ahead Logging. + + The returned list combines devices from all these categories. + + Returns: + List[str]: A list of device paths (strings) that are used as 'block', 'db', or 'wal' devices. + """ + return self.block_devices() + self.db_devices() + self.wal_devices() + + def device_osd_mapping(self, device_type: str = '') -> Dict[str, Dict[str, List[str]]]: + """Create a mapping of devices to their corresponding OSD IDs based on device type. + + This method serves as a 'proxy' function, designed to be called by the *_device_osd_mapping() methods. + + This method iterates over the OSDs and their logical volumes to build a + dictionary that maps each device of the specified type to the list of + OSD IDs that use it. The resulting dictionary can be used to determine + which OSDs share a specific device. + + Args: + device_type (str): The type of the device to filter by (e.g., 'block', 'db', or 'wal'). + If an empty string is provided, devices of all types will be included. + + Returns: + Dict[str, Dict[str, List[str]]]: A dictionary where the keys are device + names and the values are dictionaries containing a list of OSD IDs + that use the corresponding device. + + eg: + ``` + { + '/dev/vda': {'osd_ids': ['0', '1']}, + '/dev/vdb': {'osd_ids': ['2']} + } + ``` + + """ + result: Dict[str, Dict[str, List[str]]] = {} + for osd in self.osd_ids(): + for lv in self.data.get(osd, []): + if lv.get('type') == device_type or not device_type: + for device in lv.get('devices', []): + if device not in result: + result[device] = {'osd_ids': []} + result[device]['osd_ids'].append(osd) + return result + + def block_device_osd_mapping(self) -> Dict[str, Dict[str, List[str]]]: + """Get a dictionnary with all block devices and their corresponding + osd(s) id(s). + + eg: + ``` + {'/dev/vdb': {'osd_ids': ['0']}, + '/dev/vdc': {'osd_ids': ['1']}, + '/dev/vdf': {'osd_ids': ['2']}, + '/dev/vde': {'osd_ids': ['3', '4']}} + ``` + + Returns: + Dict[str, Dict[str, List[str]]]: A dict including all block devices with their corresponding + osd id(s). + """ + return self.device_osd_mapping('block') + + def db_device_osd_mapping(self) -> Dict[str, Dict[str, List[str]]]: + """Get a dictionnary with all db devices and their corresponding + osd(s) id(s). + + eg: + ``` + {'/dev/vdv': {'osd_ids': ['0', '1', '2', '3']}, + '/dev/vdx': {'osd_ids': ['4']}} + ``` + + Returns: + Dict[str, Dict[str, List[str]]]: A dict including all db devices with their corresponding + osd id(s). + """ + return self.device_osd_mapping('db') + + def wal_device_osd_mapping(self) -> Dict[str, Dict[str, List[str]]]: + """Get a dictionnary with all wal devices and their corresponding + osd(s) id(s). + + eg: + ``` + {'/dev/vdy': {'osd_ids': ['0', '1', '2', '3']}, + '/dev/vdz': {'osd_ids': ['4']}} + ``` + + Returns: + Dict[str, Dict[str, List[str]]]: A dict including all wal devices with their corresponding + osd id(s). + """ + return self.device_osd_mapping('wal') + + def is_shared_device(self, device: str) -> bool: + """Determines if a device is shared between multiple OSDs. + + This method checks if a given device is shared by multiple OSDs for a specified device type + (such as 'block', 'db', or 'wal'). If the device is associated with more than one OSD, + it is considered shared. + + Args: + device (str): The device path to check (e.g., '/dev/sda'). + device_type (str): The type of the device (e.g., 'block', 'db', 'wal'). + + Raises: + RuntimeError: If the device is not valid or not found in the shared devices mapping. + + Returns: + bool: True if the device is shared by more than one OSD, False otherwise. + """ + device_osd_mapping = self.device_osd_mapping() + if not device or device not in device_osd_mapping: + raise RuntimeError('Not a valid device path.') + return len(device_osd_mapping[device]['osd_ids']) > 1 + + def is_block_device(self, device: str) -> bool: + """Check if a specified device is a block device. + + This method checks if the specified device is included in the + list of block devices used by OSDs. + + Args: + device (str): The path of the device to check. + + Returns: + bool: True if the device is a block device, + False otherwise. + """ + return device in self.block_devices() + + def is_db_device(self, device: str) -> bool: + """Check if a specified device is a DB device. + + This method checks if the specified device is included in the + list of DB devices used by OSDs. + + Args: + device (str): The path of the device to check. + + Returns: + bool: True if the device is a DB device, + False otherwise. + """ + return device in self.db_devices() + + def is_wal_device(self, device: str) -> bool: + """Check if a specified device is a WAL device. + + This method checks if the specified device is included in the + list of WAL devices used by OSDs. + + Args: + device (str): The path of the device to check. + + Returns: + bool: True if the device is a WAL device, + False otherwise. + """ + return device in self.wal_devices() + + def get_block_devices_from_osd_id(self, osd_id: str) -> List[str]: + """Retrieve the list of block devices associated with a given OSD ID. + + This method looks up the specified OSD ID in the `data` attribute + and returns a list of devices that are of type 'block'. If there are + no devices of type 'block' for the specified OSD ID, an empty list is returned. + + Args: + osd_id (str): The OSD ID for which to retrieve block devices. + + Returns: + List[str]: A list of block device paths associated with the + specified OSD ID. If no block devices are found, + an empty list is returned. + """ + result: List[str] = [] + for lv in self.data.get(osd_id, []): + if lv.get('type') == 'block': + result = lv.get('devices', []) + return result + + def osd_ids(self) -> List[str]: + """Retrieve the list of OSD IDs. + + This method returns a list of OSD IDs by extracting the keys + from the `data` attribute, which is expected to contain + information about OSDs. If there is no data available, an + empty list is returned. + + Returns: + List[str]: A list of OSD IDs. If no data is present, + an empty list is returned. + """ + result: List[str] = [] + if self.data: + result = list(self.data.keys()) + return result diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index f8f0efc9d2831..dc43b48726379 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -101,6 +101,7 @@ from .configchecks import CephadmConfigChecks from .offline_watcher import OfflineHostWatcher from .tuned_profiles import TunedProfileUtils +from .ceph_volume import CephVolume try: import asyncssh @@ -792,6 +793,8 @@ def __init__(self, *args: Any, **kwargs: Any): # as part of the handling of stray daemons self.recently_altered_daemons: Dict[str, datetime.datetime] = {} + self.ceph_volume: CephVolume = CephVolume(self) + def shutdown(self) -> None: self.log.debug('shutdown') self._worker_pool.close() @@ -3828,9 +3831,56 @@ def upgrade_resume(self) -> str: def upgrade_stop(self) -> str: return self.upgrade.upgrade_stop() + @handle_orch_error + def replace_device(self, + hostname: str, + device: str, + clear: bool = False, + yes_i_really_mean_it: bool = False) -> Any: + output: str = '' + + self.ceph_volume.lvm_list.get_data(hostname=hostname) + + if clear: + output = self.ceph_volume.clear_replace_header(hostname, device) + else: + osds_to_zap: List[str] = [] + if hostname not in list(self.inventory.keys()): + raise OrchestratorError(f'{hostname} invalid host.') + + if device not in self.ceph_volume.lvm_list.all_devices(): + raise OrchestratorError(f"{device} doesn't appear to be used for an OSD, not a valid device in {hostname}.") + + device_osd_mapping = self.ceph_volume.lvm_list.device_osd_mapping() + osds_to_zap = device_osd_mapping[device]['osd_ids'] + + if self.ceph_volume.lvm_list.is_shared_device(device): + if not yes_i_really_mean_it: + raise OrchestratorError(f'{device} is a shared device.\n' + f'Replacing {device} implies destroying OSD(s): {osds_to_zap}.\n' + 'Please, *be very careful*, this can be a very dangerous operation.\n' + 'If you know what you are doing, pass --yes-i-really-mean-it') + if not self.to_remove_osds.rm_util.safe_to_destroy([int(osd_id) for osd_id in osds_to_zap]): + raise OrchestratorError(f"Destroying OSD(s) {osds_to_zap} would cause some PGs to be undersized/degraded.\n" + 'Refusing to proceed.') + replace_block: bool = self.ceph_volume.lvm_list.is_block_device(device) + replace_db: bool = self.ceph_volume.lvm_list.is_db_device(device) + replace_wal: bool = self.ceph_volume.lvm_list.is_wal_device(device) + + self.remove_osds(list(osds_to_zap), + replace_block=replace_block, + replace_db=replace_db, + replace_wal=replace_wal) + + output = f'Scheduled to destroy osds: {osds_to_zap} and mark {device} as being replaced.' + return output + @handle_orch_error def remove_osds(self, osd_ids: List[str], replace: bool = False, + replace_block: bool = False, + replace_db: bool = False, + replace_wal: bool = False, force: bool = False, zap: bool = False, no_destroy: bool = False) -> str: @@ -3853,6 +3903,9 @@ def remove_osds(self, osd_ids: List[str], try: self.to_remove_osds.enqueue(OSD(osd_id=int(daemon.daemon_id), replace=replace, + replace_block=replace_block, + replace_db=replace_db, + replace_wal=replace_wal, force=force, zap=zap, no_destroy=no_destroy, diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 611c27c34538a..4a7959ae04502 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -96,7 +96,10 @@ def serve(self) -> None: if not self.mgr.paused: self._run_async_actions() - self.mgr.to_remove_osds.process_removal_queue() + removal_queue_result = self.mgr.to_remove_osds.process_removal_queue() + self.log.debug(f'process_removal_queue() returned = {removal_queue_result}') + if removal_queue_result: + continue self.mgr.migration.migrate() if self.mgr.migration.is_migration_ongoing(): diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index 9b09b8c9f4925..80bf92772c49b 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -551,6 +551,12 @@ def zap_osd(self, osd: "OSD") -> str: "Zaps all devices that are associated with an OSD" if osd.hostname is not None: cmd = ['--', 'lvm', 'zap', '--osd-id', str(osd.osd_id)] + if osd.replace_block: + cmd.append('--replace-block') + if osd.replace_db: + cmd.append('--replace-db') + if osd.replace_wal: + cmd.append('--replace-wal') if not osd.no_destroy: cmd.append('--destroy') with self.mgr.async_timeout_handler(osd.hostname, f'cephadm ceph-volume {" ".join(cmd)}'): @@ -618,6 +624,9 @@ def __init__(self, started: bool = False, stopped: bool = False, replace: bool = False, + replace_block: bool = False, + replace_db: bool = False, + replace_wal: bool = False, force: bool = False, hostname: Optional[str] = None, zap: bool = False, @@ -649,6 +658,12 @@ def __init__(self, # If this is a replace or remove operation self.replace = replace + # If this is a block device replacement + self.replace_block = replace_block + # If this is a db device replacement + self.replace_db = replace_db + # If this is a wal device replacement + self.replace_wal = replace_wal # If we wait for the osd to be drained self.force = force # The name of the node @@ -676,7 +691,7 @@ def start_draining(self) -> bool: if self.stopped: logger.debug(f"Won't start draining {self}. OSD draining is stopped.") return False - if self.replace: + if self.any_replace_params: self.rm_util.set_osd_flag([self], 'out') else: self.rm_util.reweight_osd(self, 0.0) @@ -686,7 +701,7 @@ def start_draining(self) -> bool: return True def stop_draining(self) -> bool: - if self.replace: + if self.any_replace_params: self.rm_util.set_osd_flag([self], 'in') else: if self.original_weight: @@ -764,6 +779,9 @@ def to_json(self) -> dict: out['draining'] = self.draining out['stopped'] = self.stopped out['replace'] = self.replace + out['replace_block'] = self.replace_block + out['replace_db'] = self.replace_db + out['replace_wal'] = self.replace_wal out['force'] = self.force out['zap'] = self.zap out['hostname'] = self.hostname # type: ignore @@ -789,6 +807,13 @@ def from_json(cls, inp: Optional[Dict[str, Any]], rm_util: RemoveUtil) -> Option inp['hostname'] = hostname return cls(**inp) + @property + def any_replace_params(self) -> bool: + return any([self.replace, + self.replace_block, + self.replace_db, + self.replace_wal]) + def __hash__(self) -> int: return hash(self.osd_id) @@ -812,7 +837,7 @@ def __init__(self, mgr: "CephadmOrchestrator") -> None: # network calls, like mon commands. self.lock = Lock() - def process_removal_queue(self) -> None: + def process_removal_queue(self) -> bool: """ Performs actions in the _serve() loop to remove an OSD when criteria is met. @@ -820,6 +845,8 @@ def process_removal_queue(self) -> None: we can't hold self.lock, as we're calling _remove_daemon in the loop """ + result: bool = False + # make sure that we don't run on OSDs that are not in the cluster anymore. self.cleanup() @@ -863,16 +890,23 @@ def process_removal_queue(self) -> None: if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'): CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}', osd.hostname) logger.info(f"Successfully removed {osd} on {osd.hostname}") + result = True else: logger.info(f"Daemon {osd} on {osd.hostname} was already removed") - if osd.replace: + any_replace_params: bool = any([osd.replace, + osd.replace_block, + osd.replace_db, + osd.replace_wal]) + if any_replace_params: # mark destroyed in osdmap if not osd.destroy(): raise orchestrator.OrchestratorError( f"Could not destroy {osd}") logger.info( f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement") + if any_replace_params: + osd.zap = True else: # purge from osdmap if not osd.purge(): @@ -884,7 +918,7 @@ def process_removal_queue(self) -> None: logger.info(f"Zapping devices for {osd} on {osd.hostname}") osd.do_zap() logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") - + self.mgr.cache.invalidate_host_devices(osd.hostname) logger.debug(f"Removing {osd} from the queue.") # self could change while this is processing (osds get added from the CLI) @@ -893,6 +927,7 @@ def process_removal_queue(self) -> None: with self.lock: self.osds.intersection_update(new_queue) self._save_to_store() + return result def cleanup(self) -> None: # OSDs can always be cleaned up manually. This ensures that we run on existing OSDs diff --git a/src/pybind/mgr/cephadm/tests/ceph_volume_data.py b/src/pybind/mgr/cephadm/tests/ceph_volume_data.py new file mode 100644 index 0000000000000..afd6d89d39e40 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/ceph_volume_data.py @@ -0,0 +1 @@ +data = '{"0":[{"devices":["/dev/vdb"],"lv_name":"osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","lv_path":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.block_uuid=d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c,ceph.db_uuid=EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX,ceph.encrypted=0,ceph.osd_fsid=8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.osd_id=0,ceph.osdspec_affinity=osd.shared_db,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p","name":"osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","path":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","tags":{"ceph.block_device":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.block_uuid":"d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","ceph.db_uuid":"EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX","ceph.encrypted":"0","ceph.osd_fsid":"8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.osd_id":"0","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668"},{"devices":["/dev/vdk"],"lv_name":"osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","lv_path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.block_uuid=d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c,ceph.db_uuid=EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX,ceph.encrypted=0,ceph.osd_fsid=8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.osd_id=0,ceph.osdspec_affinity=osd.shared_db,ceph.type=db,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX","name":"osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","tags":{"ceph.block_device":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.block_uuid":"d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","ceph.db_uuid":"EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX","ceph.encrypted":"0","ceph.osd_fsid":"8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.osd_id":"0","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"db","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf"}],"1":[{"devices":["/dev/vdc"],"lv_name":"osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","lv_path":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.block_uuid=Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774,ceph.db_uuid=1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC,ceph.encrypted=0,ceph.osd_fsid=aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.osd_id=1,ceph.osdspec_affinity=osd.shared_db,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l","name":"osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","path":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","tags":{"ceph.block_device":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.block_uuid":"Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","ceph.db_uuid":"1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC","ceph.encrypted":"0","ceph.osd_fsid":"aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.osd_id":"1","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb"},{"devices":["/dev/vdk"],"lv_name":"osd-db-38f53373-7575-4c90-98ca-28f189685774","lv_path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.block_uuid=Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774,ceph.db_uuid=1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC,ceph.encrypted=0,ceph.osd_fsid=aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.osd_id=1,ceph.osdspec_affinity=osd.shared_db,ceph.type=db,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC","name":"osd-db-38f53373-7575-4c90-98ca-28f189685774","path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","tags":{"ceph.block_device":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.block_uuid":"Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","ceph.db_uuid":"1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC","ceph.encrypted":"0","ceph.osd_fsid":"aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.osd_id":"1","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"db","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf"}],"2":[{"devices":["/dev/vdf"],"lv_name":"osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","lv_path":"/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33,ceph.block_uuid=adQsil-KScK-5QkX-bLbg-EpJa-sNJL-3oDtaO,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=a0434b49-759a-46a4-91dc-d7cc65af3a33,ceph.osd_id=2,ceph.osdspec_affinity=None,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"adQsil-KScK-5QkX-bLbg-EpJa-sNJL-3oDtaO","name":"osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","path":"/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","tags":{"ceph.block_device":"/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","ceph.block_uuid":"adQsil-KScK-5QkX-bLbg-EpJa-sNJL-3oDtaO","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"a0434b49-759a-46a4-91dc-d7cc65af3a33","ceph.osd_id":"2","ceph.osdspec_affinity":"None","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-3ba7a728-709b-408c-a043-9e48704b5ffb"}],"3":[{"devices":["/dev/vde"],"lv_name":"osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","lv_path":"/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f,ceph.block_uuid=GBfm14-4hPu-oaWk-wSdA-O1Fw-eU5o-Q2KOh8,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=861ea81a-c24b-4c69-b4f6-e527151b132f,ceph.osd_id=3,ceph.osdspec_affinity=None,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"GBfm14-4hPu-oaWk-wSdA-O1Fw-eU5o-Q2KOh8","name":"osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","path":"/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","tags":{"ceph.block_device":"/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","ceph.block_uuid":"GBfm14-4hPu-oaWk-wSdA-O1Fw-eU5o-Q2KOh8","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"861ea81a-c24b-4c69-b4f6-e527151b132f","ceph.osd_id":"3","ceph.osdspec_affinity":"None","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b"}],"4":[{"devices":["/dev/vdg"],"lv_name":"osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","lv_path":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.block_uuid=diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16,ceph.db_uuid=5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx,ceph.encrypted=0,ceph.osd_fsid=242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.osd_id=4,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=block,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008,ceph.wal_uuid=ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2,ceph.with_tpm=0","lv_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","name":"osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","path":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","tags":{"ceph.block_device":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.block_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","ceph.db_uuid":"5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx","ceph.encrypted":"0","ceph.osd_fsid":"242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.osd_id":"4","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"block","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","ceph.wal_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-20acdce8-5548-4707-a38e-b8e925485bc5"},{"devices":["/dev/vdj"],"lv_name":"osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","lv_path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.block_uuid=diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.osd_id=4,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=wal,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008,ceph.wal_uuid=ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2,ceph.with_tpm=0","lv_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","name":"osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","tags":{"ceph.block_device":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.block_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.osd_id":"4","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"wal","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","ceph.wal_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","ceph.with_tpm":"0"},"type":"wal","vg_name":"ceph-776f980b-152a-4e8f-99b6-bae27ed0b528"},{"devices":["/dev/vdi"],"lv_name":"osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","lv_path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.block_uuid=diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16,ceph.db_uuid=5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx,ceph.encrypted=0,ceph.osd_fsid=242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.osd_id=4,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=db,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008,ceph.wal_uuid=ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2,ceph.with_tpm=0","lv_uuid":"5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx","name":"osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","tags":{"ceph.block_device":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.block_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","ceph.db_uuid":"5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx","ceph.encrypted":"0","ceph.osd_fsid":"242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.osd_id":"4","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"db","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","ceph.wal_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452"}],"5":[{"devices":["/dev/vdj"],"lv_name":"osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","lv_path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.block_uuid=gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.osd_id=5,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=wal,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea,ceph.wal_uuid=DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz,ceph.with_tpm=0","lv_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","name":"osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","tags":{"ceph.block_device":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.block_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.osd_id":"5","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"wal","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","ceph.wal_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","ceph.with_tpm":"0"},"type":"wal","vg_name":"ceph-776f980b-152a-4e8f-99b6-bae27ed0b528"},{"devices":["/dev/vdh"],"lv_name":"osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","lv_path":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.block_uuid=gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb,ceph.db_uuid=wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2,ceph.encrypted=0,ceph.osd_fsid=8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.osd_id=5,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=block,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea,ceph.wal_uuid=DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz,ceph.with_tpm=0","lv_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","name":"osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","path":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","tags":{"ceph.block_device":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.block_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","ceph.db_uuid":"wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2","ceph.encrypted":"0","ceph.osd_fsid":"8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.osd_id":"5","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"block","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","ceph.wal_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351"},{"devices":["/dev/vdi"],"lv_name":"osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","lv_path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.block_uuid=gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb,ceph.db_uuid=wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2,ceph.encrypted=0,ceph.osd_fsid=8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.osd_id=5,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=db,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea,ceph.wal_uuid=DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz,ceph.with_tpm=0","lv_uuid":"wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2","name":"osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","tags":{"ceph.block_device":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.block_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","ceph.db_uuid":"wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2","ceph.encrypted":"0","ceph.osd_fsid":"8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.osd_id":"5","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"db","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","ceph.wal_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452"}]}' diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py index e8add2c7b834a..5cc2fabaf49b6 100644 --- a/src/pybind/mgr/cephadm/tests/conftest.py +++ b/src/pybind/mgr/cephadm/tests/conftest.py @@ -1,13 +1,14 @@ import pytest from cephadm.services.osd import RemoveUtil, OSD -from tests import mock - +from mock import mock from .fixtures import with_cephadm_module +from cephadm import CephadmOrchestrator +from typing import Generator @pytest.fixture() -def cephadm_module(): +def cephadm_module() -> Generator[CephadmOrchestrator, None, None]: with with_cephadm_module({}) as m: yield m diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py index dd858c6c7dabe..dda0c6720ac6c 100644 --- a/src/pybind/mgr/cephadm/tests/fixtures.py +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -35,11 +35,11 @@ def get_module_option_ex(_, module, key, default=None): return None -def _run_cephadm(ret): +def _run_cephadm(ret, rc: int = 0): async def foo(s, host, entity, cmd, e, **kwargs): if cmd == 'gather-facts': return '{}', '', 0 - return [ret], '', 0 + return [ret], '', rc return foo diff --git a/src/pybind/mgr/cephadm/tests/test_ceph_volume.py b/src/pybind/mgr/cephadm/tests/test_ceph_volume.py new file mode 100644 index 0000000000000..cc1378a75753c --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_ceph_volume.py @@ -0,0 +1,231 @@ +import json +import pytest +from .ceph_volume_data import data +from cephadm.serve import CephadmServe +from cephadm import CephadmOrchestrator +from mock import patch +from .fixtures import _run_cephadm, with_host + + +class TestCephVolume: + def test_run(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('fake-output', 0)): + c = cephadm_module.ceph_volume.run('test', ['/bin/foo']) + assert c == (['fake-output'], '', 0) + + def test_run_json(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('{"this-is-a-fake-key": "this-is-a-fake-value"}', 0)): + c = cephadm_module.ceph_volume.run_json('test', ['/bin/foo']) + assert c == {"this-is-a-fake-key": "this-is-a-fake-value"} + + def test_clear_replace_header_ok(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('fake-output', 0)): + c = cephadm_module.ceph_volume.clear_replace_header('test', '/dev/foo') + assert c == 'Replacement header cleared on /dev/foo' + + def test_clear_replace_header_nok(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('', 1)): + c = cephadm_module.ceph_volume.clear_replace_header('fake-output', '/dev/foo') + assert c.strip() == 'No replacement header could be cleared on /dev/foo.' + + +class TestCephVolumeList: + def test_get_data(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.data == json.loads(data) + + def test_devices_by_type_block(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.devices_by_type('block')) == set(['/dev/vdb', + '/dev/vdc', + '/dev/vdg', + '/dev/vde', + '/dev/vdf', + '/dev/vdh']) + + def test_devices_by_type_db(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.devices_by_type('db')) == set(['/dev/vdi', + '/dev/vdk']) + + def test_devices_by_type_wal(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.devices_by_type('wal') == ['/dev/vdj'] + + def test_block_devices(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.block_devices()) == set(['/dev/vdb', + '/dev/vdc', + '/dev/vdg', + '/dev/vde', + '/dev/vdf', + '/dev/vdh']) + + def test_db_devices(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.db_devices()) == set(['/dev/vdk', + '/dev/vdi']) + + def test_wal_devices(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.wal_devices()) == set(['/dev/vdj']) + + def test_all_devices(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.all_devices()) == set(['/dev/vdg', + '/dev/vdj', + '/dev/vdh', + '/dev/vdi', + '/dev/vdc', + '/dev/vde', + '/dev/vdf', + '/dev/vdb', + '/dev/vdk']) + + def test_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.device_osd_mapping() == {'/dev/vdb': {'osd_ids': ['0']}, + '/dev/vdk': {'osd_ids': ['0', '1']}, + '/dev/vdc': {'osd_ids': ['1']}, + '/dev/vdf': {'osd_ids': ['2']}, + '/dev/vde': {'osd_ids': ['3']}, + '/dev/vdg': {'osd_ids': ['4']}, + '/dev/vdj': {'osd_ids': ['4', '5']}, + '/dev/vdi': {'osd_ids': ['4', '5']}, + '/dev/vdh': {'osd_ids': ['5']}} + + def test_block_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.block_device_osd_mapping() == {'/dev/vdb': {'osd_ids': ['0']}, + '/dev/vdc': {'osd_ids': ['1']}, + '/dev/vdf': {'osd_ids': ['2']}, + '/dev/vde': {'osd_ids': ['3']}, + '/dev/vdg': {'osd_ids': ['4']}, + '/dev/vdh': {'osd_ids': ['5']}} + + def test_db_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.db_device_osd_mapping() == {'/dev/vdk': {'osd_ids': ['0', '1']}, + '/dev/vdi': {'osd_ids': ['4', '5']}} + + def test_wal_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.wal_device_osd_mapping() == {'/dev/vdj': {'osd_ids': ['4', '5']}} + + def test_is_shared_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.is_shared_device('/dev/vdj') + + def test_is_shared_device_with_invalid_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + with pytest.raises(RuntimeError) as e: + assert cephadm_module.ceph_volume.lvm_list.is_shared_device('/dev/invalid-device') + assert str(e.value) == 'Not a valid device path.' + + def test_is_block_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.is_block_device('/dev/vdb') + + def test_is_db_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.is_db_device('/dev/vdk') + + def test_is_wal_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.is_wal_device('/dev/vdj') + + def test_get_block_devices_from_osd_id(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert cephadm_module.ceph_volume.lvm_list.get_block_devices_from_osd_id('0') == ['/dev/vdb'] + + def test_osd_ids(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + cephadm_module.ceph_volume.lvm_list.get_data('test') + assert set(cephadm_module.ceph_volume.lvm_list.osd_ids()) == set(['0', '1', '2', '3', '4', '5']) diff --git a/src/pybind/mgr/cephadm/tests/test_replace_device.py b/src/pybind/mgr/cephadm/tests/test_replace_device.py new file mode 100644 index 0000000000000..b4a2c81ad9a76 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_replace_device.py @@ -0,0 +1,53 @@ +import pytest +from mock import patch +from .fixtures import _run_cephadm, with_host, wait +from .ceph_volume_data import data +from cephadm.serve import CephadmServe +from cephadm import CephadmOrchestrator +from orchestrator import OrchestratorError + + +class TestReplaceDevice: + def test_invalid_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + with pytest.raises(OrchestratorError) as e: + cephadm_module.replace_device('test', '/dev/invalid-device') + assert "/dev/invalid-device doesn't appear to be used for an OSD, not a valid device in test." in str(e.value) + + def test_invalid_hostname(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + with pytest.raises(OrchestratorError): + cephadm_module.replace_device('invalid-hostname', '/dev/vdb') + + def test_block_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + c = cephadm_module.replace_device('test', '/dev/vdb') + result = wait(cephadm_module, c) + assert result == "Scheduled to destroy osds: ['0'] and mark /dev/vdb as being replaced." + + def test_shared_db_device_no_ireallymeanit_flag(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + with pytest.raises(OrchestratorError) as e: + cephadm_module.replace_device('test', '/dev/vdk') + assert "/dev/vdk is a shared device.\nReplacing /dev/vdk implies destroying OSD(s): ['0', '1'].\nPlease, *be very careful*, this can be a very dangerous operation.\nIf you know what you are doing, pass --yes-i-really-mean-it" in str(e.value) + + def test_shared_db_device(self, cephadm_module: CephadmOrchestrator) -> None: + with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)): + c = cephadm_module.replace_device('test', '/dev/vdk', yes_i_really_mean_it=True) + result = wait(cephadm_module, c) + assert result == "Scheduled to destroy osds: ['0', '1'] and mark /dev/vdk as being replaced." diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 82a8c13a9c11e..c05332df59a28 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -520,6 +520,15 @@ def rescan_host(self, hostname: str) -> OrchResult: """ raise NotImplementedError() + def replace_device(self, + hostname: str, + device: str, + clear: bool = False, + yes_i_really_mean_it: bool = False) -> OrchResult: + """Perform all required operations in order to replace a device. + """ + raise NotImplementedError() + def get_inventory(self, host_filter: Optional['InventoryFilter'] = None, refresh: bool = False) -> OrchResult[List['InventoryHost']]: """ Returns something that was created by `ceph-volume inventory`. @@ -699,12 +708,18 @@ def preview_osdspecs(self, def remove_osds(self, osd_ids: List[str], replace: bool = False, + replace_block: bool = False, + replace_db: bool = False, + replace_wal: bool = False, force: bool = False, zap: bool = False, no_destroy: bool = False) -> OrchResult[str]: """ :param osd_ids: list of OSD IDs :param replace: marks the OSD as being destroyed. See :ref:`orchestrator-osd-replace` + :param replace_block: marks the corresponding block device as being replaced. + :param replace_db: marks the corresponding db device as being replaced. + :param replace_wal: marks the corresponding wal device as being replaced. :param force: Forces the OSD removal process without waiting for the data to be drained first. :param zap: Zap/Erase all devices associated with the OSDs (DESTROYS DATA) :param no_destroy: Do not destroy associated VGs/LVs with the OSD. diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index be0096bb2d96e..7dd8c95af52c7 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -818,6 +818,21 @@ def _host_rescan(self, hostname: str, with_summary: bool = False) -> HandleComma return HandleCommandResult(stdout=completion.result_str()) return HandleCommandResult(stdout=completion.result_str().split('.')[0]) + @_cli_read_command('orch device replace') + def _replace_device(self, + hostname: str, + device: str, + clear: bool = False, + yes_i_really_mean_it: bool = False) -> HandleCommandResult: + """Perform all required operations in order to replace a device. + """ + completion = self.replace_device(hostname=hostname, + device=device, + clear=clear, + yes_i_really_mean_it=yes_i_really_mean_it) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + @_cli_read_command('orch device ls') def _list_devices(self, hostname: Optional[List[str]] = None, @@ -1415,8 +1430,9 @@ def _osd_rm_start(self, zap: bool = False, no_destroy: bool = False) -> HandleCommandResult: """Remove OSD daemons""" - completion = self.remove_osds(osd_id, replace=replace, force=force, - zap=zap, no_destroy=no_destroy) + completion = self.remove_osds(osd_id, + replace=replace, + force=force, zap=zap, no_destroy=no_destroy) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py index 041f1ed30446f..59ebbb6347e43 100644 --- a/src/python-common/ceph/deployment/drive_selection/selector.py +++ b/src/python-common/ceph/deployment/drive_selection/selector.py @@ -131,6 +131,10 @@ def assign_devices(self, device_filter): for disk in self.disks: logger.debug("Processing disk {}".format(disk.path)) + if disk.being_replaced: + logger.debug('Ignoring disk {} as it is being replaced.'.format(disk.path)) + continue + if not disk.available and not disk.ceph_device: logger.debug( ("Ignoring disk {}. " diff --git a/src/python-common/ceph/deployment/inventory.py b/src/python-common/ceph/deployment/inventory.py index a3023882108e3..e2c1a5605f9a6 100644 --- a/src/python-common/ceph/deployment/inventory.py +++ b/src/python-common/ceph/deployment/inventory.py @@ -54,7 +54,8 @@ class Device(object): 'human_readable_type', 'device_id', 'lsm_data', - 'crush_device_class' + 'crush_device_class', + 'being_replaced' ] def __init__(self, @@ -67,7 +68,8 @@ def __init__(self, lsm_data=None, # type: Optional[Dict[str, Dict[str, str]]] created=None, # type: Optional[datetime.datetime] ceph_device=None, # type: Optional[bool] - crush_device_class=None # type: Optional[str] + crush_device_class=None, # type: Optional[str] + being_replaced=None, # type: Optional[bool] ): self.path = path @@ -80,6 +82,7 @@ def __init__(self, self.created = created if created is not None else datetime_now() self.ceph_device = ceph_device self.crush_device_class = crush_device_class + self.being_replaced = being_replaced def __eq__(self, other): # type: (Any) -> bool @@ -129,7 +132,8 @@ def __repr__(self) -> str: 'lvs': self.lvs if self.lvs else 'None', 'available': str(self.available), 'ceph_device': str(self.ceph_device), - 'crush_device_class': str(self.crush_device_class) + 'crush_device_class': str(self.crush_device_class), + 'being_replaced': str(self.being_replaced) } if not self.available and self.rejected_reasons: device_desc['rejection reasons'] = self.rejected_reasons