From c69a73a21ec26ca7e52bd2dea315dc0f83adce27 Mon Sep 17 00:00:00 2001 From: Amrita Mahapatra <49347640+amr1ta@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:54:44 +0530 Subject: [PATCH] Added a flag, mark_masters_schedulable to mark master nodes scheduleble if required and verify csv version of native client Signed-off-by: Amrita Mahapatra <49347640+amr1ta@users.noreply.github.com> --- ocs_ci/deployment/baremetal.py | 60 ++++++++++++++++++- .../storage_client_deployment.py | 50 +++++++++++++--- ocs_ci/ocs/resources/storage_client.py | 16 +++++ 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/ocs_ci/deployment/baremetal.py b/ocs_ci/deployment/baremetal.py index 4cf04328007..57afc4cb179 100644 --- a/ocs_ci/deployment/baremetal.py +++ b/ocs_ci/deployment/baremetal.py @@ -1251,12 +1251,64 @@ def destroy_cluster(self, log_level="DEBUG"): @retry(exceptions.CommandFailed, tries=10, delay=30, backoff=1) -def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): +def disks_available_to_cleanup(worker, namespace=constants.DEFAULT_NAMESPACE): + """ + disks available for cleanup + + Args: + worker (object): worker node object + namespace (str): namespace where the oc_debug command will be executed + + Returns: + disks_available_for_cleanup (int): No of disks avoid to cleanup on a node + + """ + ocp_obj = ocp.OCP() + cmd = """lsblk --all --noheadings --output "KNAME,PKNAME,TYPE,MOUNTPOINT" --json""" + out = ocp_obj.exec_oc_debug_cmd( + node=worker.name, cmd_list=[cmd], namespace=namespace + ) + disk_to_ignore_cleanup_raw = json.loads(str(out)) + disks_available = disk_to_ignore_cleanup_raw["blockdevices"] + logger.info(f"The disks avialble for cleanup json: {disks_available}") + boot_disks = set() + disks_available_for_cleanup = [] + for disk in disks_available: + # First pass: identify boot disks and filter out ROM disks + if disk["type"] == "rom": + continue + if "nbd" in disk["kname"]: + continue + if disk["type"] == "part" and disk["mountpoint"] == "/boot": + boot_disks.add(disk["pkname"]) + if disk["type"] == "disk": + disks_available_for_cleanup.append(disk) + + # Second pass: filter out boot disks + disks_available_for_cleanup = [ + disk for disk in disks_available_for_cleanup if disk["kname"] not in boot_disks + ] + disks_names_available_for_cleanup = [ + disk["kname"] for disk in disks_available_for_cleanup + ] + + return disks_names_available_for_cleanup + + +@retry(exceptions.CommandFailed, tries=10, delay=30, backoff=1) +def clean_disk( + worker, namespace=constants.DEFAULT_NAMESPACE, return_no_of_disks_cleanedup=False +): """ Perform disk cleanup Args: worker (object): worker node object + namespace (str): namespace where the oc_debug command will be executed + return_no_of_disks_cleanedup (bool): If True then return, disks_cleaned the number of disks cleaned + + Returns: + disks_cleaned (int): No of disks cleaned on a node """ ocp_obj = ocp.OCP() @@ -1291,6 +1343,7 @@ def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): lsblk_output = json.loads(str(out)) lsblk_devices = lsblk_output["blockdevices"] + disks_cleaned = [] for lsblk_device in lsblk_devices: if lsblk_device["name"] in selected_disks_to_ignore_cleanup: logger.info(f'the disk cleanup is ignored for, {lsblk_device["name"]}') @@ -1309,6 +1362,11 @@ def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): namespace=namespace, ) logger.info(out) + disks_cleaned.append(lsblk_device["name"]) + + if return_no_of_disks_cleanedup: + logger.info(f"no of disks available for lso: {len(disks_cleaned)}") + return len(disks_cleaned) class BaremetalPSIUPI(Deployment): diff --git a/ocs_ci/deployment/provider_client/storage_client_deployment.py b/ocs_ci/deployment/provider_client/storage_client_deployment.py index 5c5f71f1f93..ba803fcbdef 100644 --- a/ocs_ci/deployment/provider_client/storage_client_deployment.py +++ b/ocs_ci/deployment/provider_client/storage_client_deployment.py @@ -13,7 +13,7 @@ check_phase_of_rados_namespace, ) from ocs_ci.deployment.helpers.lso_helpers import setup_local_storage -from ocs_ci.ocs.node import label_nodes, get_all_nodes, get_node_objs +from ocs_ci.ocs.node import label_nodes, get_all_nodes, get_node_objs, get_nodes from ocs_ci.ocs.utils import ( setup_ceph_toolbox, enable_console_plugin, @@ -24,7 +24,7 @@ ) from ocs_ci.utility import templating, kms as KMS, version from ocs_ci.deployment.deployment import Deployment, create_catalog_source -from ocs_ci.deployment.baremetal import clean_disk +from ocs_ci.deployment.baremetal import clean_disk, disks_available_to_cleanup from ocs_ci.ocs.resources.storage_cluster import verify_storage_cluster from ocs_ci.ocs.resources.storage_client import StorageClient from ocs_ci.ocs.bucket_utils import check_pv_backingstore_type @@ -78,6 +78,7 @@ def initial_function(self): namespace=config.ENV_DATA["cluster_namespace"], ) + self.platform = config.ENV_DATA.get("platform").lower() self.deployment = Deployment() self.storage_clients = StorageClient() @@ -95,13 +96,10 @@ def provider_and_native_client_installation( 6. Disable ROOK_CSI_ENABLE_CEPHFS and ROOK_CSI_ENABLE_RBD 7. Create storage profile """ - - # Allow ODF to be deployed on all nodes nodes = get_all_nodes() node_objs = get_node_objs(nodes) - - log.info("labeling storage nodes") - label_nodes(nodes=node_objs, label=constants.OPERATOR_NODE_LABEL) + worker_node_objs = get_nodes(node_type=constants.WORKER_MACHINE) + no_of_worker_nodes = len(worker_node_objs) # Allow hosting cluster domain to be usable by hosted clusters path = "/spec/routeAdmission" @@ -124,6 +122,29 @@ def provider_and_native_client_installation( wait_for_machineconfigpool_status(node_type="all") log.info("All the nodes are upgraded") + # Mark master nodes schedulable if mark_masters_schedulable: True + if config.ENV_DATA.get("mark_masters_schedulable", False): + path = "/spec/mastersSchedulable" + params = f"""[{{"op": "replace", "path": "{path}", "value": true}}]""" + self.scheduler_obj.patch(params=params, format_type="json"), ( + "Failed to run patch command to update control nodes as scheduleable" + ) + # Allow ODF to be deployed on all nodes + + log.info("labeling all nodes as storage nodes") + label_nodes(nodes=node_objs, label=constants.OPERATOR_NODE_LABEL) + else: + log.info("labeling worker nodes as storage nodes") + label_nodes(nodes=worker_node_objs, label=constants.OPERATOR_NODE_LABEL) + + no_of_disks_available_on_worker_nodes = disks_available_to_cleanup( + worker_node_objs[0] + ) + log.info( + f"disks avilable for cleanup, {no_of_disks_available_on_worker_nodes}" + f"number of disks avilable for cleanup, {len(no_of_disks_available_on_worker_nodes)}" + ) + # Install LSO, create LocalVolumeDiscovery and LocalVolumeSet is_local_storage_available = self.sc_obj.is_exist( resource_name=self.storageclass, @@ -197,6 +218,14 @@ def provider_and_native_client_installation( storage_cluster_data = self.add_encryption_details_to_cluster_data( storage_cluster_data ) + if self.platform == constants.BAREMETAL_PLATFORM: + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "replica" + ] = no_of_worker_nodes + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "count" + ] = no_of_disks_available_on_worker_nodes + templating.dump_data_to_temp_yaml( storage_cluster_data, constants.OCS_STORAGE_CLUSTER_YAML ) @@ -210,6 +239,13 @@ def provider_and_native_client_installation( storage_cluster_data = self.add_encryption_details_to_cluster_data( storage_cluster_data ) + if self.platform == constants.BAREMETAL_PLATFORM: + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "replica" + ] = no_of_worker_nodes + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "count" + ] = no_of_disks_available_on_worker_nodes templating.dump_data_to_temp_yaml( storage_cluster_data, constants.OCS_STORAGE_CLUSTER_UPDATED_YAML ) diff --git a/ocs_ci/ocs/resources/storage_client.py b/ocs_ci/ocs/resources/storage_client.py index 49d45c73973..19672c72a25 100644 --- a/ocs_ci/ocs/resources/storage_client.py +++ b/ocs_ci/ocs/resources/storage_client.py @@ -16,6 +16,8 @@ from ocs_ci.helpers.managed_services import ( get_all_storageclassclaims, ) +from ocs_ci.ocs.resources.ocs import get_ocs_csv +from ocs_ci.ocs.resources.storage_cluster import verify_storage_cluster from ocs_ci.utility.utils import TimeoutSampler log = logging.getLogger(__name__) @@ -503,11 +505,25 @@ def verify_native_storageclient(self): storageclaims, associated storageclasses and storagerequests are created successfully. """ + ocs_csv = get_ocs_csv() + client_csv_version = ocs_csv.data["spec"]["version"] + ocs_version = version.get_ocs_version_from_csv(only_major_minor=True) + log.info( + f"Check if OCS version: {ocs_version} matches with CSV: {client_csv_version}" + ) + assert ( + f"{ocs_version}" in client_csv_version + ), f"OCS version: {ocs_version} mismatch with CSV version {client_csv_version}" if self.ocs_version >= version.VERSION_4_16: namespace = config.ENV_DATA["cluster_namespace"] else: namespace = constants.OPENSHIFT_STORAGE_CLIENT_NAMESPACE + # Check ocs-storagecluster is in 'Ready' status + log.info("Verify storagecluster on Ready state") + verify_storage_cluster() + + # Fetch storage-client name storageclient_name = self.get_storageclient_name(namespace) # Verify storageclient is in Connected status