diff --git a/ocs_ci/deployment/baremetal.py b/ocs_ci/deployment/baremetal.py index 4cf04328007..cd33a8aa580 100644 --- a/ocs_ci/deployment/baremetal.py +++ b/ocs_ci/deployment/baremetal.py @@ -1251,12 +1251,16 @@ def destroy_cluster(self, log_level="DEBUG"): @retry(exceptions.CommandFailed, tries=10, delay=30, backoff=1) -def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): +def disks_available_to_cleanup(worker, namespace=constants.DEFAULT_NAMESPACE): """ - Perform disk cleanup + disks available for cleanup Args: worker (object): worker node object + namespace (str): namespace where the oc_debug command will be executed + + Returns: + disk_names_available_for_cleanup (list): The disk names available for cleanup on a node """ ocp_obj = ocp.OCP() @@ -1265,23 +1269,43 @@ def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): node=worker.name, cmd_list=[cmd], namespace=namespace ) disk_to_ignore_cleanup_raw = json.loads(str(out)) - disk_to_ignore_cleanup_json = disk_to_ignore_cleanup_raw["blockdevices"] - selected_disks_to_ignore_cleanup = [] - for disk_to_ignore_cleanup in disk_to_ignore_cleanup_json: - if disk_to_ignore_cleanup["mountpoint"] == "/boot": - logger.info( - f"Ignorning disk {disk_to_ignore_cleanup['pkname']} for cleanup because it's a root disk " - ) - selected_disks_to_ignore_cleanup.append( - str(disk_to_ignore_cleanup["pkname"]) - ) - elif disk_to_ignore_cleanup["type"] == "rom": - logger.info( - f"Ignorning disk {disk_to_ignore_cleanup['kname']} for cleanup because it's a rom disk " - ) - selected_disks_to_ignore_cleanup.append( - str(disk_to_ignore_cleanup["kname"]) - ) + disks_available = disk_to_ignore_cleanup_raw["blockdevices"] + boot_disks = set() + disks_available_for_cleanup = [] + for disk in disks_available: + # First pass: identify boot disks and filter out ROM disks + if disk["type"] == "rom": + continue + if "nbd" in disk["kname"]: + continue + if disk["type"] == "part" and disk["mountpoint"] == "/boot": + boot_disks.add(disk["pkname"]) + if disk["type"] == "disk": + disks_available_for_cleanup.append(disk) + + # Second pass: filter out boot disks + disks_available_for_cleanup = [ + disk for disk in disks_available_for_cleanup if disk["kname"] not in boot_disks + ] + disks_names_available_for_cleanup = [ + disk["kname"] for disk in disks_available_for_cleanup + ] + + return disks_names_available_for_cleanup + + +@retry(exceptions.CommandFailed, tries=10, delay=30, backoff=1) +def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): + """ + Perform disk cleanup + + Args: + worker (object): worker node object + namespace (str): namespace where the oc_debug command will be executed + + """ + ocp_obj = ocp.OCP() + disks_available_on_worker_nodes_for_cleanup = disks_available_to_cleanup(worker) out = ocp_obj.exec_oc_debug_cmd( node=worker.name, @@ -1292,7 +1316,7 @@ def clean_disk(worker, namespace=constants.DEFAULT_NAMESPACE): lsblk_devices = lsblk_output["blockdevices"] for lsblk_device in lsblk_devices: - if lsblk_device["name"] in selected_disks_to_ignore_cleanup: + if lsblk_device["name"] not in disks_available_on_worker_nodes_for_cleanup: logger.info(f'the disk cleanup is ignored for, {lsblk_device["name"]}') pass else: diff --git a/ocs_ci/deployment/provider_client/storage_client_deployment.py b/ocs_ci/deployment/provider_client/storage_client_deployment.py index 5c5f71f1f93..cce7e719154 100644 --- a/ocs_ci/deployment/provider_client/storage_client_deployment.py +++ b/ocs_ci/deployment/provider_client/storage_client_deployment.py @@ -13,7 +13,7 @@ check_phase_of_rados_namespace, ) from ocs_ci.deployment.helpers.lso_helpers import setup_local_storage -from ocs_ci.ocs.node import label_nodes, get_all_nodes, get_node_objs +from ocs_ci.ocs.node import label_nodes, get_all_nodes, get_node_objs, get_nodes from ocs_ci.ocs.utils import ( setup_ceph_toolbox, enable_console_plugin, @@ -24,7 +24,7 @@ ) from ocs_ci.utility import templating, kms as KMS, version from ocs_ci.deployment.deployment import Deployment, create_catalog_source -from ocs_ci.deployment.baremetal import clean_disk +from ocs_ci.deployment.baremetal import clean_disk, disks_available_to_cleanup from ocs_ci.ocs.resources.storage_cluster import verify_storage_cluster from ocs_ci.ocs.resources.storage_client import StorageClient from ocs_ci.ocs.bucket_utils import check_pv_backingstore_type @@ -78,6 +78,7 @@ def initial_function(self): namespace=config.ENV_DATA["cluster_namespace"], ) + self.platform = config.ENV_DATA.get("platform").lower() self.deployment = Deployment() self.storage_clients = StorageClient() @@ -95,13 +96,10 @@ def provider_and_native_client_installation( 6. Disable ROOK_CSI_ENABLE_CEPHFS and ROOK_CSI_ENABLE_RBD 7. Create storage profile """ - - # Allow ODF to be deployed on all nodes nodes = get_all_nodes() node_objs = get_node_objs(nodes) - - log.info("labeling storage nodes") - label_nodes(nodes=node_objs, label=constants.OPERATOR_NODE_LABEL) + worker_node_objs = get_nodes(node_type=constants.WORKER_MACHINE) + no_of_worker_nodes = len(worker_node_objs) # Allow hosting cluster domain to be usable by hosted clusters path = "/spec/routeAdmission" @@ -124,6 +122,31 @@ def provider_and_native_client_installation( wait_for_machineconfigpool_status(node_type="all") log.info("All the nodes are upgraded") + # Mark master nodes schedulable if mark_masters_schedulable: True + if config.ENV_DATA.get("mark_masters_schedulable", False): + path = "/spec/mastersSchedulable" + params = f"""[{{"op": "replace", "path": "{path}", "value": true}}]""" + assert self.scheduler_obj.patch( + params=params, format_type="json" + ), "Failed to run patch command to update control nodes as scheduleable" + # Allow ODF to be deployed on all nodes + log.info("labeling all nodes as storage nodes") + label_nodes(nodes=node_objs, label=constants.OPERATOR_NODE_LABEL) + worker_node_objs = get_nodes(node_type=constants.WORKER_MACHINE) + no_of_worker_nodes = len(worker_node_objs) + else: + log.info("labeling worker nodes as storage nodes") + label_nodes(nodes=worker_node_objs, label=constants.OPERATOR_NODE_LABEL) + + disks_available_on_worker_nodes_for_cleanup = disks_available_to_cleanup( + worker_node_objs[0] + ) + number_of_disks_available = len(disks_available_on_worker_nodes_for_cleanup) + log.info( + f"disks avilable for cleanup, {disks_available_on_worker_nodes_for_cleanup}" + f"number of disks avilable for cleanup, {number_of_disks_available}" + ) + # Install LSO, create LocalVolumeDiscovery and LocalVolumeSet is_local_storage_available = self.sc_obj.is_exist( resource_name=self.storageclass, @@ -197,6 +220,15 @@ def provider_and_native_client_installation( storage_cluster_data = self.add_encryption_details_to_cluster_data( storage_cluster_data ) + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "replica" + ] = no_of_worker_nodes + + if self.platform in constants.HCI_PROVIDER_CLIENT_PLATFORMS: + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "count" + ] = number_of_disks_available + templating.dump_data_to_temp_yaml( storage_cluster_data, constants.OCS_STORAGE_CLUSTER_YAML ) @@ -210,6 +242,14 @@ def provider_and_native_client_installation( storage_cluster_data = self.add_encryption_details_to_cluster_data( storage_cluster_data ) + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "replica" + ] = no_of_worker_nodes + + if self.platform in constants.HCI_PROVIDER_CLIENT_PLATFORMS: + storage_cluster_data["spec"]["storageDeviceSets"][0][ + "count" + ] = number_of_disks_available templating.dump_data_to_temp_yaml( storage_cluster_data, constants.OCS_STORAGE_CLUSTER_UPDATED_YAML ) diff --git a/ocs_ci/ocs/resources/storage_client.py b/ocs_ci/ocs/resources/storage_client.py index 49d45c73973..19672c72a25 100644 --- a/ocs_ci/ocs/resources/storage_client.py +++ b/ocs_ci/ocs/resources/storage_client.py @@ -16,6 +16,8 @@ from ocs_ci.helpers.managed_services import ( get_all_storageclassclaims, ) +from ocs_ci.ocs.resources.ocs import get_ocs_csv +from ocs_ci.ocs.resources.storage_cluster import verify_storage_cluster from ocs_ci.utility.utils import TimeoutSampler log = logging.getLogger(__name__) @@ -503,11 +505,25 @@ def verify_native_storageclient(self): storageclaims, associated storageclasses and storagerequests are created successfully. """ + ocs_csv = get_ocs_csv() + client_csv_version = ocs_csv.data["spec"]["version"] + ocs_version = version.get_ocs_version_from_csv(only_major_minor=True) + log.info( + f"Check if OCS version: {ocs_version} matches with CSV: {client_csv_version}" + ) + assert ( + f"{ocs_version}" in client_csv_version + ), f"OCS version: {ocs_version} mismatch with CSV version {client_csv_version}" if self.ocs_version >= version.VERSION_4_16: namespace = config.ENV_DATA["cluster_namespace"] else: namespace = constants.OPENSHIFT_STORAGE_CLIENT_NAMESPACE + # Check ocs-storagecluster is in 'Ready' status + log.info("Verify storagecluster on Ready state") + verify_storage_cluster() + + # Fetch storage-client name storageclient_name = self.get_storageclient_name(namespace) # Verify storageclient is in Connected status diff --git a/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster.yaml b/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster.yaml index c4dda62dc74..3784f4e6ca2 100644 --- a/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster.yaml +++ b/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster.yaml @@ -35,7 +35,7 @@ spec: providerAPIServerServiceType: ClusterIP storageDeviceSets: - config: {} - count: 2 + count: 4 dataPVCTemplate: metadata: {} spec: @@ -52,5 +52,5 @@ spec: name: local-storage-deviceset placement: {} preparePlacement: {} - replica: 6 + replica: 3 resources: {} diff --git a/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster_updated.yaml b/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster_updated.yaml index cb2449cdf9f..d66c61db25b 100644 --- a/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster_updated.yaml +++ b/ocs_ci/templates/ocs-deployment/provider-mode/ocs_storagecluster_updated.yaml @@ -33,7 +33,7 @@ spec: providerAPIServerServiceType: NodePort storageDeviceSets: - config: {} - count: 2 + count: 4 dataPVCTemplate: metadata: {} spec: @@ -50,5 +50,5 @@ spec: name: local-storage-deviceset placement: {} preparePlacement: {} - replica: 6 + replica: 3 resources: {}