Skip to content

Commit

Permalink
Provider mode deployment with Hosted clusters (#10120)
Browse files Browse the repository at this point in the history
Changes:
    fix leftover, to prevent issues such as https://ocs4-jenkins-csb-odf-qe.apps.ocp-c1.prod.psi.redhat.com/job/qe-deploy-ocs-cluster/39701/consoleFull (metallb installation and latest OCP fetch from release notes)
    changed made ODF and Hosted cluster installation done by completing jenkins stage 1. Deploy OCP cluster stage 2. Install OD, dependencies and Hosted Cluster
    clean ODF provider and native client validation. Fixed some issues failing on waiting just deployed resources
    ODF with "latest-4.16" now can be a version, set in

clusters: 
   hcp416-bm2-a:
     hosted_odf_version: "latest-4.16" 

"--olm-disable-default-sources" parametrization added
FIXES
prevent failures on converged deployment Provider and Hosted cluster #10160

Signed-off-by: Daniel Osypenko <[email protected]>
  • Loading branch information
DanielOsypenko authored Jul 24, 2024
1 parent 009a990 commit ea173fb
Show file tree
Hide file tree
Showing 12 changed files with 217 additions and 78 deletions.
51 changes: 44 additions & 7 deletions ocs_ci/deployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,19 @@ def do_deploy_odf_provider_mode(self):
):
storage_client_deployment_obj.provider_and_native_client_installation()

def do_deploy_cnv(self):
"""
Deploy CNV
We run it in OCP deployment stage, hence `ship_ocs_deployment` is set True.
When we run it in OCS deployment stage, the `skip_ocs_deployment` is set to False automatically and
second installation does not happen.
"""
if (
config.DEPLOYMENT.get("cnv_deployment")
and config.ENV_DATA["skip_ocs_deployment"]
):
CNVInstaller().deploy_cnv()

def do_deploy_hosted_clusters(self):
"""
Deploy Hosted cluster(s)
Expand Down Expand Up @@ -656,8 +669,7 @@ def deploy_cluster(self, log_cli_level="DEBUG"):
self.do_deploy_rdr()
self.do_deploy_fusion()
self.do_deploy_odf_provider_mode()
if config.DEPLOYMENT.get("cnv_deployment"):
CNVInstaller().deploy_cnv()
self.do_deploy_cnv()
self.do_deploy_hosted_clusters()

def get_rdr_conf(self):
Expand Down Expand Up @@ -921,7 +933,7 @@ def wait_for_subscription(self, subscription_name, namespace=None):
if not namespace:
namespace = self.namespace

if config.multicluster:
if self.muliclusterhub_running():
resource_kind = constants.SUBSCRIPTION_WITH_ACM
else:
resource_kind = constants.SUBSCRIPTION
Expand Down Expand Up @@ -1054,7 +1066,7 @@ def deploy_ocs_via_operator(self, image=None):
worker_nodes = get_worker_nodes()
node_obj = ocp.OCP(kind="node")
platform = config.ENV_DATA.get("platform").lower()
if platform != constants.BAREMETAL_PLATFORM:
if platform not in [constants.BAREMETAL_PLATFORM, constants.HCI_BAREMETAL]:
for node in worker_nodes:
for interface in interfaces:
ip_link_cmd = f"ip link set promisc on {interface}"
Expand Down Expand Up @@ -1297,7 +1309,10 @@ def deploy_ocs_via_operator(self, image=None):
cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1

# set size of request for storage
if self.platform.lower() == constants.BAREMETAL_PLATFORM:
if self.platform.lower() in [
constants.BAREMETAL_PLATFORM,
constants.HCI_BAREMETAL,
]:
pv_size_list = helpers.get_pv_size(
storageclass=self.DEFAULT_STORAGECLASS_LSO
)
Expand Down Expand Up @@ -2321,6 +2336,28 @@ def deploy_multicluster_hub(self):
logger.error(f"Failed to install MultiClusterHub. Exception is: {ex}")
return False

def muliclusterhub_running(self):
"""
Check if MultiCluster Hub is running
Returns:
bool: True if MultiCluster Hub is running, False otherwise
"""
ocp_obj = OCP(
kind=constants.ACM_MULTICLUSTER_HUB, namespace=constants.ACM_HUB_NAMESPACE
)
try:
mch_running = ocp_obj.wait_for_resource(
condition=constants.STATUS_RUNNING,
resource_name=constants.ACM_MULTICLUSTER_RESOURCE,
column="STATUS",
timeout=6,
sleep=3,
)
except CommandFailed:
mch_running = False
return mch_running


def create_external_pgsql_secret():
"""
Expand Down Expand Up @@ -2359,8 +2396,8 @@ def validate_acm_hub_install():
condition=constants.STATUS_RUNNING,
resource_name=constants.ACM_MULTICLUSTER_RESOURCE,
column="STATUS",
timeout=720,
sleep=5,
timeout=1200,
sleep=30,
)
logger.info("MultiClusterHub Deployment Succeeded")

Expand Down
5 changes: 5 additions & 0 deletions ocs_ci/deployment/helpers/hypershift_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ def create_kubevirt_ocp_cluster(
root_volume_size: str = 40,
ocp_version=None,
cp_availability_policy=None,
disable_default_sources=None,
):
"""
Create HyperShift hosted cluster. Default parameters have minimal requirements for the cluster.
Expand All @@ -251,6 +252,7 @@ def create_kubevirt_ocp_cluster(
root_volume_size (str): Root volume size of the cluster, default 40 (Gi is not required)
cp_availability_policy (str): Control plane availability policy, default HighlyAvailable, if no value
provided and argument is not used in the command the single replica mode cluster will be created
disable_default_sources (bool): Disable default sources on hosted cluster, such as 'redhat-operators'
Returns:
str: Name of the hosted cluster
"""
Expand Down Expand Up @@ -308,6 +310,9 @@ def create_kubevirt_ocp_cluster(
f" --control-plane-availability-policy {cp_availability_policy} "
)

if disable_default_sources:
create_hcp_cluster_cmd += " --olm-disable-default-sources"

logger.info("Creating HyperShift hosted cluster")
exec_cmd(create_hcp_cluster_cmd)

Expand Down
2 changes: 1 addition & 1 deletion ocs_ci/deployment/helpers/lso_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def setup_local_storage(storageclass):
# extra_disks is used in vSphere attach_disk() method
storage_class_device_count = config.ENV_DATA.get("extra_disks", 1)
expected_pvs = len(worker_names) * storage_class_device_count
if platform == constants.BAREMETAL_PLATFORM:
if platform in [constants.BAREMETAL_PLATFORM, constants.HCI_BAREMETAL]:
verify_pvs_created(expected_pvs, storageclass, False)
else:
verify_pvs_created(expected_pvs, storageclass)
Expand Down
70 changes: 35 additions & 35 deletions ocs_ci/deployment/hosted_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,13 +371,19 @@ def deploy_ocp(
cp_availability_policy = (
config.ENV_DATA["clusters"].get(self.name).get("cp_availability_policy")
)
disable_default_sources = (
config.ENV_DATA["clusters"]
.get(self.name)
.get("disable_default_sources", False)
)
return self.create_kubevirt_ocp_cluster(
name=self.name,
nodepool_replicas=nodepool_replicas,
cpu_cores=cpu_cores_per_hosted_cluster,
memory=memory_per_hosted_cluster,
ocp_version=ocp_version,
cp_availability_policy=cp_availability_policy,
disable_default_sources=disable_default_sources,
)

def deploy_dependencies(
Expand Down Expand Up @@ -523,7 +529,12 @@ def __init__(self, name: str):
self.timeout_check_resources_exist_sec = 6
self.timeout_wait_csvs_minutes = 20
self.timeout_wait_pod_minutes = 30
self.storage_client_name = None

# default cluster name picked from the storage client yaml
storage_client_data = templating.load_yaml(
constants.PROVIDER_MODE_STORAGE_CLIENT
)
self.storage_client_name = storage_client_data["metadata"]["name"]

@kubeconfig_exists_decorator
def exec_oc_cmd(self, cmd, timeout=300, ignore_error=False, **kwargs):
Expand Down Expand Up @@ -622,19 +633,9 @@ def do_deploy(self):
"""
Deploy ODF client on hosted OCP cluster
"""
logger.info(f"Deploying ODF client on hosted OCP cluster '{self.name}'")
hosted_odf_version = get_semantic_version(
config.ENV_DATA.get("clusters").get(self.name).get("hosted_odf_version"),
only_major_minor=True,
logger.info(
f"Deploying ODF client on hosted OCP cluster '{self.name}'. Creating ODF client namespace"
)

no_network_policy_version = version.VERSION_4_16

if hosted_odf_version < no_network_policy_version:
logger.info("Applying network policy")
self.apply_network_policy()

logger.info("Creating ODF client namespace")
self.create_ns()

if self.odf_csv_installed():
Expand Down Expand Up @@ -1086,12 +1087,13 @@ def create_subscription(self):
subscription_data = templating.load_yaml(constants.PROVIDER_MODE_SUBSCRIPTION)

# since we are allowed to install N+1 on hosted clusters we can not rely on PackageManifest default channel
odf_version = get_semantic_version(
config.ENV_DATA.get("clusters").get(self.name).get("hosted_odf_version"),
only_major_minor=True,
hosted_odf_version = (
config.ENV_DATA.get("clusters").get(self.name).get("hosted_odf_version")
)
if "latest" in hosted_odf_version:
hosted_odf_version = hosted_odf_version.split("-")[-1]

subscription_data["spec"]["channel"] = f"stable-{str(odf_version)}"
subscription_data["spec"]["channel"] = f"stable-{str(hosted_odf_version)}"

subscription_file = tempfile.NamedTemporaryFile(
mode="w+", prefix="subscription", delete=False
Expand Down Expand Up @@ -1140,15 +1142,14 @@ def storage_claim_exists_cephfs(self):
Returns:
bool: True if storage class claim for CephFS exists, False otherwise
"""
if (
get_semantic_version(
config.ENV_DATA.get("clusters")
.get(self.name)
.get("hosted_odf_version"),
only_major_minor=True,
)
< version.VERSION_4_16
):

hosted_odf_version = (
config.ENV_DATA.get("clusters").get(self.name).get("hosted_odf_version")
)
if "latest" in hosted_odf_version:
hosted_odf_version = hosted_odf_version.split("-")[-1]

if get_semantic_version(hosted_odf_version, True) < version.VERSION_4_16:
ocp = OCP(
kind=constants.STORAGECLASSCLAIM,
namespace=self.namespace_client,
Expand Down Expand Up @@ -1228,15 +1229,14 @@ def storage_claim_exists_rbd(self):
Returns:
bool: True if storage class claim for RBD exists, False otherwise
"""
if (
get_semantic_version(
config.ENV_DATA.get("clusters")
.get(self.name)
.get("hosted_odf_version"),
True,
)
< version.VERSION_4_16
):

hosted_odf_version = (
config.ENV_DATA.get("clusters").get(self.name).get("hosted_odf_version")
)
if "latest" in hosted_odf_version:
hosted_odf_version = hosted_odf_version.split("-")[-1]

if get_semantic_version(hosted_odf_version, True) < version.VERSION_4_16:
ocp = OCP(
kind=constants.STORAGECLASSCLAIM,
namespace=self.namespace_client,
Expand Down
21 changes: 19 additions & 2 deletions ocs_ci/deployment/metallb.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,9 @@ def deploy_lb(self):
logger.info(
f"Deploying MetalLB and dependant resources to namespace: '{self.namespace_lb}'"
)
# icsp mirrors necessary to download packages for the downstream version of metallb
self.apply_icsp()

if self.apply_icsp():
logger.info("ICSP brew-registry applied successfully")
if self.create_metallb_namespace():
logger.info(f"Namespace {self.namespace_lb} created successfully")
if self.create_catalog_source():
Expand Down Expand Up @@ -669,10 +669,26 @@ def wait_csv_installed(self):
break
return True

def icsp_brew_registry_exists(self):
"""
Check if the ICSP Brew registry exists
Returns:
bool: True if the ICSP Brew registry exists, False otherwise
"""
return OCP(
kind="ImageContentSourcePolicy", resource_name="brew-registry"
).check_resource_existence(
timeout=self.timeout_check_resources_existence, should_exist=True
)

def apply_icsp(self):
"""
Apply the ICSP to the cluster
"""
if self.icsp_brew_registry_exists():
logger.info("ICSP Brew registry already exists")
return
icsp_data = templating.load_yaml(constants.SUBMARINER_DOWNSTREAM_BREW_ICSP)
icsp_data_yaml = tempfile.NamedTemporaryFile(
mode="w+", prefix="acm_icsp", delete=False
Expand All @@ -681,3 +697,4 @@ def apply_icsp(self):
exec_cmd(f"oc create -f {icsp_data_yaml.name}", timeout=300)
wait_for_machineconfigpool_status(node_type="all")
logger.info("ICSP applied successfully")
return self.icsp_brew_registry_exists()
5 changes: 3 additions & 2 deletions ocs_ci/helpers/managed_services.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Managed Services related functionalities
"""

import logging
import re

Expand Down Expand Up @@ -236,7 +237,7 @@ def verify_osd_distribution_on_provider():
# 4Ti is the size of OSD
assert (
osd_count == int(size) / 4
), f"Zone {zone} does not have {size/4} osd, but {osd_count}"
), f"Zone {zone} does not have {size / 4} osd, but {osd_count}"


def verify_storageclient(
Expand Down Expand Up @@ -326,7 +327,7 @@ def get_all_storageclassclaims(namespace=None):
sc_claim_obj = OCP(kind=constants.STORAGECLAIM, namespace=namespace)
else:
sc_claim_obj = OCP(kind=constants.STORAGECLASSCLAIM, namespace=namespace)
sc_claims_data = sc_claim_obj.get()["items"]
sc_claims_data = sc_claim_obj.get(retry=6, wait=30)["items"]
log.info(f"storage claims: {sc_claims_data}")
return [OCS(**claim_data) for claim_data in sc_claims_data]

Expand Down
1 change: 1 addition & 0 deletions ocs_ci/ocs/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,7 @@ def get_compute_node_names(no_replace=False):
constants.BAREMETAL_PLATFORM,
constants.BAREMETALPSI_PLATFORM,
constants.IBM_POWER_PLATFORM,
constants.HCI_BAREMETAL,
]:
if no_replace:
return [
Expand Down
Loading

0 comments on commit ea173fb

Please sign in to comment.