diff --git a/src/rookify/modules/ceph.py b/src/rookify/modules/ceph.py index e4cb687..e8ef130 100644 --- a/src/rookify/modules/ceph.py +++ b/src/rookify/modules/ceph.py @@ -46,6 +46,11 @@ def get_osd_pool_configurations_from_osd_dump( osd_pool["erasure_code_profile"], erasure_code_profiles["default"] ) + if osd_pool["erasure_code_configuration"].get("plugin") != "jerasure": + raise ModuleException( + "Unsupported Ceph erasure code profile plugin in use" + ) + return osd_pools def mon_command(self, command: str, **kwargs: str) -> Dict[str, Any] | List[Any]: diff --git a/src/rookify/modules/k8s_prerequisites_check/main.py b/src/rookify/modules/k8s_prerequisites_check/main.py index a7d76cc..b73fdf2 100644 --- a/src/rookify/modules/k8s_prerequisites_check/main.py +++ b/src/rookify/modules/k8s_prerequisites_check/main.py @@ -6,6 +6,17 @@ class K8sPrerequisitesCheckHandler(ModuleHandler): def preflight(self) -> None: + self.logger.debug("K8sPrerequisitesCheck started validation") + + # We are only looking up labels here. This is a sanity check only without guarantee that the deployment found is operational or the Rook operator at all. + deployments = self.k8s.apps_v1_api.list_deployment_for_all_namespaces( + field_selector="metadata.name=rook-ceph-operator", + label_selector="operator=rook", + ) + + if len(deployments.items) < 1: + raise ModuleException("Rook operator not found") + namespace = self._config["rook"]["cluster"]["namespace"] namespaces = [ @@ -32,3 +43,4 @@ def preflight(self) -> None: raise ModuleException( "Label {0} is set on node {1}".format(label, node.metadata.name) ) + self.logger.info("K8sPrerequisitesCheck completed") diff --git a/src/rookify/modules/migrate_mds_pools/main.py b/src/rookify/modules/migrate_mds_pools/main.py index 2193fc0..099f48d 100644 --- a/src/rookify/modules/migrate_mds_pools/main.py +++ b/src/rookify/modules/migrate_mds_pools/main.py @@ -111,6 +111,14 @@ def _migrate_pool(self, pool: Dict[str, Any]) -> None: "size": osd_configuration["size"], } + if osd_configuration.get("erasure_code_profile", "") != "": + profile_configuration = osd_configuration["erasure_code_configuration"] + + definition_data_pool["erasure_code_configuration"] = { + "coding": profile_configuration["m"], + "data": profile_configuration["k"], + } + filesystem_definition_values["data_pools"].append(definition_data_pool) # Render cluster config from template diff --git a/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2 b/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2 index 2b55621..9af9a4c 100644 --- a/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2 +++ b/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2 @@ -30,11 +30,17 @@ spec: {% for pool in data_pools %} - name: {{ pool.name }} failureDomain: host + {% if pool.erasure_code_configuration %} + erasureCoded: + dataChunks: {{ pool.erasure_code_configuration.data }} + codingChunks: {{ pool.erasure_code_configuration.coding }} + {% else %} replicated: size: {{ pool.size }} # Disallow setting pool with replica 1, this could lead to data loss without recovery. # Make sure you're *ABSOLUTELY CERTAIN* that is what you want requireSafeReplicaSize: true + {% endif %} parameters: # Inline compression mode for the data pool # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression diff --git a/src/rookify/modules/migrate_osd_pools/main.py b/src/rookify/modules/migrate_osd_pools/main.py index 69bdd18..fecbbd4 100644 --- a/src/rookify/modules/migrate_osd_pools/main.py +++ b/src/rookify/modules/migrate_osd_pools/main.py @@ -59,6 +59,14 @@ def _migrate_pool(self, pool: Dict[str, Any]) -> None: "size": pool["size"], } + if pool.get("erasure_code_profile", "") != "": + profile_configuration = pool["erasure_code_configuration"] + + pool_definition_values["erasure_code_configuration"] = { + "coding": profile_configuration["m"], + "data": profile_configuration["k"], + } + # Render cluster config from template pool_definition = self.load_template("pool.yaml.j2", **pool_definition_values) diff --git a/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2 b/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2 index d8e46f4..8116877 100644 --- a/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2 +++ b/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2 @@ -13,6 +13,11 @@ metadata: spec: # The failure domain will spread the replicas of the data across different failure zones failureDomain: host + {% if erasure_code_configuration %} + erasureCoded: + dataChunks: {{ erasure_code_configuration.data }} + codingChunks: {{ erasure_code_configuration.coding }} + {% else %} # For a pool based on raw copies, specify the number of copies. A size of 1 indicates no redundancy. replicated: size: {{ size }} @@ -26,6 +31,7 @@ spec: # replicasPerFailureDomain: 2 # The name of the failure domain to place further down replicas # subFailureDomain: host + {% endif %} # Ceph CRUSH root location of the rule # For reference: https://docs.ceph.com/docs/master/rados/operations/crush-map/#types-and-buckets #crushRoot: my-root diff --git a/src/rookify/modules/migrate_rgw_pools/main.py b/src/rookify/modules/migrate_rgw_pools/main.py index 9db8f0d..b758d46 100644 --- a/src/rookify/modules/migrate_rgw_pools/main.py +++ b/src/rookify/modules/migrate_rgw_pools/main.py @@ -40,8 +40,10 @@ def preflight(self) -> None: if osd_pool_name.startswith("{0}.rgw.".format(zone_name)): zone[osd_pool_name] = osd_pool_configuration + metadata_name = "{0}.rgw.meta".format(zone_name) + if ( - "{0}.rgw.meta".format(zone_name) not in zone + metadata_name not in zone or "{0}.rgw.buckets.data".format(zone_name) not in zone ): raise ModuleException( @@ -50,16 +52,21 @@ def preflight(self) -> None: ) ) + if zone[metadata_name].get("erasure_code_profile", "") != "": + raise ModuleException( + "Ceph RGW metadata OSD pools must use replication for Rook" + ) + self.machine.get_preflight_state("MigrateRgwPoolsHandler").zones = zones def execute(self) -> None: zones = self.machine.get_preflight_state("MigrateRgwPoolsHandler").zones - for zone_name, zone_osd_configurations in zones.items(): - self._migrate_zone(zone_name, zone_osd_configurations) + for zone_name, zone_osd_pools_configuration in zones.items(): + self._migrate_zone(zone_name, zone_osd_pools_configuration) def _migrate_zone( - self, zone_name: str, zone_osd_configurations: Dict[str, Any] + self, zone_name: str, zone_osd_pools_configuration: Dict[str, Any] ) -> None: migrated_zones = getattr( self.machine.get_execution_state("MigrateRgwPoolsHandler"), @@ -78,22 +85,32 @@ def _migrate_zone( self.logger.debug("Migrating Ceph RGW zone '{0}'".format(zone_name)) - pool_metadata_osd_pool_data = zone_osd_configurations[ + pool_metadata_osd_configuration = zone_osd_pools_configuration[ "{0}.rgw.meta".format(zone_name) ] - pool_buckets_data_osd_pool_data = zone_osd_configurations[ + pool_data_osd_configuration = zone_osd_pools_configuration[ "{0}.rgw.buckets.data".format(zone_name) ] pool_definition_values = { "cluster_namespace": self._config["rook"]["cluster"]["namespace"], "name": zone_name, - "metadata_size": pool_metadata_osd_pool_data["size"], - "data_pool_size": pool_buckets_data_osd_pool_data["size"], + "metadata_size": pool_metadata_osd_configuration["size"], + "data_pool_size": pool_data_osd_configuration["size"], "rgw_placement_label": self.k8s.rgw_placement_label, } + if pool_data_osd_configuration.get("erasure_code_profile", "") != "": + profile_configuration = pool_data_osd_configuration[ + "erasure_code_configuration" + ] + + pool_definition_values["data_erasure_code_configuration"] = { + "coding": profile_configuration["m"], + "data": profile_configuration["k"], + } + # Render cluster config from template pool_definition = self.load_template("pool.yaml.j2", **pool_definition_values) @@ -105,7 +122,7 @@ def _migrate_zone( "MigrateRgwPoolsHandler" ).migrated_zones = migrated_zones - for zone_osd_pool_name in zone_osd_configurations: + for zone_osd_pool_name in zone_osd_pools_configuration: if zone_osd_pool_name not in migrated_pools: migrated_pools.append(zone_osd_pool_name) diff --git a/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2 b/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2 index 6feecf7..3d67089 100644 --- a/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2 +++ b/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2 @@ -29,11 +29,17 @@ spec: # The pool spec used to create the data pool. Can use replication or erasure coding. dataPool: failureDomain: host + {% if data_erasure_code_configuration %} + erasureCoded: + dataChunks: {{ data_erasure_code_configuration.data }} + codingChunks: {{ data_erasure_code_configuration.coding }} + {% else %} replicated: size: {{ data_pool_size }} # Disallow setting pool with replica 1, this could lead to data loss without recovery. # Make sure you're *ABSOLUTELY CERTAIN* that is what you want requireSafeReplicaSize: true + {% endif %} parameters: # Inline compression mode for the data pool # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression