Skip to content

Commit

Permalink
Add support to parse and set Ceph erasure code profiles
Browse files Browse the repository at this point in the history
Signed-off-by: Tobias Wolf <[email protected]>
  • Loading branch information
NotTheEvilOne committed Jul 8, 2024
1 parent 4af6a72 commit e2cce47
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 9 deletions.
5 changes: 5 additions & 0 deletions src/rookify/modules/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def get_osd_pool_configurations_from_osd_dump(
osd_pool["erasure_code_profile"], erasure_code_profiles["default"]
)

if osd_pool["erasure_code_configuration"].get("plugin") != "jerasure":
raise ModuleException(
"Unsupported Ceph erasure code profile plugin in use"
)

return osd_pools

def mon_command(self, command: str, **kwargs: str) -> Dict[str, Any] | List[Any]:
Expand Down
12 changes: 12 additions & 0 deletions src/rookify/modules/k8s_prerequisites_check/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@

class K8sPrerequisitesCheckHandler(ModuleHandler):
def preflight(self) -> None:
self.logger.debug("K8sPrerequisitesCheck started validation")

# We are only looking up labels here. This is a sanity check only without guarantee that the deployment found is operational or the Rook operator at all.
deployments = self.k8s.apps_v1_api.list_deployment_for_all_namespaces(
field_selector="metadata.name=rook-ceph-operator",
label_selector="operator=rook",
)

if len(deployments.items) < 1:
raise ModuleException("Rook operator not found")

namespace = self._config["rook"]["cluster"]["namespace"]

namespaces = [
Expand All @@ -32,3 +43,4 @@ def preflight(self) -> None:
raise ModuleException(
"Label {0} is set on node {1}".format(label, node.metadata.name)
)
self.logger.info("K8sPrerequisitesCheck completed")
8 changes: 8 additions & 0 deletions src/rookify/modules/migrate_mds_pools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ def _migrate_pool(self, pool: Dict[str, Any]) -> None:
"size": osd_configuration["size"],
}

if osd_configuration.get("erasure_code_profile", "") != "":
profile_configuration = osd_configuration["erasure_code_configuration"]

definition_data_pool["erasure_code_configuration"] = {
"coding": profile_configuration["m"],
"data": profile_configuration["k"],
}

filesystem_definition_values["data_pools"].append(definition_data_pool)

# Render cluster config from template
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,17 @@ spec:
{% for pool in data_pools %}
- name: {{ pool.name }}
failureDomain: host
{% if pool.erasure_code_configuration %}
erasureCoded:
dataChunks: {{ pool.erasure_code_configuration.data }}
codingChunks: {{ pool.erasure_code_configuration.coding }}
{% else %}
replicated:
size: {{ pool.size }}
# Disallow setting pool with replica 1, this could lead to data loss without recovery.
# Make sure you're *ABSOLUTELY CERTAIN* that is what you want
requireSafeReplicaSize: true
{% endif %}
parameters:
# Inline compression mode for the data pool
# Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
Expand Down
8 changes: 8 additions & 0 deletions src/rookify/modules/migrate_osd_pools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ def _migrate_pool(self, pool: Dict[str, Any]) -> None:
"size": pool["size"],
}

if pool.get("erasure_code_profile", "") != "":
profile_configuration = pool["erasure_code_configuration"]

pool_definition_values["erasure_code_configuration"] = {
"coding": profile_configuration["m"],
"data": profile_configuration["k"],
}

# Render cluster config from template
pool_definition = self.load_template("pool.yaml.j2", **pool_definition_values)

Expand Down
6 changes: 6 additions & 0 deletions src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ metadata:
spec:
# The failure domain will spread the replicas of the data across different failure zones
failureDomain: host
{% if erasure_code_configuration %}
erasureCoded:
dataChunks: {{ erasure_code_configuration.data }}
codingChunks: {{ erasure_code_configuration.coding }}
{% else %}
# For a pool based on raw copies, specify the number of copies. A size of 1 indicates no redundancy.
replicated:
size: {{ size }}
Expand All @@ -26,6 +31,7 @@ spec:
# replicasPerFailureDomain: 2
# The name of the failure domain to place further down replicas
# subFailureDomain: host
{% endif %}
# Ceph CRUSH root location of the rule
# For reference: https://docs.ceph.com/docs/master/rados/operations/crush-map/#types-and-buckets
#crushRoot: my-root
Expand Down
35 changes: 26 additions & 9 deletions src/rookify/modules/migrate_rgw_pools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ def preflight(self) -> None:
if osd_pool_name.startswith("{0}.rgw.".format(zone_name)):
zone[osd_pool_name] = osd_pool_configuration

metadata_name = "{0}.rgw.meta".format(zone_name)

if (
"{0}.rgw.meta".format(zone_name) not in zone
metadata_name not in zone
or "{0}.rgw.buckets.data".format(zone_name) not in zone
):
raise ModuleException(
Expand All @@ -50,16 +52,21 @@ def preflight(self) -> None:
)
)

if zone[metadata_name].get("erasure_code_profile", "") != "":
raise ModuleException(
"Ceph RGW metadata OSD pools must use replication for Rook"
)

self.machine.get_preflight_state("MigrateRgwPoolsHandler").zones = zones

def execute(self) -> None:
zones = self.machine.get_preflight_state("MigrateRgwPoolsHandler").zones

for zone_name, zone_osd_configurations in zones.items():
self._migrate_zone(zone_name, zone_osd_configurations)
for zone_name, zone_osd_pools_configuration in zones.items():
self._migrate_zone(zone_name, zone_osd_pools_configuration)

def _migrate_zone(
self, zone_name: str, zone_osd_configurations: Dict[str, Any]
self, zone_name: str, zone_osd_pools_configuration: Dict[str, Any]
) -> None:
migrated_zones = getattr(
self.machine.get_execution_state("MigrateRgwPoolsHandler"),
Expand All @@ -78,22 +85,32 @@ def _migrate_zone(

self.logger.debug("Migrating Ceph RGW zone '{0}'".format(zone_name))

pool_metadata_osd_pool_data = zone_osd_configurations[
pool_metadata_osd_configuration = zone_osd_pools_configuration[
"{0}.rgw.meta".format(zone_name)
]

pool_buckets_data_osd_pool_data = zone_osd_configurations[
pool_data_osd_configuration = zone_osd_pools_configuration[
"{0}.rgw.buckets.data".format(zone_name)
]

pool_definition_values = {
"cluster_namespace": self._config["rook"]["cluster"]["namespace"],
"name": zone_name,
"metadata_size": pool_metadata_osd_pool_data["size"],
"data_pool_size": pool_buckets_data_osd_pool_data["size"],
"metadata_size": pool_metadata_osd_configuration["size"],
"data_pool_size": pool_data_osd_configuration["size"],
"rgw_placement_label": self.k8s.rgw_placement_label,
}

if pool_data_osd_configuration.get("erasure_code_profile", "") != "":
profile_configuration = pool_data_osd_configuration[
"erasure_code_configuration"
]

pool_definition_values["data_erasure_code_configuration"] = {
"coding": profile_configuration["m"],
"data": profile_configuration["k"],
}

# Render cluster config from template
pool_definition = self.load_template("pool.yaml.j2", **pool_definition_values)

Expand All @@ -105,7 +122,7 @@ def _migrate_zone(
"MigrateRgwPoolsHandler"
).migrated_zones = migrated_zones

for zone_osd_pool_name in zone_osd_configurations:
for zone_osd_pool_name in zone_osd_pools_configuration:
if zone_osd_pool_name not in migrated_pools:
migrated_pools.append(zone_osd_pool_name)

Expand Down
6 changes: 6 additions & 0 deletions src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,17 @@ spec:
# The pool spec used to create the data pool. Can use replication or erasure coding.
dataPool:
failureDomain: host
{% if data_erasure_code_configuration %}
erasureCoded:
dataChunks: {{ data_erasure_code_configuration.data }}
codingChunks: {{ data_erasure_code_configuration.coding }}
{% else %}
replicated:
size: {{ data_pool_size }}
# Disallow setting pool with replica 1, this could lead to data loss without recovery.
# Make sure you're *ABSOLUTELY CERTAIN* that is what you want
requireSafeReplicaSize: true
{% endif %}
parameters:
# Inline compression mode for the data pool
# Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
Expand Down

0 comments on commit e2cce47

Please sign in to comment.