From c9a223c7861e650d13dd0795f1120589f39b5a04 Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Tue, 18 Jun 2024 16:58:59 +0200
Subject: [PATCH 1/7] Add support to call "librados.osd_command()" as well

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 src/rookify/modules/ceph.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rookify/modules/ceph.py b/src/rookify/modules/ceph.py
index 8bbe4f7..c4ec4b6 100644
--- a/src/rookify/modules/ceph.py
+++ b/src/rookify/modules/ceph.py
@@ -19,12 +19,10 @@ def __init__(self, config: Dict[str, Any]):
     def __getattr__(self, name: str) -> Any:
         return getattr(self.__ceph, name)
 
-    def mon_command(self, command: str, **kwargs: str) -> Dict[str, Any] | List[Any]:
-        cmd = {"prefix": command, "format": "json"}
-        cmd.update(**kwargs)
-
-        result = self.__ceph.mon_command(json.dumps(cmd), b"")
-
+    def _json_command(
+        self, handler: Any, *args: List[Any]
+    ) -> Dict[str, Any] | List[Any]:
+        result = handler(*args)
         if result[0] != 0:
             raise ModuleException(f"Ceph did return an error: {result}")
 

From b5ab57192061b68564d3c5d8b803c08e3d659ac5 Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Mon, 1 Jul 2024 14:08:07 +0200
Subject: [PATCH 2/7] Add additional logic to the `Ceph` module for reusability

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 src/rookify/modules/ceph.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/rookify/modules/ceph.py b/src/rookify/modules/ceph.py
index c4ec4b6..e4cb687 100644
--- a/src/rookify/modules/ceph.py
+++ b/src/rookify/modules/ceph.py
@@ -19,9 +19,7 @@ def __init__(self, config: Dict[str, Any]):
     def __getattr__(self, name: str) -> Any:
         return getattr(self.__ceph, name)
 
-    def _json_command(
-        self, handler: Any, *args: List[Any]
-    ) -> Dict[str, Any] | List[Any]:
+    def _json_command(self, handler: Any, *args: Any) -> Dict[str, Any] | List[Any]:
         result = handler(*args)
         if result[0] != 0:
             raise ModuleException(f"Ceph did return an error: {result}")
@@ -34,14 +32,35 @@ def _json_command(
 
         return data
 
+    def get_osd_pool_configurations_from_osd_dump(
+        self, dump_data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        osd_pools = {osd_pool["pool_name"]: osd_pool for osd_pool in dump_data["pools"]}
+
+        erasure_code_profiles = dump_data["erasure_code_profiles"]
+
+        for osd_pool_name in osd_pools:
+            osd_pool = osd_pools[osd_pool_name]
+
+            osd_pool["erasure_code_configuration"] = erasure_code_profiles.get(
+                osd_pool["erasure_code_profile"], erasure_code_profiles["default"]
+            )
+
+        return osd_pools
+
     def mon_command(self, command: str, **kwargs: str) -> Dict[str, Any] | List[Any]:
         cmd = {"prefix": command, "format": "json"}
         cmd.update(**kwargs)
-        return self._json_command(self.__ceph.mon_command, json.dumps(cmd), b"")  # type: ignore
+        return self._json_command(self.__ceph.mon_command, json.dumps(cmd), b"")
+
+    def mgr_command(self, command: str, **kwargs: str) -> Dict[str, Any] | List[Any]:
+        cmd = {"prefix": command, "format": "json"}
+        cmd.update(**kwargs)
+        return self._json_command(self.__ceph.mgr_command, json.dumps(cmd), b"")
 
     def osd_command(
         self, osd_id: int, command: str, **kwargs: str
     ) -> Dict[str, Any] | List[Any]:
         cmd = {"prefix": command, "format": "json"}
         cmd.update(**kwargs)
-        return self._json_command(self.__ceph.osd_command, osd_id, json.dumps(cmd), b"")  # type: ignore
+        return self._json_command(self.__ceph.osd_command, osd_id, json.dumps(cmd), b"")

From 9f6759ec746f671cdf2fc413408ab9d21bb1215a Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Mon, 1 Jul 2024 14:08:45 +0200
Subject: [PATCH 3/7] Fix possible issue with non-existant state tags for
 `Machine`

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 src/rookify/modules/machine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rookify/modules/machine.py b/src/rookify/modules/machine.py
index 7192e63..54d47ca 100644
--- a/src/rookify/modules/machine.py
+++ b/src/rookify/modules/machine.py
@@ -78,7 +78,8 @@ def _get_state_tags_data(self, name: str) -> Dict[str, Any]:
 
         if len(state.tags) > 0:
             for tag in state.tags:
-                data[tag] = getattr(state, tag)
+                if hasattr(state, tag):
+                    data[tag] = getattr(state, tag)
 
         return data
 

From c40c75655829a5014229e09735ec4d4048844e1d Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Mon, 1 Jul 2024 14:16:12 +0200
Subject: [PATCH 4/7] Switch from `fs dump` to `fs ls` in `analyze_ceph`

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 src/rookify/modules/analyze_ceph/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rookify/modules/analyze_ceph/main.py b/src/rookify/modules/analyze_ceph/main.py
index 39c3a26..3111181 100644
--- a/src/rookify/modules/analyze_ceph/main.py
+++ b/src/rookify/modules/analyze_ceph/main.py
@@ -7,7 +7,7 @@
 
 class AnalyzeCephHandler(ModuleHandler):
     def preflight(self) -> Any:
-        commands = ["mon dump", "osd dump", "device ls", "fs dump", "node ls"]
+        commands = ["mon dump", "osd dump", "device ls", "fs ls", "node ls"]
 
         state = self.machine.get_preflight_state("AnalyzeCephHandler")
         state.data: Dict[str, Any] = {}  # type: ignore

From 91b5534d9e511cd66b0e666933e8d3056e0a504f Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Mon, 1 Jul 2024 16:03:09 +0200
Subject: [PATCH 5/7] Add support to migrate Ceph MDS pools

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 .../modules/migrate_mds_pools/__init__.py     |   3 +
 src/rookify/modules/migrate_mds_pools/main.py | 156 ++++++++++++++++++
 .../templates/filesystem.yaml.j2              | 140 ++++++++++++++++
 3 files changed, 299 insertions(+)
 create mode 100644 src/rookify/modules/migrate_mds_pools/__init__.py
 create mode 100644 src/rookify/modules/migrate_mds_pools/main.py
 create mode 100644 src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2

diff --git a/src/rookify/modules/migrate_mds_pools/__init__.py b/src/rookify/modules/migrate_mds_pools/__init__.py
new file mode 100644
index 0000000..32b8ee3
--- /dev/null
+++ b/src/rookify/modules/migrate_mds_pools/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from .main import MigrateMdsPoolsHandler as ModuleHandler  # noqa
diff --git a/src/rookify/modules/migrate_mds_pools/main.py b/src/rookify/modules/migrate_mds_pools/main.py
new file mode 100644
index 0000000..de4bede
--- /dev/null
+++ b/src/rookify/modules/migrate_mds_pools/main.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+from typing import Any, Dict
+from ..machine import Machine
+from ..module import ModuleHandler
+
+
+class MigrateMdsPoolsHandler(ModuleHandler):
+    REQUIRES = ["analyze_ceph", "migrate_mds"]
+
+    def preflight(self) -> None:
+        state_data = self.machine.get_preflight_state("AnalyzeCephHandler").data
+
+        pools = getattr(
+            self.machine.get_preflight_state("MigrateMdsPoolsHandler"), "pools", {}
+        )
+
+        osd_pools = self.ceph.get_osd_pool_configurations_from_osd_dump(
+            state_data["osd"]["dump"]
+        )
+
+        for mds_fs_data in state_data["fs"]["ls"]:
+            if not mds_fs_data["metadata_pool"].endswith("-metadata"):
+                self.logger.warn(
+                    "MDS filesystem '{0}' uses an incompatible Ceph pool metadata name '{1}' and can not be migrated to Rook automatically".format(
+                        mds_fs_data["name"], mds_fs_data["metadata_pool"]
+                    )
+                )
+
+                # Store pools for incompatible MDS filesystem as migrated ones
+                migrated_pools = getattr(
+                    self.machine.get_execution_state("MigrateMdsPoolsHandler"),
+                    "migrated_pools",
+                    [],
+                )
+
+                if mds_fs_data["metadata_pool"] not in migrated_pools:
+                    migrated_pools.append(mds_fs_data["metadata_pool"])
+
+                for pool_data_osd_name in mds_fs_data["data_pools"]:
+                    if pool_data_osd_name not in migrated_pools:
+                        migrated_pools.append(pool_data_osd_name)
+
+                self.machine.get_execution_state(
+                    "MigrateMdsPoolsHandler"
+                ).migrated_pools = migrated_pools
+
+                continue
+
+            pool = {
+                "name": mds_fs_data["name"],
+                "metadata": mds_fs_data["metadata_pool"],
+                "data": [pool for pool in mds_fs_data["data_pools"]],
+                "osd_pool_configurations": {},
+            }
+
+            pool["osd_pool_configurations"][mds_fs_data["metadata_pool"]] = osd_pools[
+                mds_fs_data["metadata_pool"]
+            ]
+
+            for mds_ods_pool_name in mds_fs_data["data_pools"]:
+                pool["osd_pool_configurations"][mds_ods_pool_name] = osd_pools[
+                    mds_ods_pool_name
+                ]
+
+            pools[mds_fs_data["name"]] = pool
+
+        self.machine.get_preflight_state("MigrateMdsPoolsHandler").pools = pools
+
+    def execute(self) -> None:
+        pools = self.machine.get_preflight_state("MigrateMdsPoolsHandler").pools
+
+        for pool in pools.values():
+            self._migrate_pool(pool)
+
+    def _migrate_pool(self, pool: Dict[str, Any]) -> None:
+        migrated_mds_pools = getattr(
+            self.machine.get_execution_state("MigrateMdsPoolsHandler"),
+            "migrated_mds_pools",
+            [],
+        )
+
+        if pool["name"] in migrated_mds_pools:
+            return
+
+        migrated_pools = getattr(
+            self.machine.get_execution_state("MigrateMdsPoolsHandler"),
+            "migrated_pools",
+            [],
+        )
+
+        self.logger.debug("Migrating Ceph MDS pool '{0}'".format(pool["name"]))
+        osd_pool_configurations = pool["osd_pool_configurations"]
+
+        pool_metadata_osd_configuration = osd_pool_configurations[pool["metadata"]]
+
+        filesystem_definition_values = {
+            "cluster_namespace": self._config["rook"]["cluster"]["namespace"],
+            "name": pool["name"],
+            "mds_size": pool_metadata_osd_configuration["size"],
+        }
+
+        filesystem_definition_values["data_pools"] = []
+
+        for pool_data_osd_name in pool["data"]:
+            osd_configuration = osd_pool_configurations[pool_data_osd_name]
+
+            definition_data_pool = {
+                "name": osd_configuration["pool_name"],
+                "size": osd_configuration["size"],
+            }
+
+            filesystem_definition_values["data_pools"].append(definition_data_pool)
+
+        # Render cluster config from template
+        pool_definition = self.load_template(
+            "filesystem.yaml.j2", **filesystem_definition_values
+        )
+
+        self.k8s.crd_api_apply(pool_definition.yaml)
+
+        if pool["name"] not in migrated_mds_pools:
+            migrated_mds_pools.append(pool["name"])
+
+        self.machine.get_execution_state(
+            "MigrateMdsPoolsHandler"
+        ).migrated_mds_pools = migrated_mds_pools
+
+        if pool["metadata"] not in migrated_pools:
+            migrated_pools.append(pool["metadata"])
+
+        for pool_data_osd_name in pool["data"]:
+            if pool_data_osd_name not in migrated_pools:
+                migrated_pools.append(pool_data_osd_name)
+
+        self.machine.get_execution_state(
+            "MigrateMdsPoolsHandler"
+        ).migrated_pools = migrated_pools
+
+        self.logger.info("Migrated Ceph MDS pool '{0}'".format(pool["name"]))
+
+    @staticmethod
+    def register_execution_state(
+        machine: Machine, state_name: str, handler: ModuleHandler, **kwargs: Any
+    ) -> None:
+        ModuleHandler.register_execution_state(
+            machine, state_name, handler, tags=["migrated_pools", "migrated_mds_pools"]
+        )
+
+    @staticmethod
+    def register_preflight_state(
+        machine: Machine, state_name: str, handler: ModuleHandler, **kwargs: Any
+    ) -> None:
+        ModuleHandler.register_preflight_state(
+            machine, state_name, handler, tags=["pools"]
+        )
diff --git a/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2 b/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2
new file mode 100644
index 0000000..f25658f
--- /dev/null
+++ b/src/rookify/modules/migrate_mds_pools/templates/filesystem.yaml.j2
@@ -0,0 +1,140 @@
+---
+#################################################################################################################
+# Create a filesystem with settings with replication enabled for a production environment.
+# A minimum of 3 OSDs on different nodes are required in this example.
+# If one mds daemon per node is too restrictive, see the podAntiAffinity below.
+#  kubectl create -f filesystem.yaml
+#################################################################################################################
+
+apiVersion: ceph.rook.io/v1
+kind: CephFilesystem
+metadata:
+  name: {{ name }}
+  namespace: {{ cluster_namespace }}
+spec:
+  # The metadata pool spec. Must use replication.
+  metadataPool:
+    replicated:
+      size: {{ mds_size }}
+      requireSafeReplicaSize: true
+    parameters:
+      # Inline compression mode for the data pool
+      # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
+      compression_mode:
+        none
+        # gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
+      # for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
+      #target_size_ratio: ".5"
+  # The list of data pool specs. Can use replication or erasure coding.
+  dataPools:
+    {% for pool in data_pools %}
+    - name: {{ pool.name }}
+      failureDomain: host
+      replicated:
+        size: {{ pool.size }}
+        # Disallow setting pool with replica 1, this could lead to data loss without recovery.
+        # Make sure you're *ABSOLUTELY CERTAIN* that is what you want
+        requireSafeReplicaSize: true
+      parameters:
+        # Inline compression mode for the data pool
+        # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
+        compression_mode:
+          none
+          # gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
+        # for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
+        #target_size_ratio: ".5"
+      {% endfor %}
+  # Whether to preserve filesystem after CephFilesystem CRD deletion
+  preserveFilesystemOnDelete: true
+  # The metadata service (mds) configuration
+  metadataServer:
+    # The number of active MDS instances
+    activeCount: 1
+    # Whether each active MDS instance will have an active standby with a warm metadata cache for faster failover.
+    # If false, standbys will be available, but will not have a warm cache.
+    activeStandby: true
+    # The affinity rules to apply to the mds deployment
+    placement:
+      #  nodeAffinity:
+      #    requiredDuringSchedulingIgnoredDuringExecution:
+      #      nodeSelectorTerms:
+      #      - matchExpressions:
+      #        - key: role
+      #          operator: In
+      #          values:
+      #          - mds-node
+      #  topologySpreadConstraints:
+      #  tolerations:
+      #  - key: mds-node
+      #    operator: Exists
+      #  podAffinity:
+      podAntiAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          - labelSelector:
+              matchExpressions:
+                - key: app
+                  operator: In
+                  values:
+                    - rook-ceph-mds
+            ## Add this if you want to allow mds daemons for different filesystems to run on one
+            ## node. The value in "values" must match .metadata.name.
+            #    - key: rook_file_system
+            #          operator: In
+            #          values:
+            #            - myfs
+            # topologyKey: kubernetes.io/hostname will place MDS across different hosts
+            topologyKey: kubernetes.io/hostname
+        preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 100
+            podAffinityTerm:
+              labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - rook-ceph-mds
+              # topologyKey: */zone can be used to spread MDS across different AZ
+              # Use <topologyKey: failure-domain.beta.kubernetes.io/zone> in k8s cluster if your cluster is v1.16 or lower
+              # Use <topologyKey: topology.kubernetes.io/zone>  in k8s cluster is v1.17 or upper
+              topologyKey: topology.kubernetes.io/zone
+    # A key/value list of annotations
+    # annotations:
+    #  key: value
+    # A key/value list of labels
+    # labels:
+    #  key: value
+    # resources:
+    # The requests and limits set here, allow the filesystem MDS Pod(s) to use half of one CPU core and 1 gigabyte of memory
+    #  limits:
+    #    memory: "1024Mi"
+    #  requests:
+    #    cpu: "500m"
+    #    memory: "1024Mi"
+    priorityClassName: system-cluster-critical
+    livenessProbe:
+      disabled: false
+    startupProbe:
+      disabled: false
+  # Filesystem mirroring settings
+  # mirroring:
+  #   enabled: true
+  #   # list of Kubernetes Secrets containing the peer token
+  #   # for more details see: https://docs.ceph.com/en/latest/dev/cephfs-mirroring/#bootstrap-peers
+  #   # Add the secret name if it already exists else specify the empty list here.
+  #   peers:
+  #     secretNames:
+  #     - secondary-cluster-peer
+  #   # specify the schedule(s) on which snapshots should be taken
+  #   # see the official syntax here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-schedules
+  #   snapshotSchedules:
+  #     - path: /
+  #       interval: 24h # daily snapshots
+  #   # The startTime should be mentioned in the format YYYY-MM-DDTHH:MM:SS
+  #   # If startTime is not specified, then by default the start time is considered as midnight UTC.
+  #   # see usage here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#usage
+  #   # startTime: 2022-07-15T11:55:00
+  #   # manage retention policies
+  #   # see syntax duration here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-retention-policies
+  #   snapshotRetention:
+  #     - path: /
+  #       duration: "h 24"

From 7f9b00264f67c16f325c7ec2783d64f47013494d Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Mon, 1 Jul 2024 16:07:21 +0200
Subject: [PATCH 6/7] Add support to migrate Ceph RGW zones

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 .../modules/migrate_rgw_pools/__init__.py     |   3 +
 src/rookify/modules/migrate_rgw_pools/main.py | 131 ++++++++++++++++++
 .../migrate_rgw_pools/templates/pool.yaml.j2  | 115 +++++++++++++++
 3 files changed, 249 insertions(+)
 create mode 100644 src/rookify/modules/migrate_rgw_pools/__init__.py
 create mode 100644 src/rookify/modules/migrate_rgw_pools/main.py
 create mode 100644 src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2

diff --git a/src/rookify/modules/migrate_rgw_pools/__init__.py b/src/rookify/modules/migrate_rgw_pools/__init__.py
new file mode 100644
index 0000000..2ccafc9
--- /dev/null
+++ b/src/rookify/modules/migrate_rgw_pools/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from .main import MigrateRgwPoolsHandler as ModuleHandler  # noqa
diff --git a/src/rookify/modules/migrate_rgw_pools/main.py b/src/rookify/modules/migrate_rgw_pools/main.py
new file mode 100644
index 0000000..bfa40a2
--- /dev/null
+++ b/src/rookify/modules/migrate_rgw_pools/main.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+from typing import Any, Dict
+from ..exception import ModuleException
+from ..machine import Machine
+from ..module import ModuleHandler
+
+
+class MigrateRgwPoolsHandler(ModuleHandler):
+    REQUIRES = ["analyze_ceph", "migrate_rgw"]
+
+    def preflight(self) -> None:
+        state_data = self.machine.get_preflight_state("AnalyzeCephHandler").data
+
+        zones = getattr(
+            self.machine.get_preflight_state("MigrateRgwPoolsHandler"), "zones", {}
+        )
+
+        service_data = self.ceph.mon_command("service dump")
+
+        rgw_daemons = service_data["services"].get("rgw", {}).get("daemons", {})  # type: ignore
+
+        for rgw_daemon in rgw_daemons.values():
+            if not isinstance(rgw_daemon, dict):
+                continue
+
+            zone_name = rgw_daemon["metadata"]["zone_name"]
+
+            if zone_name not in zones:
+                zones[zone_name] = {}
+
+        osd_pools = self.ceph.get_osd_pool_configurations_from_osd_dump(
+            state_data["osd"]["dump"]
+        )
+
+        for zone_name in zones:
+            zone = zones[zone_name]
+
+            for osd_pool_name, osd_pool_configuration in osd_pools.items():
+                if osd_pool_name.startswith("{0}.rgw.".format(zone_name)):
+                    zone[osd_pool_name] = osd_pool_configuration
+
+            if (
+                "{0}.rgw.meta".format(zone_name) not in zone
+                or "{0}.rgw.buckets.data".format(zone_name) not in zone
+            ):
+                raise ModuleException(
+                    "Failed to identify required pools for RGW zone '{0}'".format(
+                        zone_name
+                    )
+                )
+
+        self.machine.get_preflight_state("MigrateRgwPoolsHandler").zones = zones
+
+    def execute(self) -> None:
+        zones = self.machine.get_preflight_state("MigrateRgwPoolsHandler").zones
+
+        for zone_name, zone_osd_configurations in zones.items():
+            self._migrate_zone(zone_name, zone_osd_configurations)
+
+    def _migrate_zone(
+        self, zone_name: str, zone_osd_configurations: Dict[str, Any]
+    ) -> None:
+        migrated_zones = getattr(
+            self.machine.get_execution_state("MigrateRgwPoolsHandler"),
+            "migrated_zones",
+            [],
+        )
+
+        if zone_name in migrated_zones:
+            return
+
+        migrated_pools = getattr(
+            self.machine.get_execution_state("MigrateRgwPoolsHandler"),
+            "migrated_pools",
+            [],
+        )
+
+        self.logger.debug("Migrating Ceph RGW zone '{0}'".format(zone_name))
+
+        pool_metadata_osd_pool_data = zone_osd_configurations[
+            "{0}.rgw.meta".format(zone_name)
+        ]
+
+        pool_buckets_data_osd_pool_data = zone_osd_configurations[
+            "{0}.rgw.buckets.data".format(zone_name)
+        ]
+
+        pool_definition_values = {
+            "cluster_namespace": self._config["rook"]["cluster"]["namespace"],
+            "name": zone_name,
+            "metadata_size": pool_metadata_osd_pool_data["size"],
+            "data_pool_size": pool_buckets_data_osd_pool_data["size"],
+        }
+
+        # Render cluster config from template
+        pool_definition = self.load_template("pool.yaml.j2", **pool_definition_values)
+
+        self.k8s.crd_api_apply(pool_definition.yaml)
+
+        migrated_zones.append(zone_name)
+
+        self.machine.get_execution_state(
+            "MigrateRgwPoolsHandler"
+        ).migrated_zones = migrated_zones
+
+        for zone_osd_pool_name in zone_osd_configurations:
+            if zone_osd_pool_name not in migrated_pools:
+                migrated_pools.append(zone_osd_pool_name)
+
+        self.machine.get_execution_state(
+            "MigrateRgwPoolsHandler"
+        ).migrated_pools = migrated_pools
+
+        self.logger.info("Migrated Ceph RGW zone '{0}'".format(zone_name))
+
+    @staticmethod
+    def register_execution_state(
+        machine: Machine, state_name: str, handler: ModuleHandler, **kwargs: Any
+    ) -> None:
+        ModuleHandler.register_execution_state(
+            machine, state_name, handler, tags=["migrated_pools", "migrated_zones"]
+        )
+
+    @staticmethod
+    def register_preflight_state(
+        machine: Machine, state_name: str, handler: ModuleHandler, **kwargs: Any
+    ) -> None:
+        ModuleHandler.register_preflight_state(
+            machine, state_name, handler, tags=["zones"]
+        )
diff --git a/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2 b/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2
new file mode 100644
index 0000000..c90babf
--- /dev/null
+++ b/src/rookify/modules/migrate_rgw_pools/templates/pool.yaml.j2
@@ -0,0 +1,115 @@
+---
+#################################################################################################################
+# Create an object store with settings for replication in a production environment. A minimum of 3 hosts with
+# OSDs are required in this example.
+#  kubectl create -f object.yaml
+#################################################################################################################
+
+apiVersion: ceph.rook.io/v1
+kind: CephObjectStore
+metadata:
+  name: {{ name }}
+  namespace: {{ cluster_namespace }}
+spec:
+  # The pool spec used to create the metadata pools. Must use replication.
+  metadataPool:
+    failureDomain: host
+    replicated:
+      size: {{ metadata_size }}
+      # Disallow setting pool with replica 1, this could lead to data loss without recovery.
+      # Make sure you're *ABSOLUTELY CERTAIN* that is what you want
+      requireSafeReplicaSize: true
+    parameters:
+      # Inline compression mode for the data pool
+      # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
+      compression_mode: none
+      # gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
+      # for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
+      #target_size_ratio: ".5"
+  # The pool spec used to create the data pool. Can use replication or erasure coding.
+  dataPool:
+    failureDomain: host
+    replicated:
+      size: {{ data_pool_size }}
+      # Disallow setting pool with replica 1, this could lead to data loss without recovery.
+      # Make sure you're *ABSOLUTELY CERTAIN* that is what you want
+      requireSafeReplicaSize: true
+    parameters:
+      # Inline compression mode for the data pool
+      # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
+      compression_mode: none
+      # gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
+      # for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
+      #target_size_ratio: ".5"
+  # Whether to preserve metadata and data pools on object store deletion
+  preservePoolsOnDelete: false
+  # The gateway service configuration
+  gateway:
+    # A reference to the secret in the rook namespace where the ssl certificate is stored
+    # sslCertificateRef:
+    # A reference to the secret in the rook namespace where the ca bundle is stored
+    # caBundleRef:
+    # The port that RGW pods will listen on (http)
+    port: 80
+    # The port that RGW pods will listen on (https). An ssl certificate is required.
+    # securePort: 443
+    # The number of pods in the rgw deployment
+    instances: 1
+    # The affinity rules to apply to the rgw deployment.
+    placement:
+      podAntiAffinity:
+        preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 100
+            podAffinityTerm:
+              labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - rook-ceph-rgw
+              # topologyKey: */zone can be used to spread RGW across different AZ
+              # Use <topologyKey: failure-domain.beta.kubernetes.io/zone> in k8s cluster if your cluster is v1.16 or lower
+              # Use <topologyKey: topology.kubernetes.io/zone>  in k8s cluster is v1.17 or upper
+              topologyKey: kubernetes.io/hostname
+    # A key/value list of annotations
+    #  nodeAffinity:
+    #    requiredDuringSchedulingIgnoredDuringExecution:
+    #      nodeSelectorTerms:
+    #      - matchExpressions:
+    #        - key: role
+    #          operator: In
+    #          values:
+    #          - rgw-node
+    #  topologySpreadConstraints:
+    #  tolerations:
+    #  - key: rgw-node
+    #    operator: Exists
+    #  podAffinity:
+    #  podAntiAffinity:
+    # A key/value list of annotations
+    annotations:
+    #  key: value
+    # A key/value list of labels
+    labels:
+    #  key: value
+    resources:
+    # The requests and limits set here, allow the object store gateway Pod(s) to use half of one CPU core and 1 gigabyte of memory
+    #  limits:
+    #    memory: "1024Mi"
+    #  requests:
+    #    cpu: "500m"
+    #    memory: "1024Mi"
+    priorityClassName: system-cluster-critical
+  #zone:
+  #name: zone-a
+  # service endpoint healthcheck
+  healthCheck:
+    # Configure the pod probes for the rgw daemon
+    startupProbe:
+      disabled: false
+    readinessProbe:
+      disabled: false
+  # hosting:
+  #   The list of subdomain names for virtual hosting of buckets.
+  #   dnsNames:
+  #     - "mystore.example.com"

From 111b1d8231238d91d12c09333ec321d0854da508 Mon Sep 17 00:00:00 2001
From: Tobias Wolf <wolf@b1-systems.de>
Date: Mon, 1 Jul 2024 20:59:35 +0200
Subject: [PATCH 7/7] Add support to migrate Ceph OSD pools

Signed-off-by: Tobias Wolf <wolf@b1-systems.de>
---
 .../modules/migrate_osd_pools/__init__.py     |  3 +
 src/rookify/modules/migrate_osd_pools/main.py | 76 +++++++++++++++++++
 .../migrate_osd_pools/templates/pool.yaml.j2  | 67 ++++++++++++++++
 3 files changed, 146 insertions(+)
 create mode 100644 src/rookify/modules/migrate_osd_pools/__init__.py
 create mode 100644 src/rookify/modules/migrate_osd_pools/main.py
 create mode 100644 src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2

diff --git a/src/rookify/modules/migrate_osd_pools/__init__.py b/src/rookify/modules/migrate_osd_pools/__init__.py
new file mode 100644
index 0000000..cd48c3c
--- /dev/null
+++ b/src/rookify/modules/migrate_osd_pools/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from .main import MigrateOSDPoolsHandler as ModuleHandler  # noqa
diff --git a/src/rookify/modules/migrate_osd_pools/main.py b/src/rookify/modules/migrate_osd_pools/main.py
new file mode 100644
index 0000000..69bdd18
--- /dev/null
+++ b/src/rookify/modules/migrate_osd_pools/main.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+
+from typing import Any, Dict
+from ..machine import Machine
+from ..module import ModuleHandler
+
+
+class MigrateOSDPoolsHandler(ModuleHandler):
+    REQUIRES = ["analyze_ceph", "migrate_mds_pools", "migrate_rgw_pools"]
+
+    def execute(self) -> None:
+        state_data = self.machine.get_preflight_state("AnalyzeCephHandler").data
+
+        migrated_mds_pools = getattr(
+            self.machine.get_execution_state("MigrateMdsPoolsHandler"),
+            "migrated_pools",
+            [],
+        )
+
+        migrated_rgw_pools = getattr(
+            self.machine.get_execution_state("MigrateRgwPoolsHandler"),
+            "migrated_pools",
+            [],
+        )
+
+        migrated_pools = migrated_mds_pools + migrated_rgw_pools
+
+        osd_pool_configurations = self.ceph.get_osd_pool_configurations_from_osd_dump(
+            state_data["osd"]["dump"]
+        )
+
+        pools = []
+
+        for pool in osd_pool_configurations.values():
+            if (
+                not pool["pool_name"].startswith(".")
+                and pool["pool_name"] not in migrated_pools
+            ):
+                pools.append(pool)
+
+        for pool in pools:
+            self._migrate_pool(pool)
+
+    def _migrate_pool(self, pool: Dict[str, Any]) -> None:
+        migrated_pools = getattr(
+            self.machine.get_execution_state("MigrateOSDPoolsHandler"),
+            "migrated_pools",
+            [],
+        )
+
+        if pool["pool_name"] in migrated_pools:
+            return
+
+        self.logger.debug("Migrating Ceph OSD pool '{0}'".format(pool["pool_name"]))
+
+        pool_definition_values = {
+            "cluster_namespace": self._config["rook"]["cluster"]["namespace"],
+            "name": pool["pool_name"],
+            "size": pool["size"],
+        }
+
+        # Render cluster config from template
+        pool_definition = self.load_template("pool.yaml.j2", **pool_definition_values)
+
+        self.k8s.crd_api_apply(pool_definition.yaml)
+        migrated_pools.append(pool["pool_name"])
+
+        self.logger.info("Migrated Ceph OSD pool '{0}'".format(pool["pool_name"]))
+
+    @staticmethod
+    def register_execution_state(
+        machine: Machine, state_name: str, handler: ModuleHandler, **kwargs: Any
+    ) -> None:
+        ModuleHandler.register_execution_state(
+            machine, state_name, handler, tags=["migrated_pools"]
+        )
diff --git a/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2 b/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2
new file mode 100644
index 0000000..d8e46f4
--- /dev/null
+++ b/src/rookify/modules/migrate_osd_pools/templates/pool.yaml.j2
@@ -0,0 +1,67 @@
+---
+#################################################################################################################
+# Create a Ceph pool with settings for replication in production environments. A minimum of 3 OSDs on
+# different hosts are required in this example.
+#  kubectl create -f pool.yaml
+#################################################################################################################
+
+apiVersion: ceph.rook.io/v1
+kind: CephBlockPool
+metadata:
+  name: {{ name }}
+  namespace: {{ cluster_namespace }}
+spec:
+  # The failure domain will spread the replicas of the data across different failure zones
+  failureDomain: host
+  # For a pool based on raw copies, specify the number of copies. A size of 1 indicates no redundancy.
+  replicated:
+    size: {{ size }}
+    # Disallow setting pool with replica 1, this could lead to data loss without recovery.
+    # Make sure you're *ABSOLUTELY CERTAIN* that is what you want
+    requireSafeReplicaSize: true
+    # hybridStorage:
+    #   primaryDeviceClass: ssd
+    #   secondaryDeviceClass: hdd
+    # The number for replicas per failure domain, the value must be a divisor of the replica count. If specified, the most common value is 2 for stretch clusters, where the replica count would be 4.
+    # replicasPerFailureDomain: 2
+    # The name of the failure domain to place further down replicas
+    # subFailureDomain: host
+  # Ceph CRUSH root location of the rule
+  # For reference: https://docs.ceph.com/docs/master/rados/operations/crush-map/#types-and-buckets
+  #crushRoot: my-root
+  # The Ceph CRUSH device class associated with the CRUSH replicated rule
+  # For reference: https://docs.ceph.com/docs/master/rados/operations/crush-map/#device-classes
+  # If device classes are specified, ensure this property is added to every pool in the cluster,
+  # otherwise Ceph will warn about pools with overlapping roots.
+  #deviceClass: my-class
+  # Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false.
+  # For reference: https://docs.ceph.com/docs/master/mgr/prometheus/#rbd-io-statistics
+  # enableRBDStats: true
+  # Set any property on a given pool
+  # see https://docs.ceph.com/docs/master/rados/operations/pools/#set-pool-values
+  parameters:
+    # Inline compression mode for the data pool
+    # Further reference: https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression
+    compression_mode: none
+    # gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool
+    # for more info: https://docs.ceph.com/docs/master/rados/operations/placement-groups/#specifying-expected-pool-size
+    #target_size_ratio: ".5"
+  mirroring:
+    enabled: false
+    # mirroring mode: pool level or per image
+    # for more details see: https://docs.ceph.com/docs/master/rbd/rbd-mirroring/#enable-mirroring
+    mode: image
+    # specify the schedule(s) on which snapshots should be taken
+    # snapshotSchedules:
+    #   - interval: 24h # daily snapshots
+    #     startTime: 14:00:00-05:00
+  # reports pool mirroring status if enabled
+  statusCheck:
+    mirror:
+      disabled: false
+      interval: 60s
+  # quota in bytes and/or objects, default value is 0 (unlimited)
+  # see https://docs.ceph.com/en/latest/rados/operations/pools/#set-pool-quotas
+  # quotas:
+  #   maxSize: "10Gi" # valid suffixes include k, M, G, T, P, E, Ki, Mi, Gi, Ti, Pi, Ei
+  #   maxObjects: 1000000000 # 1 billion objects