From b189ed19c9e2b77d98ba41999de76c264e1028fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan-Marten=20Br=C3=BCggemann?= <brueggemann@b1-systems.de>
Date: Mon, 11 Mar 2024 17:01:48 +0100
Subject: [PATCH] implement create_cluster module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jan-Marten Brüggemann <brueggemann@b1-systems.de>
---
 .gitignore                                    |   1 +
 src/config.example.yaml                       |  10 +-
 src/rookify/modules/analyze_ceph/main.py      |   2 +-
 .../modules/create_cluster/__init__.py        |   1 +
 src/rookify/modules/create_cluster/main.py    | 102 +++++
 .../create_cluster/templates/cluster.yaml.j2  | 347 ++++++++++++++++++
 src/rookify/modules/example/main.py           |   4 +-
 src/rookify/modules/migrate_osds/main.py      |   2 +-
 src/rookify/modules/module.py                 |  69 +++-
 9 files changed, 517 insertions(+), 21 deletions(-)
 create mode 100644 src/rookify/modules/create_cluster/main.py
 create mode 100644 src/rookify/modules/create_cluster/templates/cluster.yaml.j2

diff --git a/.gitignore b/.gitignore
index d8dd9a3..8174be4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -89,3 +89,4 @@ cython_debug/
 data.yaml
 config.yaml
 .ceph
+.k8s
diff --git a/src/config.example.yaml b/src/config.example.yaml
index 7e60f64..08c759b 100644
--- a/src/config.example.yaml
+++ b/src/config.example.yaml
@@ -19,8 +19,14 @@ ssh:
       user: dragon
 
 kubernetes:
-  host: 192.168.22.10
-  api_key: abc
+  config: ../.k8s/config
+
+rook:
+  cluster:
+    name: osism-ceph
+    namespace: osism-ceph
+  ceph:
+    image: quay.io/ceph/ceph:v18.2.1
 
 migration_modules:
 - migrate_osds
diff --git a/src/rookify/modules/analyze_ceph/main.py b/src/rookify/modules/analyze_ceph/main.py
index cc90105..6cb023f 100644
--- a/src/rookify/modules/analyze_ceph/main.py
+++ b/src/rookify/modules/analyze_ceph/main.py
@@ -6,7 +6,7 @@
 
 
 class AnalyzeCephHandler(ModuleHandler):
-    def run(self) -> Dict[str, Any]:
+    def run(self) -> Any:
         commands = ["mon dump", "osd dump", "device ls", "fs dump", "node ls"]
 
         results: Dict[str, Any] = dict()
diff --git a/src/rookify/modules/create_cluster/__init__.py b/src/rookify/modules/create_cluster/__init__.py
index 5726e6f..8bb0ef5 100644
--- a/src/rookify/modules/create_cluster/__init__.py
+++ b/src/rookify/modules/create_cluster/__init__.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+# type: ignore
 
 from .main import CreateClusterHandler
 
diff --git a/src/rookify/modules/create_cluster/main.py b/src/rookify/modules/create_cluster/main.py
new file mode 100644
index 0000000..1733fad
--- /dev/null
+++ b/src/rookify/modules/create_cluster/main.py
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+
+from ..module import ModuleHandler, ModuleException
+
+from typing import Any
+import kubernetes
+
+
+class CreateClusterHandler(ModuleHandler):
+    def __create_cluster_definition(self) -> Any:
+        try:
+            node_ls_data = self._data["analyze_ceph"]["node"]["ls"]
+
+            # Get monitor count
+            mon_count = 0
+            for node, mons in node_ls_data["mon"].items():
+                mon_count += 1
+                if len(mons) > 1:
+                    raise ModuleException(
+                        f"There are more than 1 mon running on node {node}"
+                    )
+
+            # Get manager count
+            mgr_count = 0
+            for node, mgrs in node_ls_data["mgr"].items():
+                mgr_count += 1
+                if len(mons) > 1:
+                    raise ModuleException(
+                        f"There are more than 1 mgr running on node {node}"
+                    )
+
+            # Render cluster config from template
+            self.__cluster_name = self._config["rook"]["cluster"]["name"]
+            self.__cluster_namespace = self._config["rook"]["cluster"]["namespace"]
+            self.__cluster_image = self._config["rook"]["ceph"]["image"]
+            self.__mon_placement_label = (
+                self._config["rook"]["cluster"]["mon_placement_label"]
+                if "mon_placement_label" in self._config["rook"]["cluster"]
+                else f"placement-{self.__cluster_name}-mon"
+            )
+            self.__mgr_placement_label = (
+                self._config["rook"]["cluster"]["mgr_placement_label"]
+                if "mgr_placement_label" in self._config["rook"]["cluster"]
+                else f"placement-{self.__cluster_name}-mgr"
+            )
+            self.__cluster_definition = self.load_template(
+                "cluster.yaml.j2",
+                cluster_name=self.__cluster_name,
+                cluster_namespace=self.__cluster_namespace,
+                ceph_image=self.__cluster_image,
+                mon_count=mon_count,
+                mgr_count=mgr_count,
+                mon_placement_label=self.__mon_placement_label,
+                mgr_placement_label=self.__mgr_placement_label,
+            )
+
+        except KeyError:
+            raise ModuleException("Ceph monitor data is incomplete")
+
+    def __check_k8s_prerequisites(self) -> None:
+        # We have to check, if our placement labels are disabled or unset
+        nodes = self.k8s.CoreV1Api.list_node().items
+        for node in nodes:
+            node_labels = node.metadata.labels
+            if (
+                self.__mon_placement_label in node_labels
+                and node_labels[self.__mon_placement_label] == "enabled"
+            ):
+                raise ModuleException(
+                    f"Label {self.__mon_placement_label} is set on node {node.metadata.name}"
+                )
+            if (
+                self.__mgr_placement_label in node_labels
+                and node_labels[self.__mgr_placement_label] == "enabled"
+            ):
+                raise ModuleException(
+                    f"Label {self.__mon_placement_label} is set on node {node.metadata.name}"
+                )
+
+    def preflight(self) -> None:
+        self.__create_cluster_definition()
+        self.__check_k8s_prerequisites()
+
+    def run(self) -> Any:
+        # Create Namespace
+        namespace = kubernetes.client.V1Namespace(
+            metadata=kubernetes.client.V1ObjectMeta(name=self.__cluster_name)
+        )
+        try:
+            self.k8s.CoreV1Api.create_namespace(namespace)
+        except kubernetes.client.exceptions.ApiException as err:
+            if err.reason != "Conflict":
+                raise ModuleException(f"Could not create namespace: {err.reason}")
+
+        # Create CephCluster
+        result = self.k8s.crd_api_apply(
+            manifest=self.__cluster_definition.yaml
+        ).to_dict()
+
+        # Check Cluster
+
+        return result
diff --git a/src/rookify/modules/create_cluster/templates/cluster.yaml.j2 b/src/rookify/modules/create_cluster/templates/cluster.yaml.j2
new file mode 100644
index 0000000..f49ac6b
--- /dev/null
+++ b/src/rookify/modules/create_cluster/templates/cluster.yaml.j2
@@ -0,0 +1,347 @@
+#################################################################################################################
+# Define the settings for the rook-ceph cluster with common settings for a production cluster.
+# All nodes with available raw devices will be used for the Ceph cluster. At least three nodes are required
+# in this example. See the documentation for more details on storage settings available.
+
+# For example, to create the cluster:
+#   kubectl create -f crds.yaml -f common.yaml -f operator.yaml
+#   kubectl create -f cluster.yaml
+#################################################################################################################
+
+apiVersion: ceph.rook.io/v1
+kind: CephCluster
+metadata:
+  name: {{ cluster_name }}
+  namespace: {{ cluster_namespace }}
+spec:
+  cephVersion:
+    # The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
+    # v17 is Quincy, v18 is Reef.
+    # RECOMMENDATION: In production, use a specific version tag instead of the general v17 flag, which pulls the latest release and could result in different
+    # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
+    # If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v18.2.1-20240103
+    # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
+    image: {{ ceph_image }}
+    # Whether to allow unsupported versions of Ceph. Currently `quincy` and `reef` are supported.
+    # Future versions such as `squid` (v19) would require this to be set to `true`.
+    # Do not set to true in production.
+    allowUnsupported: false
+  # The path on the host where configuration files will be persisted. Must be specified.
+  # Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
+  # In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
+  dataDirHostPath: /var/lib/rook
+  # Whether or not upgrade should continue even if a check fails
+  # This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
+  # Use at your OWN risk
+  # To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/latest/ceph-upgrade.html#ceph-version-upgrades
+  skipUpgradeChecks: false
+  # Whether or not continue if PGs are not clean during an upgrade
+  continueUpgradeAfterChecksEvenIfNotHealthy: false
+  # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
+  # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
+  # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then operator would
+  # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
+  # The default wait timeout is 10 minutes.
+  waitTimeoutForHealthyOSDInMinutes: 10
+  mon:
+    # Set the number of mons to be started. Generally recommended to be 3.
+    # For highest availability, an odd number of mons should be specified.
+    count: {{ mon_count }}
+    # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
+    # Mons should only be allowed on the same node for test environments where data loss is acceptable.
+    allowMultiplePerNode: false
+  mgr:
+    # When higher availability of the mgr is needed, increase the count to 2.
+    # In that case, one mgr will be active and one in standby. When Ceph updates which
+    # mgr is active, Rook will update the mgr services to match the active mgr.
+    count: {{ mgr_count }}
+    allowMultiplePerNode: false
+    modules:
+      # List of modules to optionally enable or disable.
+      # Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR.
+      # - name: rook
+      #   enabled: true
+  # enable the ceph dashboard for viewing cluster status
+  dashboard:
+    enabled: true
+    # serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
+    # urlPrefix: /ceph-dashboard
+    # serve the dashboard at the given port.
+    # port: 8443
+    # serve the dashboard using SSL
+    ssl: true
+    # The url of the Prometheus instance
+    # prometheusEndpoint: <protocol>://<prometheus-host>:<port>
+    # Whether SSL should be verified if the Prometheus server is using https
+    # prometheusEndpointSSLVerify: false
+  # enable prometheus alerting for cluster
+  monitoring:
+    # requires Prometheus to be pre-installed
+    enabled: false
+    # Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled.
+    # If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false.
+    metricsDisabled: false
+  network:
+    connections:
+      # Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network.
+      # The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted.
+      # When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check.
+      # IMPORTANT: Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only,
+      # you can set the "mounter: rbd-nbd" in the rbd storage class, or "mounter: fuse" in the cephfs storage class.
+      # The nbd and fuse drivers are *not* recommended in production since restarting the csi driver pod will disconnect the volumes.
+      encryption:
+        enabled: false
+      # Whether to compress the data in transit across the wire. The default is false.
+      # See the kernel requirements above for encryption.
+      compression:
+        enabled: false
+      # Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled
+      # and clients will be required to connect to the Ceph cluster with the v2 port (3300).
+      # Requires a kernel that supports msgr v2 (kernel 5.11 or CentOS 8.4 or newer).
+      requireMsgr2: false
+    # enable host networking
+    provider: host
+    # enable the Multus network provider
+    #provider: multus
+    #selectors:
+    #  The selector keys are required to be `public` and `cluster`.
+    #  Based on the configuration, the operator will do the following:
+    #    1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface
+    #    2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network'
+    #
+    #  In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus
+    #
+    #  public: public-conf --> NetworkAttachmentDefinition object name in Multus
+    #  cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus
+    # Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4
+    #ipFamily: "IPv6"
+    # Ceph daemons to listen on both IPv4 and Ipv6 networks
+    #dualStack: false
+    # Enable multiClusterService to export the mon and OSD services to peer cluster.
+    # This is useful to support RBD mirroring between two clusters having overlapping CIDRs.
+    # Ensure that peer clusters are connected using an MCS API compatible application, like Globalnet Submariner.
+    #multiClusterService:
+    #  enabled: false
+
+  # enable the crash collector for ceph daemon crash collection
+  crashCollector:
+    disable: false
+    # Uncomment daysToRetain to prune ceph crash entries older than the
+    # specified number of days.
+    #daysToRetain: 30
+  # enable log collector, daemons will log on files and rotate
+  logCollector:
+    enabled: true
+    periodicity: daily # one of: hourly, daily, weekly, monthly
+    maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
+  # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
+  cleanupPolicy:
+    # Since cluster cleanup is destructive to data, confirmation is required.
+    # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
+    # This value should only be set when the cluster is about to be deleted. After the confirmation is set,
+    # Rook will immediately stop configuring the cluster and only wait for the delete command.
+    # If the empty string is set, Rook will not destroy any data on hosts during uninstall.
+    confirmation: ""
+    # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
+    sanitizeDisks:
+      # method indicates if the entire disk should be sanitized or simply ceph's metadata
+      # in both case, re-install is possible
+      # possible choices are 'complete' or 'quick' (default)
+      method: quick
+      # dataSource indicate where to get random bytes from to write on the disk
+      # possible choices are 'zero' (default) or 'random'
+      # using random sources will consume entropy from the system and will take much more time then the zero source
+      dataSource: zero
+      # iteration overwrite N times instead of the default (1)
+      # takes an integer value
+      iteration: 1
+    # allowUninstallWithVolumes defines how the uninstall should be performed
+    # If set to true, cephCluster deletion does not wait for the PVs to be deleted.
+    allowUninstallWithVolumes: false
+  # To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
+  # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
+  # tolerate taints with a key of 'storage-node'.
+  # placement:
+  #   all:
+  #     nodeAffinity:
+  #       requiredDuringSchedulingIgnoredDuringExecution:
+  #         nodeSelectorTerms:
+  #         - matchExpressions:
+  #           - key: role
+  #             operator: In
+  #             values:
+  #             - storage-node
+  #     podAffinity:
+  #     podAntiAffinity:
+  #     topologySpreadConstraints:
+  #     tolerations:
+  #     - key: storage-node
+  #       operator: Exists
+  # The above placement information can also be specified for mon, osd, and mgr components
+  #   mon:
+  placement:
+    mon:
+      nodeAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          nodeSelectorTerms:
+          - matchExpressions:
+            - key: {{ mon_placement_label }}
+              operator: In
+              values:
+              - enabled
+    mgr:
+      nodeAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          nodeSelectorTerms:
+          - matchExpressions:
+            - key: {{ mgr_placement_label }}
+              operator: In
+              values:
+              - enabled
+
+  # Monitor deployments may contain an anti-affinity rule for avoiding monitor
+  # collocation on the same node. This is a required rule when host network is used
+  # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
+  # preferred rule with weight: 50.
+  #   osd:
+  #    prepareosd:
+  #    mgr:
+  #    cleanup:
+  annotations:
+  #   all:
+  #   mon:
+  #   osd:
+  #   cleanup:
+  #   prepareosd:
+  # clusterMetadata annotations will be applied to only `rook-ceph-mon-endpoints` configmap and the `rook-ceph-mon` and `rook-ceph-admin-keyring` secrets.
+  # And clusterMetadata annotations will not be merged with `all` annotations.
+  #    clusterMetadata:
+  #       kubed.appscode.com/sync: "true"
+  # If no mgr annotations are set, prometheus scrape annotations will be set by default.
+  #   mgr:
+  labels:
+  #   all:
+  #   mon:
+  #   osd:
+  #   cleanup:
+  #   mgr:
+  #   prepareosd:
+  # monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator.
+  # These labels can be passed as LabelSelector to Prometheus
+  #   monitoring:
+  #   crashcollector:
+  resources:
+  #The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory
+  #   mgr:
+  #     limits:
+  #       memory: "1024Mi"
+  #     requests:
+  #       cpu: "500m"
+  #       memory: "1024Mi"
+  # The above example requests/limits can also be added to the other components
+  #   mon:
+  #   osd:
+  # For OSD it also is a possible to specify requests/limits based on device class
+  #   osd-hdd:
+  #   osd-ssd:
+  #   osd-nvme:
+  #   prepareosd:
+  #   mgr-sidecar:
+  #   crashcollector:
+  #   logcollector:
+  #   cleanup:
+  #   exporter:
+  # The option to automatically remove OSDs that are out and are safe to destroy.
+  removeOSDsIfOutAndSafeToRemove: false
+  priorityClassNames:
+    #all: rook-ceph-default-priority-class
+    mon: system-node-critical
+    osd: system-node-critical
+    mgr: system-cluster-critical
+    #crashcollector: rook-ceph-crashcollector-priority-class
+  storage: # cluster level storage configuration and selection
+    useAllNodes: false
+    useAllDevices: false
+    #deviceFilter:
+    config:
+      # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
+      # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
+      # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
+      osdsPerDevice: "1" # this value can be overridden at the node or device level
+      encryptedDevice: "true" # the default value for this option is "false"
+    # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
+    # nodes below will be used as storage resources.  Each node's 'name' field should match their 'kubernetes.io/hostname' label.
+    # nodes:
+    #   - name: "172.17.4.201"
+    #     devices: # specific devices to use for storage can be specified for each node
+    #       - name: "sdb"
+    #       - name: "nvme01" # multiple osds can be created on high performance devices
+    #         config:
+    #           osdsPerDevice: "5"
+    #       - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
+    #     config: # configuration can be specified at the node level which overrides the cluster level config
+    #   - name: "172.17.4.301"
+    #     deviceFilter: "^sd."
+    # when onlyApplyOSDPlacement is false, will merge both placement.All() and placement.osd
+    onlyApplyOSDPlacement: false
+    # Time for which an OSD pod will sleep before restarting, if it stopped due to flapping
+    # flappingRestartIntervalHours: 24
+  # The section for configuring management of daemon disruptions during upgrade or fencing.
+  disruptionManagement:
+    # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
+    # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
+    # block eviction of OSDs by default and unblock them safely when drains are detected.
+    managePodBudgets: true
+    # A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
+    # default DOWN/OUT interval) when it is draining. This is only relevant when  `managePodBudgets` is `true`. The default value is `30` minutes.
+    osdMaintenanceTimeout: 30
+    # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up.
+    # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`.
+    # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain.
+    pgHealthCheckTimeout: 0
+
+  # csi defines CSI Driver settings applied per cluster.
+  csi:
+    readAffinity:
+      # Enable read affinity to enable clients to optimize reads from an OSD in the same topology.
+      # Enabling the read affinity may cause the OSDs to consume some extra memory.
+      # For more details see this doc:
+      # https://rook.io/docs/rook/latest/Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-read-affinity-for-rbd-volumes
+      enabled: false
+
+    # cephfs driver specific settings.
+    cephfs:
+      # Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options.
+      # kernelMountOptions: ""
+      # Set CephFS Fuse mount options to use https://docs.ceph.com/en/quincy/man/8/ceph-fuse/#options.
+      # fuseMountOptions: ""
+
+  # healthChecks
+  # Valid values for daemons are 'mon', 'osd', 'status'
+  healthCheck:
+    daemonHealth:
+      mon:
+        disabled: false
+        interval: 45s
+      osd:
+        disabled: false
+        interval: 60s
+      status:
+        disabled: false
+        interval: 60s
+    # Change pod liveness probe timing or threshold values. Works for all mon,mgr,osd daemons.
+    livenessProbe:
+      mon:
+        disabled: false
+      mgr:
+        disabled: false
+      osd:
+        disabled: false
+    # Change pod startup probe timing or threshold values. Works for all mon,mgr,osd daemons.
+    startupProbe:
+      mon:
+        disabled: false
+      mgr:
+        disabled: false
+      osd:
+        disabled: false
diff --git a/src/rookify/modules/example/main.py b/src/rookify/modules/example/main.py
index e52f1e8..e62394c 100644
--- a/src/rookify/modules/example/main.py
+++ b/src/rookify/modules/example/main.py
@@ -2,7 +2,7 @@
 
 from ..module import ModuleHandler, ModuleException
 
-from typing import Any, Dict
+from typing import Any
 
 
 class ExampleHandler(ModuleHandler):
@@ -10,6 +10,6 @@ def preflight(self) -> None:
         # Do something for checking if all needed preconditions are met else throw ModuleException
         raise ModuleException("Example module was loaded, so aborting!")
 
-    def run(self) -> Dict[str, Any]:
+    def run(self) -> Any:
         # Run the migration tasks
         return {}
diff --git a/src/rookify/modules/migrate_osds/main.py b/src/rookify/modules/migrate_osds/main.py
index 7a8f2d1..5377671 100644
--- a/src/rookify/modules/migrate_osds/main.py
+++ b/src/rookify/modules/migrate_osds/main.py
@@ -11,7 +11,7 @@ def preflight(self) -> None:
         # result = self.ceph.mon_command("osd dump")
         # raise ModuleException('test error')
 
-    def run(self) -> Dict[str, Any]:
+    def run(self) -> Any:
         osd_config: Dict[str, Any] = dict()
         for node, osds in self._data["analyze_ceph"]["node"]["ls"]["osd"].items():
             osd_config[node] = {"osds": {}}
diff --git a/src/rookify/modules/module.py b/src/rookify/modules/module.py
index a4b65fd..a9e3d79 100644
--- a/src/rookify/modules/module.py
+++ b/src/rookify/modules/module.py
@@ -24,7 +24,7 @@ class __Ceph:
         def __init__(self, config: Dict[str, Any]):
             try:
                 self.__ceph = rados.Rados(
-                    conffile=config["conf_file"], conf={"keyring": config["keyring"]}
+                    conffile=config["config"], conf={"keyring": config["keyring"]}
                 )
                 self.__ceph.connect()
             except rados.ObjectNotFound as err:
@@ -44,10 +44,11 @@ def mon_command(
 
     class __K8s:
         def __init__(self, config: Dict[str, Any]):
-            k8s_config = kubernetes.client.Configuration()
-            k8s_config.api_key = config["api_key"]
-            k8s_config.host = config["host"]
+            k8s_config = kubernetes.config.load_kube_config(
+                config_file=config["config"]
+            )
             self.__client = kubernetes.client.ApiClient(k8s_config)
+            self.__dynamic_client: Optional[kubernetes.dynamic.DynamicClient] = None
 
         @property
         def CoreV1Api(self) -> kubernetes.client.CoreV1Api:
@@ -61,6 +62,39 @@ def AppsV1Api(self) -> kubernetes.client.AppsV1Api:
         def NodeV1Api(self) -> kubernetes.client.NodeV1Api:
             return kubernetes.client.NodeV1Api(self.__client)
 
+        @property
+        def DynamicClient(self) -> kubernetes.dynamic.DynamicClient:
+            if not self.__dynamic_client:
+                self.__dynamic_client = kubernetes.dynamic.DynamicClient(self.__client)
+            return self.__dynamic_client
+
+        def CRDApi(
+            self, api_version: str, kind: str
+        ) -> kubernetes.dynamic.resource.Resource:
+            return self.DynamicClient.resources.get(api_version=api_version, kind=kind)
+
+        def crd_api_apply(
+            self, manifest: Dict[Any, Any]
+        ) -> kubernetes.dynamic.resource.ResourceInstance:
+            """
+            This applies a manifest for custom CRDs
+            See https://github.com/kubernetes-client/python/issues/1792 for more information
+            :param manifest: Dict of the kubernetes manifest
+            """
+            api_version = manifest["apiVersion"]
+            kind = manifest["kind"]
+            resource_name = manifest["metadata"]["name"]
+            namespace = manifest["metadata"]["namespace"]
+            crd_api = self.CRDApi(api_version=api_version, kind=kind)
+
+            try:
+                crd_api.get(namespace=namespace, name=resource_name)
+                return crd_api.patch(
+                    body=manifest, content_type="application/merge-patch+json"
+                )
+            except kubernetes.dynamic.exceptions.NotFoundError:
+                return crd_api.create(body=manifest, namespace=namespace)
+
     class __SSH:
         def __init__(self, config: Dict[str, Any]):
             self.__config = config
@@ -87,35 +121,40 @@ def command(self, host: str, command: str) -> fabric.runners.Result:
 
     class __Template:
         def __init__(self, template_path: str):
-            self.__result_raw = None
-            self.__result_yaml = None
-            self.__template_path = template_path
+            self.__result_raw: Optional[str] = None
+            self.__result_yaml: Optional[Any] = None
+            self.__template_path: str = template_path
             with open(template_path) as file:
                 self.__template = jinja2.Template(file.read())
 
-        def render(self, **variables):
+        def render(self, **variables: Any) -> None:
             self.__result_raw = self.__template.render(**variables)
             self.__result_yaml = None
 
         @property
-        def raw(self):
+        def raw(self) -> str:
+            if not self.__result_raw:
+                raise ModuleException("Template was not rendered")
             return self.__result_raw
 
         @property
-        def yaml(self):
+        def yaml(self) -> Any:
             if not self.__result_yaml:
-                self.__result_yaml = yaml.safe_load(self.__result_raw)
+                self.__result_yaml = yaml.safe_load(self.raw)
             return self.__result_yaml
 
-    def __init__(self, config: dict, data: dict, module_path: str):
+    def __init__(self, config: Dict[str, Any], data: Dict[str, Any], module_path: str):
         """
         Construct a new 'ModuleHandler' object.
 
-        :param module_data: The config and results from modules
+        :param config: The global config file
+        :param data: The output of modules required by this module
+        :param module_path: The filesystem path of this module
         :return: returns nothing
         """
         self._config = config
         self._data = data
+        self.__module_path = module_path
         self.__ceph: Optional[ModuleHandler.__Ceph] = None
         self.__k8s: Optional[ModuleHandler.__K8s] = None
         self.__ssh: Optional[ModuleHandler.__SSH] = None
@@ -154,8 +193,8 @@ def ssh(self) -> __SSH:
             self.__ssh = ModuleHandler.__SSH(self._config["ssh"])
         return self.__ssh
 
-    def load_template(self, filename: str, **variables) -> __Template:
+    def load_template(self, filename: str, **variables: Any) -> __Template:
         template_path = os.path.join(self.__module_path, "templates", filename)
-        template = self.__Template(template_path)
+        template = ModuleHandler.__Template(template_path)
         template.render(**variables)
         return template