From 28ea84441a5c88f036437e6a156ab699da58151d Mon Sep 17 00:00:00 2001
From: "Jiawei \"Tyler\" Gu" <47795840+tylergu@users.noreply.github.com>
Date: Mon, 29 Jan 2024 02:24:43 -0600
Subject: [PATCH 1/4] Prepare system state interface and collection (#313)

* Prepare system state interface and collection

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>

* Fix system state test

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>

* Fix temporary file creation in test

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>

---------

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>
---
 acto/cli/collect_system_state.py              |  58 ++++
 acto/system_state/__init__.py                 |   0
 acto/system_state/cluster_role.py             |  34 +++
 acto/system_state/cluster_role_binding.py     |  35 +++
 acto/system_state/config_map.py               |  35 +++
 acto/system_state/cron_job.py                 |  35 +++
 acto/system_state/daemon_set.py               |  68 +++++
 acto/system_state/deployment.py               |  74 ++++++
 acto/system_state/endpoints.py                |  35 +++
 acto/system_state/ingress.py                  |  41 +++
 acto/system_state/job.py                      |  35 +++
 acto/system_state/kubernetes_object.py        | 250 ++++++++++++++++++
 acto/system_state/kubernetes_system_state.py  | 234 ++++++++++++++++
 acto/system_state/network_policy.py           |  37 +++
 acto/system_state/persistent_volume.py        |  32 +++
 acto/system_state/persistent_volume_claim.py  |  37 +++
 acto/system_state/pod.py                      |  64 +++++
 acto/system_state/replica_set.py              |  56 ++++
 acto/system_state/role.py                     |  37 +++
 acto/system_state/role_binding.py             |  37 +++
 acto/system_state/secret.py                   |  35 +++
 acto/system_state/service.py                  |  40 +++
 acto/system_state/service_account.py          |  37 +++
 acto/system_state/stateful_set.py             |  71 +++++
 acto/system_state/storage_class.py            |  32 +++
 .../test_kubernetes_system_state.py           |  66 +++++
 26 files changed, 1515 insertions(+)
 create mode 100644 acto/cli/collect_system_state.py
 create mode 100644 acto/system_state/__init__.py
 create mode 100644 acto/system_state/cluster_role.py
 create mode 100644 acto/system_state/cluster_role_binding.py
 create mode 100644 acto/system_state/config_map.py
 create mode 100644 acto/system_state/cron_job.py
 create mode 100644 acto/system_state/daemon_set.py
 create mode 100644 acto/system_state/deployment.py
 create mode 100644 acto/system_state/endpoints.py
 create mode 100644 acto/system_state/ingress.py
 create mode 100644 acto/system_state/job.py
 create mode 100644 acto/system_state/kubernetes_object.py
 create mode 100644 acto/system_state/kubernetes_system_state.py
 create mode 100644 acto/system_state/network_policy.py
 create mode 100644 acto/system_state/persistent_volume.py
 create mode 100644 acto/system_state/persistent_volume_claim.py
 create mode 100644 acto/system_state/pod.py
 create mode 100644 acto/system_state/replica_set.py
 create mode 100644 acto/system_state/role.py
 create mode 100644 acto/system_state/role_binding.py
 create mode 100644 acto/system_state/secret.py
 create mode 100644 acto/system_state/service.py
 create mode 100644 acto/system_state/service_account.py
 create mode 100644 acto/system_state/stateful_set.py
 create mode 100644 acto/system_state/storage_class.py
 create mode 100644 test/integration_tests/test_kubernetes_system_state.py

diff --git a/acto/cli/collect_system_state.py b/acto/cli/collect_system_state.py
new file mode 100644
index 0000000000..59ee9233a4
--- /dev/null
+++ b/acto/cli/collect_system_state.py
@@ -0,0 +1,58 @@
+import argparse
+import logging
+import os
+
+from acto.common import kubernetes_client
+from acto.system_state.kubernetes_system_state import KubernetesSystemState
+
+
+def main():
+    """Main function"""
+    parser = argparse.ArgumentParser(
+        description="Collect the system state of a Kubernetes cluster under a namespace"
+        " and dump it to a file. Check the health of the system state."
+    )
+    parser.add_argument(
+        "--output",
+        required=False,
+        default="system_state.json",
+        help="Path to dump the system state to",
+    )
+    parser.add_argument(
+        "--kubeconfig",
+        required=False,
+        default=f"{os.environ['HOME']}/.kube/config",
+        help="Path to the kubeconfig file",
+    )
+    parser.add_argument(
+        "--kubecontext",
+        required=False,
+        default="kind-kind",
+        help="Name of the Kubernetes context to use",
+    )
+    parser.add_argument(
+        "--namespace",
+        required=False,
+        default="default",
+        help="Namespace to collect the system state under",
+    )
+    args = parser.parse_args()
+
+    api_client = kubernetes_client(args.kubeconfig, args.kubecontext)
+
+    system_state = KubernetesSystemState.from_api_client(
+        api_client, args.namespace
+    )
+    system_state.dump(args.output)
+    logging.info("System state dumped to %s", args.output)
+
+    health_status = system_state.check_health()
+    if health_status.is_healthy() is False:
+        logging.error(
+            "System state is not healthy with errors: \n%s",
+            str(health_status),
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/acto/system_state/__init__.py b/acto/system_state/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/acto/system_state/cluster_role.py b/acto/system_state/cluster_role.py
new file mode 100644
index 0000000000..f786162b6f
--- /dev/null
+++ b/acto/system_state/cluster_role.py
@@ -0,0 +1,34 @@
+"""ClusterRole state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesDictObject,
+    list_object_helper,
+)
+
+
+class ClusterRoleState(KubernetesDictObject):
+    """ClusterRole state object."""
+
+    root: dict[str, kubernetes_models.V1ClusterRole]
+
+    @classmethod
+    def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self:
+        data = list_object_helper(
+            kubernetes.client.RbacAuthorizationV1Api(
+                api_client
+            ).list_cluster_role,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check ClusterRole health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/cluster_role_binding.py b/acto/system_state/cluster_role_binding.py
new file mode 100644
index 0000000000..097ea6fc50
--- /dev/null
+++ b/acto/system_state/cluster_role_binding.py
@@ -0,0 +1,35 @@
+"""ClusterRoleBinding state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from kubernetes.client.api_client import ApiClient
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesDictObject,
+    list_object_helper,
+)
+
+
+class ClusterRoleBindingState(KubernetesDictObject):
+    """ClusterRoleBinding state object."""
+
+    root: dict[str, kubernetes_models.V1ClusterRoleBinding]
+
+    @classmethod
+    def from_api_client(cls, api_client: ApiClient) -> Self:
+        data = list_object_helper(
+            kubernetes.client.RbacAuthorizationV1Api(
+                api_client
+            ).list_cluster_role_binding,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check ClusterRoleBinding health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/config_map.py b/acto/system_state/config_map.py
new file mode 100644
index 0000000000..bfa9fd45ab
--- /dev/null
+++ b/acto/system_state/config_map.py
@@ -0,0 +1,35 @@
+"""ConfigMap state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class ConfigMapState(KubernetesNamespacedDictObject):
+    """ConfigMap state object."""
+
+    root: dict[str, kubernetes_models.V1ConfigMap]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(api_client).list_namespaced_config_map,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if ConfigMap is healthy"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/cron_job.py b/acto/system_state/cron_job.py
new file mode 100644
index 0000000000..b51fc9df75
--- /dev/null
+++ b/acto/system_state/cron_job.py
@@ -0,0 +1,35 @@
+"""CronJob state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class CronJobState(KubernetesNamespacedDictObject):
+    """CronJob state object."""
+
+    root: dict[str, kubernetes_models.V1CronJob]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.BatchV1Api(api_client).list_namespaced_cron_job,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check CronJob health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/daemon_set.py b/acto/system_state/daemon_set.py
new file mode 100644
index 0000000000..0f2fef6477
--- /dev/null
+++ b/acto/system_state/daemon_set.py
@@ -0,0 +1,68 @@
+"""DaemonSet state model"""
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class DaemonSetState(KubernetesNamespacedDictObject):
+    """DaemonSet state object."""
+
+    root: dict[str, kubernetes_models.V1DaemonSet]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.AppsV1Api(api_client).list_namespaced_daemon_set,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if DaemonSet is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+
+        for name, daemon_set in self.root.items():
+            if (
+                daemon_set.status.observed_generation
+                != daemon_set.metadata.generation
+            ):
+                return False, f"DaemonSet[{name}] generation mismatch"
+
+            if (
+                daemon_set.status.desired_number_scheduled
+                != daemon_set.status.number_ready
+            ):
+                return (
+                    False,
+                    f"DaemonSet[{name}] replicas mismatch, "
+                    + f"desired[{daemon_set.status.desired_number_scheduled}] "
+                    + f"!= ready[{daemon_set.status.number_ready}]",
+                )
+
+            if daemon_set.status.conditions is not None:
+                for condition in daemon_set.status.conditions:
+                    if (
+                        condition.type == "Progressing"
+                        and condition.status != "True"
+                    ):
+                        return (
+                            False,
+                            f"DaemonSet[{name}] is not progressing: {condition.message}",
+                        )
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/deployment.py b/acto/system_state/deployment.py
new file mode 100644
index 0000000000..76189ae9f1
--- /dev/null
+++ b/acto/system_state/deployment.py
@@ -0,0 +1,74 @@
+"""Deployment state model"""
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class DeploymentState(KubernetesNamespacedDictObject):
+    """Deployment state model"""
+
+    root: dict[str, kubernetes_models.V1Deployment]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.AppsV1Api(api_client).list_namespaced_deployment,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if deployment is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+
+        for name, deployment in self.root.items():
+            if (
+                deployment.status.observed_generation
+                != deployment.metadata.generation
+            ):
+                return False, f"Deployment[{name}] generation mismatch"
+
+            if deployment.spec.replicas != deployment.status.ready_replicas:
+                return False, f"Deployment[{name}] replicas mismatch"
+
+            if deployment.status.conditions is not None:
+                for condition in deployment.status.conditions:
+                    if (
+                        condition.type == "Available"
+                        and condition.status != "True"
+                    ):
+                        return False, f"Deployment[{name}] is not available"
+                    if (
+                        condition.type == "Progressing"
+                        and condition.status != "True"
+                    ):
+                        return False, f"Deployment[{name}] is not progressing"
+
+            if deployment.status.replicas != deployment.status.ready_replicas:
+                return False, f"Deployment[{name}] replicas mismatch"
+
+            if (
+                deployment.status.unavailable_replicas != 0
+                and deployment.status.unavailable_replicas is not None
+            ):
+                return (
+                    False,
+                    f"[{name}] [{deployment.status.unavailable_replicas}] pods are unavailable",
+                )
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/endpoints.py b/acto/system_state/endpoints.py
new file mode 100644
index 0000000000..f2ae0bc006
--- /dev/null
+++ b/acto/system_state/endpoints.py
@@ -0,0 +1,35 @@
+"""Endpoints state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class EndpointsState(KubernetesNamespacedDictObject):
+    """Endpoints state object."""
+
+    root: dict[str, kubernetes_models.V1Endpoints]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(api_client).list_namespaced_endpoints,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check Endpoints health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/ingress.py b/acto/system_state/ingress.py
new file mode 100644
index 0000000000..ceb3f93a1f
--- /dev/null
+++ b/acto/system_state/ingress.py
@@ -0,0 +1,41 @@
+"""Ingress state model"""
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class IngressState(KubernetesNamespacedDictObject):
+    """Ingress state object."""
+
+    root: dict[str, kubernetes_models.V1Ingress]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.NetworkingV1Api(
+                api_client
+            ).list_namespaced_ingress,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if Ingress is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/job.py b/acto/system_state/job.py
new file mode 100644
index 0000000000..e87c8a0975
--- /dev/null
+++ b/acto/system_state/job.py
@@ -0,0 +1,35 @@
+"""Job state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class JobState(KubernetesNamespacedDictObject):
+    """Job state object."""
+
+    root: dict[str, kubernetes_models.V1Job]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.BatchV1Api(api_client).list_namespaced_job,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check Job health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/kubernetes_object.py b/acto/system_state/kubernetes_object.py
new file mode 100644
index 0000000000..9738a235a1
--- /dev/null
+++ b/acto/system_state/kubernetes_object.py
@@ -0,0 +1,250 @@
+"""Base class for all Kubernetes objects."""
+
+
+import abc
+from typing import Any, Callable, Literal, TypeAlias
+
+import deepdiff
+import deepdiff.model as deepdiff_model
+import kubernetes
+import kubernetes.client
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from deepdiff.helper import NotPresent
+from typing_extensions import Self
+
+from acto.common import (
+    EXCLUDE_PATH_REGEX,
+    Diff,
+    PropertyPath,
+    flatten_dict,
+    flatten_list,
+)
+
+
+class KubernetesResourceInterface(pydantic.BaseModel):
+    """Helper interface shared by all Kubernetes resources"""
+
+    model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
+
+    metadata: kubernetes_models.V1ObjectMeta
+
+
+class KubernetesListNamespacedObjectMethodReturnType(abc.ABC):
+    """Kubernetes list namespaced object method return type"""
+
+    @property
+    @abc.abstractmethod
+    def items(self) -> list[KubernetesResourceInterface]:
+        """Return Kubernetes list namespaced object method items"""
+
+
+KubernetesListNamespacedObjectMethod: TypeAlias = Callable[
+    ..., KubernetesListNamespacedObjectMethodReturnType
+]
+
+DiffType: TypeAlias = Literal[
+    "type_changes",
+    "values_changed",
+    "iterable_item_added",
+    "iterable_item_removed",
+    "dictionary_item_added",
+    "dictionary_item_removed",
+    "set_item_added",
+    "set_item_removed",
+    "attribute_added",
+    "attribute_removed",
+]
+
+# ObjectDiff: TypeAlias = dict[DiffType, dict[str, Diff]]
+
+
+class ObjectDiff(pydantic.RootModel):
+    """Object diff, based on TreeView of deepdiff"""
+
+    root: dict[DiffType, dict[str, Diff]]
+
+    @classmethod
+    def from_deepdiff(cls, diff: deepdiff_model.TreeResult) -> Self:
+        """Create ObjectDiff from deepdiff.DeepDiff"""
+        data: dict[TypeAlias, dict[str, Diff]] = {}
+
+        for category, changes in diff.items():
+            data[category] = {}
+            for change in changes:
+                # Heuristic
+                # When an entire dict/list is added or removed, flatten this
+                # dict/list to help field matching and value comparison in oracle
+
+                if (isinstance(change.t1, (dict, list))) and (
+                    change.t2 is None or isinstance(change.t2, NotPresent)
+                ):
+                    if isinstance(change.t1, dict):
+                        flattened_changes = flatten_dict(change.t1, [])
+                    else:
+                        flattened_changes = flatten_list(change.t1, [])
+                    for path, value in flattened_changes:
+                        if value is None or isinstance(value, NotPresent):
+                            continue
+                        str_path = change.path()
+                        for i in path:
+                            str_path += f"[{i}]"
+                        data[category][str_path] = Diff(
+                            prev=value,
+                            curr=change.t2,
+                            path=PropertyPath(
+                                change.path(output_format="list") + path
+                            ),
+                        )
+                elif (isinstance(change.t2, (dict, list))) and (
+                    change.t1 is None or isinstance(change.t1, NotPresent)
+                ):
+                    if isinstance(change.t2, dict):
+                        flattened_changes = flatten_dict(change.t2, [])
+                    else:
+                        flattened_changes = flatten_list(change.t2, [])
+                    for path, value in flattened_changes:
+                        if value is None or isinstance(value, NotPresent):
+                            continue
+                        str_path = change.path()
+                        for i in path:
+                            str_path += f"[{i}]"
+                        data[category][str_path] = Diff(
+                            prev=change.t1,
+                            curr=value,
+                            path=PropertyPath(
+                                change.path(output_format="list") + path
+                            ),
+                        )
+                else:
+                    data[category][change.path()] = Diff(
+                        prev=change.t1,
+                        curr=change.t2,
+                        path=PropertyPath(change.path(output_format="list")),
+                    )
+
+        return cls.model_validate(data)
+
+
+class KubernetesObject(abc.ABC, pydantic.RootModel):
+    """Base class for all Kubernetes objects."""
+
+    model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
+
+    root: Any
+
+    @classmethod
+    @abc.abstractmethod
+    def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self:
+        """Create Kubernetes object from ApiClient"""
+        raise NotImplementedError()
+
+    def diff_from(self, other: Self) -> ObjectDiff:
+        """Diff with other Kubernetes object"""
+        return ObjectDiff.from_deepdiff(
+            deepdiff.DeepDiff(
+                self,
+                other,
+                exclude_regex_paths=EXCLUDE_PATH_REGEX,
+                view="tree",
+            )
+        )
+
+    @abc.abstractmethod
+    def check_health(self) -> tuple[bool, str]:
+        """Check if object is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+        raise NotImplementedError()
+
+    @pydantic.model_serializer
+    def serialize(self) -> dict:
+        """Serialize Kubernetes object"""
+        raise NotImplementedError()
+
+
+class KubernetesNamespacedObject(KubernetesObject):
+    """Base class for all Kubernetes namespaced objects."""
+
+    @classmethod
+    @abc.abstractmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        """Create Kubernetes object from ApiClient"""
+        raise NotImplementedError()
+
+    @classmethod
+    def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self:
+        return cls.from_api_client_namespaced(api_client, "default")
+
+
+class KubernetesListObject(KubernetesObject):
+    """Base class for all Kubernetes objects stored as a list."""
+
+    root: list[Any]
+
+    def __iter__(self):
+        return iter(self.root)
+
+    def __getitem__(self, key: int) -> Any:
+        return self.root[key]
+
+
+class KubernetesDictObject(KubernetesObject):
+    """Base class for all Kubernetes objects stored as a dict."""
+
+    root: dict[str, Any]
+
+    def __iter__(self):
+        return iter(self.root)
+
+    def __getitem__(self, key: str) -> Any:
+        return self.root[key]
+
+
+class KubernetesNamespacedListObject(KubernetesNamespacedObject):
+    """Base class for all Kubernetes namespaced objects stored as a list."""
+
+    root: list[Any]
+
+    def __iter__(self):
+        return iter(self.root)
+
+    def __getitem__(self, key: int) -> Any:
+        return self.root[key]
+
+
+class KubernetesNamespacedDictObject(KubernetesNamespacedObject):
+    """Base class for all Kubernetes namespaced objects stored as a dict."""
+
+    root: dict[str, Any]
+
+    def __iter__(self):
+        return iter(self.root)
+
+    def __getitem__(self, key: str) -> Any:
+        return self.root[key]
+
+
+def list_object_helper(
+    method: KubernetesListNamespacedObjectMethod,
+) -> dict[str, KubernetesResourceInterface]:
+    """List object helper"""
+    result = {}
+    for obj in method(watch=False).items:
+        result[obj.metadata.name] = obj
+    return result
+
+
+def list_namespaced_object_helper(
+    method: KubernetesListNamespacedObjectMethod,
+    namespace: str,
+) -> dict[str, KubernetesResourceInterface]:
+    """List namespaced object helper"""
+    result = {}
+    for obj in method(namespace=namespace, watch=False).items:
+        result[obj.metadata.name] = obj
+    return result
diff --git a/acto/system_state/kubernetes_system_state.py b/acto/system_state/kubernetes_system_state.py
new file mode 100644
index 0000000000..7205f9bf94
--- /dev/null
+++ b/acto/system_state/kubernetes_system_state.py
@@ -0,0 +1,234 @@
+"""Kubernetes system state model"""
+
+import json
+
+import kubernetes
+import pydantic
+from typing_extensions import Self
+
+from acto.serialization import ActoEncoder
+from acto.system_state.cluster_role import ClusterRoleState
+from acto.system_state.cluster_role_binding import ClusterRoleBindingState
+from acto.system_state.config_map import ConfigMapState
+from acto.system_state.cron_job import CronJobState
+from acto.system_state.daemon_set import DaemonSetState
+from acto.system_state.deployment import DeploymentState
+from acto.system_state.endpoints import EndpointsState
+from acto.system_state.ingress import IngressState
+from acto.system_state.job import JobState
+from acto.system_state.kubernetes_object import ObjectDiff
+from acto.system_state.network_policy import NetworkPolicyState
+from acto.system_state.persistent_volume import PersistentVolumeState
+from acto.system_state.persistent_volume_claim import PersistentVolumeClaimState
+from acto.system_state.pod import PodState
+from acto.system_state.replica_set import ReplicaSetState
+from acto.system_state.role import RoleState
+from acto.system_state.role_binding import RoleBindingState
+from acto.system_state.secret import SecretState
+from acto.system_state.service import ServiceState
+from acto.system_state.service_account import ServiceAccountState
+from acto.system_state.stateful_set import StatefulSetState
+from acto.system_state.storage_class import StorageClassState
+
+
+class KubernetesSystemDiff(pydantic.BaseModel):
+    """Kubernetes system diff model"""
+
+    cluster_role_binding: ObjectDiff
+    cluster_role: ObjectDiff
+    config_map: ObjectDiff
+    cron_job: ObjectDiff
+    daemon_set: ObjectDiff
+    deployment: ObjectDiff
+    endpoint: ObjectDiff
+    ingress: ObjectDiff
+    job: ObjectDiff
+    network_policy: ObjectDiff
+    persistent_volume_claim: ObjectDiff
+    persistent_volume: ObjectDiff
+    pod: ObjectDiff
+    replica_set: ObjectDiff
+    role_binding: ObjectDiff
+    role: ObjectDiff
+    secret: ObjectDiff
+    service_account: ObjectDiff
+    service: ObjectDiff
+    stateful_set: ObjectDiff
+    storage_class: ObjectDiff
+
+
+class KubernetesSystemHealth(pydantic.BaseModel):
+    """Kubernetes system health status model"""
+
+    daemon_set: tuple[bool, str]
+    deployment: tuple[bool, str]
+    job: tuple[bool, str]
+    pod: tuple[bool, str]
+    replica_set: tuple[bool, str]
+    stateful_set: tuple[bool, str]
+
+    def is_healthy(self) -> bool:
+        """Check if Kubernetes system is healthy"""
+        return all(
+            [
+                self.daemon_set[0],
+                self.deployment[0],
+                self.job[0],
+                self.pod[0],
+                self.replica_set[0],
+                self.stateful_set[0],
+            ]
+        )
+
+    def __str__(self) -> str:
+        ret = ""
+        if not self.daemon_set[0]:
+            ret += f"DaemonSet: {self.daemon_set[1]}\n"
+        if not self.deployment[0]:
+            ret += f"Deployment: {self.deployment[1]}\n"
+        if not self.job[0]:
+            ret += f"Job: {self.job[1]}\n"
+        if not self.pod[0]:
+            ret += f"Pod: {self.pod[1]}\n"
+        if not self.replica_set[0]:
+            ret += f"ReplicaSet: {self.replica_set[1]}\n"
+        if not self.stateful_set[0]:
+            ret += f"StatefulSet: {self.stateful_set[1]}\n"
+        return ret
+
+
+class KubernetesSystemState(pydantic.BaseModel):
+    """System state of the cluster, including all Kubernetes resources"""
+
+    cluster_role_binding: ClusterRoleBindingState
+    cluster_role: ClusterRoleState
+    config_map: ConfigMapState
+    cron_job: CronJobState
+    daemon_set: DaemonSetState
+    deployment: DeploymentState
+    endpoint: EndpointsState
+    ingress: IngressState
+    job: JobState
+    network_policy: NetworkPolicyState
+    persistent_volume_claim: PersistentVolumeClaimState
+    persistent_volume: PersistentVolumeState
+    pod: PodState
+    replica_set: ReplicaSetState
+    role_binding: RoleBindingState
+    role: RoleState
+    secret: SecretState
+    service_account: ServiceAccountState
+    service: ServiceState
+    stateful_set: StatefulSetState
+    storage_class: StorageClassState
+
+    @classmethod
+    def from_api_client(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        """Initialize Kubernetes system state by fetching all objects from
+        Kubernetes API server.
+        """
+        return cls(
+            cluster_role_binding=ClusterRoleBindingState.from_api_client(
+                api_client
+            ),
+            cluster_role=ClusterRoleState.from_api_client(api_client),
+            config_map=ConfigMapState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            cron_job=CronJobState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            daemon_set=DaemonSetState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            deployment=DeploymentState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            endpoint=EndpointsState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            ingress=IngressState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            job=JobState.from_api_client_namespaced(api_client, namespace),
+            network_policy=NetworkPolicyState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            persistent_volume_claim=PersistentVolumeClaimState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            persistent_volume=PersistentVolumeState.from_api_client(api_client),
+            pod=PodState.from_api_client_namespaced(api_client, namespace),
+            replica_set=ReplicaSetState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            role_binding=RoleBindingState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            role=RoleState.from_api_client_namespaced(api_client, namespace),
+            secret=SecretState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            service_account=ServiceAccountState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            service=ServiceState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            stateful_set=StatefulSetState.from_api_client_namespaced(
+                api_client, namespace
+            ),
+            storage_class=StorageClassState.from_api_client(api_client),
+        )
+
+    def diff_from(self, other: Self) -> KubernetesSystemDiff:
+        """Diff with other Kubernetes system state"""
+        return KubernetesSystemDiff(
+            cluster_role_binding=self.cluster_role_binding.diff_from(
+                other.cluster_role_binding
+            ),
+            cluster_role=self.cluster_role.diff_from(other.cluster_role),
+            config_map=self.config_map.diff_from(other.config_map),
+            cron_job=self.cron_job.diff_from(other.cron_job),
+            daemon_set=self.daemon_set.diff_from(other.daemon_set),
+            deployment=self.deployment.diff_from(other.deployment),
+            endpoint=self.endpoint.diff_from(other.endpoint),
+            ingress=self.ingress.diff_from(other.ingress),
+            job=self.job.diff_from(other.job),
+            network_policy=self.network_policy.diff_from(other.network_policy),
+            persistent_volume_claim=self.persistent_volume_claim.diff_from(
+                other.persistent_volume_claim
+            ),
+            persistent_volume=self.persistent_volume.diff_from(
+                other.persistent_volume
+            ),
+            pod=self.pod.diff_from(other.pod),
+            replica_set=self.replica_set.diff_from(other.replica_set),
+            role_binding=self.role_binding.diff_from(other.role_binding),
+            role=self.role.diff_from(other.role),
+            secret=self.secret.diff_from(other.secret),
+            service_account=self.service_account.diff_from(
+                other.service_account
+            ),
+            service=self.service.diff_from(other.service),
+            stateful_set=self.stateful_set.diff_from(other.stateful_set),
+            storage_class=self.storage_class.diff_from(other.storage_class),
+        )
+
+    def dump(self, path: str) -> None:
+        """Dump Kubernetes system state to a file"""
+        with open(path, "w", encoding="utf-8") as file:
+            json.dump(self.model_dump(), file, indent=4, cls=ActoEncoder)
+
+    def check_health(self) -> KubernetesSystemHealth:
+        """Check if Kubernetes system state is healthy"""
+        return KubernetesSystemHealth(
+            daemon_set=self.daemon_set.check_health(),
+            deployment=self.deployment.check_health(),
+            job=self.job.check_health(),
+            pod=self.pod.check_health(),
+            replica_set=self.replica_set.check_health(),
+            stateful_set=self.stateful_set.check_health(),
+        )
diff --git a/acto/system_state/network_policy.py b/acto/system_state/network_policy.py
new file mode 100644
index 0000000000..9b11c1bcb6
--- /dev/null
+++ b/acto/system_state/network_policy.py
@@ -0,0 +1,37 @@
+"""NetworkPolicy state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class NetworkPolicyState(KubernetesNamespacedDictObject):
+    """NetworkPolicy state object."""
+
+    root: dict[str, kubernetes_models.V1NetworkPolicy]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.NetworkingV1Api(
+                api_client
+            ).list_namespaced_network_policy,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check NetworkPolicy health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/persistent_volume.py b/acto/system_state/persistent_volume.py
new file mode 100644
index 0000000000..5012bd7560
--- /dev/null
+++ b/acto/system_state/persistent_volume.py
@@ -0,0 +1,32 @@
+"""PersistentVolume state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesDictObject,
+    list_object_helper,
+)
+
+
+class PersistentVolumeState(KubernetesDictObject):
+    """PersistentVolume state object."""
+
+    root: dict[str, kubernetes_models.V1PersistentVolume]
+
+    @classmethod
+    def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self:
+        data = list_object_helper(
+            kubernetes.client.CoreV1Api(api_client).list_persistent_volume,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if PersistentVolume is healthy"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/persistent_volume_claim.py b/acto/system_state/persistent_volume_claim.py
new file mode 100644
index 0000000000..3cbee72adc
--- /dev/null
+++ b/acto/system_state/persistent_volume_claim.py
@@ -0,0 +1,37 @@
+"""PersistentVolumeClaim state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class PersistentVolumeClaimState(KubernetesNamespacedDictObject):
+    """PersistentVolumeClaim state object."""
+
+    root: dict[str, kubernetes_models.V1PersistentVolumeClaim]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(
+                api_client
+            ).list_namespaced_persistent_volume_claim,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check health of PersistentVolumeClaim"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/pod.py b/acto/system_state/pod.py
new file mode 100644
index 0000000000..0b47babef5
--- /dev/null
+++ b/acto/system_state/pod.py
@@ -0,0 +1,64 @@
+"""Pod state model"""
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class PodState(KubernetesNamespacedDictObject):
+    """Pod state model"""
+
+    root: dict[str, kubernetes_models.V1Pod]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(api_client).list_namespaced_pod,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if pod is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+
+        for name, pod in self.root.items():
+            if pod.status.conditions is not None:
+                for condition in pod.status.conditions:
+                    if condition.type == "Ready" and condition.status != "True":
+                        return (
+                            False,
+                            f"Pod[{name}] is not ready: {condition.message}",
+                        )
+
+            if pod.status.container_statuses is not None:
+                for container_status in pod.status.container_statuses:
+                    if container_status.ready is not True:
+                        return (
+                            False,
+                            f"Container {container_status.name} is not ready",
+                        )
+
+            if pod.status.init_container_statuses is not None:
+                for container_status in pod.status.init_container_statuses:
+                    if container_status.ready is not True:
+                        return (
+                            False,
+                            f"Init container {container_status.name} is not ready",
+                        )
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/replica_set.py b/acto/system_state/replica_set.py
new file mode 100644
index 0000000000..7cf40816ef
--- /dev/null
+++ b/acto/system_state/replica_set.py
@@ -0,0 +1,56 @@
+"""ReplicaSet state model."""
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class ReplicaSetState(KubernetesNamespacedDictObject):
+    """ReplicaSet state object."""
+
+    root: dict[str, kubernetes_models.V1ReplicaSet]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.AppsV1Api(api_client).list_namespaced_replica_set,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if ReplicaSet is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+        for name, replica_set in self.root.items():
+            if (
+                replica_set.status.observed_generation
+                != replica_set.metadata.generation
+            ):
+                return False, f"ReplicaSet[{name}] generation mismatch"
+
+            if replica_set.spec.replicas != replica_set.status.ready_replicas:
+                return False, f"ReplicaSet[{name}] replicas mismatch"
+
+            if replica_set.status.conditions is not None:
+                for condition in replica_set.status.conditions:
+                    if (
+                        condition.type == "Available"
+                        and condition.status != "True"
+                    ):
+                        return False, f"ReplicaSet[{name}] is not available"
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/role.py b/acto/system_state/role.py
new file mode 100644
index 0000000000..ff63b60f31
--- /dev/null
+++ b/acto/system_state/role.py
@@ -0,0 +1,37 @@
+"""Role state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class RoleState(KubernetesNamespacedDictObject):
+    """Role state object."""
+
+    root: dict[str, kubernetes_models.V1Role]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.RbacAuthorizationV1Api(
+                api_client
+            ).list_namespaced_role,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check Role health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/role_binding.py b/acto/system_state/role_binding.py
new file mode 100644
index 0000000000..19b6f5eccc
--- /dev/null
+++ b/acto/system_state/role_binding.py
@@ -0,0 +1,37 @@
+"""RoleBinding state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class RoleBindingState(KubernetesNamespacedDictObject):
+    """RoleBinding state object."""
+
+    root: dict[str, kubernetes_models.V1RoleBinding]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.RbacAuthorizationV1Api(
+                api_client
+            ).list_namespaced_role_binding,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check RoleBinding health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/secret.py b/acto/system_state/secret.py
new file mode 100644
index 0000000000..57e44ece7f
--- /dev/null
+++ b/acto/system_state/secret.py
@@ -0,0 +1,35 @@
+"""Secret state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class SecretState(KubernetesNamespacedDictObject):
+    """Secret state object."""
+
+    root: dict[str, kubernetes_models.V1Secret]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(api_client).list_namespaced_secret,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if Secret is healthy"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/service.py b/acto/system_state/service.py
new file mode 100644
index 0000000000..b31eda30b3
--- /dev/null
+++ b/acto/system_state/service.py
@@ -0,0 +1,40 @@
+"""Service state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class ServiceState(KubernetesNamespacedDictObject):
+    """Service state object."""
+
+    root: dict[str, kubernetes_models.V1Service]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(api_client).list_namespaced_service,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if Service is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/service_account.py b/acto/system_state/service_account.py
new file mode 100644
index 0000000000..44441bb4b0
--- /dev/null
+++ b/acto/system_state/service_account.py
@@ -0,0 +1,37 @@
+"""ServiceAccount state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class ServiceAccountState(KubernetesNamespacedDictObject):
+    """ServiceAccount state object."""
+
+    root: dict[str, kubernetes_models.V1ServiceAccount]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.CoreV1Api(
+                api_client
+            ).list_namespaced_service_account,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check ServiceAccount health"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/stateful_set.py b/acto/system_state/stateful_set.py
new file mode 100644
index 0000000000..7af2410323
--- /dev/null
+++ b/acto/system_state/stateful_set.py
@@ -0,0 +1,71 @@
+"""StatefulSet state model."""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesNamespacedDictObject,
+    list_namespaced_object_helper,
+)
+
+
+class StatefulSetState(KubernetesNamespacedDictObject):
+    """StatefulSet state object."""
+
+    root: dict[str, kubernetes_models.V1StatefulSet]
+
+    @classmethod
+    def from_api_client_namespaced(
+        cls, api_client: kubernetes.client.ApiClient, namespace: str
+    ) -> Self:
+        data = list_namespaced_object_helper(
+            kubernetes.client.AppsV1Api(
+                api_client
+            ).list_namespaced_stateful_set,
+            namespace,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check if StatefulSet is healthy
+
+        Returns:
+            tuple[bool, str]: (is_healthy, reason)
+        """
+
+        for name, stateful_set in self.root.items():
+            if (
+                stateful_set.status.observed_generation
+                != stateful_set.metadata.generation
+            ):
+                return False, f"StatefulSet[{name}] generation mismatch"
+
+            if (
+                stateful_set.status.current_revision
+                != stateful_set.status.update_revision
+            ):
+                return (
+                    False,
+                    f"StatefulSet[{name}] revision mismatch"
+                    + f"current[{stateful_set.status.current_revision}] "
+                    + f"!= update[{stateful_set.status.update_revision}]",
+                )
+
+            if stateful_set.spec.replicas != stateful_set.status.ready_replicas:
+                return False, f"StatefulSet[{name}] replicas mismatch"
+
+            if stateful_set.status.conditions is not None:
+                for condition in stateful_set.status.conditions:
+                    if condition.type == "Ready" and condition.status != "True":
+                        return (
+                            False,
+                            f"StatefulSet[{name}] is not ready: {condition.message}",
+                        )
+
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/acto/system_state/storage_class.py b/acto/system_state/storage_class.py
new file mode 100644
index 0000000000..02b3cba3fb
--- /dev/null
+++ b/acto/system_state/storage_class.py
@@ -0,0 +1,32 @@
+"""StorageClass state model"""
+
+import kubernetes
+import kubernetes.client.models as kubernetes_models
+import pydantic
+from typing_extensions import Self
+
+from acto.system_state.kubernetes_object import (
+    KubernetesDictObject,
+    list_object_helper,
+)
+
+
+class StorageClassState(KubernetesDictObject):
+    """StorageClass state object."""
+
+    root: dict[str, kubernetes_models.V1StorageClass]
+
+    @classmethod
+    def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self:
+        data = list_object_helper(
+            kubernetes.client.StorageV1Api(api_client).list_storage_class,
+        )
+        return cls.model_validate(data)
+
+    def check_health(self) -> tuple[bool, str]:
+        """Check health of StorageClass"""
+        return True, ""
+
+    @pydantic.model_serializer
+    def serialize(self):
+        return {key: value.to_dict() for key, value in self.root.items()}
diff --git a/test/integration_tests/test_kubernetes_system_state.py b/test/integration_tests/test_kubernetes_system_state.py
new file mode 100644
index 0000000000..34707b1cc8
--- /dev/null
+++ b/test/integration_tests/test_kubernetes_system_state.py
@@ -0,0 +1,66 @@
+"""Integration tests for Kubernetes system state collection."""
+import os
+import pathlib
+import tempfile
+import unittest
+
+from acto.common import kubernetes_client
+from acto.kubernetes_engine import kind
+from acto.system_state.kubernetes_system_state import KubernetesSystemState
+
+test_dir = pathlib.Path(__file__).parent.resolve()
+test_data_dir = os.path.join(test_dir, "test_data")
+
+
+class TestKubernetesSystemState(unittest.TestCase):
+    """test Kubernetes system state collection."""
+
+    def setUp(self):
+        config_path = os.path.join(os.path.expanduser("~"), ".kube/test-config")
+        name = "test-cluster"
+        num_nodes = 1
+        version = "v1.26.0"
+
+        cluster_instance = kind.Kind(acto_namespace=0)
+
+        cluster_instance.configure_cluster(num_nodes, version)
+        print(
+            f"Creating cluster {name} with {num_nodes} nodes, version {version}, "
+            + f"configPath {config_path}"
+        )
+        cluster_instance.create_cluster(name, config_path)
+
+        self.kubeconfig = config_path
+        self.cluster_name = name
+        self.cluster_instance = cluster_instance
+
+    def tearDown(self):
+        self.cluster_instance.delete_cluster(self.cluster_name, self.kubeconfig)
+
+    def test_collect_and_serialization(self):
+        """Test collect and serialization of Kubernetes system state."""
+
+        api_client = kubernetes_client(
+            self.kubeconfig,
+            self.cluster_instance.get_context_name(self.cluster_name),
+        )
+
+        # check collection works
+        state = KubernetesSystemState.from_api_client(api_client, "kube-system")
+        assert "kindnet" in state.daemon_set
+        assert "kube-proxy" in state.daemon_set
+        assert "coredns" in state.deployment
+        assert "kube-dns" in state.service
+        assert "standard" in state.storage_class
+        assert "coredns" in state.config_map
+        assert "admin" in state.cluster_role
+        assert "cluster-admin" in state.cluster_role_binding
+
+        # check serialization works
+        with tempfile.TemporaryDirectory() as tempdir:
+            filepath = os.path.join(tempdir, "system_state.json")
+            state.dump(filepath)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 2489c4b8dcb3b3270ef31b22dbaac9cb830e8a41 Mon Sep 17 00:00:00 2001
From: "Jiawei \"Tyler\" Gu" <47795840+tylergu@users.noreply.github.com>
Date: Wed, 31 Jan 2024 00:46:38 -0600
Subject: [PATCH 2/4] Feat: CLI for getting total number of schema nodes from
 CRD (#316)

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>
---
 acto/schema/anyof.py                         |  38 +++--
 acto/schema/array.py                         |  71 +++++++---
 acto/schema/base.py                          | 116 ++++++++++------
 acto/schema/boolean.py                       |  22 ++-
 acto/schema/get_total_number_schemas.py      |  93 +++++++++++++
 acto/schema/get_total_number_schemas_test.py |  22 +++
 acto/schema/integer.py                       |  43 +++++-
 acto/schema/number.py                        |  55 ++++++--
 acto/schema/object.py                        | 134 +++++++++++++-----
 acto/schema/oneof.py                         |  36 +++--
 acto/schema/opaque.py                        |  19 ++-
 acto/schema/schema.py                        | 138 +++----------------
 acto/schema/string.py                        |  49 +++++--
 13 files changed, 555 insertions(+), 281 deletions(-)
 create mode 100644 acto/schema/get_total_number_schemas.py
 create mode 100644 acto/schema/get_total_number_schemas_test.py

diff --git a/acto/schema/anyof.py b/acto/schema/anyof.py
index a69d6c2460..7fd8fd57ab 100644
--- a/acto/schema/anyof.py
+++ b/acto/schema/anyof.py
@@ -1,34 +1,42 @@
+import random
 from copy import deepcopy
 from typing import List, Tuple
 
 from .base import BaseSchema, TreeNode
-from .schema import extract_schema
 
 
 class AnyOfSchema(BaseSchema):
-    '''Representing a schema with AnyOf keyword in it
-    '''
+    """Representing a schema with AnyOf keyword in it"""
 
     def __init__(self, path: list, schema: dict) -> None:
+        # This is to fix the circular import
+        # pylint: disable=import-outside-toplevel, cyclic-import
+        from .schema import extract_schema
+
         super().__init__(path, schema)
         self.possibilities: List[BaseSchema] = []
-        for index, v in enumerate(schema['anyOf']):
+        for index, v in enumerate(schema["anyOf"]):
             base_schema = deepcopy(schema)
-            del base_schema['anyOf']
+            del base_schema["anyOf"]
             base_schema.update(v)
-            self.possibilities.append(extract_schema(self.path + ['%s' % str(index)], base_schema))
+            self.possibilities.append(
+                extract_schema(self.path + [str(index)], base_schema)
+            )
 
     def get_possibilities(self):
+        """Return all possibilities of the anyOf schema"""
         return self.possibilities
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
         if self.problematic:
             return [], [], []
         return [self], [], []
-    
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
-        normal_schemas = [self]
-        semantic_schemas = []
+
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
+        normal_schemas: list[BaseSchema] = [self]
+        semantic_schemas: list[BaseSchema] = []
 
         for possibility in self.possibilities:
             possibility_tuple = possibility.get_normal_semantic_schemas()
@@ -51,10 +59,14 @@ def load_examples(self, example: list):
     def set_default(self, instance):
         self.default = instance
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        schema = random.choice(self.possibilities)
+        return schema.gen(exclude_value=exclude_value, minimum=minimum)
+
     def __str__(self) -> str:
-        ret = '['
+        ret = "["
         for i in self.possibilities:
             ret += str(i)
-            ret += ', '
-        ret += ']'
+            ret += ", "
+        ret += "]"
         return ret
diff --git a/acto/schema/array.py b/acto/schema/array.py
index ddda23c85e..24b5522902 100644
--- a/acto/schema/array.py
+++ b/acto/schema/array.py
@@ -1,29 +1,45 @@
+import random
 from typing import List, Tuple
 
 from .base import BaseSchema, TreeNode
-from .schema import extract_schema
 
 
 class ArraySchema(BaseSchema):
-    '''Representation of an array node
-    
+    """Representation of an array node
+
     It handles
         - minItems
         - maxItems
         - items
         - uniqueItems
-    '''
+    """
+
     default_min_items = 0
     default_max_items = 5
 
     def __init__(self, path: list, schema: dict) -> None:
-        super().__init__(path, schema)
-        self.item_schema = extract_schema(self.path + ['ITEM'], schema['items'])
-        self.min_items = self.default_min_items if 'minItems' not in schema else schema['minItems']
-        self.max_items = self.default_max_items if 'maxItems' not in schema else schema['maxItems']
-        self.unique_items = None if 'uniqueItems' not in schema else schema['exclusiveMinimum']
+        # This is to fix the circular import
+        # pylint: disable=import-outside-toplevel, cyclic-import
+        from .schema import extract_schema
 
-    def get_item_schema(self):
+        super().__init__(path, schema)
+        self.item_schema = extract_schema(self.path + ["ITEM"], schema["items"])
+        self.min_items = (
+            self.default_min_items
+            if "minItems" not in schema
+            else schema["minItems"]
+        )
+        self.max_items = (
+            self.default_max_items
+            if "maxItems" not in schema
+            else schema["maxItems"]
+        )
+        self.unique_items = (
+            None if "uniqueItems" not in schema else schema["exclusiveMinimum"]
+        )
+
+    def get_item_schema(self) -> BaseSchema:
+        """Get the schema of the items in the array"""
         return self.item_schema
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
@@ -66,10 +82,12 @@ def get_all_schemas(self) -> Tuple[list, list, list]:
             normal_schemas.append(self)
 
         return normal_schemas, pruned_by_overspecified, pruned_by_copiedover
-    
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
-        normal_schemas = [self]
-        semantic_schemas = []
+
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
+        normal_schemas: list[BaseSchema] = [self]
+        semantic_schemas: list[BaseSchema] = []
 
         child_schema_tuple = self.item_schema.get_normal_semantic_schemas()
         normal_schemas.extend(child_schema_tuple[0])
@@ -79,7 +97,7 @@ def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSch
 
     def to_tree(self) -> TreeNode:
         node = TreeNode(self.path)
-        node.add_child('ITEM', self.item_schema.to_tree())
+        node.add_child("ITEM", self.item_schema.to_tree())
         return node
 
     def load_examples(self, example: list):
@@ -93,8 +111,29 @@ def set_default(self, instance):
     def empty_value(self):
         return []
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs) -> list:
+        if self.enum is not None:
+            if exclude_value is not None:
+                return random.choice(
+                    [x for x in self.enum if x != exclude_value]
+                )
+            else:
+                return random.choice(self.enum)
+        else:
+            # XXX: need to handle exclude_value, but not important for now for array types
+            result = []
+            if "size" in kwargs and kwargs["size"] is not None:
+                num = kwargs["size"]
+            elif minimum:
+                num = self.min_items
+            else:
+                num = random.randint(self.min_items, self.max_items)
+            for _ in range(num):
+                result.append(self.item_schema.gen(minimum=minimum))
+            return result
+
     def __str__(self) -> str:
-        return 'Array'
+        return "Array"
 
     def __getitem__(self, key):
         return self.item_schema
diff --git a/acto/schema/base.py b/acto/schema/base.py
index 701e501d40..0e12a9d273 100644
--- a/acto/schema/base.py
+++ b/acto/schema/base.py
@@ -1,28 +1,32 @@
-import random
 from abc import abstractmethod
-from typing import List, Tuple
+from typing import Any, Callable, List, Optional, Tuple
 
-from jsonschema import validate
+import jsonschema
+import jsonschema.exceptions
 
 from acto.utils.thread_logger import get_thread_logger
 
 
-class TreeNode():
+class TreeNode:
+    """Tree node for schema tree"""
 
     def __init__(self, path: list) -> None:
         self.path = list(path)
-        self.parent: TreeNode = None
-        self.children = {}
+        self.parent: Optional[TreeNode] = None
+        self.children: dict[str, "TreeNode"] = {}
 
-    def add_child(self, key: str, child: 'TreeNode'):
+    def add_child(self, key: str, child: "TreeNode"):
+        """Add a child to the node"""
         self.children[key] = child
         child.set_parent(self)
         child.path = self.path + [key]
 
-    def set_parent(self, parent: 'TreeNode'):
+    def set_parent(self, parent: "TreeNode"):
+        """Set parent of the node"""
         self.parent = parent
 
-    def get_node_by_path(self, path: list) -> 'TreeNode':
+    def get_node_by_path(self, path: list) -> Optional["TreeNode"]:
+        """Get node by path"""
         logger = get_thread_logger(with_prefix=True)
 
         if len(path) == 0:
@@ -32,41 +36,48 @@ def get_node_by_path(self, path: list) -> 'TreeNode':
         if key in self:
             return self[key].get_node_by_path(path)
         else:
-            logger.error('%s not in children', key)
-            logger.error('%s', self.children)
+            logger.error("%s not in children", key)
+            logger.error("%s", self.children)
             return None
 
     def get_children(self) -> dict:
+        """Get children of the node"""
         return self.children
 
     def get_path(self) -> list:
+        """Get path of the node"""
         return self.path
 
-    def traverse_func(self, func: callable):
+    def traverse_func(self, func: Callable):
+        """Traverse the tree and apply func to each node"""
         if func(self):
             for child in self.children.values():
                 child.traverse_func(func)
 
     def __getitem__(self, key):
         if key not in self.children:
-            if 'ITEM' in self.children and key == 'INDEX':
-                return self.children['ITEM']
-            if isinstance(key, int) and 'ITEM' in self.children:
-                return self.children['ITEM']
-            elif 'additional_properties' in self.children and isinstance(key, str):
-                return self.children['additional_properties']
+            if "ITEM" in self.children and key == "INDEX":
+                return self.children["ITEM"]
+            if isinstance(key, int) and "ITEM" in self.children:
+                return self.children["ITEM"]
+            elif "additional_properties" in self.children and isinstance(
+                key, str
+            ):
+                return self.children["additional_properties"]
             else:
-                raise KeyError('key: %s' % key)
+                raise KeyError(f"key: {key}")
         else:
             return self.children[key]
 
     def __contains__(self, key) -> bool:
         if key not in self.children:
-            if 'ITEM' in self.children and key == 'INDEX':
+            if "ITEM" in self.children and key == "INDEX":
                 return True
-            if 'ITEM' in self.children and isinstance(key, int):
+            if "ITEM" in self.children and isinstance(key, int):
                 return True
-            elif 'additional_properties' in self.children and isinstance(key, str):
+            elif "additional_properties" in self.children and isinstance(
+                key, str
+            ):
                 return True
             else:
                 return False
@@ -77,73 +88,88 @@ def __str__(self) -> str:
         return str(self.path)
 
     def deepcopy(self, path: list):
+        """Deep copy the node and its children"""
         ret = TreeNode(path)
         for key, child in self.children.items():
             ret.add_child(key, child.deepcopy(path + [key]))
 
-        ret.testcases = list(self.testcases)
-
         return ret
 
 
-class SchemaInterface():
+class SchemaInterface:
+    """Interface for schemas"""
+
     @abstractmethod
-    def get_all_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema'], List['BaseSchema']]:
-        '''Returns a tuple of normal schemas, schemas pruned by over-specified, schemas pruned by 
-        copied-over'''
-        #FIXME: this method needs to be redefined. Its return type is a legacy from previous abandoned design
+    def get_all_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"], List["BaseSchema"]]:
+        """Returns a tuple of normal schemas, schemas pruned by over-specified, schemas pruned by
+        copied-over"""
+        # FIXME: this method needs to be redefined.
+        # Its return type is a legacy from previous abandoned design
         raise NotImplementedError
 
     @abstractmethod
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
-        '''Returns a tuple of normal schemas, semantic schemas'''
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
+        """Returns a tuple of normal schemas, semantic schemas"""
         raise NotImplementedError
 
     @abstractmethod
     def to_tree(self) -> TreeNode:
-        '''Returns tree structure, used for input generation'''
+        """Returns tree structure, used for input generation"""
         raise NotImplementedError
 
     @abstractmethod
-    def load_examples(self, example):
-        '''Load example into schema and subschemas'''
+    def load_examples(self, example: Any):
+        """Load example into schema and subschemas"""
         raise NotImplementedError
 
     @abstractmethod
     def set_default(self, instance):
+        """Set default value of the schema"""
         raise NotImplementedError
 
     @abstractmethod
     def empty_value(self):
+        """Get empty value of the schema"""
         raise NotImplementedError
 
 
 class BaseSchema(SchemaInterface):
-    '''Base class for schemas
-    
+    """Base class for schemas
+
     Handles some keywords used for any types
-    '''
+    """
 
     def __init__(self, path: list, schema: dict) -> None:
         self.path = path
         self.raw_schema = schema
-        self.default = None if 'default' not in schema else schema['default']
-        self.enum = None if 'enum' not in schema else schema['enum']
-        self.examples = []
+        self.default = None if "default" not in schema else schema["default"]
+        self.enum = None if "enum" not in schema else schema["enum"]
+        self.examples: list[Any] = []
 
         self.copied_over = False
         self.over_specified = False
         self.problematic = False
         self.patch = False
         self.mapped = False
-        self.used_fields = []
+        self.used_fields: list[SchemaInterface] = []
 
     def get_path(self) -> list:
+        """Get path of the schema"""
         return self.path
 
-    def validate(self, instance) -> bool:
+    def validate(self, instance: Any) -> bool:
+        """Validate instance against schema"""
         try:
-            validate(instance, self.raw_schema)
+            jsonschema.validate(instance, self.raw_schema)
             return True
-        except:
-            return False
\ No newline at end of file
+        except jsonschema.exceptions.ValidationError:
+            return False
+
+    @abstractmethod
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        """Generate instance from schema"""
+        raise NotImplementedError
diff --git a/acto/schema/boolean.py b/acto/schema/boolean.py
index e3c971c48e..4691b6c386 100644
--- a/acto/schema/boolean.py
+++ b/acto/schema/boolean.py
@@ -1,22 +1,25 @@
+import random
 from typing import List, Tuple
 
 from .base import BaseSchema, TreeNode
 
 
 class BooleanSchema(BaseSchema):
+    """Representation of a boolean node"""
 
     def __init__(self, path: list, schema: dict) -> None:
         super().__init__(path, schema)
-        if self.default == None:
+        if self.default is None:
             self.default = False
-        pass
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
         if self.problematic:
             return [], [], []
         return [self], [], []
-    
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
+
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
         return [self], []
 
     def to_tree(self) -> TreeNode:
@@ -29,10 +32,17 @@ def set_default(self, instance):
         if isinstance(instance, bool):
             self.default = instance
         elif isinstance(instance, str):
-            self.default = instance.lower() in ['true', 'True']
+            self.default = instance.lower() in ["true", "True"]
 
     def empty_value(self):
         return False
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        """Generate a random boolean value"""
+        if exclude_value is not None:
+            return not exclude_value
+        else:
+            return random.choice([True, False])
+
     def __str__(self) -> str:
-        return 'boolean'
+        return "boolean"
diff --git a/acto/schema/get_total_number_schemas.py b/acto/schema/get_total_number_schemas.py
new file mode 100644
index 0000000000..8dd78bb947
--- /dev/null
+++ b/acto/schema/get_total_number_schemas.py
@@ -0,0 +1,93 @@
+import argparse
+import logging
+import sys
+
+import yaml
+
+from .schema import extract_schema
+
+
+def get_total_number_schemas(raw_schema: dict) -> int:
+    """Get the total number of schema nodes in a raw schema"""
+    root = extract_schema([], raw_schema)
+    base, over_specified, copied_over = root.get_all_schemas()
+    return len(base) + len(over_specified) + len(copied_over)
+
+
+def main():
+    """Main function"""
+    parser = argparse.ArgumentParser(
+        description="Counts the total number of schema nodes in a CRD"
+    )
+    parser.add_argument(
+        "--crd-file", help="Path to the CRD file", type=str, required=True
+    )
+    parser.add_argument(
+        "--crd-name",
+        help="Name of the CRD defined in metadata.name",
+        type=str,
+        required=False,
+    )
+    parser.add_argument(
+        "--version",
+        help="Version of the schema in the CRD",
+        type=str,
+        required=False,
+    )
+    args = parser.parse_args()
+
+    with open(args.crd_file, "r", encoding="utf-8") as crd_file:
+        documents = yaml.load_all(crd_file, Loader=yaml.FullLoader)
+
+        crd_candidates = {}
+        for document in documents:
+            if "kind" in document:
+                if document["kind"] == "CustomResourceDefinition":
+                    crd_candidates[document["metadata"]["name"]] = document
+                    break
+            else:
+                raise RuntimeError("Document contains no-Kubernetes objects")
+
+    if crd_candidates:
+        if args.crd_name is not None and args.crd_name not in crd_candidates:
+            logging.error(
+                "CRD %s not found, available CRDs: %s",
+                args.crd_name,
+                str(crd_candidates.keys()),
+            )
+            sys.exit(1)
+
+        if len(crd_candidates) > 1:
+            if args.crd_name is None:
+                logging.error(
+                    "Multiple CRDs found, please specify one through the --crd-name argument"
+                )
+                logging.error("Available CRDs: %s", str(crd_candidates.keys()))
+                sys.exit(1)
+            else:
+                crd = crd_candidates[args.crd_name]
+        else:
+            crd = list(crd_candidates.values())[0]
+
+    if args.version is None:
+        if len(crd["spec"]["versions"]) > 1:
+            logging.warning(
+                "Multiple versions found in CRD, using the latest version"
+            )
+        raw_schema = crd["spec"]["versions"][-1]["schema"]["openAPIV3Schema"]
+    else:
+        for version in crd["spec"]["versions"]:
+            if version["name"] == args.version:
+                raw_schema = version["schema"]["openAPIV3Schema"]
+                break
+        else:
+            raise RuntimeError(f"Version {args.version} not found in CRD")
+
+    print(
+        "Total number of schema nodes in the CRD:",
+        get_total_number_schemas(raw_schema),
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/acto/schema/get_total_number_schemas_test.py b/acto/schema/get_total_number_schemas_test.py
new file mode 100644
index 0000000000..cf540b3460
--- /dev/null
+++ b/acto/schema/get_total_number_schemas_test.py
@@ -0,0 +1,22 @@
+import unittest
+
+from .get_total_number_schemas import get_total_number_schemas
+
+
+class TestGetTotalNumberSchemas(unittest.TestCase):
+    """Tests for get_total_number_schemas()."""
+
+    def test_simple_get_total_number_schemas(self):
+        """Test simple case of getting total number of schemas."""
+
+        schema = {
+            "properties": {
+                "a": {"type": "number"},
+                "b": {"type": "number"},
+                "c": {"type": "number"},
+            },
+            "type": "object",
+        }
+        num = get_total_number_schemas(schema)
+
+        assert num == 4
diff --git a/acto/schema/integer.py b/acto/schema/integer.py
index 3efcc12b98..b0d054ef95 100644
--- a/acto/schema/integer.py
+++ b/acto/schema/integer.py
@@ -1,15 +1,16 @@
-from typing import List, Tuple
+import random
+from typing import Any, List, Tuple
 
 from .base import BaseSchema, TreeNode
 from .number import NumberSchema
 
 
 class IntegerSchema(NumberSchema):
-    '''Special case of NumberSchema'''
+    """Special case of NumberSchema"""
 
     def __init__(self, path: list, schema: dict) -> None:
         super().__init__(path, schema)
-        if self.default == None:
+        if self.default is None:
             self.default = 0
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
@@ -17,7 +18,9 @@ def get_all_schemas(self) -> Tuple[list, list, list]:
             return [], [], []
         return [self], [], []
 
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
         if self.problematic:
             return [], []
         return [self], []
@@ -25,8 +28,9 @@ def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSch
     def to_tree(self) -> TreeNode:
         return TreeNode(self.path)
 
-    def load_examples(self, example: int):
-        self.examples.append(example)
+    def load_examples(self, example: Any):
+        if isinstance(example, int):
+            self.examples.append(example)
 
     def set_default(self, instance):
         self.default = int(instance)
@@ -34,5 +38,30 @@ def set_default(self, instance):
     def empty_value(self):
         return 0
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs) -> int:
+        # TODO: Use exclusive_minimum, exclusive_maximum
+        if self.enum is not None:
+            if exclude_value is not None:
+                return random.choice(
+                    [x for x in self.enum if x != exclude_value]
+                )
+            else:
+                return random.choice(self.enum)
+        elif self.multiple_of is not None:
+            return random.randrange(
+                self.minimum, self.maximum + 1, self.multiple_of
+            )
+        else:
+            if exclude_value is not None:
+                return random.choice(
+                    [
+                        x
+                        for x in range(self.minimum, self.maximum + 1)
+                        if x != exclude_value
+                    ]
+                )
+            else:
+                return random.randrange(self.minimum, self.maximum + 1)
+
     def __str__(self) -> str:
-        return 'Integer'
\ No newline at end of file
+        return "Integer"
diff --git a/acto/schema/number.py b/acto/schema/number.py
index 147329fcf9..434ff056fd 100644
--- a/acto/schema/number.py
+++ b/acto/schema/number.py
@@ -1,37 +1,57 @@
+import random
 from typing import List, Tuple
 
 from .base import BaseSchema, TreeNode
 
 
 class NumberSchema(BaseSchema):
-    '''Representation of a number node
-    
+    """Representation of a number node
+
     It handles
         - minimum
         - maximum
         - exclusiveMinimum
         - exclusiveMaximum
         - multipleOf
-    '''
+    """
+
     default_minimum = 0
     default_maximum = 5
 
     def __init__(self, path: list, schema: dict) -> None:
         super().__init__(path, schema)
-        self.minimum = self.default_minimum if 'minimum' not in schema else schema['minimum']
-        self.maximum = self.default_maximum if 'maximum' not in schema else schema['maximum']
-        self.exclusive_minimum = None if 'exclusiveMinimum' not in schema else schema[
-            'exclusiveMinimum']
-        self.exclusive_maximum = None if 'exclusiveMaximum' not in schema else schema[
-            'exclusiveMaximum']
-        self.multiple_of = None if 'multipleOf' not in schema else schema['multipleOf']
+        self.minimum = (
+            self.default_minimum
+            if "minimum" not in schema
+            else schema["minimum"]
+        )
+        self.maximum = (
+            self.default_maximum
+            if "maximum" not in schema
+            else schema["maximum"]
+        )
+        self.exclusive_minimum = (
+            None
+            if "exclusiveMinimum" not in schema
+            else schema["exclusiveMinimum"]
+        )
+        self.exclusive_maximum = (
+            None
+            if "exclusiveMaximum" not in schema
+            else schema["exclusiveMaximum"]
+        )
+        self.multiple_of = (
+            None if "multipleOf" not in schema else schema["multipleOf"]
+        )
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
         if self.problematic:
             return [], [], []
         return [self], [], []
 
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
         if self.problematic:
             return [], []
         return [self], []
@@ -48,5 +68,16 @@ def set_default(self, instance):
     def empty_value(self):
         return 0
 
+    def gen(self, exclude_value=None, minimum=False, **kwargs) -> float:
+        # TODO: Use exclusive_minimum, exclusive_maximum, multiple_of
+        if self.enum is not None:
+            if exclude_value is not None:
+                return random.choice(
+                    [x for x in self.enum if x != exclude_value]
+                )
+            else:
+                return random.choice(self.enum)
+        return random.uniform(self.minimum, self.maximum)
+
     def __str__(self) -> str:
-        return 'Number'
\ No newline at end of file
+        return "Number"
diff --git a/acto/schema/object.py b/acto/schema/object.py
index e036dc90d0..7137ceb9ef 100644
--- a/acto/schema/object.py
+++ b/acto/schema/object.py
@@ -1,6 +1,6 @@
+import random
 from typing import List, Tuple
 
-from acto.schema.schema import extract_schema
 from acto.utils.thread_logger import get_thread_logger
 
 from .base import BaseSchema, TreeNode
@@ -8,8 +8,8 @@
 
 
 class ObjectSchema(BaseSchema):
-    '''Representation of an object node
-    
+    """Representation of an object node
+
     It handles
         - properties
         - additionalProperties
@@ -20,46 +20,55 @@ class ObjectSchema(BaseSchema):
         - dependencies
         - patternProperties
         - regexp
-    '''
+    """
 
     def __init__(self, path: list, schema: dict) -> None:
+        # This is to fix the circular import
+        # pylint: disable=import-outside-toplevel
+        from .schema import extract_schema
+
         super().__init__(path, schema)
         self.properties = {}
         self.additional_properties = None
         self.required = []
         logger = get_thread_logger(with_prefix=True)
-        if 'properties' not in schema and 'additionalProperties' not in schema:
-            logger.warning('Object schema %s does not have properties nor additionalProperties' %
-                           self.path)
-        if 'properties' in schema:
-            for property_key, property_schema in schema['properties'].items():
-                self.properties[property_key] = extract_schema(self.path + [property_key],
-                                                               property_schema)
-        if 'additionalProperties' in schema:
-            self.additional_properties = extract_schema(self.path + ['additional_properties'],
-                                                        schema['additionalProperties'])
-        if 'required' in schema:
-            self.required = schema['required']
-        if 'minProperties' in schema:
-            self.min_properties = schema['minProperties']
-        if 'maxProperties' in schema:
-            self.max_properties = schema['maxProperties']
+        if "properties" not in schema and "additionalProperties" not in schema:
+            logger.warning(
+                "Object schema %s does not have properties nor additionalProperties",
+                self.path,
+            )
+        if "properties" in schema:
+            for property_key, property_schema in schema["properties"].items():
+                self.properties[property_key] = extract_schema(
+                    self.path + [property_key], property_schema
+                )
+        if "additionalProperties" in schema:
+            self.additional_properties = extract_schema(
+                self.path + ["additional_properties"],
+                schema["additionalProperties"],
+            )
+        if "required" in schema:
+            self.required = schema["required"]
+        if "minProperties" in schema:
+            self.min_properties = schema["minProperties"]
+        if "maxProperties" in schema:
+            self.max_properties = schema["maxProperties"]
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
-        '''Return all the subschemas as a list'''
+        """Return all the subschemas as a list"""
         if self.problematic:
             return [], [], []
 
         normal_schemas: List[BaseSchema] = []
         pruned_by_overspecified: List[BaseSchema] = []
         pruned_by_copiedover: List[BaseSchema] = []
-        if self.properties != None:
+        if self.properties is not None:
             for value in self.properties.values():
                 child_schema_tuple = value.get_all_schemas()
                 normal_schemas.extend(child_schema_tuple[0])
                 pruned_by_overspecified.extend(child_schema_tuple[1])
                 pruned_by_copiedover.extend(child_schema_tuple[2])
-        if self.additional_properties != None:
+        if self.additional_properties is not None:
             normal_schemas.append(self.additional_properties)
 
         if self.copied_over:
@@ -90,32 +99,36 @@ def get_all_schemas(self) -> Tuple[list, list, list]:
 
         return normal_schemas, pruned_by_overspecified, pruned_by_copiedover
 
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
         if self.problematic:
             return [], []
 
         normal_schemas: List[BaseSchema] = [self]
         semantic_schemas: List[BaseSchema] = []
 
-        if self.properties != None:
+        if self.properties is not None:
             for value in self.properties.values():
                 child_schema_tuple = value.get_normal_semantic_schemas()
                 normal_schemas.extend(child_schema_tuple[0])
                 semantic_schemas.extend(child_schema_tuple[1])
 
-        if self.additional_properties != None:
+        if self.additional_properties is not None:
             normal_schemas.append(self.additional_properties)
 
         return normal_schemas, semantic_schemas
 
     def to_tree(self) -> TreeNode:
         node = TreeNode(self.path)
-        if self.properties != None:
+        if self.properties is not None:
             for key, value in self.properties.items():
                 node.add_child(key, value.to_tree())
 
-        if self.additional_properties != None:
-            node.add_child('additional_properties', self.additional_properties.to_tree())
+        if self.additional_properties is not None:
+            node.add_child(
+                "additional_properties", self.additional_properties.to_tree()
+            )
 
         return node
 
@@ -132,33 +145,82 @@ def empty_value(self):
         return {}
 
     def get_property_schema(self, key):
+        """Get the schema of a property"""
         logger = get_thread_logger(with_prefix=True)
         if key in self.properties:
             return self.properties[key]
-        elif self.additional_properties != None:
+        elif self.additional_properties is not None:
             return self.additional_properties
         else:
-            logger.warning('Field [%s] does not have a schema, using opaque schema', key)
+            logger.warning(
+                "Field [%s] does not have a schema, using opaque schema", key
+            )
             return OpaqueSchema(self.path + [key], {})
 
     def get_properties(self) -> dict:
+        """Get the properties of the object"""
         return self.properties
 
     def get_additional_properties(self):
+        """Get the additional properties of the object"""
         return self.additional_properties
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        # TODO: Use constraints: minProperties, maxProperties
+        logger = get_thread_logger(with_prefix=True)
+
+        if self.enum is not None:
+            if exclude_value is not None:
+                return random.choice(
+                    [x for x in self.enum if x != exclude_value]
+                )
+            else:
+                return random.choice(self.enum)
+
+        # XXX: need to handle exclude_value, but not important for now for object types
+        result = {}
+        if len(self.properties) == 0:
+            if self.additional_properties is None:
+                # raise TypeError('[%s]: No properties and no additional properties' % self.path)
+                logger.warning(
+                    "[%s]: No properties and no additional properties",
+                    self.path,
+                )
+                return {}
+            key = "ACTOKEY"
+            result[key] = self.additional_properties.gen(minimum=minimum)
+        else:
+            for k, v in self.properties.items():
+                if minimum:
+                    if k in self.required:
+                        result[k] = v.gen(minimum=True)
+                    else:
+                        continue
+                else:
+                    if random.uniform(0, 1) < 0.1 and k not in self.required:
+                        # 10% of the chance this child will be null
+                        result[k] = None
+                    else:
+                        result[k] = v.gen(minimum=minimum)
+        if "enabled" in self.properties:
+            result["enabled"] = True
+        return result
+
     def __str__(self) -> str:
-        ret = '{'
+        ret = "{"
         for k, v in self.properties.items():
             ret += str(k)
-            ret += ': '
+            ret += ": "
             ret += str(v)
-            ret += ', '
-        ret += '}'
+            ret += ", "
+        ret += "}"
         return ret
 
     def __getitem__(self, key):
-        if self.additional_properties != None and key not in self.properties:
+        if (
+            self.additional_properties is not None
+            and key not in self.properties
+        ):
             # if the object schema has additionalProperties, and the key is not in the properties,
             # return the additionalProperties schema
             return self.additional_properties
diff --git a/acto/schema/oneof.py b/acto/schema/oneof.py
index 61c3938321..6ea33f2203 100644
--- a/acto/schema/oneof.py
+++ b/acto/schema/oneof.py
@@ -1,24 +1,30 @@
+import random
 from copy import deepcopy
 from typing import List, Tuple
 
 from .base import BaseSchema, TreeNode
-from .schema import extract_schema
 
 
 class OneOfSchema(BaseSchema):
-    '''Representing a schema with AnyOf keyword in it
-    '''
+    """Representing a schema with AnyOf keyword in it"""
 
     def __init__(self, path: list, schema: dict) -> None:
+        # This is to fix the circular import
+        # pylint: disable=import-outside-toplevel, cyclic-import
+        from .schema import extract_schema
+
         super().__init__(path, schema)
         self.possibilities = []
-        for index, v in enumerate(schema['oneOf']):
+        for index, v in enumerate(schema["oneOf"]):
             base_schema = deepcopy(schema)
-            del base_schema['oneOf']
+            del base_schema["oneOf"]
             base_schema.update(v)
-            self.possibilities.append(extract_schema(self.path + ['%s' % str(index)], base_schema))
+            self.possibilities.append(
+                extract_schema(self.path + [str(index)], base_schema)
+            )
 
     def get_possibilities(self):
+        """Return all possibilities of the anyOf schema"""
         return self.possibilities
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
@@ -26,9 +32,11 @@ def get_all_schemas(self) -> Tuple[list, list, list]:
             return [], [], []
         return [self], [], []
 
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
-        normal_schemas = [self]
-        semantic_schemas = []
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
+        normal_schemas: list[BaseSchema] = [self]
+        semantic_schemas: list[BaseSchema] = []
 
         for possibility in self.possibilities:
             possibility_tuple = possibility.get_normal_semantic_schemas()
@@ -51,10 +59,14 @@ def load_examples(self, example: list):
     def set_default(self, instance):
         self.default = instance
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        schema = random.choice(self.possibilities)
+        return schema.gen(exclude_value=exclude_value, minimum=minimum)
+
     def __str__(self) -> str:
-        ret = '['
+        ret = "["
         for i in self.possibilities:
             ret += str(i)
-            ret += ', '
-        ret += ']'
+            ret += ", "
+        ret += "]"
         return ret
diff --git a/acto/schema/opaque.py b/acto/schema/opaque.py
index b06795030e..94d24761d4 100644
--- a/acto/schema/opaque.py
+++ b/acto/schema/opaque.py
@@ -4,15 +4,14 @@
 
 
 class OpaqueSchema(BaseSchema):
-    '''Opaque schema to handle the fields that do not have a schema'''
-
-    def __init__(self, path: list, schema: dict) -> None:
-        super().__init__(path, schema)
+    """Opaque schema to handle the fields that do not have a schema"""
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
         return [], [], []
-    
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
+
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
         return [], []
 
     def to_tree(self) -> TreeNode:
@@ -21,8 +20,14 @@ def to_tree(self) -> TreeNode:
     def load_examples(self, example):
         pass
 
+    def set_default(self, instance):
+        self.default = instance
+
     def empty_value(self):
         return None
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        return None
+
     def __str__(self) -> str:
-        return 'any'
+        return "any"
diff --git a/acto/schema/schema.py b/acto/schema/schema.py
index f60f679dfe..d01d13676f 100644
--- a/acto/schema/schema.py
+++ b/acto/schema/schema.py
@@ -1,147 +1,51 @@
 from acto.utils import get_thread_logger
 
+from .anyof import AnyOfSchema
+from .array import ArraySchema
 from .base import BaseSchema
 from .boolean import BooleanSchema
 from .integer import IntegerSchema
 from .number import NumberSchema
+from .object import ObjectSchema
+from .oneof import OneOfSchema
 from .opaque import OpaqueSchema
 from .string import StringSchema
 
 
 def extract_schema(path: list, schema: dict) -> BaseSchema:
-    from .anyof import AnyOfSchema
-    from .array import ArraySchema
-    from .object import ObjectSchema
-    from .oneof import OneOfSchema
+    """Extract a schema from a dict"""
+
     logger = get_thread_logger(with_prefix=True)
 
-    if 'anyOf' in schema:
+    if "anyOf" in schema:
         return AnyOfSchema(path, schema)
-    elif 'oneOf' in schema:
+    elif "oneOf" in schema:
         return OneOfSchema(path, schema)
 
-    if 'type' not in schema:
-        if 'properties' in schema:
+    if "type" not in schema:
+        if "properties" in schema:
             return ObjectSchema(path, schema)
         else:
-            logger.warn('No type found in schema: %s' % str(schema))
+            logger.warning("No type found in schema: %s", str(schema))
             return OpaqueSchema(path, schema)
-    t = schema['type']
+    t = schema["type"]
     if isinstance(t, list):
-        if 'null' in t:
-            t.remove('null')
+        if "null" in t:
+            t.remove("null")
         if len(t) == 1:
             t = t[0]
 
-    if t == 'string':
+    if t == "string":
         return StringSchema(path, schema)
-    elif t == 'number':
+    elif t == "number":
         return NumberSchema(path, schema)
-    elif t == 'integer':
+    elif t == "integer":
         return IntegerSchema(path, schema)
-    elif t == 'boolean':
+    elif t == "boolean":
         return BooleanSchema(path, schema)
-    elif t == 'array':
+    elif t == "array":
         return ArraySchema(path, schema)
-    elif t == 'object':
+    elif t == "object":
         return ObjectSchema(path, schema)
     else:
-        logger.error('Unsupported type %s' % t)
-        return None
-
-
-# if __name__ == '__main__':
-#     with open('data/rabbitmq-operator/operator.yaml',
-#               'r') as operator_yaml:
-#         parsed_operator_documents = yaml.load_all(operator_yaml, Loader=yaml.FullLoader)
-#         for document in parsed_operator_documents:
-#             if document['kind'] == 'CustomResourceDefinition':
-#                 spec_schema = ObjectSchema(
-#                     ['root'], document['spec']['versions'][0]['schema']['openAPIV3Schema']
-#                     ['properties']['spec'])
-#                 print(str(spec_schema))
-#                 print(spec_schema.gen())
-#                 print(spec_schema.num_fields())
-#                 for k, v in spec_schema.properties.items():
-#                     print('%s has %d fields' % (k, v.num_fields()))
-#                 print(spec_schema.num_cases())
-
-#                 schema_list = spec_schema.get_all_schemas()
-#                 test_plan = {}
-#                 for schema in schema_list:
-#                     test_plan[str(schema.path)] = schema.test_cases()
-#                 with open('test_plan.json', 'w') as fout:
-#                     json.dump(test_plan, fout, cls=ActoEncoder, indent=4)
-
-    # ss = StringSchema(None, {"type": "string"})
-    # print(ss.gen())
-
-    # schemas = {
-    #     'configmap': '/home/tyler/k8s_resources/configmap-v1.json',
-    #     'cronjob': '/home/tyler/k8s_resources/cronjob-batch-v2alpha1.json',
-    #     'deployment': '/home/tyler/k8s_resources/deployment-apps-v1.json',
-    #     'ingress': '/home/tyler/k8s_resources/ingress-networking-v1.json',
-    #     'pvc': '/home/tyler/k8s_resources/persistentvolumeclaim-v1.json',
-    #     'pdb': '/home/tyler/k8s_resources/poddisruptionbudget-policy-v1beta1.json',
-    #     'pod': '/home/tyler/k8s_resources/pod-v1.json',
-    #     'secret': '/home/tyler/k8s_resources/secret-v1.json',
-    #     'service': '/home/tyler/k8s_resources/service-v1.json',
-    #     'statefulset': '/home/tyler/k8s_resources/statefulset-apps-v1.json',
-    # }
-
-    # resource_num_fields = {}
-
-    # for resource, schema_path in schemas.items():
-    #     with open(schema_path, 'r') as schema_file:
-    #         schema = json.load(schema_file)
-    #         spec_schema = extract_schema([], schema)
-
-    #         resource_num_fields[resource] = len(spec_schema.get_all_schemas()[0])
-
-    # print(resource_num_fields)
-
-    # used_resource_in_operators = {
-    #     'cass-operator': [
-    #         'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset'
-    #     ],
-    #     'cockroach-operator': [
-    #         'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset'
-    #     ],
-    #     'knative-operator': ['configmap', 'deployment', 'pdb', 'secret', 'service'],
-    #     'mongodb-community-operator': [
-    #         'configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'
-    #     ],
-    #     'percona-server-mongodb-operator': [
-    #         'configmap', 'deployment', 'ingress', 'pvc', 'pdb', 'secret', 'service', 'statefulset'
-    #     ],
-    #     'percona-xtradb-cluster-operator': [
-    #         'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset'
-    #     ],
-    #     'rabbitmq-operator': ['configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'],
-    #     'redis-operator': ['configmap', 'deployment', 'pdb', 'secret', 'service', 'statefulset'],
-    #     'redis-ot-operator': ['configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'],
-    #     'tidb-operator': ['configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'],
-    #     'zookeeper-operator': [
-    #         'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset'
-    #     ],
-    # }
-
-    # for operator, resources in used_resource_in_operators.items():
-    #     num_system_fields = 0
-    #     for resource in resources:
-    #         num_system_fields += resource_num_fields[resource]
-
-    #     print('%s has %d system fields' % (operator, num_system_fields))
-        # print(num_system_fields)
-
-    # with open('data/cass-operator/bundle.yaml',
-    #           'r') as operator_yaml:
-    #     parsed_operator_documents = yaml.load_all(operator_yaml, Loader=yaml.FullLoader)
-    #     for document in parsed_operator_documents:
-    #         if document['kind'] == 'CustomResourceDefinition':
-    #             spec_schema = ObjectSchema(
-    #                 ['root'], document['spec']['versions'][0]['schema']['openAPIV3Schema']
-    #                 ['properties']['spec'])
-    #             print(spec_schema.num_fields())
-    #             for k, v in spec_schema.properties.items():
-    #                 print('%s has %d fields' % (k, len(v.get_all_schemas()[0])))
\ No newline at end of file
+        raise RuntimeError(f"Unknown type: {t}")
diff --git a/acto/schema/string.py b/acto/schema/string.py
index 097e4fddc0..bad2e6fa81 100644
--- a/acto/schema/string.py
+++ b/acto/schema/string.py
@@ -1,31 +1,44 @@
+import random
 from typing import List, Tuple
 
+import exrex
+
+from acto.common import random_string
+
 from .base import BaseSchema, TreeNode
 
 
 class StringSchema(BaseSchema):
-    '''Representation of a string node
-    
+    """Representation of a string node
+
     It handles
         - minLength
         - maxLength
         - pattern
-    '''
+    """
+
     default_max_length = 10
 
     def __init__(self, path: list, schema: dict) -> None:
         super().__init__(path, schema)
-        self.min_length = None if 'minLength' not in schema else schema['minLength']
-        self.max_length = self.default_max_length if 'maxLength' not in schema else schema[
-            'maxLength']
-        self.pattern = None if 'pattern' not in schema else schema['pattern']
+        self.min_length = (
+            None if "minLength" not in schema else schema["minLength"]
+        )
+        self.max_length = (
+            self.default_max_length
+            if "maxLength" not in schema
+            else schema["maxLength"]
+        )
+        self.pattern = None if "pattern" not in schema else schema["pattern"]
 
     def get_all_schemas(self) -> Tuple[list, list, list]:
         if self.problematic:
             return [], [], []
         return [self], [], []
-    
-    def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]:
+
+    def get_normal_semantic_schemas(
+        self,
+    ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]:
         if self.problematic:
             return [], []
         return [self], []
@@ -42,5 +55,21 @@ def set_default(self, instance):
     def empty_value(self):
         return ""
 
+    def gen(self, exclude_value=None, minimum: bool = False, **kwargs):
+        # TODO: Use minLength: the exrex does not support minLength
+        if self.enum is not None:
+            if exclude_value is not None:
+                return random.choice(
+                    [x for x in self.enum if x != exclude_value]
+                )
+            else:
+                return random.choice(self.enum)
+        if self.pattern is not None:
+            # XXX: since it's random, we don't need to exclude the value
+            return exrex.getone(self.pattern, self.max_length)
+        if minimum:
+            return random_string(self.min_length)  # type: ignore
+        return "ACTOKEY"
+
     def __str__(self) -> str:
-        return 'String'
\ No newline at end of file
+        return "String"

From ef08dab35b9d5a9f3c1fcc6efe4da73f74e39b32 Mon Sep 17 00:00:00 2001
From: Tyler Gu <jiaweig3@illinois.edu>
Date: Wed, 31 Jan 2024 12:16:50 -0600
Subject: [PATCH 3/4] Update bug numbers

Signed-off-by: Tyler Gu <jiaweig3@illinois.edu>
---
 bugs.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bugs.md b/bugs.md
index 52aeaad929..65d98c41f1 100644
--- a/bugs.md
+++ b/bugs.md
@@ -63,6 +63,8 @@
 | pravega/zookeeper-operator          | [https://github.com/pravega/zookeeper-operator/issues/540](https://github.com/pravega/zookeeper-operator/issues/540)                 |           |
 | pravega/zookeeper-operator          | [https://github.com/pravega/zookeeper-operator/issues/541](https://github.com/pravega/zookeeper-operator/issues/541)                 |           |
 | pravega/zookeeper-operator          | [https://github.com/pravega/zookeeper-operator/issues/547](https://github.com/pravega/zookeeper-operator/issues/547)                 |           |
+| cloudnative-pg/cloudnative-pg       | [https://github.com/cloudnative-pg/cloudnative-pg/issues/3623](https://github.com/cloudnative-pg/cloudnative-pg/issues/3623)         | fixed     |
+| cloudnative-pg/cloudnative-pg       | [https://github.com/cloudnative-pg/cloudnative-pg/issues/3541](https://github.com/cloudnative-pg/cloudnative-pg/issues/3541)         |           |
 
 
 # Byproduct bugs

From 5a89170b75530c7b043776e261c660a9d7d21449 Mon Sep 17 00:00:00 2001
From: "Jiawei \"Tyler\" Gu" <47795840+tylergu@users.noreply.github.com>
Date: Wed, 31 Jan 2024 12:20:09 -0600
Subject: [PATCH 4/4] Update counter.yml

---
 .github/workflows/counter.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/counter.yml b/.github/workflows/counter.yml
index ec84bd650e..87f3146036 100644
--- a/.github/workflows/counter.yml
+++ b/.github/workflows/counter.yml
@@ -37,7 +37,7 @@ jobs:
     - name: Checkout
       uses: actions/checkout@v2
     - name: 'Set up Python'
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v4
       with:
           python-version: 3.10
     - name: Change bugs.md