From 28ea84441a5c88f036437e6a156ab699da58151d Mon Sep 17 00:00:00 2001 From: "Jiawei \"Tyler\" Gu" <47795840+tylergu@users.noreply.github.com> Date: Mon, 29 Jan 2024 02:24:43 -0600 Subject: [PATCH 1/4] Prepare system state interface and collection (#313) * Prepare system state interface and collection Signed-off-by: Tyler Gu <jiaweig3@illinois.edu> * Fix system state test Signed-off-by: Tyler Gu <jiaweig3@illinois.edu> * Fix temporary file creation in test Signed-off-by: Tyler Gu <jiaweig3@illinois.edu> --------- Signed-off-by: Tyler Gu <jiaweig3@illinois.edu> --- acto/cli/collect_system_state.py | 58 ++++ acto/system_state/__init__.py | 0 acto/system_state/cluster_role.py | 34 +++ acto/system_state/cluster_role_binding.py | 35 +++ acto/system_state/config_map.py | 35 +++ acto/system_state/cron_job.py | 35 +++ acto/system_state/daemon_set.py | 68 +++++ acto/system_state/deployment.py | 74 ++++++ acto/system_state/endpoints.py | 35 +++ acto/system_state/ingress.py | 41 +++ acto/system_state/job.py | 35 +++ acto/system_state/kubernetes_object.py | 250 ++++++++++++++++++ acto/system_state/kubernetes_system_state.py | 234 ++++++++++++++++ acto/system_state/network_policy.py | 37 +++ acto/system_state/persistent_volume.py | 32 +++ acto/system_state/persistent_volume_claim.py | 37 +++ acto/system_state/pod.py | 64 +++++ acto/system_state/replica_set.py | 56 ++++ acto/system_state/role.py | 37 +++ acto/system_state/role_binding.py | 37 +++ acto/system_state/secret.py | 35 +++ acto/system_state/service.py | 40 +++ acto/system_state/service_account.py | 37 +++ acto/system_state/stateful_set.py | 71 +++++ acto/system_state/storage_class.py | 32 +++ .../test_kubernetes_system_state.py | 66 +++++ 26 files changed, 1515 insertions(+) create mode 100644 acto/cli/collect_system_state.py create mode 100644 acto/system_state/__init__.py create mode 100644 acto/system_state/cluster_role.py create mode 100644 acto/system_state/cluster_role_binding.py create mode 100644 acto/system_state/config_map.py create mode 100644 acto/system_state/cron_job.py create mode 100644 acto/system_state/daemon_set.py create mode 100644 acto/system_state/deployment.py create mode 100644 acto/system_state/endpoints.py create mode 100644 acto/system_state/ingress.py create mode 100644 acto/system_state/job.py create mode 100644 acto/system_state/kubernetes_object.py create mode 100644 acto/system_state/kubernetes_system_state.py create mode 100644 acto/system_state/network_policy.py create mode 100644 acto/system_state/persistent_volume.py create mode 100644 acto/system_state/persistent_volume_claim.py create mode 100644 acto/system_state/pod.py create mode 100644 acto/system_state/replica_set.py create mode 100644 acto/system_state/role.py create mode 100644 acto/system_state/role_binding.py create mode 100644 acto/system_state/secret.py create mode 100644 acto/system_state/service.py create mode 100644 acto/system_state/service_account.py create mode 100644 acto/system_state/stateful_set.py create mode 100644 acto/system_state/storage_class.py create mode 100644 test/integration_tests/test_kubernetes_system_state.py diff --git a/acto/cli/collect_system_state.py b/acto/cli/collect_system_state.py new file mode 100644 index 0000000000..59ee9233a4 --- /dev/null +++ b/acto/cli/collect_system_state.py @@ -0,0 +1,58 @@ +import argparse +import logging +import os + +from acto.common import kubernetes_client +from acto.system_state.kubernetes_system_state import KubernetesSystemState + + +def main(): + """Main function""" + parser = argparse.ArgumentParser( + description="Collect the system state of a Kubernetes cluster under a namespace" + " and dump it to a file. Check the health of the system state." + ) + parser.add_argument( + "--output", + required=False, + default="system_state.json", + help="Path to dump the system state to", + ) + parser.add_argument( + "--kubeconfig", + required=False, + default=f"{os.environ['HOME']}/.kube/config", + help="Path to the kubeconfig file", + ) + parser.add_argument( + "--kubecontext", + required=False, + default="kind-kind", + help="Name of the Kubernetes context to use", + ) + parser.add_argument( + "--namespace", + required=False, + default="default", + help="Namespace to collect the system state under", + ) + args = parser.parse_args() + + api_client = kubernetes_client(args.kubeconfig, args.kubecontext) + + system_state = KubernetesSystemState.from_api_client( + api_client, args.namespace + ) + system_state.dump(args.output) + logging.info("System state dumped to %s", args.output) + + health_status = system_state.check_health() + if health_status.is_healthy() is False: + logging.error( + "System state is not healthy with errors: \n%s", + str(health_status), + ) + + +if __name__ == "__main__": + main() diff --git a/acto/system_state/__init__.py b/acto/system_state/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acto/system_state/cluster_role.py b/acto/system_state/cluster_role.py new file mode 100644 index 0000000000..f786162b6f --- /dev/null +++ b/acto/system_state/cluster_role.py @@ -0,0 +1,34 @@ +"""ClusterRole state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesDictObject, + list_object_helper, +) + + +class ClusterRoleState(KubernetesDictObject): + """ClusterRole state object.""" + + root: dict[str, kubernetes_models.V1ClusterRole] + + @classmethod + def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self: + data = list_object_helper( + kubernetes.client.RbacAuthorizationV1Api( + api_client + ).list_cluster_role, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check ClusterRole health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/cluster_role_binding.py b/acto/system_state/cluster_role_binding.py new file mode 100644 index 0000000000..097ea6fc50 --- /dev/null +++ b/acto/system_state/cluster_role_binding.py @@ -0,0 +1,35 @@ +"""ClusterRoleBinding state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from kubernetes.client.api_client import ApiClient +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesDictObject, + list_object_helper, +) + + +class ClusterRoleBindingState(KubernetesDictObject): + """ClusterRoleBinding state object.""" + + root: dict[str, kubernetes_models.V1ClusterRoleBinding] + + @classmethod + def from_api_client(cls, api_client: ApiClient) -> Self: + data = list_object_helper( + kubernetes.client.RbacAuthorizationV1Api( + api_client + ).list_cluster_role_binding, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check ClusterRoleBinding health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/config_map.py b/acto/system_state/config_map.py new file mode 100644 index 0000000000..bfa9fd45ab --- /dev/null +++ b/acto/system_state/config_map.py @@ -0,0 +1,35 @@ +"""ConfigMap state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class ConfigMapState(KubernetesNamespacedDictObject): + """ConfigMap state object.""" + + root: dict[str, kubernetes_models.V1ConfigMap] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api(api_client).list_namespaced_config_map, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if ConfigMap is healthy""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/cron_job.py b/acto/system_state/cron_job.py new file mode 100644 index 0000000000..b51fc9df75 --- /dev/null +++ b/acto/system_state/cron_job.py @@ -0,0 +1,35 @@ +"""CronJob state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class CronJobState(KubernetesNamespacedDictObject): + """CronJob state object.""" + + root: dict[str, kubernetes_models.V1CronJob] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.BatchV1Api(api_client).list_namespaced_cron_job, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check CronJob health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/daemon_set.py b/acto/system_state/daemon_set.py new file mode 100644 index 0000000000..0f2fef6477 --- /dev/null +++ b/acto/system_state/daemon_set.py @@ -0,0 +1,68 @@ +"""DaemonSet state model""" +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class DaemonSetState(KubernetesNamespacedDictObject): + """DaemonSet state object.""" + + root: dict[str, kubernetes_models.V1DaemonSet] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.AppsV1Api(api_client).list_namespaced_daemon_set, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if DaemonSet is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + + for name, daemon_set in self.root.items(): + if ( + daemon_set.status.observed_generation + != daemon_set.metadata.generation + ): + return False, f"DaemonSet[{name}] generation mismatch" + + if ( + daemon_set.status.desired_number_scheduled + != daemon_set.status.number_ready + ): + return ( + False, + f"DaemonSet[{name}] replicas mismatch, " + + f"desired[{daemon_set.status.desired_number_scheduled}] " + + f"!= ready[{daemon_set.status.number_ready}]", + ) + + if daemon_set.status.conditions is not None: + for condition in daemon_set.status.conditions: + if ( + condition.type == "Progressing" + and condition.status != "True" + ): + return ( + False, + f"DaemonSet[{name}] is not progressing: {condition.message}", + ) + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/deployment.py b/acto/system_state/deployment.py new file mode 100644 index 0000000000..76189ae9f1 --- /dev/null +++ b/acto/system_state/deployment.py @@ -0,0 +1,74 @@ +"""Deployment state model""" +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class DeploymentState(KubernetesNamespacedDictObject): + """Deployment state model""" + + root: dict[str, kubernetes_models.V1Deployment] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.AppsV1Api(api_client).list_namespaced_deployment, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if deployment is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + + for name, deployment in self.root.items(): + if ( + deployment.status.observed_generation + != deployment.metadata.generation + ): + return False, f"Deployment[{name}] generation mismatch" + + if deployment.spec.replicas != deployment.status.ready_replicas: + return False, f"Deployment[{name}] replicas mismatch" + + if deployment.status.conditions is not None: + for condition in deployment.status.conditions: + if ( + condition.type == "Available" + and condition.status != "True" + ): + return False, f"Deployment[{name}] is not available" + if ( + condition.type == "Progressing" + and condition.status != "True" + ): + return False, f"Deployment[{name}] is not progressing" + + if deployment.status.replicas != deployment.status.ready_replicas: + return False, f"Deployment[{name}] replicas mismatch" + + if ( + deployment.status.unavailable_replicas != 0 + and deployment.status.unavailable_replicas is not None + ): + return ( + False, + f"[{name}] [{deployment.status.unavailable_replicas}] pods are unavailable", + ) + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/endpoints.py b/acto/system_state/endpoints.py new file mode 100644 index 0000000000..f2ae0bc006 --- /dev/null +++ b/acto/system_state/endpoints.py @@ -0,0 +1,35 @@ +"""Endpoints state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class EndpointsState(KubernetesNamespacedDictObject): + """Endpoints state object.""" + + root: dict[str, kubernetes_models.V1Endpoints] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api(api_client).list_namespaced_endpoints, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check Endpoints health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/ingress.py b/acto/system_state/ingress.py new file mode 100644 index 0000000000..ceb3f93a1f --- /dev/null +++ b/acto/system_state/ingress.py @@ -0,0 +1,41 @@ +"""Ingress state model""" +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class IngressState(KubernetesNamespacedDictObject): + """Ingress state object.""" + + root: dict[str, kubernetes_models.V1Ingress] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.NetworkingV1Api( + api_client + ).list_namespaced_ingress, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if Ingress is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/job.py b/acto/system_state/job.py new file mode 100644 index 0000000000..e87c8a0975 --- /dev/null +++ b/acto/system_state/job.py @@ -0,0 +1,35 @@ +"""Job state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class JobState(KubernetesNamespacedDictObject): + """Job state object.""" + + root: dict[str, kubernetes_models.V1Job] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.BatchV1Api(api_client).list_namespaced_job, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check Job health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/kubernetes_object.py b/acto/system_state/kubernetes_object.py new file mode 100644 index 0000000000..9738a235a1 --- /dev/null +++ b/acto/system_state/kubernetes_object.py @@ -0,0 +1,250 @@ +"""Base class for all Kubernetes objects.""" + + +import abc +from typing import Any, Callable, Literal, TypeAlias + +import deepdiff +import deepdiff.model as deepdiff_model +import kubernetes +import kubernetes.client +import kubernetes.client.models as kubernetes_models +import pydantic +from deepdiff.helper import NotPresent +from typing_extensions import Self + +from acto.common import ( + EXCLUDE_PATH_REGEX, + Diff, + PropertyPath, + flatten_dict, + flatten_list, +) + + +class KubernetesResourceInterface(pydantic.BaseModel): + """Helper interface shared by all Kubernetes resources""" + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + metadata: kubernetes_models.V1ObjectMeta + + +class KubernetesListNamespacedObjectMethodReturnType(abc.ABC): + """Kubernetes list namespaced object method return type""" + + @property + @abc.abstractmethod + def items(self) -> list[KubernetesResourceInterface]: + """Return Kubernetes list namespaced object method items""" + + +KubernetesListNamespacedObjectMethod: TypeAlias = Callable[ + ..., KubernetesListNamespacedObjectMethodReturnType +] + +DiffType: TypeAlias = Literal[ + "type_changes", + "values_changed", + "iterable_item_added", + "iterable_item_removed", + "dictionary_item_added", + "dictionary_item_removed", + "set_item_added", + "set_item_removed", + "attribute_added", + "attribute_removed", +] + +# ObjectDiff: TypeAlias = dict[DiffType, dict[str, Diff]] + + +class ObjectDiff(pydantic.RootModel): + """Object diff, based on TreeView of deepdiff""" + + root: dict[DiffType, dict[str, Diff]] + + @classmethod + def from_deepdiff(cls, diff: deepdiff_model.TreeResult) -> Self: + """Create ObjectDiff from deepdiff.DeepDiff""" + data: dict[TypeAlias, dict[str, Diff]] = {} + + for category, changes in diff.items(): + data[category] = {} + for change in changes: + # Heuristic + # When an entire dict/list is added or removed, flatten this + # dict/list to help field matching and value comparison in oracle + + if (isinstance(change.t1, (dict, list))) and ( + change.t2 is None or isinstance(change.t2, NotPresent) + ): + if isinstance(change.t1, dict): + flattened_changes = flatten_dict(change.t1, []) + else: + flattened_changes = flatten_list(change.t1, []) + for path, value in flattened_changes: + if value is None or isinstance(value, NotPresent): + continue + str_path = change.path() + for i in path: + str_path += f"[{i}]" + data[category][str_path] = Diff( + prev=value, + curr=change.t2, + path=PropertyPath( + change.path(output_format="list") + path + ), + ) + elif (isinstance(change.t2, (dict, list))) and ( + change.t1 is None or isinstance(change.t1, NotPresent) + ): + if isinstance(change.t2, dict): + flattened_changes = flatten_dict(change.t2, []) + else: + flattened_changes = flatten_list(change.t2, []) + for path, value in flattened_changes: + if value is None or isinstance(value, NotPresent): + continue + str_path = change.path() + for i in path: + str_path += f"[{i}]" + data[category][str_path] = Diff( + prev=change.t1, + curr=value, + path=PropertyPath( + change.path(output_format="list") + path + ), + ) + else: + data[category][change.path()] = Diff( + prev=change.t1, + curr=change.t2, + path=PropertyPath(change.path(output_format="list")), + ) + + return cls.model_validate(data) + + +class KubernetesObject(abc.ABC, pydantic.RootModel): + """Base class for all Kubernetes objects.""" + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + root: Any + + @classmethod + @abc.abstractmethod + def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self: + """Create Kubernetes object from ApiClient""" + raise NotImplementedError() + + def diff_from(self, other: Self) -> ObjectDiff: + """Diff with other Kubernetes object""" + return ObjectDiff.from_deepdiff( + deepdiff.DeepDiff( + self, + other, + exclude_regex_paths=EXCLUDE_PATH_REGEX, + view="tree", + ) + ) + + @abc.abstractmethod + def check_health(self) -> tuple[bool, str]: + """Check if object is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + raise NotImplementedError() + + @pydantic.model_serializer + def serialize(self) -> dict: + """Serialize Kubernetes object""" + raise NotImplementedError() + + +class KubernetesNamespacedObject(KubernetesObject): + """Base class for all Kubernetes namespaced objects.""" + + @classmethod + @abc.abstractmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + """Create Kubernetes object from ApiClient""" + raise NotImplementedError() + + @classmethod + def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self: + return cls.from_api_client_namespaced(api_client, "default") + + +class KubernetesListObject(KubernetesObject): + """Base class for all Kubernetes objects stored as a list.""" + + root: list[Any] + + def __iter__(self): + return iter(self.root) + + def __getitem__(self, key: int) -> Any: + return self.root[key] + + +class KubernetesDictObject(KubernetesObject): + """Base class for all Kubernetes objects stored as a dict.""" + + root: dict[str, Any] + + def __iter__(self): + return iter(self.root) + + def __getitem__(self, key: str) -> Any: + return self.root[key] + + +class KubernetesNamespacedListObject(KubernetesNamespacedObject): + """Base class for all Kubernetes namespaced objects stored as a list.""" + + root: list[Any] + + def __iter__(self): + return iter(self.root) + + def __getitem__(self, key: int) -> Any: + return self.root[key] + + +class KubernetesNamespacedDictObject(KubernetesNamespacedObject): + """Base class for all Kubernetes namespaced objects stored as a dict.""" + + root: dict[str, Any] + + def __iter__(self): + return iter(self.root) + + def __getitem__(self, key: str) -> Any: + return self.root[key] + + +def list_object_helper( + method: KubernetesListNamespacedObjectMethod, +) -> dict[str, KubernetesResourceInterface]: + """List object helper""" + result = {} + for obj in method(watch=False).items: + result[obj.metadata.name] = obj + return result + + +def list_namespaced_object_helper( + method: KubernetesListNamespacedObjectMethod, + namespace: str, +) -> dict[str, KubernetesResourceInterface]: + """List namespaced object helper""" + result = {} + for obj in method(namespace=namespace, watch=False).items: + result[obj.metadata.name] = obj + return result diff --git a/acto/system_state/kubernetes_system_state.py b/acto/system_state/kubernetes_system_state.py new file mode 100644 index 0000000000..7205f9bf94 --- /dev/null +++ b/acto/system_state/kubernetes_system_state.py @@ -0,0 +1,234 @@ +"""Kubernetes system state model""" + +import json + +import kubernetes +import pydantic +from typing_extensions import Self + +from acto.serialization import ActoEncoder +from acto.system_state.cluster_role import ClusterRoleState +from acto.system_state.cluster_role_binding import ClusterRoleBindingState +from acto.system_state.config_map import ConfigMapState +from acto.system_state.cron_job import CronJobState +from acto.system_state.daemon_set import DaemonSetState +from acto.system_state.deployment import DeploymentState +from acto.system_state.endpoints import EndpointsState +from acto.system_state.ingress import IngressState +from acto.system_state.job import JobState +from acto.system_state.kubernetes_object import ObjectDiff +from acto.system_state.network_policy import NetworkPolicyState +from acto.system_state.persistent_volume import PersistentVolumeState +from acto.system_state.persistent_volume_claim import PersistentVolumeClaimState +from acto.system_state.pod import PodState +from acto.system_state.replica_set import ReplicaSetState +from acto.system_state.role import RoleState +from acto.system_state.role_binding import RoleBindingState +from acto.system_state.secret import SecretState +from acto.system_state.service import ServiceState +from acto.system_state.service_account import ServiceAccountState +from acto.system_state.stateful_set import StatefulSetState +from acto.system_state.storage_class import StorageClassState + + +class KubernetesSystemDiff(pydantic.BaseModel): + """Kubernetes system diff model""" + + cluster_role_binding: ObjectDiff + cluster_role: ObjectDiff + config_map: ObjectDiff + cron_job: ObjectDiff + daemon_set: ObjectDiff + deployment: ObjectDiff + endpoint: ObjectDiff + ingress: ObjectDiff + job: ObjectDiff + network_policy: ObjectDiff + persistent_volume_claim: ObjectDiff + persistent_volume: ObjectDiff + pod: ObjectDiff + replica_set: ObjectDiff + role_binding: ObjectDiff + role: ObjectDiff + secret: ObjectDiff + service_account: ObjectDiff + service: ObjectDiff + stateful_set: ObjectDiff + storage_class: ObjectDiff + + +class KubernetesSystemHealth(pydantic.BaseModel): + """Kubernetes system health status model""" + + daemon_set: tuple[bool, str] + deployment: tuple[bool, str] + job: tuple[bool, str] + pod: tuple[bool, str] + replica_set: tuple[bool, str] + stateful_set: tuple[bool, str] + + def is_healthy(self) -> bool: + """Check if Kubernetes system is healthy""" + return all( + [ + self.daemon_set[0], + self.deployment[0], + self.job[0], + self.pod[0], + self.replica_set[0], + self.stateful_set[0], + ] + ) + + def __str__(self) -> str: + ret = "" + if not self.daemon_set[0]: + ret += f"DaemonSet: {self.daemon_set[1]}\n" + if not self.deployment[0]: + ret += f"Deployment: {self.deployment[1]}\n" + if not self.job[0]: + ret += f"Job: {self.job[1]}\n" + if not self.pod[0]: + ret += f"Pod: {self.pod[1]}\n" + if not self.replica_set[0]: + ret += f"ReplicaSet: {self.replica_set[1]}\n" + if not self.stateful_set[0]: + ret += f"StatefulSet: {self.stateful_set[1]}\n" + return ret + + +class KubernetesSystemState(pydantic.BaseModel): + """System state of the cluster, including all Kubernetes resources""" + + cluster_role_binding: ClusterRoleBindingState + cluster_role: ClusterRoleState + config_map: ConfigMapState + cron_job: CronJobState + daemon_set: DaemonSetState + deployment: DeploymentState + endpoint: EndpointsState + ingress: IngressState + job: JobState + network_policy: NetworkPolicyState + persistent_volume_claim: PersistentVolumeClaimState + persistent_volume: PersistentVolumeState + pod: PodState + replica_set: ReplicaSetState + role_binding: RoleBindingState + role: RoleState + secret: SecretState + service_account: ServiceAccountState + service: ServiceState + stateful_set: StatefulSetState + storage_class: StorageClassState + + @classmethod + def from_api_client( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + """Initialize Kubernetes system state by fetching all objects from + Kubernetes API server. + """ + return cls( + cluster_role_binding=ClusterRoleBindingState.from_api_client( + api_client + ), + cluster_role=ClusterRoleState.from_api_client(api_client), + config_map=ConfigMapState.from_api_client_namespaced( + api_client, namespace + ), + cron_job=CronJobState.from_api_client_namespaced( + api_client, namespace + ), + daemon_set=DaemonSetState.from_api_client_namespaced( + api_client, namespace + ), + deployment=DeploymentState.from_api_client_namespaced( + api_client, namespace + ), + endpoint=EndpointsState.from_api_client_namespaced( + api_client, namespace + ), + ingress=IngressState.from_api_client_namespaced( + api_client, namespace + ), + job=JobState.from_api_client_namespaced(api_client, namespace), + network_policy=NetworkPolicyState.from_api_client_namespaced( + api_client, namespace + ), + persistent_volume_claim=PersistentVolumeClaimState.from_api_client_namespaced( + api_client, namespace + ), + persistent_volume=PersistentVolumeState.from_api_client(api_client), + pod=PodState.from_api_client_namespaced(api_client, namespace), + replica_set=ReplicaSetState.from_api_client_namespaced( + api_client, namespace + ), + role_binding=RoleBindingState.from_api_client_namespaced( + api_client, namespace + ), + role=RoleState.from_api_client_namespaced(api_client, namespace), + secret=SecretState.from_api_client_namespaced( + api_client, namespace + ), + service_account=ServiceAccountState.from_api_client_namespaced( + api_client, namespace + ), + service=ServiceState.from_api_client_namespaced( + api_client, namespace + ), + stateful_set=StatefulSetState.from_api_client_namespaced( + api_client, namespace + ), + storage_class=StorageClassState.from_api_client(api_client), + ) + + def diff_from(self, other: Self) -> KubernetesSystemDiff: + """Diff with other Kubernetes system state""" + return KubernetesSystemDiff( + cluster_role_binding=self.cluster_role_binding.diff_from( + other.cluster_role_binding + ), + cluster_role=self.cluster_role.diff_from(other.cluster_role), + config_map=self.config_map.diff_from(other.config_map), + cron_job=self.cron_job.diff_from(other.cron_job), + daemon_set=self.daemon_set.diff_from(other.daemon_set), + deployment=self.deployment.diff_from(other.deployment), + endpoint=self.endpoint.diff_from(other.endpoint), + ingress=self.ingress.diff_from(other.ingress), + job=self.job.diff_from(other.job), + network_policy=self.network_policy.diff_from(other.network_policy), + persistent_volume_claim=self.persistent_volume_claim.diff_from( + other.persistent_volume_claim + ), + persistent_volume=self.persistent_volume.diff_from( + other.persistent_volume + ), + pod=self.pod.diff_from(other.pod), + replica_set=self.replica_set.diff_from(other.replica_set), + role_binding=self.role_binding.diff_from(other.role_binding), + role=self.role.diff_from(other.role), + secret=self.secret.diff_from(other.secret), + service_account=self.service_account.diff_from( + other.service_account + ), + service=self.service.diff_from(other.service), + stateful_set=self.stateful_set.diff_from(other.stateful_set), + storage_class=self.storage_class.diff_from(other.storage_class), + ) + + def dump(self, path: str) -> None: + """Dump Kubernetes system state to a file""" + with open(path, "w", encoding="utf-8") as file: + json.dump(self.model_dump(), file, indent=4, cls=ActoEncoder) + + def check_health(self) -> KubernetesSystemHealth: + """Check if Kubernetes system state is healthy""" + return KubernetesSystemHealth( + daemon_set=self.daemon_set.check_health(), + deployment=self.deployment.check_health(), + job=self.job.check_health(), + pod=self.pod.check_health(), + replica_set=self.replica_set.check_health(), + stateful_set=self.stateful_set.check_health(), + ) diff --git a/acto/system_state/network_policy.py b/acto/system_state/network_policy.py new file mode 100644 index 0000000000..9b11c1bcb6 --- /dev/null +++ b/acto/system_state/network_policy.py @@ -0,0 +1,37 @@ +"""NetworkPolicy state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class NetworkPolicyState(KubernetesNamespacedDictObject): + """NetworkPolicy state object.""" + + root: dict[str, kubernetes_models.V1NetworkPolicy] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.NetworkingV1Api( + api_client + ).list_namespaced_network_policy, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check NetworkPolicy health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/persistent_volume.py b/acto/system_state/persistent_volume.py new file mode 100644 index 0000000000..5012bd7560 --- /dev/null +++ b/acto/system_state/persistent_volume.py @@ -0,0 +1,32 @@ +"""PersistentVolume state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesDictObject, + list_object_helper, +) + + +class PersistentVolumeState(KubernetesDictObject): + """PersistentVolume state object.""" + + root: dict[str, kubernetes_models.V1PersistentVolume] + + @classmethod + def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self: + data = list_object_helper( + kubernetes.client.CoreV1Api(api_client).list_persistent_volume, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if PersistentVolume is healthy""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/persistent_volume_claim.py b/acto/system_state/persistent_volume_claim.py new file mode 100644 index 0000000000..3cbee72adc --- /dev/null +++ b/acto/system_state/persistent_volume_claim.py @@ -0,0 +1,37 @@ +"""PersistentVolumeClaim state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class PersistentVolumeClaimState(KubernetesNamespacedDictObject): + """PersistentVolumeClaim state object.""" + + root: dict[str, kubernetes_models.V1PersistentVolumeClaim] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api( + api_client + ).list_namespaced_persistent_volume_claim, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check health of PersistentVolumeClaim""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/pod.py b/acto/system_state/pod.py new file mode 100644 index 0000000000..0b47babef5 --- /dev/null +++ b/acto/system_state/pod.py @@ -0,0 +1,64 @@ +"""Pod state model""" +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class PodState(KubernetesNamespacedDictObject): + """Pod state model""" + + root: dict[str, kubernetes_models.V1Pod] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api(api_client).list_namespaced_pod, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if pod is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + + for name, pod in self.root.items(): + if pod.status.conditions is not None: + for condition in pod.status.conditions: + if condition.type == "Ready" and condition.status != "True": + return ( + False, + f"Pod[{name}] is not ready: {condition.message}", + ) + + if pod.status.container_statuses is not None: + for container_status in pod.status.container_statuses: + if container_status.ready is not True: + return ( + False, + f"Container {container_status.name} is not ready", + ) + + if pod.status.init_container_statuses is not None: + for container_status in pod.status.init_container_statuses: + if container_status.ready is not True: + return ( + False, + f"Init container {container_status.name} is not ready", + ) + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/replica_set.py b/acto/system_state/replica_set.py new file mode 100644 index 0000000000..7cf40816ef --- /dev/null +++ b/acto/system_state/replica_set.py @@ -0,0 +1,56 @@ +"""ReplicaSet state model.""" +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class ReplicaSetState(KubernetesNamespacedDictObject): + """ReplicaSet state object.""" + + root: dict[str, kubernetes_models.V1ReplicaSet] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.AppsV1Api(api_client).list_namespaced_replica_set, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if ReplicaSet is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + for name, replica_set in self.root.items(): + if ( + replica_set.status.observed_generation + != replica_set.metadata.generation + ): + return False, f"ReplicaSet[{name}] generation mismatch" + + if replica_set.spec.replicas != replica_set.status.ready_replicas: + return False, f"ReplicaSet[{name}] replicas mismatch" + + if replica_set.status.conditions is not None: + for condition in replica_set.status.conditions: + if ( + condition.type == "Available" + and condition.status != "True" + ): + return False, f"ReplicaSet[{name}] is not available" + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/role.py b/acto/system_state/role.py new file mode 100644 index 0000000000..ff63b60f31 --- /dev/null +++ b/acto/system_state/role.py @@ -0,0 +1,37 @@ +"""Role state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class RoleState(KubernetesNamespacedDictObject): + """Role state object.""" + + root: dict[str, kubernetes_models.V1Role] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.RbacAuthorizationV1Api( + api_client + ).list_namespaced_role, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check Role health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/role_binding.py b/acto/system_state/role_binding.py new file mode 100644 index 0000000000..19b6f5eccc --- /dev/null +++ b/acto/system_state/role_binding.py @@ -0,0 +1,37 @@ +"""RoleBinding state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class RoleBindingState(KubernetesNamespacedDictObject): + """RoleBinding state object.""" + + root: dict[str, kubernetes_models.V1RoleBinding] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.RbacAuthorizationV1Api( + api_client + ).list_namespaced_role_binding, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check RoleBinding health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/secret.py b/acto/system_state/secret.py new file mode 100644 index 0000000000..57e44ece7f --- /dev/null +++ b/acto/system_state/secret.py @@ -0,0 +1,35 @@ +"""Secret state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class SecretState(KubernetesNamespacedDictObject): + """Secret state object.""" + + root: dict[str, kubernetes_models.V1Secret] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api(api_client).list_namespaced_secret, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if Secret is healthy""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/service.py b/acto/system_state/service.py new file mode 100644 index 0000000000..b31eda30b3 --- /dev/null +++ b/acto/system_state/service.py @@ -0,0 +1,40 @@ +"""Service state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class ServiceState(KubernetesNamespacedDictObject): + """Service state object.""" + + root: dict[str, kubernetes_models.V1Service] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api(api_client).list_namespaced_service, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if Service is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/service_account.py b/acto/system_state/service_account.py new file mode 100644 index 0000000000..44441bb4b0 --- /dev/null +++ b/acto/system_state/service_account.py @@ -0,0 +1,37 @@ +"""ServiceAccount state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class ServiceAccountState(KubernetesNamespacedDictObject): + """ServiceAccount state object.""" + + root: dict[str, kubernetes_models.V1ServiceAccount] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.CoreV1Api( + api_client + ).list_namespaced_service_account, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check ServiceAccount health""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/stateful_set.py b/acto/system_state/stateful_set.py new file mode 100644 index 0000000000..7af2410323 --- /dev/null +++ b/acto/system_state/stateful_set.py @@ -0,0 +1,71 @@ +"""StatefulSet state model.""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesNamespacedDictObject, + list_namespaced_object_helper, +) + + +class StatefulSetState(KubernetesNamespacedDictObject): + """StatefulSet state object.""" + + root: dict[str, kubernetes_models.V1StatefulSet] + + @classmethod + def from_api_client_namespaced( + cls, api_client: kubernetes.client.ApiClient, namespace: str + ) -> Self: + data = list_namespaced_object_helper( + kubernetes.client.AppsV1Api( + api_client + ).list_namespaced_stateful_set, + namespace, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check if StatefulSet is healthy + + Returns: + tuple[bool, str]: (is_healthy, reason) + """ + + for name, stateful_set in self.root.items(): + if ( + stateful_set.status.observed_generation + != stateful_set.metadata.generation + ): + return False, f"StatefulSet[{name}] generation mismatch" + + if ( + stateful_set.status.current_revision + != stateful_set.status.update_revision + ): + return ( + False, + f"StatefulSet[{name}] revision mismatch" + + f"current[{stateful_set.status.current_revision}] " + + f"!= update[{stateful_set.status.update_revision}]", + ) + + if stateful_set.spec.replicas != stateful_set.status.ready_replicas: + return False, f"StatefulSet[{name}] replicas mismatch" + + if stateful_set.status.conditions is not None: + for condition in stateful_set.status.conditions: + if condition.type == "Ready" and condition.status != "True": + return ( + False, + f"StatefulSet[{name}] is not ready: {condition.message}", + ) + + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/acto/system_state/storage_class.py b/acto/system_state/storage_class.py new file mode 100644 index 0000000000..02b3cba3fb --- /dev/null +++ b/acto/system_state/storage_class.py @@ -0,0 +1,32 @@ +"""StorageClass state model""" + +import kubernetes +import kubernetes.client.models as kubernetes_models +import pydantic +from typing_extensions import Self + +from acto.system_state.kubernetes_object import ( + KubernetesDictObject, + list_object_helper, +) + + +class StorageClassState(KubernetesDictObject): + """StorageClass state object.""" + + root: dict[str, kubernetes_models.V1StorageClass] + + @classmethod + def from_api_client(cls, api_client: kubernetes.client.ApiClient) -> Self: + data = list_object_helper( + kubernetes.client.StorageV1Api(api_client).list_storage_class, + ) + return cls.model_validate(data) + + def check_health(self) -> tuple[bool, str]: + """Check health of StorageClass""" + return True, "" + + @pydantic.model_serializer + def serialize(self): + return {key: value.to_dict() for key, value in self.root.items()} diff --git a/test/integration_tests/test_kubernetes_system_state.py b/test/integration_tests/test_kubernetes_system_state.py new file mode 100644 index 0000000000..34707b1cc8 --- /dev/null +++ b/test/integration_tests/test_kubernetes_system_state.py @@ -0,0 +1,66 @@ +"""Integration tests for Kubernetes system state collection.""" +import os +import pathlib +import tempfile +import unittest + +from acto.common import kubernetes_client +from acto.kubernetes_engine import kind +from acto.system_state.kubernetes_system_state import KubernetesSystemState + +test_dir = pathlib.Path(__file__).parent.resolve() +test_data_dir = os.path.join(test_dir, "test_data") + + +class TestKubernetesSystemState(unittest.TestCase): + """test Kubernetes system state collection.""" + + def setUp(self): + config_path = os.path.join(os.path.expanduser("~"), ".kube/test-config") + name = "test-cluster" + num_nodes = 1 + version = "v1.26.0" + + cluster_instance = kind.Kind(acto_namespace=0) + + cluster_instance.configure_cluster(num_nodes, version) + print( + f"Creating cluster {name} with {num_nodes} nodes, version {version}, " + + f"configPath {config_path}" + ) + cluster_instance.create_cluster(name, config_path) + + self.kubeconfig = config_path + self.cluster_name = name + self.cluster_instance = cluster_instance + + def tearDown(self): + self.cluster_instance.delete_cluster(self.cluster_name, self.kubeconfig) + + def test_collect_and_serialization(self): + """Test collect and serialization of Kubernetes system state.""" + + api_client = kubernetes_client( + self.kubeconfig, + self.cluster_instance.get_context_name(self.cluster_name), + ) + + # check collection works + state = KubernetesSystemState.from_api_client(api_client, "kube-system") + assert "kindnet" in state.daemon_set + assert "kube-proxy" in state.daemon_set + assert "coredns" in state.deployment + assert "kube-dns" in state.service + assert "standard" in state.storage_class + assert "coredns" in state.config_map + assert "admin" in state.cluster_role + assert "cluster-admin" in state.cluster_role_binding + + # check serialization works + with tempfile.TemporaryDirectory() as tempdir: + filepath = os.path.join(tempdir, "system_state.json") + state.dump(filepath) + + +if __name__ == "__main__": + unittest.main() From 2489c4b8dcb3b3270ef31b22dbaac9cb830e8a41 Mon Sep 17 00:00:00 2001 From: "Jiawei \"Tyler\" Gu" <47795840+tylergu@users.noreply.github.com> Date: Wed, 31 Jan 2024 00:46:38 -0600 Subject: [PATCH 2/4] Feat: CLI for getting total number of schema nodes from CRD (#316) Signed-off-by: Tyler Gu <jiaweig3@illinois.edu> --- acto/schema/anyof.py | 38 +++-- acto/schema/array.py | 71 +++++++--- acto/schema/base.py | 116 ++++++++++------ acto/schema/boolean.py | 22 ++- acto/schema/get_total_number_schemas.py | 93 +++++++++++++ acto/schema/get_total_number_schemas_test.py | 22 +++ acto/schema/integer.py | 43 +++++- acto/schema/number.py | 55 ++++++-- acto/schema/object.py | 134 +++++++++++++----- acto/schema/oneof.py | 36 +++-- acto/schema/opaque.py | 19 ++- acto/schema/schema.py | 138 +++---------------- acto/schema/string.py | 49 +++++-- 13 files changed, 555 insertions(+), 281 deletions(-) create mode 100644 acto/schema/get_total_number_schemas.py create mode 100644 acto/schema/get_total_number_schemas_test.py diff --git a/acto/schema/anyof.py b/acto/schema/anyof.py index a69d6c2460..7fd8fd57ab 100644 --- a/acto/schema/anyof.py +++ b/acto/schema/anyof.py @@ -1,34 +1,42 @@ +import random from copy import deepcopy from typing import List, Tuple from .base import BaseSchema, TreeNode -from .schema import extract_schema class AnyOfSchema(BaseSchema): - '''Representing a schema with AnyOf keyword in it - ''' + """Representing a schema with AnyOf keyword in it""" def __init__(self, path: list, schema: dict) -> None: + # This is to fix the circular import + # pylint: disable=import-outside-toplevel, cyclic-import + from .schema import extract_schema + super().__init__(path, schema) self.possibilities: List[BaseSchema] = [] - for index, v in enumerate(schema['anyOf']): + for index, v in enumerate(schema["anyOf"]): base_schema = deepcopy(schema) - del base_schema['anyOf'] + del base_schema["anyOf"] base_schema.update(v) - self.possibilities.append(extract_schema(self.path + ['%s' % str(index)], base_schema)) + self.possibilities.append( + extract_schema(self.path + [str(index)], base_schema) + ) def get_possibilities(self): + """Return all possibilities of the anyOf schema""" return self.possibilities def get_all_schemas(self) -> Tuple[list, list, list]: if self.problematic: return [], [], [] return [self], [], [] - - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: - normal_schemas = [self] - semantic_schemas = [] + + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: + normal_schemas: list[BaseSchema] = [self] + semantic_schemas: list[BaseSchema] = [] for possibility in self.possibilities: possibility_tuple = possibility.get_normal_semantic_schemas() @@ -51,10 +59,14 @@ def load_examples(self, example: list): def set_default(self, instance): self.default = instance + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + schema = random.choice(self.possibilities) + return schema.gen(exclude_value=exclude_value, minimum=minimum) + def __str__(self) -> str: - ret = '[' + ret = "[" for i in self.possibilities: ret += str(i) - ret += ', ' - ret += ']' + ret += ", " + ret += "]" return ret diff --git a/acto/schema/array.py b/acto/schema/array.py index ddda23c85e..24b5522902 100644 --- a/acto/schema/array.py +++ b/acto/schema/array.py @@ -1,29 +1,45 @@ +import random from typing import List, Tuple from .base import BaseSchema, TreeNode -from .schema import extract_schema class ArraySchema(BaseSchema): - '''Representation of an array node - + """Representation of an array node + It handles - minItems - maxItems - items - uniqueItems - ''' + """ + default_min_items = 0 default_max_items = 5 def __init__(self, path: list, schema: dict) -> None: - super().__init__(path, schema) - self.item_schema = extract_schema(self.path + ['ITEM'], schema['items']) - self.min_items = self.default_min_items if 'minItems' not in schema else schema['minItems'] - self.max_items = self.default_max_items if 'maxItems' not in schema else schema['maxItems'] - self.unique_items = None if 'uniqueItems' not in schema else schema['exclusiveMinimum'] + # This is to fix the circular import + # pylint: disable=import-outside-toplevel, cyclic-import + from .schema import extract_schema - def get_item_schema(self): + super().__init__(path, schema) + self.item_schema = extract_schema(self.path + ["ITEM"], schema["items"]) + self.min_items = ( + self.default_min_items + if "minItems" not in schema + else schema["minItems"] + ) + self.max_items = ( + self.default_max_items + if "maxItems" not in schema + else schema["maxItems"] + ) + self.unique_items = ( + None if "uniqueItems" not in schema else schema["exclusiveMinimum"] + ) + + def get_item_schema(self) -> BaseSchema: + """Get the schema of the items in the array""" return self.item_schema def get_all_schemas(self) -> Tuple[list, list, list]: @@ -66,10 +82,12 @@ def get_all_schemas(self) -> Tuple[list, list, list]: normal_schemas.append(self) return normal_schemas, pruned_by_overspecified, pruned_by_copiedover - - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: - normal_schemas = [self] - semantic_schemas = [] + + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: + normal_schemas: list[BaseSchema] = [self] + semantic_schemas: list[BaseSchema] = [] child_schema_tuple = self.item_schema.get_normal_semantic_schemas() normal_schemas.extend(child_schema_tuple[0]) @@ -79,7 +97,7 @@ def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSch def to_tree(self) -> TreeNode: node = TreeNode(self.path) - node.add_child('ITEM', self.item_schema.to_tree()) + node.add_child("ITEM", self.item_schema.to_tree()) return node def load_examples(self, example: list): @@ -93,8 +111,29 @@ def set_default(self, instance): def empty_value(self): return [] + def gen(self, exclude_value=None, minimum: bool = False, **kwargs) -> list: + if self.enum is not None: + if exclude_value is not None: + return random.choice( + [x for x in self.enum if x != exclude_value] + ) + else: + return random.choice(self.enum) + else: + # XXX: need to handle exclude_value, but not important for now for array types + result = [] + if "size" in kwargs and kwargs["size"] is not None: + num = kwargs["size"] + elif minimum: + num = self.min_items + else: + num = random.randint(self.min_items, self.max_items) + for _ in range(num): + result.append(self.item_schema.gen(minimum=minimum)) + return result + def __str__(self) -> str: - return 'Array' + return "Array" def __getitem__(self, key): return self.item_schema diff --git a/acto/schema/base.py b/acto/schema/base.py index 701e501d40..0e12a9d273 100644 --- a/acto/schema/base.py +++ b/acto/schema/base.py @@ -1,28 +1,32 @@ -import random from abc import abstractmethod -from typing import List, Tuple +from typing import Any, Callable, List, Optional, Tuple -from jsonschema import validate +import jsonschema +import jsonschema.exceptions from acto.utils.thread_logger import get_thread_logger -class TreeNode(): +class TreeNode: + """Tree node for schema tree""" def __init__(self, path: list) -> None: self.path = list(path) - self.parent: TreeNode = None - self.children = {} + self.parent: Optional[TreeNode] = None + self.children: dict[str, "TreeNode"] = {} - def add_child(self, key: str, child: 'TreeNode'): + def add_child(self, key: str, child: "TreeNode"): + """Add a child to the node""" self.children[key] = child child.set_parent(self) child.path = self.path + [key] - def set_parent(self, parent: 'TreeNode'): + def set_parent(self, parent: "TreeNode"): + """Set parent of the node""" self.parent = parent - def get_node_by_path(self, path: list) -> 'TreeNode': + def get_node_by_path(self, path: list) -> Optional["TreeNode"]: + """Get node by path""" logger = get_thread_logger(with_prefix=True) if len(path) == 0: @@ -32,41 +36,48 @@ def get_node_by_path(self, path: list) -> 'TreeNode': if key in self: return self[key].get_node_by_path(path) else: - logger.error('%s not in children', key) - logger.error('%s', self.children) + logger.error("%s not in children", key) + logger.error("%s", self.children) return None def get_children(self) -> dict: + """Get children of the node""" return self.children def get_path(self) -> list: + """Get path of the node""" return self.path - def traverse_func(self, func: callable): + def traverse_func(self, func: Callable): + """Traverse the tree and apply func to each node""" if func(self): for child in self.children.values(): child.traverse_func(func) def __getitem__(self, key): if key not in self.children: - if 'ITEM' in self.children and key == 'INDEX': - return self.children['ITEM'] - if isinstance(key, int) and 'ITEM' in self.children: - return self.children['ITEM'] - elif 'additional_properties' in self.children and isinstance(key, str): - return self.children['additional_properties'] + if "ITEM" in self.children and key == "INDEX": + return self.children["ITEM"] + if isinstance(key, int) and "ITEM" in self.children: + return self.children["ITEM"] + elif "additional_properties" in self.children and isinstance( + key, str + ): + return self.children["additional_properties"] else: - raise KeyError('key: %s' % key) + raise KeyError(f"key: {key}") else: return self.children[key] def __contains__(self, key) -> bool: if key not in self.children: - if 'ITEM' in self.children and key == 'INDEX': + if "ITEM" in self.children and key == "INDEX": return True - if 'ITEM' in self.children and isinstance(key, int): + if "ITEM" in self.children and isinstance(key, int): return True - elif 'additional_properties' in self.children and isinstance(key, str): + elif "additional_properties" in self.children and isinstance( + key, str + ): return True else: return False @@ -77,73 +88,88 @@ def __str__(self) -> str: return str(self.path) def deepcopy(self, path: list): + """Deep copy the node and its children""" ret = TreeNode(path) for key, child in self.children.items(): ret.add_child(key, child.deepcopy(path + [key])) - ret.testcases = list(self.testcases) - return ret -class SchemaInterface(): +class SchemaInterface: + """Interface for schemas""" + @abstractmethod - def get_all_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema'], List['BaseSchema']]: - '''Returns a tuple of normal schemas, schemas pruned by over-specified, schemas pruned by - copied-over''' - #FIXME: this method needs to be redefined. Its return type is a legacy from previous abandoned design + def get_all_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"], List["BaseSchema"]]: + """Returns a tuple of normal schemas, schemas pruned by over-specified, schemas pruned by + copied-over""" + # FIXME: this method needs to be redefined. + # Its return type is a legacy from previous abandoned design raise NotImplementedError @abstractmethod - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: - '''Returns a tuple of normal schemas, semantic schemas''' + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: + """Returns a tuple of normal schemas, semantic schemas""" raise NotImplementedError @abstractmethod def to_tree(self) -> TreeNode: - '''Returns tree structure, used for input generation''' + """Returns tree structure, used for input generation""" raise NotImplementedError @abstractmethod - def load_examples(self, example): - '''Load example into schema and subschemas''' + def load_examples(self, example: Any): + """Load example into schema and subschemas""" raise NotImplementedError @abstractmethod def set_default(self, instance): + """Set default value of the schema""" raise NotImplementedError @abstractmethod def empty_value(self): + """Get empty value of the schema""" raise NotImplementedError class BaseSchema(SchemaInterface): - '''Base class for schemas - + """Base class for schemas + Handles some keywords used for any types - ''' + """ def __init__(self, path: list, schema: dict) -> None: self.path = path self.raw_schema = schema - self.default = None if 'default' not in schema else schema['default'] - self.enum = None if 'enum' not in schema else schema['enum'] - self.examples = [] + self.default = None if "default" not in schema else schema["default"] + self.enum = None if "enum" not in schema else schema["enum"] + self.examples: list[Any] = [] self.copied_over = False self.over_specified = False self.problematic = False self.patch = False self.mapped = False - self.used_fields = [] + self.used_fields: list[SchemaInterface] = [] def get_path(self) -> list: + """Get path of the schema""" return self.path - def validate(self, instance) -> bool: + def validate(self, instance: Any) -> bool: + """Validate instance against schema""" try: - validate(instance, self.raw_schema) + jsonschema.validate(instance, self.raw_schema) return True - except: - return False \ No newline at end of file + except jsonschema.exceptions.ValidationError: + return False + + @abstractmethod + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + """Generate instance from schema""" + raise NotImplementedError diff --git a/acto/schema/boolean.py b/acto/schema/boolean.py index e3c971c48e..4691b6c386 100644 --- a/acto/schema/boolean.py +++ b/acto/schema/boolean.py @@ -1,22 +1,25 @@ +import random from typing import List, Tuple from .base import BaseSchema, TreeNode class BooleanSchema(BaseSchema): + """Representation of a boolean node""" def __init__(self, path: list, schema: dict) -> None: super().__init__(path, schema) - if self.default == None: + if self.default is None: self.default = False - pass def get_all_schemas(self) -> Tuple[list, list, list]: if self.problematic: return [], [], [] return [self], [], [] - - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: + + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: return [self], [] def to_tree(self) -> TreeNode: @@ -29,10 +32,17 @@ def set_default(self, instance): if isinstance(instance, bool): self.default = instance elif isinstance(instance, str): - self.default = instance.lower() in ['true', 'True'] + self.default = instance.lower() in ["true", "True"] def empty_value(self): return False + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + """Generate a random boolean value""" + if exclude_value is not None: + return not exclude_value + else: + return random.choice([True, False]) + def __str__(self) -> str: - return 'boolean' + return "boolean" diff --git a/acto/schema/get_total_number_schemas.py b/acto/schema/get_total_number_schemas.py new file mode 100644 index 0000000000..8dd78bb947 --- /dev/null +++ b/acto/schema/get_total_number_schemas.py @@ -0,0 +1,93 @@ +import argparse +import logging +import sys + +import yaml + +from .schema import extract_schema + + +def get_total_number_schemas(raw_schema: dict) -> int: + """Get the total number of schema nodes in a raw schema""" + root = extract_schema([], raw_schema) + base, over_specified, copied_over = root.get_all_schemas() + return len(base) + len(over_specified) + len(copied_over) + + +def main(): + """Main function""" + parser = argparse.ArgumentParser( + description="Counts the total number of schema nodes in a CRD" + ) + parser.add_argument( + "--crd-file", help="Path to the CRD file", type=str, required=True + ) + parser.add_argument( + "--crd-name", + help="Name of the CRD defined in metadata.name", + type=str, + required=False, + ) + parser.add_argument( + "--version", + help="Version of the schema in the CRD", + type=str, + required=False, + ) + args = parser.parse_args() + + with open(args.crd_file, "r", encoding="utf-8") as crd_file: + documents = yaml.load_all(crd_file, Loader=yaml.FullLoader) + + crd_candidates = {} + for document in documents: + if "kind" in document: + if document["kind"] == "CustomResourceDefinition": + crd_candidates[document["metadata"]["name"]] = document + break + else: + raise RuntimeError("Document contains no-Kubernetes objects") + + if crd_candidates: + if args.crd_name is not None and args.crd_name not in crd_candidates: + logging.error( + "CRD %s not found, available CRDs: %s", + args.crd_name, + str(crd_candidates.keys()), + ) + sys.exit(1) + + if len(crd_candidates) > 1: + if args.crd_name is None: + logging.error( + "Multiple CRDs found, please specify one through the --crd-name argument" + ) + logging.error("Available CRDs: %s", str(crd_candidates.keys())) + sys.exit(1) + else: + crd = crd_candidates[args.crd_name] + else: + crd = list(crd_candidates.values())[0] + + if args.version is None: + if len(crd["spec"]["versions"]) > 1: + logging.warning( + "Multiple versions found in CRD, using the latest version" + ) + raw_schema = crd["spec"]["versions"][-1]["schema"]["openAPIV3Schema"] + else: + for version in crd["spec"]["versions"]: + if version["name"] == args.version: + raw_schema = version["schema"]["openAPIV3Schema"] + break + else: + raise RuntimeError(f"Version {args.version} not found in CRD") + + print( + "Total number of schema nodes in the CRD:", + get_total_number_schemas(raw_schema), + ) + + +if __name__ == "__main__": + main() diff --git a/acto/schema/get_total_number_schemas_test.py b/acto/schema/get_total_number_schemas_test.py new file mode 100644 index 0000000000..cf540b3460 --- /dev/null +++ b/acto/schema/get_total_number_schemas_test.py @@ -0,0 +1,22 @@ +import unittest + +from .get_total_number_schemas import get_total_number_schemas + + +class TestGetTotalNumberSchemas(unittest.TestCase): + """Tests for get_total_number_schemas().""" + + def test_simple_get_total_number_schemas(self): + """Test simple case of getting total number of schemas.""" + + schema = { + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"}, + "c": {"type": "number"}, + }, + "type": "object", + } + num = get_total_number_schemas(schema) + + assert num == 4 diff --git a/acto/schema/integer.py b/acto/schema/integer.py index 3efcc12b98..b0d054ef95 100644 --- a/acto/schema/integer.py +++ b/acto/schema/integer.py @@ -1,15 +1,16 @@ -from typing import List, Tuple +import random +from typing import Any, List, Tuple from .base import BaseSchema, TreeNode from .number import NumberSchema class IntegerSchema(NumberSchema): - '''Special case of NumberSchema''' + """Special case of NumberSchema""" def __init__(self, path: list, schema: dict) -> None: super().__init__(path, schema) - if self.default == None: + if self.default is None: self.default = 0 def get_all_schemas(self) -> Tuple[list, list, list]: @@ -17,7 +18,9 @@ def get_all_schemas(self) -> Tuple[list, list, list]: return [], [], [] return [self], [], [] - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: if self.problematic: return [], [] return [self], [] @@ -25,8 +28,9 @@ def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSch def to_tree(self) -> TreeNode: return TreeNode(self.path) - def load_examples(self, example: int): - self.examples.append(example) + def load_examples(self, example: Any): + if isinstance(example, int): + self.examples.append(example) def set_default(self, instance): self.default = int(instance) @@ -34,5 +38,30 @@ def set_default(self, instance): def empty_value(self): return 0 + def gen(self, exclude_value=None, minimum: bool = False, **kwargs) -> int: + # TODO: Use exclusive_minimum, exclusive_maximum + if self.enum is not None: + if exclude_value is not None: + return random.choice( + [x for x in self.enum if x != exclude_value] + ) + else: + return random.choice(self.enum) + elif self.multiple_of is not None: + return random.randrange( + self.minimum, self.maximum + 1, self.multiple_of + ) + else: + if exclude_value is not None: + return random.choice( + [ + x + for x in range(self.minimum, self.maximum + 1) + if x != exclude_value + ] + ) + else: + return random.randrange(self.minimum, self.maximum + 1) + def __str__(self) -> str: - return 'Integer' \ No newline at end of file + return "Integer" diff --git a/acto/schema/number.py b/acto/schema/number.py index 147329fcf9..434ff056fd 100644 --- a/acto/schema/number.py +++ b/acto/schema/number.py @@ -1,37 +1,57 @@ +import random from typing import List, Tuple from .base import BaseSchema, TreeNode class NumberSchema(BaseSchema): - '''Representation of a number node - + """Representation of a number node + It handles - minimum - maximum - exclusiveMinimum - exclusiveMaximum - multipleOf - ''' + """ + default_minimum = 0 default_maximum = 5 def __init__(self, path: list, schema: dict) -> None: super().__init__(path, schema) - self.minimum = self.default_minimum if 'minimum' not in schema else schema['minimum'] - self.maximum = self.default_maximum if 'maximum' not in schema else schema['maximum'] - self.exclusive_minimum = None if 'exclusiveMinimum' not in schema else schema[ - 'exclusiveMinimum'] - self.exclusive_maximum = None if 'exclusiveMaximum' not in schema else schema[ - 'exclusiveMaximum'] - self.multiple_of = None if 'multipleOf' not in schema else schema['multipleOf'] + self.minimum = ( + self.default_minimum + if "minimum" not in schema + else schema["minimum"] + ) + self.maximum = ( + self.default_maximum + if "maximum" not in schema + else schema["maximum"] + ) + self.exclusive_minimum = ( + None + if "exclusiveMinimum" not in schema + else schema["exclusiveMinimum"] + ) + self.exclusive_maximum = ( + None + if "exclusiveMaximum" not in schema + else schema["exclusiveMaximum"] + ) + self.multiple_of = ( + None if "multipleOf" not in schema else schema["multipleOf"] + ) def get_all_schemas(self) -> Tuple[list, list, list]: if self.problematic: return [], [], [] return [self], [], [] - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: if self.problematic: return [], [] return [self], [] @@ -48,5 +68,16 @@ def set_default(self, instance): def empty_value(self): return 0 + def gen(self, exclude_value=None, minimum=False, **kwargs) -> float: + # TODO: Use exclusive_minimum, exclusive_maximum, multiple_of + if self.enum is not None: + if exclude_value is not None: + return random.choice( + [x for x in self.enum if x != exclude_value] + ) + else: + return random.choice(self.enum) + return random.uniform(self.minimum, self.maximum) + def __str__(self) -> str: - return 'Number' \ No newline at end of file + return "Number" diff --git a/acto/schema/object.py b/acto/schema/object.py index e036dc90d0..7137ceb9ef 100644 --- a/acto/schema/object.py +++ b/acto/schema/object.py @@ -1,6 +1,6 @@ +import random from typing import List, Tuple -from acto.schema.schema import extract_schema from acto.utils.thread_logger import get_thread_logger from .base import BaseSchema, TreeNode @@ -8,8 +8,8 @@ class ObjectSchema(BaseSchema): - '''Representation of an object node - + """Representation of an object node + It handles - properties - additionalProperties @@ -20,46 +20,55 @@ class ObjectSchema(BaseSchema): - dependencies - patternProperties - regexp - ''' + """ def __init__(self, path: list, schema: dict) -> None: + # This is to fix the circular import + # pylint: disable=import-outside-toplevel + from .schema import extract_schema + super().__init__(path, schema) self.properties = {} self.additional_properties = None self.required = [] logger = get_thread_logger(with_prefix=True) - if 'properties' not in schema and 'additionalProperties' not in schema: - logger.warning('Object schema %s does not have properties nor additionalProperties' % - self.path) - if 'properties' in schema: - for property_key, property_schema in schema['properties'].items(): - self.properties[property_key] = extract_schema(self.path + [property_key], - property_schema) - if 'additionalProperties' in schema: - self.additional_properties = extract_schema(self.path + ['additional_properties'], - schema['additionalProperties']) - if 'required' in schema: - self.required = schema['required'] - if 'minProperties' in schema: - self.min_properties = schema['minProperties'] - if 'maxProperties' in schema: - self.max_properties = schema['maxProperties'] + if "properties" not in schema and "additionalProperties" not in schema: + logger.warning( + "Object schema %s does not have properties nor additionalProperties", + self.path, + ) + if "properties" in schema: + for property_key, property_schema in schema["properties"].items(): + self.properties[property_key] = extract_schema( + self.path + [property_key], property_schema + ) + if "additionalProperties" in schema: + self.additional_properties = extract_schema( + self.path + ["additional_properties"], + schema["additionalProperties"], + ) + if "required" in schema: + self.required = schema["required"] + if "minProperties" in schema: + self.min_properties = schema["minProperties"] + if "maxProperties" in schema: + self.max_properties = schema["maxProperties"] def get_all_schemas(self) -> Tuple[list, list, list]: - '''Return all the subschemas as a list''' + """Return all the subschemas as a list""" if self.problematic: return [], [], [] normal_schemas: List[BaseSchema] = [] pruned_by_overspecified: List[BaseSchema] = [] pruned_by_copiedover: List[BaseSchema] = [] - if self.properties != None: + if self.properties is not None: for value in self.properties.values(): child_schema_tuple = value.get_all_schemas() normal_schemas.extend(child_schema_tuple[0]) pruned_by_overspecified.extend(child_schema_tuple[1]) pruned_by_copiedover.extend(child_schema_tuple[2]) - if self.additional_properties != None: + if self.additional_properties is not None: normal_schemas.append(self.additional_properties) if self.copied_over: @@ -90,32 +99,36 @@ def get_all_schemas(self) -> Tuple[list, list, list]: return normal_schemas, pruned_by_overspecified, pruned_by_copiedover - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: if self.problematic: return [], [] normal_schemas: List[BaseSchema] = [self] semantic_schemas: List[BaseSchema] = [] - if self.properties != None: + if self.properties is not None: for value in self.properties.values(): child_schema_tuple = value.get_normal_semantic_schemas() normal_schemas.extend(child_schema_tuple[0]) semantic_schemas.extend(child_schema_tuple[1]) - if self.additional_properties != None: + if self.additional_properties is not None: normal_schemas.append(self.additional_properties) return normal_schemas, semantic_schemas def to_tree(self) -> TreeNode: node = TreeNode(self.path) - if self.properties != None: + if self.properties is not None: for key, value in self.properties.items(): node.add_child(key, value.to_tree()) - if self.additional_properties != None: - node.add_child('additional_properties', self.additional_properties.to_tree()) + if self.additional_properties is not None: + node.add_child( + "additional_properties", self.additional_properties.to_tree() + ) return node @@ -132,33 +145,82 @@ def empty_value(self): return {} def get_property_schema(self, key): + """Get the schema of a property""" logger = get_thread_logger(with_prefix=True) if key in self.properties: return self.properties[key] - elif self.additional_properties != None: + elif self.additional_properties is not None: return self.additional_properties else: - logger.warning('Field [%s] does not have a schema, using opaque schema', key) + logger.warning( + "Field [%s] does not have a schema, using opaque schema", key + ) return OpaqueSchema(self.path + [key], {}) def get_properties(self) -> dict: + """Get the properties of the object""" return self.properties def get_additional_properties(self): + """Get the additional properties of the object""" return self.additional_properties + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + # TODO: Use constraints: minProperties, maxProperties + logger = get_thread_logger(with_prefix=True) + + if self.enum is not None: + if exclude_value is not None: + return random.choice( + [x for x in self.enum if x != exclude_value] + ) + else: + return random.choice(self.enum) + + # XXX: need to handle exclude_value, but not important for now for object types + result = {} + if len(self.properties) == 0: + if self.additional_properties is None: + # raise TypeError('[%s]: No properties and no additional properties' % self.path) + logger.warning( + "[%s]: No properties and no additional properties", + self.path, + ) + return {} + key = "ACTOKEY" + result[key] = self.additional_properties.gen(minimum=minimum) + else: + for k, v in self.properties.items(): + if minimum: + if k in self.required: + result[k] = v.gen(minimum=True) + else: + continue + else: + if random.uniform(0, 1) < 0.1 and k not in self.required: + # 10% of the chance this child will be null + result[k] = None + else: + result[k] = v.gen(minimum=minimum) + if "enabled" in self.properties: + result["enabled"] = True + return result + def __str__(self) -> str: - ret = '{' + ret = "{" for k, v in self.properties.items(): ret += str(k) - ret += ': ' + ret += ": " ret += str(v) - ret += ', ' - ret += '}' + ret += ", " + ret += "}" return ret def __getitem__(self, key): - if self.additional_properties != None and key not in self.properties: + if ( + self.additional_properties is not None + and key not in self.properties + ): # if the object schema has additionalProperties, and the key is not in the properties, # return the additionalProperties schema return self.additional_properties diff --git a/acto/schema/oneof.py b/acto/schema/oneof.py index 61c3938321..6ea33f2203 100644 --- a/acto/schema/oneof.py +++ b/acto/schema/oneof.py @@ -1,24 +1,30 @@ +import random from copy import deepcopy from typing import List, Tuple from .base import BaseSchema, TreeNode -from .schema import extract_schema class OneOfSchema(BaseSchema): - '''Representing a schema with AnyOf keyword in it - ''' + """Representing a schema with AnyOf keyword in it""" def __init__(self, path: list, schema: dict) -> None: + # This is to fix the circular import + # pylint: disable=import-outside-toplevel, cyclic-import + from .schema import extract_schema + super().__init__(path, schema) self.possibilities = [] - for index, v in enumerate(schema['oneOf']): + for index, v in enumerate(schema["oneOf"]): base_schema = deepcopy(schema) - del base_schema['oneOf'] + del base_schema["oneOf"] base_schema.update(v) - self.possibilities.append(extract_schema(self.path + ['%s' % str(index)], base_schema)) + self.possibilities.append( + extract_schema(self.path + [str(index)], base_schema) + ) def get_possibilities(self): + """Return all possibilities of the anyOf schema""" return self.possibilities def get_all_schemas(self) -> Tuple[list, list, list]: @@ -26,9 +32,11 @@ def get_all_schemas(self) -> Tuple[list, list, list]: return [], [], [] return [self], [], [] - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: - normal_schemas = [self] - semantic_schemas = [] + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: + normal_schemas: list[BaseSchema] = [self] + semantic_schemas: list[BaseSchema] = [] for possibility in self.possibilities: possibility_tuple = possibility.get_normal_semantic_schemas() @@ -51,10 +59,14 @@ def load_examples(self, example: list): def set_default(self, instance): self.default = instance + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + schema = random.choice(self.possibilities) + return schema.gen(exclude_value=exclude_value, minimum=minimum) + def __str__(self) -> str: - ret = '[' + ret = "[" for i in self.possibilities: ret += str(i) - ret += ', ' - ret += ']' + ret += ", " + ret += "]" return ret diff --git a/acto/schema/opaque.py b/acto/schema/opaque.py index b06795030e..94d24761d4 100644 --- a/acto/schema/opaque.py +++ b/acto/schema/opaque.py @@ -4,15 +4,14 @@ class OpaqueSchema(BaseSchema): - '''Opaque schema to handle the fields that do not have a schema''' - - def __init__(self, path: list, schema: dict) -> None: - super().__init__(path, schema) + """Opaque schema to handle the fields that do not have a schema""" def get_all_schemas(self) -> Tuple[list, list, list]: return [], [], [] - - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: + + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: return [], [] def to_tree(self) -> TreeNode: @@ -21,8 +20,14 @@ def to_tree(self) -> TreeNode: def load_examples(self, example): pass + def set_default(self, instance): + self.default = instance + def empty_value(self): return None + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + return None + def __str__(self) -> str: - return 'any' + return "any" diff --git a/acto/schema/schema.py b/acto/schema/schema.py index f60f679dfe..d01d13676f 100644 --- a/acto/schema/schema.py +++ b/acto/schema/schema.py @@ -1,147 +1,51 @@ from acto.utils import get_thread_logger +from .anyof import AnyOfSchema +from .array import ArraySchema from .base import BaseSchema from .boolean import BooleanSchema from .integer import IntegerSchema from .number import NumberSchema +from .object import ObjectSchema +from .oneof import OneOfSchema from .opaque import OpaqueSchema from .string import StringSchema def extract_schema(path: list, schema: dict) -> BaseSchema: - from .anyof import AnyOfSchema - from .array import ArraySchema - from .object import ObjectSchema - from .oneof import OneOfSchema + """Extract a schema from a dict""" + logger = get_thread_logger(with_prefix=True) - if 'anyOf' in schema: + if "anyOf" in schema: return AnyOfSchema(path, schema) - elif 'oneOf' in schema: + elif "oneOf" in schema: return OneOfSchema(path, schema) - if 'type' not in schema: - if 'properties' in schema: + if "type" not in schema: + if "properties" in schema: return ObjectSchema(path, schema) else: - logger.warn('No type found in schema: %s' % str(schema)) + logger.warning("No type found in schema: %s", str(schema)) return OpaqueSchema(path, schema) - t = schema['type'] + t = schema["type"] if isinstance(t, list): - if 'null' in t: - t.remove('null') + if "null" in t: + t.remove("null") if len(t) == 1: t = t[0] - if t == 'string': + if t == "string": return StringSchema(path, schema) - elif t == 'number': + elif t == "number": return NumberSchema(path, schema) - elif t == 'integer': + elif t == "integer": return IntegerSchema(path, schema) - elif t == 'boolean': + elif t == "boolean": return BooleanSchema(path, schema) - elif t == 'array': + elif t == "array": return ArraySchema(path, schema) - elif t == 'object': + elif t == "object": return ObjectSchema(path, schema) else: - logger.error('Unsupported type %s' % t) - return None - - -# if __name__ == '__main__': -# with open('data/rabbitmq-operator/operator.yaml', -# 'r') as operator_yaml: -# parsed_operator_documents = yaml.load_all(operator_yaml, Loader=yaml.FullLoader) -# for document in parsed_operator_documents: -# if document['kind'] == 'CustomResourceDefinition': -# spec_schema = ObjectSchema( -# ['root'], document['spec']['versions'][0]['schema']['openAPIV3Schema'] -# ['properties']['spec']) -# print(str(spec_schema)) -# print(spec_schema.gen()) -# print(spec_schema.num_fields()) -# for k, v in spec_schema.properties.items(): -# print('%s has %d fields' % (k, v.num_fields())) -# print(spec_schema.num_cases()) - -# schema_list = spec_schema.get_all_schemas() -# test_plan = {} -# for schema in schema_list: -# test_plan[str(schema.path)] = schema.test_cases() -# with open('test_plan.json', 'w') as fout: -# json.dump(test_plan, fout, cls=ActoEncoder, indent=4) - - # ss = StringSchema(None, {"type": "string"}) - # print(ss.gen()) - - # schemas = { - # 'configmap': '/home/tyler/k8s_resources/configmap-v1.json', - # 'cronjob': '/home/tyler/k8s_resources/cronjob-batch-v2alpha1.json', - # 'deployment': '/home/tyler/k8s_resources/deployment-apps-v1.json', - # 'ingress': '/home/tyler/k8s_resources/ingress-networking-v1.json', - # 'pvc': '/home/tyler/k8s_resources/persistentvolumeclaim-v1.json', - # 'pdb': '/home/tyler/k8s_resources/poddisruptionbudget-policy-v1beta1.json', - # 'pod': '/home/tyler/k8s_resources/pod-v1.json', - # 'secret': '/home/tyler/k8s_resources/secret-v1.json', - # 'service': '/home/tyler/k8s_resources/service-v1.json', - # 'statefulset': '/home/tyler/k8s_resources/statefulset-apps-v1.json', - # } - - # resource_num_fields = {} - - # for resource, schema_path in schemas.items(): - # with open(schema_path, 'r') as schema_file: - # schema = json.load(schema_file) - # spec_schema = extract_schema([], schema) - - # resource_num_fields[resource] = len(spec_schema.get_all_schemas()[0]) - - # print(resource_num_fields) - - # used_resource_in_operators = { - # 'cass-operator': [ - # 'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset' - # ], - # 'cockroach-operator': [ - # 'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset' - # ], - # 'knative-operator': ['configmap', 'deployment', 'pdb', 'secret', 'service'], - # 'mongodb-community-operator': [ - # 'configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset' - # ], - # 'percona-server-mongodb-operator': [ - # 'configmap', 'deployment', 'ingress', 'pvc', 'pdb', 'secret', 'service', 'statefulset' - # ], - # 'percona-xtradb-cluster-operator': [ - # 'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset' - # ], - # 'rabbitmq-operator': ['configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'], - # 'redis-operator': ['configmap', 'deployment', 'pdb', 'secret', 'service', 'statefulset'], - # 'redis-ot-operator': ['configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'], - # 'tidb-operator': ['configmap', 'deployment', 'pvc', 'secret', 'service', 'statefulset'], - # 'zookeeper-operator': [ - # 'configmap', 'deployment', 'pvc', 'pdb', 'secret', 'service', 'statefulset' - # ], - # } - - # for operator, resources in used_resource_in_operators.items(): - # num_system_fields = 0 - # for resource in resources: - # num_system_fields += resource_num_fields[resource] - - # print('%s has %d system fields' % (operator, num_system_fields)) - # print(num_system_fields) - - # with open('data/cass-operator/bundle.yaml', - # 'r') as operator_yaml: - # parsed_operator_documents = yaml.load_all(operator_yaml, Loader=yaml.FullLoader) - # for document in parsed_operator_documents: - # if document['kind'] == 'CustomResourceDefinition': - # spec_schema = ObjectSchema( - # ['root'], document['spec']['versions'][0]['schema']['openAPIV3Schema'] - # ['properties']['spec']) - # print(spec_schema.num_fields()) - # for k, v in spec_schema.properties.items(): - # print('%s has %d fields' % (k, len(v.get_all_schemas()[0]))) \ No newline at end of file + raise RuntimeError(f"Unknown type: {t}") diff --git a/acto/schema/string.py b/acto/schema/string.py index 097e4fddc0..bad2e6fa81 100644 --- a/acto/schema/string.py +++ b/acto/schema/string.py @@ -1,31 +1,44 @@ +import random from typing import List, Tuple +import exrex + +from acto.common import random_string + from .base import BaseSchema, TreeNode class StringSchema(BaseSchema): - '''Representation of a string node - + """Representation of a string node + It handles - minLength - maxLength - pattern - ''' + """ + default_max_length = 10 def __init__(self, path: list, schema: dict) -> None: super().__init__(path, schema) - self.min_length = None if 'minLength' not in schema else schema['minLength'] - self.max_length = self.default_max_length if 'maxLength' not in schema else schema[ - 'maxLength'] - self.pattern = None if 'pattern' not in schema else schema['pattern'] + self.min_length = ( + None if "minLength" not in schema else schema["minLength"] + ) + self.max_length = ( + self.default_max_length + if "maxLength" not in schema + else schema["maxLength"] + ) + self.pattern = None if "pattern" not in schema else schema["pattern"] def get_all_schemas(self) -> Tuple[list, list, list]: if self.problematic: return [], [], [] return [self], [], [] - - def get_normal_semantic_schemas(self) -> Tuple[List['BaseSchema'], List['BaseSchema']]: + + def get_normal_semantic_schemas( + self, + ) -> Tuple[List["BaseSchema"], List["BaseSchema"]]: if self.problematic: return [], [] return [self], [] @@ -42,5 +55,21 @@ def set_default(self, instance): def empty_value(self): return "" + def gen(self, exclude_value=None, minimum: bool = False, **kwargs): + # TODO: Use minLength: the exrex does not support minLength + if self.enum is not None: + if exclude_value is not None: + return random.choice( + [x for x in self.enum if x != exclude_value] + ) + else: + return random.choice(self.enum) + if self.pattern is not None: + # XXX: since it's random, we don't need to exclude the value + return exrex.getone(self.pattern, self.max_length) + if minimum: + return random_string(self.min_length) # type: ignore + return "ACTOKEY" + def __str__(self) -> str: - return 'String' \ No newline at end of file + return "String" From ef08dab35b9d5a9f3c1fcc6efe4da73f74e39b32 Mon Sep 17 00:00:00 2001 From: Tyler Gu <jiaweig3@illinois.edu> Date: Wed, 31 Jan 2024 12:16:50 -0600 Subject: [PATCH 3/4] Update bug numbers Signed-off-by: Tyler Gu <jiaweig3@illinois.edu> --- bugs.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bugs.md b/bugs.md index 52aeaad929..65d98c41f1 100644 --- a/bugs.md +++ b/bugs.md @@ -63,6 +63,8 @@ | pravega/zookeeper-operator | [https://github.com/pravega/zookeeper-operator/issues/540](https://github.com/pravega/zookeeper-operator/issues/540) | | | pravega/zookeeper-operator | [https://github.com/pravega/zookeeper-operator/issues/541](https://github.com/pravega/zookeeper-operator/issues/541) | | | pravega/zookeeper-operator | [https://github.com/pravega/zookeeper-operator/issues/547](https://github.com/pravega/zookeeper-operator/issues/547) | | +| cloudnative-pg/cloudnative-pg | [https://github.com/cloudnative-pg/cloudnative-pg/issues/3623](https://github.com/cloudnative-pg/cloudnative-pg/issues/3623) | fixed | +| cloudnative-pg/cloudnative-pg | [https://github.com/cloudnative-pg/cloudnative-pg/issues/3541](https://github.com/cloudnative-pg/cloudnative-pg/issues/3541) | | # Byproduct bugs From 5a89170b75530c7b043776e261c660a9d7d21449 Mon Sep 17 00:00:00 2001 From: "Jiawei \"Tyler\" Gu" <47795840+tylergu@users.noreply.github.com> Date: Wed, 31 Jan 2024 12:20:09 -0600 Subject: [PATCH 4/4] Update counter.yml --- .github/workflows/counter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/counter.yml b/.github/workflows/counter.yml index ec84bd650e..87f3146036 100644 --- a/.github/workflows/counter.yml +++ b/.github/workflows/counter.yml @@ -37,7 +37,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: 'Set up Python' - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: 3.10 - name: Change bugs.md