krkn-chaos · jtydlack · Aug 28, 2024 · Sep 26, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/krkn/scenario_plugins/node_actions/abstract_node_scenarios.py b/krkn/scenario_plugins/node_actions/abstract_node_scenarios.py
@@ -36,6 +36,16 @@ def helper_node_stop_start_scenario(self, instance_kill_count, node, timeout):
         self.helper_node_start_scenario(instance_kill_count, node, timeout)
         logging.info("helper_node_stop_start_scenario has been successfully injected!")
 
+    # Node scenario to detach and attach the disk
+    def disk_detach_attach_scenario(self, instance_kill_count, node, timeout, duration):
+        logging.info("Starting node_stop_start_scenario injection")
+        disk_attachment_details = self.disk_attachment_info(instance_kill_count, node)
+        self.disk_detach_scenario(instance_kill_count, node, timeout)
+        logging.info("Waiting for %s seconds before attaching the disk" % (duration))
+        time.sleep(duration)
+        self.disk_attach_scenario(instance_kill_count, disk_attachment_details, timeout)
+        logging.info("disk_detach_attach_scenario has been successfully injected!")
+
     # Node scenario to terminate the node
     def node_termination_scenario(self, instance_kill_count, node, timeout):
         pass

diff --git a/krkn/scenario_plugins/node_actions/aws_node_scenarios.py b/krkn/scenario_plugins/node_actions/aws_node_scenarios.py
@@ -13,6 +13,7 @@ class AWS:
     def __init__(self):
         self.boto_client = boto3.client("ec2")
         self.boto_instance = boto3.resource("ec2").Instance("id")
+        self.boto_resource = boto3.resource("ec2")
 
     # Get the instance ID of the node
     def get_instance_id(self, node):
@@ -179,6 +180,60 @@ def delete_network_acl(self, acl_id):
 
             raise RuntimeError()
 
+    # TODO Detach volume
+    def detach_volumes(self, volumes_ids: list):
+        for volume in volumes_ids:
+            try:
+                self.boto_client.detach_volume(VolumeId=volume, Force=True)
+            except Exception as e:
+                logging.error(
+                    "Detaching volume %s failed with exception: %s"
+                    % (volume, e)
+                )
+
+    # TODO Attach volume
+    def attach_volume(self, attachment: dict):
+        try:
+            if self.get_volume_state(attachment["VolumeId"]) == "in-use":
+                return
+            logging.info(
+                "Attaching the %s volumes to instance %s."
+                % (attachment["VolumeId"], attachment["InstanceId"])
+            )
+            self.boto_client.attach_volume(
+                InstanceId=attachment["InstanceId"],
+                Device=attachment["Device"],
+                VolumeId=attachment["VolumeId"]
+            )
+        except Exception as e:
+            logging.error(
+                "Failed attaching disk %s to the %s instance. "
+                "Encountered following exception: %s"
+                % (attachment['VolumeId'], attachment['InstanceId'], e)
+            )
+            # raise RuntimeError()
+
+    # Get IDs of node volumes
+    def get_volumes_ids(self, instance_id: list):
+        response = self.boto_client.describe_instances(InstanceIds=instance_id)
+        instance_attachment_details = response['Reservations'][0]['Instances'][0]['BlockDeviceMappings']
+        volume_ids = []
+        for device in instance_attachment_details:
+            volume_id = device['Ebs']['VolumeId']
+            volume_ids.append(volume_id)
+        return volume_ids
+
+    # Get volumes attachment details
+    def get_volume_attachment_details(self, volume_ids: list):
+        response = self.boto_client.describe_volumes(VolumeIds=volume_ids)
+        volumes_details = response["Volumes"]
+        return volumes_details
+
+    # TODO Get volume state
+    def get_volume_state(self, volume_id: str):
+        volume = self.boto_resource.Volume(volume_id)
+        state = volume.state
+        return state
 
 # krkn_lib
 class aws_node_scenarios(abstract_node_scenarios):
@@ -290,3 +345,47 @@ def node_reboot_scenario(self, instance_kill_count, node, timeout):
                 logging.error("node_reboot_scenario injection failed!")
 
                 raise RuntimeError()
+
+    # TODO Get volume attachment info
+    def disk_attachment_info(self, instance_kill_count, node):
+        for _ in range(instance_kill_count):
+            try:
+                logging.info("Obtaining disk attachment information")
+                instance_id = (self.aws.get_instance_id(node)).split()
+                volumes_ids = self.aws.get_volumes_ids(instance_id)
+                vol_attachment_details = self.aws.get_volume_attachment_details(
+                    volumes_ids
+                )
+                return vol_attachment_details
+            except Exception as e:
+                logging.error(
+                    "Failed to obtain disk attachment information of %s node. "
+                    "Encounteres following exception: %s." % (node, e)
+                )
+                raise RuntimeError()
+
+    # TODO Node scenario to detach the volume
+    def disk_detach_scenario(self, instance_kill_count, node, timeout):
+        for _ in range(instance_kill_count):
+            try:
+                logging.info("Starting disk_detach_scenario injection")
+                instance_id = (self.aws.get_instance_id(node)).split()
+                volumes_ids = self.aws.get_volumes_ids(instance_id)
+                logging.info(
+                    "Detaching the %s volumes from instance %s "
+                    % (volumes_ids, node)
+                )
+                self.aws.detach_volumes(volumes_ids)
+            except Exception as e:
+                logging.error(
+                    "Failed to detach disk from %s node. Encountered following"
+                    "exception: %s." % (node, e)
+                )
+                logging.debug("")
+                raise RuntimeError()
+
+    # TODO Node scenario to attach the volume
+    def disk_attach_scenario(self, instance_kill_count, attachment_details, timeout):
+        for _ in range(instance_kill_count):
+            for attachment in attachment_details:
+                self.aws.attach_volume(attachment["Attachments"][0])
diff --git a/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py b/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py
@@ -163,7 +163,7 @@ def run_node(self, single_node, node_scenario_object, action, node_scenario):
         logging.info("action" + str(action))
         # Get the scenario specifics for running action nodes
         run_kill_count = get_yaml_item_value(node_scenario, "runs", 1)
-        if action == "node_stop_start_scenario":
+        if action in ("node_stop_start_scenario", "disk_detach_attach_scenario"):
             duration = get_yaml_item_value(node_scenario, "duration", 120)
 
         timeout = get_yaml_item_value(node_scenario, "timeout", 120)
@@ -200,6 +200,10 @@ def run_node(self, single_node, node_scenario_object, action, node_scenario):
                 node_scenario_object.node_reboot_scenario(
                     run_kill_count, single_node, timeout
                 )
+            elif action == "disk_detach_attach_scenario":
+                        node_scenario_object.disk_detach_attach_scenario(
+                            run_kill_count, single_node, timeout
+                        )
             elif action == "stop_start_kubelet_scenario":
                 node_scenario_object.stop_start_kubelet_scenario(
                     run_kill_count, single_node, timeout

diff --git a/run_kraken.py b/run_kraken.py
@@ -467,7 +467,7 @@ def main(cfg) -> int:
                     end_time,
                     alert_profile,
                     elastic_colllect_alerts,
-                    elastic_alerts_index,
+                    elastic_alerts_index
                 )
 
             else: