From 2687314f85b3fe7b85f36e5649c705a2e8c92ac6 Mon Sep 17 00:00:00 2001 From: Naga Ravi Chaitanya Elluri Date: Tue, 20 Aug 2024 09:01:52 -0400 Subject: [PATCH] Add support to run baremetal node disruptions (#202) This commit enables users to run disrupt baremetal using IPMI to be able to understand the impact on OpenShift as well as applications running on it. Signed-off-by: Naga Ravi Chaitanya Elluri --- docs/node-scenarios.md | 17 +++++++++++------ node-scenarios/Dockerfile | 5 +++++ .../baremetal_node_scenario.yaml.template | 14 ++++++++++++++ node-scenarios/env.sh | 5 +++++ node-scenarios/run.sh | 5 ++++- 5 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 node-scenarios/baremetal_node_scenario.yaml.template diff --git a/docs/node-scenarios.md b/docs/node-scenarios.md index 7367c7a..4debb38 100644 --- a/docs/node-scenarios.md +++ b/docs/node-scenarios.md @@ -36,11 +36,14 @@ LABEL_SELECTOR | Node label to target NODE_NAME | Node name to inject faults in case of targeting a specific node; Can set multiple node names separated by a comma | "" | INSTANCE_COUNT | Targeted instance count matching the label selector | 1 | RUNS | Iterations to perform action on a single node | 1 | -CLOUD_TYPE | Cloud platform on top of which cluster is running, supported platforms - aws or vmware | aws | +CLOUD_TYPE | Cloud platform on top of which cluster is running, supported platforms - aws, vmware, ibmcloud, bm | aws | TIMEOUT | Duration to wait for completion of node scenario injection | 180 | DURATION | Duration to stop the node before running the start action - not supported for vmware and ibm cloud type | 120 | VERIFY_SESSION | Only needed for vmware - Set to True if you want to verify the vSphere client session using certificates | False | SKIP_OPENSHIFT_CHECKS | Only needed for vmware - Set to True if you don't want to wait for the status of the nodes to change on OpenShift before passing the scenario | False | +BMC_USER | Only needed for Baremetal ( bm ) - IPMI/bmc username | "" | +BMC_PASSWORD | Only needed for Baremetal ( bm ) - IPMI/bmc password | "" | +BMC_ADDR | Only needed for Baremetal ( bm ) - IPMI/bmc username | "" | #### Demo You can find a link to a demo of the scenario [here](https://asciinema.org/a/ANZY7HhPdWTNaWt4xMFanF6Q5) @@ -74,6 +77,13 @@ $ export IBMC_APIKEY= ``` +Baremetal +``` +$ export BMC_USER= +$ export BMC_PASSWORD= +$ export BMC_ADDR= +``` + Google Cloud Platform ``` TBD @@ -93,11 +103,6 @@ OpenStack TBD ``` -Baremetal -``` -TBD -``` - **NOTE** In case of using custom metrics profile or alerts profile when `CAPTURE_METRICS` or `ENABLE_ALERTS` is enabled, mount the metrics profile from the host on which the container is run using podman/docker under `/home/krkn/kraken/config/metrics-aggregated.yaml` and `/home/krkn/kraken/config/alerts`. For example: ``` $ podman run --name= --net=host --env-host=true -v :/home/krkn/kraken/config/metrics-aggregated.yaml -v :/home/krkn/kraken/config/alerts -v :/home/krkn/.kube/config:Z -d quay.io/krkn-chaos/krkn-hub:container-scenarios diff --git a/node-scenarios/Dockerfile b/node-scenarios/Dockerfile index b6ed807..951f585 100644 --- a/node-scenarios/Dockerfile +++ b/node-scenarios/Dockerfile @@ -4,6 +4,10 @@ FROM quay.io/krkn-chaos/krkn:latest ENV KUBECONFIG /home/krkn/.kube/config +USER root +RUN yum install -y OpenIPMI ipmitool +USER krkn + # Copy configurations COPY config.yaml.template /home/krkn/kraken/config/config.yaml.template COPY metrics_config.yaml.template /home/krkn/kraken/config/kube_burner.yaml.template @@ -13,4 +17,5 @@ COPY node-scenarios/run.sh /home/krkn/run.sh COPY common_run.sh /home/krkn/common_run.sh COPY node-scenarios/node_scenario.yaml.template /home/krkn/kraken/scenarios/node_scenario.yaml.template COPY node-scenarios/plugin_node_scenario.yaml.template /home/krkn/kraken/scenarios/plugin_node_scenario.yaml.template +COPY node-scenarios/baremetal_node_scenario.yaml.template /home/krkn/kraken/scenarios/baremetal_node_scenario.yaml.template ENTRYPOINT /home/krkn/run.sh diff --git a/node-scenarios/baremetal_node_scenario.yaml.template b/node-scenarios/baremetal_node_scenario.yaml.template new file mode 100644 index 0000000..33b812b --- /dev/null +++ b/node-scenarios/baremetal_node_scenario.yaml.template @@ -0,0 +1,14 @@ +node_scenarios: + - actions: # Node chaos scenarios to be injected. + - $ACTION + node_name: $NODE_NAME # Node on which scenario has to be injected. + label_selector: $LABEL_SELECTOR # When node_name is not specified, a node with matching label_selector is selected for node chaos scenario injection. + instance_count: $INSTANCE_COUNT # Number of nodes to perform action/select that match the label selector. + runs: $RUNS # Number of times to inject each scenario under actions (will perform on same node each time). + timeout: $TIMEOUT # Duration to wait for completion of node scenario injection. + cloud_type: $CLOUD_TYPE # Cloud type on which Kubernetes/OpenShift runs. + bmc_user: $BMC_USER # For baremetal (bm) cloud type. The default IPMI username. Optional if specified for all machines. + bmc_password: $BMC_PASSWORD # For baremetal (bm) cloud type. The default IPMI password. Optional if specified for all machines. + bmc_info: # This section is here to specify baremetal per-machine info, so it is optional if there is no per-machine info. + $NODE_NAME: # The node name for the baremetal machine + bmc_addr: $BMC_ADDR # Optional. For baremetal nodes with the IPMI BMC address missing from 'oc get bmh'. diff --git a/node-scenarios/env.sh b/node-scenarios/env.sh index 5a541c2..6102681 100755 --- a/node-scenarios/env.sh +++ b/node-scenarios/env.sh @@ -14,3 +14,8 @@ export SCENARIO_FILE=${SCENARIO_FILE:=scenarios/node_scenario.yaml} export SCENARIO_POST_ACTION=${SCENARIO_POST_ACTION:=""} export VERIFY_SESSION=${VERIFY_SESSION:="false"} export SKIP_OPENSHIFT_CHECKS=${SKIP_OPENSHIFT_CHECKS:="false"} + +# Baremetal vars +export BMC_USER=${BMC_USER:=""} +export BMC_PASSWORD=${BMC_PASSWORD:=""} +export BMC_ADDR=${BMC_ADDR:=""} diff --git a/node-scenarios/run.sh b/node-scenarios/run.sh index 0c7cf11..a14db63 100755 --- a/node-scenarios/run.sh +++ b/node-scenarios/run.sh @@ -26,10 +26,13 @@ if [[ "$CLOUD_TYPE" == "vmware" || "$CLOUD_TYPE" == "ibmcloud" ]]; then fi envsubst < /home/krkn/kraken/scenarios/plugin_node_scenario.yaml.template > /home/krkn/kraken/scenarios/node_scenario.yaml - +elif [[ "$CLOUD_TYPE" == "bm" ]]; then + envsubst < /home/krkn/kraken/scenarios/baremetal_node_scenario.yaml.template > /home/krkn/kraken/scenarios/node_scenario.yaml else envsubst < /home/krkn/kraken/scenarios/node_scenario.yaml.template > /home/krkn/kraken/scenarios/node_scenario.yaml fi + +# Setup config envsubst < /home/krkn/kraken/config/config.yaml.template > /home/krkn/kraken/config/node_scenario_config.yaml # Run Kraken