Skip to content

Commit

Permalink
Merge branch 'krkn-chaos:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
tsebastiani authored Oct 24, 2024
2 parents cc575d3 + e00bba4 commit 4159b26
Show file tree
Hide file tree
Showing 38 changed files with 1,324 additions and 68 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ jobs:
QUAY_TOKEN: ${{ secrets.QUAY_PASSWORD }}
- name: Check out code
uses: actions/checkout@main
- name: Add krknctl metadata to Dockerfiles
run: |
bash build.sh
- name: Build the Docker images
run: docker compose build --parallel
- name: Push the Docker images
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ COPY application-outages/run.sh /home/krkn/run.sh
COPY common_run.sh /home/krkn/common_run.sh
COPY application-outages/app_outages.yaml.template /home/krkn/kraken/scenarios/app_outage.yaml.template

LABEL krknctl.kubeconfig_path="/home/krkn/.kube/config"
LABEL krknctl.title="Application Outages"
LABEL krknctl.description="Scenario to block the traffic ( Ingress/Egress ) of an application matching the labels for the specified duration of time to understand the behavior of the service/other services which depend on it during downtime. This helps with planning the requirements accordingly, be it improving the timeouts or tweaking the alerts etc."

LABEL krknctl.inputFields='$KRKNCTL_INPUT'



ENTRYPOINT /home/krkn/run.sh
39 changes: 39 additions & 0 deletions application-outages/krknctl-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[
{
"name":"chaos-duration",
"short_description":"Chaos Duration",
"description":"Set chaos duration (in sec) as desired",
"variable":"DURATION",
"type":"number",
"default":"600",
"required":"false"
},
{
"name":"namespace",
"short_description":"Namespace",
"description":"Namespace to target - all application routes will go inaccessible if pod selector is empty ( Required )",
"variable":"NAMESPACE",
"type":"string",
"required":"true"
},
{
"name":"pod-selector",
"short_description":"Pod Selector",
"description": "Pods to target. For example \"{app: foo}\"",
"type":"string",
"variable":"POD_SELECTOR",
"validator": "\\{[a-zA-Z0-9-_]+\\: [a-zA-Z0-9-_]+\\}",
"required": "true"
},
{
"name":"block-traffic-type",
"short_description":"Pod Selector",
"description": "It can be [Ingress] or [Egress] or [Ingress, Egress]",
"type":"string",
"variable":"BLOCK_TRAFFIC_TYPE",
"validator": "^\\[Ingress\\]$|^\\[Egress\\]$|^\\[Ingress, Egress\\]$",
"default":"[Ingress, Egress]",
"required": "false"
}

]
9 changes: 9 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SCENARIOS=( application-outages container-scenarios network-chaos node-cpu-hog node-io-hog \
node-memory-hog node-scenarios pod-network-chaos pod-scenarios power-outages pvc-scenario \
service-disruption-scenarios service-hijacking syn-flood time-scenarios zone-outages )
for i in "${SCENARIOS[@]}"; do
export KRKNCTL_INPUT=$(cat $i/krknctl-input.json|tr -d "\n")
envsubst < $i/Dockerfile.template > $i/Dockerfile
done;

docker compose build --parallel
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,11 @@ COPY container-scenarios/run.sh /home/krkn/run.sh
COPY container-scenarios/container_scenario.yaml.template /home/krkn/kraken/scenarios/container_scenario.yaml.template
COPY common_run.sh /home/krkn/common_run.sh
USER krkn

LABEL krknctl.kubeconfig_path="/home/krkn/.kube/config"
LABEL krknctl.title="Container Scenarios"
LABEL krknctl.description="Scenario to block the traffic ( Ingress/Egress ) of an application matching the labels for the specified duration of time to understand the behavior of the service/other services which depend on it during downtime. This helps with planning the requirements accordingly, be it improving the timeouts or tweaking the alerts etc."

LABEL krknctl.inputFields='$KRKNCTL_INPUT'

ENTRYPOINT /home/krkn/run.sh
61 changes: 61 additions & 0 deletions container-scenarios/krknctl-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[

{
"name":"namespace",
"short_description":"Namespace",
"description":"Targeted namespace in the cluster",
"variable":"NAMESPACE",
"type":"string",
"default":"openshift-etcd",
"required":"false"
},
{
"name":"label-selector",
"short_description":"Label Selector",
"description":"Label of the container(s) to target",
"variable":"LABEL_SELECTOR",
"type":"string",
"default":"k8s-app=etcd",
"validator":"^.+\\=.+$",
"required":"false"
},
{
"name":"disruption-count",
"short_description":"Disruption Count",
"description": "Number of container to disrupt",
"type":"number",
"variable":"DISRUPTION_COUNT",
"default":"1",
"required": "false"
},
{
"name":"container-name",
"short_description":"Container Name",
"description": "Name of the container to disrupt",
"type":"string",
"variable":"CONTAINER_NAME",
"default":"etcd",
"required": "false"
},
{
"name":"action",
"short_description":"Action",
"description":"kill signal to run. For example 1 ( hang up ) or 9",
"variable":"ACTION",
"type":"string",
"default":"1",
"validator":"^[1-9]$",
"required":"false"
},
{
"name":"expected-recovery-time",
"short_description":"Expected Recovery Time",
"description":"Time to wait before checking if all containers that were affected recover properly",
"variable":"EXPECTED_RECOVERY_TIME",
"type":"number",
"default":"60",
"required":"false"
}


]
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,9 @@ COPY common_run.sh /home/krkn/common_run.sh
COPY network-chaos/network_chaos_egress.yaml.template /home/krkn/kraken/scenarios/network_chaos_egress.yaml.template
COPY network-chaos/network_chaos_ingress.yaml.template /home/krkn/kraken/scenarios/network_chaos_ingress.yaml.template

LABEL krknctl.kubeconfig_path="/home/krkn/.kube/config"
LABEL krknctl.title="Network Chaos"
LABEL krknctl.description="Scenario to introduce network latency, packet loss, and bandwidth restriction in the Node's host network interface. The purpose of this scenario is to observe faults caused by random variations in the network."
LABEL krknctl.inputFields='$KRKNCTL_INPUT'

ENTRYPOINT /home/krkn/run.sh
110 changes: 110 additions & 0 deletions network-chaos/krknctl-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
[
{
"name":"traffic-type",
"short_description":"Traffic Type",
"description":"Selects the network chaos scenario type can be ingress or egress",
"variable":"TRAFFIC_TYPE",
"type":"enum",
"allowed_values":"ingress,egress",
"separator":",",
"required":"true"
},
{
"name":"duration",
"short_description":"Duration [common]",
"description":"Duration in seconds - during with network chaos will be applied.",
"variable":"DURATION",
"type":"number",
"default":"300",
"required":"false"
},
{
"name":"label-selector",
"short_description":"Label Selector [common]",
"description": "When NODE_NAME is not specified, a node with matching label_selector is selected for running.",
"type":"string",
"variable":"LABEL_SELECTOR",
"default":"node-role.kubernetes.io/master",
"required": "false"
},
{
"name":"execution",
"short_description":"Execution [common]",
"description": "Execute each of the egress option as a single scenario(parallel) or as separate scenario(serial).",
"type":"enum",
"variable":"EXECUTION",
"allowed_values":"parallel,serial",
"separator":",",
"default":"parallel",
"required": "false"
},
{
"name":"instance-count",
"short_description":"Instance Count [common]",
"description": "Targeted instance count matching the label selector.",
"type":"number",
"variable":"INSTANCE_COUNT",
"default":"1",
"required": "false"
},
{
"name":"node-name",
"short_description":"Node Name [*Egress only*]",
"description": "Node name to inject faults in case of targeting a specific node; Can set multiple node names separated by a comma",
"type":"string",
"variable":"NODE_NAME",
"default":"",
"required": "false"
},
{
"name":"interfaces",
"short_description":"Interfaces [*Egress only*]",
"description": "List of interface on which to apply the network restriction. eg. [eth0,eth1,eth2]",
"type":"string",
"variable":"INTERFACES",
"default":"[]",
"validator":"^\\[\\]$|^\\[([a-zA-Z0-9]+(,)?)*\\[^,]]$",
"required": "false"
},
{
"name":"egress",
"short_description":"Egress [*Egress only*]",
"description": "Dictonary of values to set network latency(latency: 50ms), packet loss(loss: 0.02), bandwidth restriction(bandwidth: 100mbit) eg. {bandwidth: 100mbit}",
"type":"string",
"variable":"EGRESS",
"default":"{bandwidth: 100mbit}",
"validator":"^{((latency|loss|bandwidth): [a-zA-Z0-9.]+(,)?)+[^,]}$",
"required": "false"
},
{
"name":"target-node-interface",
"short_description":"Target Node and Interfaces [*Ingress only*]",
"description": "Dictionary with key as node name(s) and value as a list of its interfaces to test. For example: {ip-10-0-216-2.us-west-2.compute.internal: [ens5]}",
"type":"string",
"variable":"TARGET_NODE_AND_INTERFACE",
"default":"",
"validator":"^{([a-zA-Z0-9-.]+: \\[(([A-Za-z0-9]+(,)?)+)\\](,)?)+[^,]}$",
"required": "false"
},
{
"name":"network-params",
"short_description":"Network Params [*Ingress only*]",
"description": "latency, loss and bandwidth are the three supported network parameters to alter for the chaos test. For example: {latency: 50ms, loss: 0.02}",
"type":"string",
"variable":"NETWORK_PARAMS",
"default":"",
"validator":"^{((latency|loss|bandwidth): [a-zA-Z0-9.]+(,)?)+[^,]}$",
"required": "false"
},
{
"name":"wait-duration",
"short_description":"Wait Duration [*Ingress only*]",
"description": "Ensure that it is at least about twice of test_duration",
"type":"number",
"variable":"WAIT_DURATION",
"default":"300",
"required": "false"
}


]
16 changes: 0 additions & 16 deletions node-cpu-hog/Dockerfile

This file was deleted.

20 changes: 20 additions & 0 deletions node-cpu-hog/Dockerfile.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM quay.io/krkn-chaos/krkn:latest

ENV KUBECONFIG /home/krkn/.kube/config

# Copy configurations
COPY metrics_config.yaml.template /home/krkn/kraken/config/kube_burner.yaml.template
COPY config.yaml.template /home/krkn/kraken/config/config.yaml.template
COPY node-cpu-hog/env.sh /home/krkn/env.sh
COPY env.sh /home/krkn/main_env.sh
COPY node-cpu-hog/run.sh /home/krkn/run.sh
COPY node-cpu-hog/input.yaml.template /home/krkn/kraken/scenarios/kube/cpu-hog/input.yaml.template
COPY common_run.sh /home/krkn/common_run.sh

LABEL krknctl.kubeconfig_path="/home/krkn/.kube/config"
LABEL krknctl.title="CPU Hog"
LABEL krknctl.description="This scenario is based on the arcaflow arcaflow-plugin-stressng plugin. The purpose of this scenario is to create cpu pressure on a particular node of the Kubernetes/OpenShift cluster for a time span. To enable this plugin add the pointer to the scenario input file scenarios/arcaflow/cpu-hog/input.yaml as described in the Usage section. This scenario takes a list of objects named input_list"

LABEL krknctl.inputFields='$KRKNCTL_INPUT'

ENTRYPOINT /home/krkn/run.sh
45 changes: 45 additions & 0 deletions node-cpu-hog/krknctl-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[
{
"name":"chaos-duration",
"short_description":"Chaos Duration",
"description":"Set chaos duration (in sec) as desired",
"variable":"TOTAL_CHAOS_DURATION",
"type":"number",
"default":"60"
},
{
"name":"cores",
"short_description":"Number of cores",
"description":"Number of cores (workers) of node CPU to be consumed",
"variable":"NODE_CPU_CORE",
"type":"number",
"default":"2"
},
{
"name":"cpu-percentage",
"short_description":"Cpu percentage",
"description":"Percentage of total cpu to be consumed",
"variable":"NODE_CPU_PERCENTAGE",
"type":"number",
"default":"50"
},
{
"name":"namespace",
"short_description":"Namespace",
"description":"Namespace where the scenario container will be deployed",
"variable":"NAMESPACE",
"type":"string",
"default":"default"
},
{
"name":"node-selectors",
"short_description":"Node Selectors",
"description":"Node selectors where the scenario containers will be scheduled in the format \"<selector>=<value>\". NOTE: This value can be specified as a list of node selectors separated by \";\". Will be instantiated a container per each node selector with the same scenario options. This option is meant to run one or more stress scenarios simultaneously on different nodes, kubernetes will schedule the pods on the target node accordingly with the selector specified. Specifying the same selector multiple times will instantiate as many scenario container as the number of times the selector is specified on the same node",
"variable":"NODE_SELECTORS",
"type":"string",
"validator": "^$|^(([a-zA-Z0-9._-]+\\=[a-zA-Z0-9._-]+)(;)?)+[^;]$",
"default":"",
"required": "false"
}

]
7 changes: 7 additions & 0 deletions node-io-hog/Dockerfile → node-io-hog/Dockerfile.template
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,11 @@ COPY node-io-hog/run.sh /home/krkn/run.sh
COPY node-io-hog/input.yaml.template /home/krkn/kraken/scenarios/kube/io-hog/input.yaml.template
COPY common_run.sh /home/krkn/common_run.sh

LABEL krknctl.kubeconfig_path="/home/krkn/.kube/config"
LABEL krknctl.title="I/O Hog"
LABEL krknctl.description="This scenario is based on the arcaflow arcaflow-plugin-stressng plugin. The purpose of this scenario is to create disk pressure on a particular node of the Kubernetes/OpenShift cluster for a time span. The scenario allows to attach a node path to the pod as a hostPath volume."

LABEL krknctl.inputFields='$KRKNCTL_INPUT'


ENTRYPOINT /home/krkn/run.sh
Loading

0 comments on commit 4159b26

Please sign in to comment.