forked from microsoft/AIOpsLab
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathinject_virtual.py
329 lines (271 loc) · 14 KB
/
inject_virtual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Inject faults at the virtualization layer: K8S, Docker, etc."""
import yaml
import time
from aiopslab.service.kubectl import KubeCtl
from aiopslab.service.helm import Helm
from aiopslab.generators.fault.base import FaultInjector
from aiopslab.service.apps.base import Application
from aiopslab.paths import TARGET_MICROSERVICES
class VirtualizationFaultInjector(FaultInjector):
def __init__(self, namespace: str):
super().__init__(namespace)
self.namespace = namespace
self.kubectl = KubeCtl()
self.mongo_service_pod_map = {
"url-shorten-mongodb": "url-shorten-service",
}
def delete_service_pods(self, target_service_pods: list[str]):
"""Kill the corresponding service pod to enforce the fault."""
for pod in target_service_pods:
delete_pod_command = f"kubectl delete pod {pod} -n {self.namespace}"
delete_result = self.kubectl.exec_command(delete_pod_command)
print(f"Deleted service pod {pod} to enforce the fault: {delete_result}")
############# FAULT LIBRARY ################
# V.1 - misconfig_k8s: Misconfigure service port in Kubernetes - Misconfig
def inject_misconfig_k8s(self, microservices: list[str]):
"""Inject a fault to misconfigure service's target port in Kubernetes."""
for service in microservices:
service_config = self._modify_target_port_config(
from_port=9090,
to_port=9999,
configs=self.kubectl.get_service_json(service, self.testbed),
)
print(f"Misconfig fault for service: {service} | namespace: {self.testbed}")
self.kubectl.patch_service(service, self.testbed, service_config)
def recover_misconfig_k8s(self, microservices: list[str]):
for service in microservices:
service_config = self._modify_target_port_config(
from_port=9999,
to_port=9090,
configs=self.kubectl.get_service_json(service, self.testbed),
)
print(f"Recovering for service: {service} | namespace: {self.testbed}")
self.kubectl.patch_service(service, self.testbed, service_config)
# V.2 - auth_miss_mongodb: Authentication missing for MongoDB - Auth
def inject_auth_miss_mongodb(self, microservices: list[str]):
"""Inject a fault to enable TLS for a MongoDB service.
NOTE: modifies the values.yaml file for the service. The fault is created
by forcing the service to require TLS for connections, which will fail if
the certificate is not provided.
NOTE: mode: requireTLS, certificateKeyFile, and CAFile are required fields.
"""
for service in microservices:
# Prepare the set values for helm upgrade
set_values = {
"url-shorten-mongodb.tls.mode": "requireTLS",
"url-shorten-mongodb.tls.certificateKeyFile": "/etc/tls/tls.pem",
"url-shorten-mongodb.tls.CAFile": "/etc/tls/ca.crt",
}
# Define Helm upgrade configurations
helm_args = {
"release_name": "social-network",
"chart_path": TARGET_MICROSERVICES
/ "socialNetwork/helm-chart/socialnetwork/",
"namespace": self.namespace,
"values_file": TARGET_MICROSERVICES
/ "socialNetwork/helm-chart/socialnetwork/values.yaml",
"set_values": set_values,
}
Helm.upgrade(**helm_args)
pods = self.kubectl.list_pods(self.namespace)
target_service_pods = [
pod.metadata.name
for pod in pods.items
if self.mongo_service_pod_map[service] in pod.metadata.name
]
print(f"Target Service Pods: {target_service_pods}")
self.delete_service_pods(target_service_pods)
self.kubectl.exec_command(
f"kubectl rollout restart deployment {service} -n {self.namespace}"
)
def recover_auth_miss_mongodb(self, microservices: list[str]):
for service in microservices:
set_values = {
"url-shorten-mongodb.tls.mode": "disabled",
"url-shorten-mongodb.tls.certificateKeyFile": "",
"url-shorten-mongodb.tls.CAFile": "",
}
helm_args = {
"release_name": "social-network",
"chart_path": TARGET_MICROSERVICES
/ "socialNetwork/helm-chart/socialnetwork/",
"namespace": self.namespace,
"values_file": TARGET_MICROSERVICES
/ "socialNetwork/helm-chart/socialnetwork/values.yaml",
"set_values": set_values,
}
Helm.upgrade(**helm_args)
pods = self.kubectl.list_pods(self.namespace)
target_service_pods = [
pod.metadata.name
for pod in pods.items
if self.mongo_service_pod_map[service] in pod.metadata.name
]
print(f"Target Service Pods: {target_service_pods}")
self.delete_service_pods(target_service_pods)
self.kubectl.exec_command(
f"kubectl rollout restart deployment {service} -n {self.namespace}"
)
# V.3 - scale_pods_to_zero: Scale pods to zero - Deploy/Operation
def inject_scale_pods_to_zero(self, microservices: list[str]):
"""Inject a fault to scale pods to zero for a service."""
for service in microservices:
self.kubectl.exec_command(
f"kubectl scale deployment {service} --replicas=0 -n {self.namespace}"
)
print(
f"Scaled deployment {service} to 0 replicas | namespace: {self.namespace}"
)
def recover_scale_pods_to_zero(self, microservices: list[str]):
for service in microservices:
self.kubectl.exec_command(
f"kubectl scale deployment {service} --replicas=1 -n {self.namespace}"
)
print(
f"Scaled deployment {service} back to 1 replica | namespace: {self.namespace}"
)
# V.4 - assign_to_non_existent_node: Assign to non-existent or NotReady node - Dependency
def inject_assign_to_non_existent_node(self, microservices: list[str]):
"""Inject a fault to assign a service to a non-existent or NotReady node."""
non_existent_node_name = "extra-node"
for service in microservices:
deployment_yaml = self._get_deployment_yaml(service)
deployment_yaml["spec"]["template"]["spec"]["nodeSelector"] = {
"kubernetes.io/hostname": non_existent_node_name
}
# Write the modified YAML to a temporary file
modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml)
delete_command = f"kubectl delete deployment {service} -n {self.namespace}"
self.kubectl.exec_command(delete_command)
apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}"
self.kubectl.exec_command(apply_command)
print(f"Redeployed {service} to node {non_existent_node_name}.")
def recover_assign_to_non_existent_node(self, microservices: list[str]):
for service in microservices:
deployment_yaml = self._get_deployment_yaml(service)
if "nodeSelector" in deployment_yaml["spec"]["template"]["spec"]:
del deployment_yaml["spec"]["template"]["spec"]["nodeSelector"]
modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml)
delete_command = f"kubectl delete deployment {service} -n {self.namespace}"
self.kubectl.exec_command(delete_command)
apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}"
self.kubectl.exec_command(apply_command)
print(f"Removed nodeSelector for service {service} and redeployed.")
# V.5 - redeploy without deleting the PV - only for HotelReservation
def inject_redepoly_without_pv(self, app: Application):
"""Inject a fault to delete the namespace without deleting the PV."""
self.kubectl.delete_namespace(self.namespace)
print(f"Deleting namespace {self.namespace} without deleting the PV.")
time.sleep(15)
print(f"Redeploying {self.namespace}.")
app = type(app)()
app.deploy()
def recover_redepoly_without_pv(self, app: Application):
app.cleanup()
# pass
# V.6 - wrong binary usage incident
def inject_wrong_bin_usage(self, microservices: list[str]):
"""Inject a fault to use the wrong binary of a service."""
for service in microservices:
deployment_yaml = self._get_deployment_yaml(service)
# Modify the deployment YAML to use the 'geo' binary instead of the 'profile' binary
containers = deployment_yaml["spec"]["template"]["spec"]["containers"]
for container in containers:
if "command" in container and "profile" in container["command"]:
print(
f"Changing binary for container {container['name']} from 'profile' to 'geo'."
)
container["command"] = ["geo"] # Replace 'profile' with 'geo'
modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml)
# Delete the deployment and re-apply
delete_command = f"kubectl delete deployment {service} -n {self.namespace}"
apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}"
self.kubectl.exec_command(delete_command)
self.kubectl.exec_command(apply_command)
print(f"Injected wrong binary usage fault for service: {service}")
def recover_wrong_bin_usage(self, microservices: list[str]):
for service in microservices:
deployment_yaml = self._get_deployment_yaml(service)
containers = deployment_yaml["spec"]["template"]["spec"]["containers"]
for container in containers:
if "command" in container and "geo" in container["command"]:
print(
f"Reverting binary for container {container['name']} from 'geo' to 'profile'."
)
container["command"] = [
"profile"
] # Restore 'geo' back to 'profile'
modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml)
delete_command = f"kubectl delete deployment {service} -n {self.namespace}"
apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}"
self.kubectl.exec_command(delete_command)
self.kubectl.exec_command(apply_command)
print(f"Recovered from wrong binary usage fault for service: {service}")
############# HELPER FUNCTIONS ################
def _wait_for_pods_ready(self, microservices: list[str], timeout: int = 30):
for service in microservices:
command = f"kubectl wait --for=condition=ready pod -l app={service} -n {self.namespace} --timeout={timeout}s"
result = self.kubectl.exec_command(command)
print(f"Wait result for {service}: {result}")
def _modify_target_port_config(self, from_port: int, to_port: int, configs: dict):
for port in configs["spec"]["ports"]:
if port.get("targetPort") == from_port:
port["targetPort"] = to_port
return configs
def _get_values_yaml(self, service_name: str):
kubectl = KubeCtl()
values_yaml = kubectl.exec_command(
f"kubectl get configmap {service_name} -n {self.testbed} -o yaml"
)
return yaml.safe_load(values_yaml)
def _enable_tls(self, values_yaml: dict):
values_yaml["net"] = {
"tls": {
"mode": "requireTLS",
"certificateKeyFile": "/etc/tls/tls.pem",
"CAFile": "/etc/tls/ca.crt",
}
}
return yaml.dump(values_yaml)
def _apply_modified_yaml(self, service_name: str, modified_yaml: str):
modified_yaml_path = f"/tmp/{service_name}-values.yaml"
with open(modified_yaml_path, "w") as f:
f.write(modified_yaml)
kubectl = KubeCtl()
kubectl.exec_command(
f"kubectl create configmap {service_name} -n {self.testbed} --from-file=values.yaml={modified_yaml_path} --dry-run=client -o yaml | kubectl apply -f -"
)
kubectl.exec_command(
f"kubectl rollout restart deployment {service_name} -n {self.testbed}"
)
def _get_deployment_yaml(self, service_name: str):
deployment_yaml = self.kubectl.exec_command(
f"kubectl get deployment {service_name} -n {self.namespace} -o yaml"
)
return yaml.safe_load(deployment_yaml)
def _change_node_selector(self, deployment_yaml: dict, node_name: str):
if "spec" in deployment_yaml and "template" in deployment_yaml["spec"]:
deployment_yaml["spec"]["template"]["spec"]["nodeSelector"] = {
"kubernetes.io/hostname": node_name
}
return yaml.dump(deployment_yaml)
def _write_yaml_to_file(self, service_name: str, yaml_content: dict):
"""Helper function to write YAML content to a temporary file."""
import yaml
file_path = f"/tmp/{service_name}_modified.yaml"
with open(file_path, "w") as file:
yaml.dump(yaml_content, file)
return file_path
if __name__ == "__main__":
namespace = "test-social-network"
microservices = ["mongodb-geo"]
# microservices = ["geo"]
fault_type = "auth_miss_mongodb"
# fault_type = "misconfig_app"
# fault_type = "revoke_auth"
print("Start injection ...")
injector = VirtualizationFaultInjector(namespace)
# injector._inject(fault_type, microservices)
injector._recover(fault_type, microservices)