Skip to content

Commit

Permalink
Add support to ignore checking pods in ns
Browse files Browse the repository at this point in the history
This commit adds a way for user to specify a regex to ignore checking
the pods in the namespaces.

Fixes https://github.com/redhat-chaos/cerberus/issues/177.
  • Loading branch information
chaitanyaenr committed Aug 14, 2022
1 parent ad30a23 commit 0ce6f37
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 3 deletions.
13 changes: 10 additions & 3 deletions cerberus/kubernetes/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,21 @@ def namespace_sleep_tracker(namespace, pods_tracker):

# Monitor the status of the pods in the specified namespace
# and set the status to true or false
def monitor_namespace(namespace):
def monitor_namespace(namespace, ignore_pattern=None):
notready_pods = set()
match = False
notready_containers = defaultdict(list)
all_pod_info_list = get_all_pod_info(namespace)
if all_pod_info_list is not None and len(all_pod_info_list) > 0:
for all_pod_info in all_pod_info_list:
for pod_info in all_pod_info.items:
pod = pod_info.metadata.name
if ignore_pattern:
for pattern in ignore_pattern:
if re.match(pattern, pod):
match = True
if match:
continue
pod_status = pod_info.status
pod_status_phase = pod_status.phase
if pod_status_phase != "Running" and pod_status_phase != "Succeeded":
Expand All @@ -295,8 +302,8 @@ def monitor_namespace(namespace):
return status, notready_pods, notready_containers


def process_namespace(iteration, namespace, failed_pods_components, failed_pod_containers):
watch_component_status, failed_component_pods, failed_containers = monitor_namespace(namespace)
def process_namespace(iteration, namespace, failed_pods_components, failed_pod_containers, ignore_pattern):
watch_component_status, failed_component_pods, failed_containers = monitor_namespace(namespace, ignore_pattern)
logging.info("Iteration %s: %s: %s" % (iteration, namespace, watch_component_status))
if not watch_component_status:
failed_pods_components[namespace] = failed_component_pods
Expand Down
1 change: 1 addition & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ cerberus:
- openshift-kube-scheduler
- openshift-ingress
- openshift-sdn # When enabled, it will check for the cluster sdn and monitor that namespace
watch_namespaces_ignore_pattern: [^installer*] # Ignores pods matching the regex pattern in the namespaces specified under watch_namespaces
cerberus_publish_status: True # When enabled, cerberus starts a light weight http server and publishes the status
inspect_components: False # Enable it only when OpenShift client is supported to run
# When enabled, cerberus collects logs, events and metrics of failed components
Expand Down
1 change: 1 addition & 0 deletions config/kubernetes_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ cerberus:
label: node-role.kubernetes.io/master
watch_namespaces: # List of namespaces to be monitored
- kube-system
watch_namespaces_ignore_pattern: [] # Ignores pods matching the regex pattern in the namespaces specified under watch_namespaces
cerberus_publish_status: True # When enabled, cerberus starts a light weight http server and publishes the status
inspect_components: False # Enable it only when OpenShift client is supported to run
# When enabled, cerberus collects logs, events and metrics of failed components
Expand Down
1 change: 1 addition & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ cerberus:
- openshift-kube-scheduler
- openshift-ingress
- openshift-sdn # When enabled, it will check for the cluster sdn and monitor that namespace
watch_namespaces_ignore_pattern: [] # Ignores pods matching the regex pattern in the namespaces specified under watch_namespaces
cerberus_publish_status: True # When enabled, cerberus starts a light weight http server and publishes the status
inspect_components: False # Enable it only when OpenShift client is supported to run
# When enabled, cerberus collects logs, events and metrics of failed components
Expand Down
2 changes: 2 additions & 0 deletions start_cerberus.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def main(cfg):
watch_nodes = config["cerberus"].get("watch_nodes", False)
watch_cluster_operators = config["cerberus"].get("watch_cluster_operators", False)
watch_namespaces = config["cerberus"].get("watch_namespaces", [])
watch_namespaces_ignore_pattern = config["cerberus"].get("watch_namespaces_ignore_pattern", [])
watch_terminating_namespaces = config["cerberus"].get("watch_terminating_namespaces", True)
watch_url_routes = config["cerberus"].get("watch_url_routes", [])
watch_master_schedulable = config["cerberus"].get("watch_master_schedulable", {})
Expand Down Expand Up @@ -288,6 +289,7 @@ def main(cfg):
watch_namespaces,
repeat(failed_pods_components),
repeat(failed_pod_containers),
repeat(watch_namespaces_ignore_pattern),
),
)

Expand Down

0 comments on commit 0ce6f37

Please sign in to comment.