@@ -24,63 +24,65 @@ def check_sudo():
2424 return True
2525
2626def check_all (interactive ):
27- all_success = NO_ERROR
28- # 1: Install
29- checked_install = check_installation (interactive )
30- if (is_error (checked_install )):
31- return checked_install
32- else :
33- all_success = checked_install
34-
35- print ("================================================================================" )
36- # 2: Connection
37- checked_connection = check_connection (interactive )
38- if (is_error (checked_connection )):
39- return checked_connection
40- else :
41- all_success = checked_connection
42-
43- print ("================================================================================" )
44- # 3: General Health
45- checked_general_health = check_general_health (interactive )
46- if (is_error (checked_general_health )):
47- return checked_general_health
48- else :
49- all_success = checked_general_health
50-
51- print ("================================================================================" )
52- # 4: High CPU/Memory Usage
53- checked_highcpumem = check_high_cpu_memory (interactive )
54- if (is_error (checked_highcpumem )):
55- return checked_highcpumem
56- else :
57- all_success = checked_highcpumem
58-
59- print ("================================================================================" )
60- # 5: Syslog
61- checked_syslog = check_syslog (interactive )
62- if (is_error (checked_syslog )):
63- return checked_syslog
64- else :
65- all_success = checked_syslog
66-
67- print ("================================================================================" )
68- # 6: Custom logs
69- checked_custom_logs = check_custom_logs (interactive )
70- if (is_error (checked_custom_logs )):
71- return checked_custom_logs
72- else :
73- all_success = checked_custom_logs
74-
27+ """
28+ Run all troubleshooter checks, continuing even if errors occur.
29+ Collects all results and reports the most severe issue at the end.
30+ """
31+ checks = [
32+ ("Installation" , check_installation ),
33+ ("Connection" , check_connection ),
34+ ("General Health" , check_general_health ),
35+ ("High CPU/Memory Usage" , check_high_cpu_memory ),
36+ ("Syslog" , check_syslog ),
37+ ("Custom logs" , check_custom_logs ),
38+ ("Metrics" , run_metrics_troubleshooter ),
39+ ]
40+
41+ results = []
42+ overall_status = NO_ERROR
43+
44+ for i , (check_name , check_func ) in enumerate (checks , 1 ):
45+ print ("================================================================================" )
46+ print ("Running check {0}/7: {1}..." .format (i , check_name ))
47+
48+ try :
49+ result = check_func (interactive )
50+ results .append ((check_name , result ))
51+
52+ # Track the most severe error (higher error codes are more severe)
53+ if is_error (result ) and result > overall_status :
54+ overall_status = result
55+ elif not is_error (result ) and result > overall_status and overall_status == NO_ERROR :
56+ overall_status = result
57+
58+ # Print immediate result for this check
59+ if is_error (result ):
60+ print ("[ERROR] {0}: ERROR (code {1})" .format (check_name , result ))
61+ elif result != NO_ERROR :
62+ print ("[WARN] {0}: WARNING (code {1})" .format (check_name , result ))
63+ else :
64+ print ("[OK] {0}: OK" .format (check_name ))
65+
66+ except Exception as e :
67+ print ("[EXCEPTION] {0}: EXCEPTION - {1}" .format (check_name , str (e )))
68+ results .append ((check_name , "EXCEPTION: {0}" .format (str (e ))))
69+ overall_status = ERR_FOUND # Set a generic error code
70+
71+ # Summary of all results
72+ print ("\n ================================================================================" )
73+ print ("SUMMARY OF ALL CHECKS:" )
7574 print ("================================================================================" )
76- # 7: Metrics not flowing
77- check_data_collected = run_metrics_troubleshooter (interactive )
78- if (is_error (check_data_collected )):
79- return check_data_collected
80- else :
81- all_success = check_data_collected
82-
83- return all_success
75+ for check_name , result in results :
76+ if isinstance (result , str ) and result .startswith ("EXCEPTION" ):
77+ print ("[EXCEPTION] {0}: {1}" .format (check_name , result ))
78+ elif is_error (result ):
79+ print ("[ERROR] {0}: ERROR (code {1})" .format (check_name , result ))
80+ elif result != NO_ERROR :
81+ print ("[WARN] {0}: WARNING (code {1})" .format (check_name , result ))
82+ else :
83+ print ("[OK] {0}: OK" .format (check_name ))
84+
85+ return overall_status
8486
8587def collect_logs ():
8688 # get output directory for logs
0 commit comments