From 5b870293fefa3fac92bbf1d2b525bf0499e4683e Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 08:31:42 +0200 Subject: [PATCH 1/9] Log error return code --- sem/runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sem/runner.py b/sem/runner.py index 28e6016..2b4e0ef 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -345,6 +345,8 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): if stop_on_errors: raise Exception(error_message) print(error_message) + print('Return code:') + print(return_code) current_result['meta']['elapsed_time'] = end-start current_result['meta']['exitcode'] = return_code From 15febefa27543e9371b9ae74e526166bbf2b3695 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 09:49:50 +0200 Subject: [PATCH 2/9] Identify SIGKILL and log ad hoc error msg --- sem/runner.py | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index 2b4e0ef..9172672 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -1,6 +1,7 @@ import importlib import os import re +from signal import SIGKILL import subprocess import time import uuid @@ -327,26 +328,30 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): if return_code != 0: with open(stdout_file_path, 'r') as stdout_file, open( stderr_file_path, 'r') as stderr_file: - complete_command = sem.utils.get_command_from_result(self.script, current_result) - complete_command_debug = sem.utils.get_command_from_result(self.script, current_result, debug=True) - error_message = ('\nSimulation exited with an error.\n' - 'Params: %s\n' - 'Stderr: %s\n' - 'Stdout: %s\n' - 'Use this command to reproduce:\n' - '%s\n' - 'Debug with gdb:\n' - '%s' - % (parameter, - stderr_file.read(), - stdout_file.read(), - complete_command, - complete_command_debug)) - if stop_on_errors: - raise Exception(error_message) - print(error_message) - print('Return code:') - print(return_code) + common_error_message = ('\nSimulation exited with an error.\n' + 'Params: %s\n' + 'Stderr: %s\n' + 'Stdout: %s\n' + % (parameter, + stderr_file.read(), + stdout_file.read())) + if return_code == SIGKILL: + error_message = common_error_message + \ + 'Simulation likely killed due to an out of memory error.\n' + \ + 'Check kernel logs (dmesg, for instance) to confirm.' + else: + complete_command = sem.utils.get_command_from_result(self.script, current_result) + complete_command_debug = sem.utils.get_command_from_result(self.script, current_result, debug=True) + error_message = common_error_message + \ + ('Use this command to reproduce:\n' + '%s\n' + 'Debug with gdb:\n' + '%s' + % (complete_command, + complete_command_debug)) + if stop_on_errors: + raise Exception(error_message) + print(error_message) current_result['meta']['elapsed_time'] = end-start current_result['meta']['exitcode'] = return_code From 838d5af30d96a788eb9dbda53d6f7623accab9f7 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 10:04:25 +0200 Subject: [PATCH 3/9] Switch to self-def ret code constant --- sem/runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index 9172672..d1d85cf 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -1,14 +1,15 @@ import importlib import os import re -from signal import SIGKILL import subprocess import time import uuid import sem.utils from tqdm import tqdm +from typing import Final +SIGKILL_CODE: Final = -9 # Return code used to identify out of memory events. class SimulationRunner(object): """ @@ -335,7 +336,7 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): % (parameter, stderr_file.read(), stdout_file.read())) - if return_code == SIGKILL: + if return_code == SIGKILL_CODE: error_message = common_error_message + \ 'Simulation likely killed due to an out of memory error.\n' + \ 'Check kernel logs (dmesg, for instance) to confirm.' From 93867cf5aab447329fa1c69a890fc88a28eea269 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 10:06:37 +0200 Subject: [PATCH 4/9] Fix ident --- sem/runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index d1d85cf..6648920 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -350,9 +350,10 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): '%s' % (complete_command, complete_command_debug)) - if stop_on_errors: - raise Exception(error_message) - print(error_message) + + if stop_on_errors: + raise Exception(error_message) + print(error_message) current_result['meta']['elapsed_time'] = end-start current_result['meta']['exitcode'] = return_code From 3d03c1fd8ff997395be61dc797633bfe8e0606f6 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 12:14:04 +0200 Subject: [PATCH 5/9] Improve err msg --- sem/runner.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index 6648920..25d9fe2 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -329,8 +329,7 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): if return_code != 0: with open(stdout_file_path, 'r') as stdout_file, open( stderr_file_path, 'r') as stderr_file: - common_error_message = ('\nSimulation exited with an error.\n' - 'Params: %s\n' + common_error_message = ('Params: %s\n' 'Stderr: %s\n' 'Stdout: %s\n' % (parameter, @@ -338,12 +337,13 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): stdout_file.read())) if return_code == SIGKILL_CODE: error_message = common_error_message + \ - 'Simulation likely killed due to an out of memory error.\n' + \ - 'Check kernel logs (dmesg, for instance) to confirm.' + '\nSimulation likely killed due to an out of memory error.\n' + \ + 'Check kernel logs (dmesg, for instance) to confirm\n.' else: complete_command = sem.utils.get_command_from_result(self.script, current_result) complete_command_debug = sem.utils.get_command_from_result(self.script, current_result, debug=True) - error_message = common_error_message + \ + error_message = '\nSimulation exited with an error.\n' + \ + common_error_message + \ ('Use this command to reproduce:\n' '%s\n' 'Debug with gdb:\n' From 8e863e2ea618974faed2c2efc9cc078a199d5019 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 13:12:25 +0200 Subject: [PATCH 6/9] Further improve err msg --- sem/runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index 25d9fe2..2f2c695 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -336,9 +336,9 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): stderr_file.read(), stdout_file.read())) if return_code == SIGKILL_CODE: - error_message = common_error_message + \ - '\nSimulation likely killed due to an out of memory error.\n' + \ - 'Check kernel logs (dmesg, for instance) to confirm\n.' + error_message = '\nSimulation likely killed due to an out of memory error.\n' + \ + 'Check kernel logs (dmesg, for instance) to confirm\n.' + \ + common_error_message else: complete_command = sem.utils.get_command_from_result(self.script, current_result) complete_command_debug = sem.utils.get_command_from_result(self.script, current_result, debug=True) From 09452d1b31de705e036cf46454ba999ef9854677 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Wed, 25 May 2022 13:22:09 +0200 Subject: [PATCH 7/9] Self-review --- sem/runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index 2f2c695..a57010f 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -9,7 +9,7 @@ from tqdm import tqdm from typing import Final -SIGKILL_CODE: Final = -9 # Return code used to identify out of memory events. +SIGKILL_CODE: Final = -9 # POSIX return code which usually corresponds to out of memory events. class SimulationRunner(object): """ @@ -337,7 +337,7 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): stdout_file.read())) if return_code == SIGKILL_CODE: error_message = '\nSimulation likely killed due to an out of memory error.\n' + \ - 'Check kernel logs (dmesg, for instance) to confirm\n.' + \ + 'Check kernel logs (dmesg, for instance) to confirm.\n' + \ common_error_message else: complete_command = sem.utils.get_command_from_result(self.script, current_result) From 0b3678507d86435d34db1b35217d9c322e5fd548 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Mon, 30 May 2022 10:27:03 +0200 Subject: [PATCH 8/9] Most DM comments --- sem/runner.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sem/runner.py b/sem/runner.py index a57010f..8492eef 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -332,11 +332,13 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): common_error_message = ('Params: %s\n' 'Stderr: %s\n' 'Stdout: %s\n' + 'Return code: %s\n' % (parameter, stderr_file.read(), - stdout_file.read())) + stdout_file.read(), + return_code)) if return_code == SIGKILL_CODE: - error_message = '\nSimulation likely killed due to an out of memory error.\n' + \ + error_message = '\nSimulation was killed. Possible causes may include an out of memory error.\n' + \ 'Check kernel logs (dmesg, for instance) to confirm.\n' + \ common_error_message else: @@ -353,7 +355,7 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): if stop_on_errors: raise Exception(error_message) - print(error_message) + print(error_message) current_result['meta']['elapsed_time'] = end-start current_result['meta']['exitcode'] = return_code From 805cf723a56364d925f8fdf43591250089cfa057 Mon Sep 17 00:00:00 2001 From: pagmatt Date: Mon, 30 May 2022 10:28:35 +0200 Subject: [PATCH 9/9] Proper string formatting --- sem/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sem/runner.py b/sem/runner.py index 8492eef..330121e 100644 --- a/sem/runner.py +++ b/sem/runner.py @@ -332,7 +332,7 @@ def run_simulations(self, parameter_list, data_folder, stop_on_errors=False): common_error_message = ('Params: %s\n' 'Stderr: %s\n' 'Stdout: %s\n' - 'Return code: %s\n' + 'Return code: %d\n' % (parameter, stderr_file.read(), stdout_file.read(),