Skip to content

Commit

Permalink
minor
Browse files Browse the repository at this point in the history
  • Loading branch information
antoinefalisse committed Jul 5, 2024
1 parent d20f7fa commit 356195b
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
5 changes: 5 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@
r = requests.patch(trial_url, data={"status": "error"},
headers = {"Authorization": "Token {}".format(API_TOKEN)})
traceback.print_exc()

# Antoine: Removing this, it is too often causing the machines to stop. Not because
# the machines are failing, but because for instance the video is very long with a lot
# of people in it. We should not stop the machine for that. Originally the check was
# to catch a bug where the machine would hang, I have not seen this bug in a long time.
# args_as_strings = [str(arg) for arg in e.args]
# if len(args_as_strings) > 1 and 'pose detection timed out' in args_as_strings[1].lower():
# logging.info("Worker failed. Stopping machine.")
Expand Down
4 changes: 2 additions & 2 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1537,7 +1537,7 @@ def checkResourceUsage(stop_machine_and_email=True):

if stop_machine_and_email and resourceUsage['disk_perc'] > 95:

message = "Disc is full on an OpenCap machine backend machine. It has been stopped. Data: " \
message = "Disc is full on an OpenCap backend machine. It has been stopped. Data: " \
+ json.dumps(resourceUsage)
sendStatusEmail(message=message)

Expand All @@ -1554,7 +1554,7 @@ def checkCudaTF():
for gpu in gpus:
print(f"GPU: {gpu.name}")
else:
message = "Cuda check failed on an OpenCap machine backend machine. It has been stopped."
message = "Cuda check failed on an OpenCap backend machine. It has been stopped."
sendStatusEmail(message=message)
raise Exception("No GPU detected. Exiting.")

Expand Down

0 comments on commit 356195b

Please sign in to comment.