Skip to content

Commit

Permalink
Merge pull request #64 from brianhlin/sw2929_fix_mem_parsing
Browse files Browse the repository at this point in the history
Fix Slurm/PBS memory parsing (SOFTWARE-2929)
  • Loading branch information
brianhlin authored Oct 30, 2017
2 parents af091f9 + e4dbcd4 commit 5fa75e4
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 22 deletions.
45 changes: 34 additions & 11 deletions src/scripts/pbs_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,25 +330,48 @@ def get_finished_job_stats(jobid):
except Exception, e:
log("Unable to read in CSV output from sacct: %s" % str(e))
return return_dict

sacct_parser = {'RemoteUserCpu': lambda orig, results: orig + \
convert_cpu_to_seconds(results["AveCPU"]) * int(results["AllocCPUS"]),
'ImageSize': lambda orig, results: orig + int(results["MaxRSS"].replace('K', '')),
'ExitCode': lambda orig, results: int(results["ExitCode"].split(":")[0])}

# Slurm can return more than 1 row, for some odd reason.
# so sum up relevant values
for row in reader:
for attr, func in sacct_parser.items():
if row["AveCPU"] is not "":
try:
return_dict[attr] = func(return_dict[attr], row)
except (ValueError, KeyError), exc:
log("Could not parse %s for Jobid %s: %s" % (attr, jobid, exc))

# PBS completion
return_dict['RemoteUserCpu'] += convert_cpu_to_seconds(row["AveCPU"]) * int(row["AllocCPUS"])
except:
log("Failed to parse CPU usage for job id %s: %s, %s" % (jobid, row["AveCPU"], row["AllocCPUS"]))
raise
if row["MaxRSS"] is not "":
# Remove the trailing [KMGTP] and scale the value appropriately
# Note: We assume that all values will have a suffix, and we
# want the value in kilos.
try:
value = row["MaxRSS"]
factor = 1
if value[-1] == 'M':
factor = 1024
elif value[-1] == 'G':
factor = 1024 * 1024
elif value[-1] == 'T':
factor = 1024 * 1024 * 1024
elif value[-1] == 'P':
factor = 1024 * 1024 * 1024 * 1024
return_dict["ImageSize"] += int(value.strip('KMGTP')) * factor
except:
log("Failed to parse memory usage for job id %s: %s" % (jobid, row["MaxRSS"]))
raise
if row["ExitCode"] is not "":
try:
return_dict["ExitCode"] = int(row["ExitCode"].split(":")[0])
except:
log("Failed to parse ExitCode for job id %s: %s" % (jobid, row["ExitCode"]))
raise

# PBS completion
elif _cluster_type_cache == "pbs":
pass

return return_dict


_qstat_location_cache = None
def get_qstat_location():
Expand Down
43 changes: 32 additions & 11 deletions src/scripts/slurm_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,22 +319,43 @@ def get_finished_job_stats(jobid):
except Exception, e:
log("Unable to read in CSV output from sacct: %s" % str(e))
return return_dict

sacct_parser = {'RemoteUserCpu': lambda orig, results: orig + \
convert_cpu_to_seconds(results["AveCPU"]) * int(results["AllocCPUS"]),
'ImageSize': lambda orig, results: orig + int(results["MaxRSS"].replace('K', '')),
'ExitCode': lambda orig, results: int(results["ExitCode"].split(":")[0])}

# Slurm can return more than 1 row, for some odd reason.
# so sum up relevant values
for row in reader:
for attr, func in sacct_parser.items():
if row["AveCPU"] is not "":
try:
return_dict[attr] = func(return_dict[attr], row)
except (ValueError, KeyError), exc:
log("Could not parse %s for Jobid %s: %s" % (attr, jobid, exc))

return_dict['RemoteUserCpu'] += convert_cpu_to_seconds(row["AveCPU"]) * int(row["AllocCPUS"])
except:
log("Failed to parse CPU usage for job id %s: %s, %s" % (jobid, row["AveCPU"], row["AllocCPUS"]))
raise
if row["MaxRSS"] is not "":
# Remove the trailing [KMGTP] and scale the value appropriately
# Note: We assume that all values will have a suffix, and we
# want the value in kilos.
try:
value = row["MaxRSS"]
factor = 1
if value[-1] == 'M':
factor = 1024
elif value[-1] == 'G':
factor = 1024 * 1024
elif value[-1] == 'T':
factor = 1024 * 1024 * 1024
elif value[-1] == 'P':
factor = 1024 * 1024 * 1024 * 1024
return_dict["ImageSize"] += int(value.strip('KMGTP')) * factor
except:
log("Failed to parse memory usage for job id %s: %s" % (jobid, row["MaxRSS"]))
raise
if row["ExitCode"] is not "":
try:
return_dict["ExitCode"] = int(row["ExitCode"].split(":")[0])
except:
log("Failed to parse memory usage for job id %s: %s" % (jobid, row["MaxRSS"]))
raise
return return_dict


_slurm_location_cache = None
def get_slurm_location(program):
Expand Down

0 comments on commit 5fa75e4

Please sign in to comment.