Skip to content

Commit

Permalink
fix gpu allocation bug in hp_sweeper
Browse files Browse the repository at this point in the history
  • Loading branch information
taochenshh committed Jul 15, 2020
1 parent 5240b10 commit df05b5d
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions easyrl/utils/hp_sweeper.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,20 @@ def get_sweep_cmds(yaml_file):
num_exps = len(cmds)
gpus_free_mem = [all_gpus_stats[x].memoryFree for x in gpus_to_use]
allowable_gpu_jobs = [int(math.floor(x / gpu_mem_per_job)) for x in gpus_free_mem]
jobs_run_on_gpu = [0 for i in range(len(gpus_free_mem))]
can_run_on_gpu = [True for i in range(len(gpus_free_mem))]
jobs_run_on_gpu = [0 for i in range(len(gpus_to_use))]
can_run_on_gpu = [True for i in range(len(gpus_to_use))]
gpu_id = 0
final_cmds = []
for idx in range(num_exps):
if not any(can_run_on_gpu):
logger.warning(f'Run out of GPUs!')
break
while not can_run_on_gpu[gpu_id]:
gpu_id = (gpu_id + 1) % len(gpus_free_mem)
final_cmds.append(cmds[idx] + f' --device=cuda:{gpu_id}')
gpu_id = (gpu_id + 1) % len(gpus_to_use)
final_cmds.append(cmds[idx] + f' --device=cuda:{gpus_to_use[gpu_id]}')
jobs_run_on_gpu[gpu_id] += 1
can_run_on_gpu[gpu_id] = jobs_run_on_gpu[gpu_id] < allowable_gpu_jobs[gpu_id]
gpu_id = (gpu_id + 1) % len(gpus_free_mem)
gpu_id = (gpu_id + 1) % len(gpus_to_use)
return final_cmds


Expand Down Expand Up @@ -149,7 +149,7 @@ def run_sweep_cmds(cmds):

def main():
parser = argparse.ArgumentParser()
parser.add_argument('--cfg_file', type=str,
parser.add_argument('--cfg_file', '-f', type=str,
required=True, help='config file (yaml)')
args = parser.parse_args()
cmds = get_sweep_cmds(args.cfg_file)
Expand Down

0 comments on commit df05b5d

Please sign in to comment.