Skip to content

Commit

Permalink
adding cvar for first element of the quantile
Browse files Browse the repository at this point in the history
  • Loading branch information
manila95 committed Mar 27, 2024
1 parent b2b518c commit ae055bc
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions cleanrl/ppo_continuous_action_wandb.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def train(cfg):
cfg.use_risk = False if cfg.risk_model_path == "None" else True

import wandb
run = wandb.init(config=vars(cfg), entity="kaustubh95",
run = wandb.init(config=vars(cfg), entity="kaustubh_umontreal",
project="risk_aware_exploration",
monitor_gym=True,
sync_tensorboard=True, save_code=True)
Expand Down Expand Up @@ -696,7 +696,7 @@ def train(cfg):
risk_penalty = torch.Tensor([0.]).to(device)
else:
risk_penalty = torch.sum(torch.div(risk_prob*cfg.risk_penalty, quantile_means))
f_risk_penalty.append(risk_penalty.item())
#f_risk_penalty.append(risk_penalty.item())
# print(next_risk)
risks[step] = next_risk
all_risks[global_step] = risk_prob#, axis=-1)
Expand All @@ -716,6 +716,13 @@ def train(cfg):
# TRY NOT TO MODIFY: execute the game and log data.
next_obs, reward, terminated, truncated, infos = envs.step(action.cpu().numpy())
done = np.logical_or(terminated, truncated)

if cfg.use_risk:
risk_penalty = risk_prob[:,0] * cfg.risk_penalty

#print(risk_penalty)
f_risk_penalty.append(risk_penalty.item())

rewards[step] = torch.tensor(reward).to(device).view(-1) - risk_penalty

info_dict = {'reward': reward, 'done': done, 'cost': cost, 'obs': obs}
Expand Down

0 comments on commit ae055bc

Please sign in to comment.