Skip to content

Commit

Permalink
logging cost
Browse files Browse the repository at this point in the history
  • Loading branch information
manila95 committed Mar 12, 2024
1 parent ae6ebdc commit d99febf
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions cleanrl/ppo_rnd_envpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def parse_args():
help="if toggled, cuda will be enabled by default")
parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
help="if toggled, this experiment will be tracked with Weights and Biases")
parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
parser.add_argument("--wandb-project-name", type=str, default="risk-aware-exploration",
help="the wandb's project name")
parser.add_argument("--wandb-entity", type=str, default=None,
parser.add_argument("--wandb-entity", type=str, default="manila95",
help="the entity (team) of wandb's project")
parser.add_argument("--reward-goal", type=float, default=1.0,
help="reward to give when the goal is achieved")
Expand Down Expand Up @@ -266,7 +266,7 @@ def update(self, rews):
if __name__ == "__main__":
args = parse_args()
run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
if args.track:
if True:
import wandb

wandb.init(
Expand Down Expand Up @@ -344,6 +344,8 @@ def update(self, rews):
next_ob = []
print("End to initialize...")

total_cost = 0

for update in range(1, num_updates + 1):
# Annealing the rate if instructed to do so.
if args.anneal_lr:
Expand Down Expand Up @@ -405,6 +407,8 @@ def update(self, rews):
# Skip the envs that are not done
if info is None:
continue
ep_cost = info["cost"]
total_cost += ep_cost
avg_returns.append(info["episode"]["r"])
epi_ret = np.average(avg_returns)
print(f"Episodic Return: {info['episode']['r']}")
Expand All @@ -416,7 +420,9 @@ def update(self, rews):
global_step,
)
writer.add_scalar("charts/avg_episodic_return", epi_ret, global_step)

writer.add_scalar("charts/episodic_cost", ep_cost, global_step)
writer.add_scalar("charts/total_cost", total_cost, global_step)

curiosity_reward_per_env = np.array(
[discounted_reward.update(reward_per_step) for reward_per_step in curiosity_rewards.cpu().data.numpy().T]
)
Expand Down

0 comments on commit d99febf

Please sign in to comment.