Skip to content
This repository has been archived by the owner on Feb 3, 2023. It is now read-only.

Commit

Permalink
creating submission
Browse files Browse the repository at this point in the history
  • Loading branch information
bri25yu committed Nov 2, 2022
1 parent 5c3a36e commit 845a082
Show file tree
Hide file tree
Showing 8 changed files with 299 additions and 0 deletions.
240 changes: 240 additions & 0 deletions hw4/cs285/scripts/create_graphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
from typing import Any, Dict, List, Tuple

import os
from pathlib import Path

import tensorflow as tf
tf.get_logger().setLevel("ERROR")

from tensorflow.python.summary.summary_iterator import summary_iterator

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


run_logs_dir = os.path.join(*Path(__file__).parts[:-3], "run_logs")

def load_eventfile_by_folder_prefix(prefix: str) -> List:
# Find the appropriate full file name
is_prefix = lambda s: s.startswith(prefix)
# We take the first element by default
full_folder_name = list(filter(is_prefix, os.listdir(run_logs_dir)))[0]

# Get the full path of the eventfile directory
eventfile_dir = os.path.join(run_logs_dir, full_folder_name)

# Get the eventfile_path
eventfile_name = os.listdir(eventfile_dir)[0]
eventfile_path = os.path.join(eventfile_dir, eventfile_name)

return list(summary_iterator(eventfile_path))


def filter_summaries_by_tag(summaries: List, tag: str) -> List[Tuple]:
"""
Filters summaries for all events
"""
value_is_tag = lambda v: v.tag == tag
get_value_tag_from_event = lambda e: next(filter(value_is_tag, e.summary.value), None)

filtered = []
for event in summaries:
value = get_value_tag_from_event(event)
if value is None:
continue

filtered.append((event, value))

return filtered


def get_first_simple_value(summaries: List[Tuple]) -> float:
"""
Takes in the output of `filter_summaries_by_tag`
"""
return next(iter(summaries))[1].simple_value


def get_first_tag_simple_value(summaries: List, tag: str) -> float:
filtered = filter_summaries_by_tag(summaries, tag)
return get_first_simple_value(filtered)


def get_property_and_steps(experiment_prefix: str, property_name: str) -> Tuple[List[float], List[float]]:
"""
Returns a tuple of steps and property values.
The arrays are sorted ascending in steps.
"""
experiment_summary = load_eventfile_by_folder_prefix(experiment_prefix)

train_returns = filter_summaries_by_tag(experiment_summary, property_name)
steps = [r[0].step for r in train_returns]
returns = [r[1].simple_value for r in train_returns]

steps = np.array(steps)
returns = np.array(returns)

sorted_idxs = steps.argsort()

steps = steps[sorted_idxs]
returns = returns[sorted_idxs]

return steps, returns


def get_train_averagereturns(experiment_prefix: str) -> Tuple[List[float], List[float]]:
return get_property_and_steps(experiment_prefix, "Train_AverageReturn")


def get_eval_averagereturns(experiment_prefix: str) -> Tuple[List[float], List[float]]:
return get_property_and_steps(experiment_prefix, "Eval_AverageReturn")


def get_train_bestreturns(experiment_prefix: str) -> Tuple[List[float], List[float]]:
return get_property_and_steps(experiment_prefix, "Train_BestReturn")


def q2():
config_prefix = "hw4_q2_obstacles_singleiteration_obstacles-cs285-v0"

rows, cols = 1, 1
fig, ax = plt.subplots(rows, cols, figsize=(10 * cols, 8 * rows))

def scatter_and_label(steps, returns, label, expected_return):
points = ax.scatter(steps, returns, label=label)
for xy in zip(steps, returns):
ax.annotate(f"({xy[0]}, {xy[1]:.1f})", xy=xy, textcoords="data")

ax.hlines(y=expected_return, xmin=-0.3, xmax=0.3, label=f"Expected {label.lower()}", linestyles=["--"], color=points.get_facecolor())

scatter_and_label(*get_eval_averagereturns(config_prefix), "Eval returns", -50)
scatter_and_label(*get_train_averagereturns(config_prefix), "Train returns", -160)

ax.set_xlabel("Train iterations")
ax.set_ylabel("Return")
ax.legend()

fig.suptitle("Single iteration MPC policy performance on the obstacles environment")
fig.tight_layout()
fig.savefig("report_resources/q2.png")


def q3():
configs = {
("Obstacles", "hw4_q3_obstacles_obstacles-cs285-v0", -20),
("Reacher", "hw4_q3_reacher_reacher-cs285-v0", -250),
("Cheetah", "hw4_q3_cheetah_cheetah-cs285-v0", 350),
}

rows, cols = 1, 3
fig, axs = plt.subplots(rows, cols, figsize=(10 * cols, 8 * rows))

for ax, (config_name, config_prefix, expected_return) in zip(axs, configs):
steps, eval_returns = get_eval_averagereturns(config_prefix)

ax.plot(steps, eval_returns)
ax.hlines(y=expected_return, xmin=min(steps), xmax=max(steps), label="Expected eval return", color="red")
ax.set_title(f"MBRL performance on {config_name} environment")
ax.set_xlabel("Train iterations")
ax.set_ylabel("Eval average return")
ax.legend()

fig.suptitle("Model based RL (MBRL) performance on various environments")
fig.tight_layout()
fig.savefig("report_resources/q3.png")


def q4():
prefix_template = "hw4_q4_reacher_{key}{value}_reacher-cs285-v0"
configs = [
{
"name": "Ensemble size",
"key": "ensemble",
"values": [1, 3, 5],
},
{
"name": "Horizon",
"key": "horizon",
"values": [5, 15, 30],
},
{
"name": "Num candidate sequences",
"key": "numseq",
"values": [100, 1000],
},
]

rows, cols = 1, 3
fig, axs = plt.subplots(rows, cols, figsize=(10 * cols, 8 * rows))

for ax, config in zip(axs, configs):
name, key, values = config["name"], config["key"], config["values"]
for value in values:
config_prefix = prefix_template.format(key=key, value=value)
steps, eval_returns = get_eval_averagereturns(config_prefix)

ax.plot(steps, eval_returns, label=f"{name}={value}")

ax.set_title(f"Ablation over {name.lower()}")
ax.set_xlabel("Train iterations")
ax.set_ylabel("Eval average return")
ax.legend()

fig.suptitle("Ablation of model-based RL (MBRL) performance on reacher environment")
fig.tight_layout()
fig.savefig("report_resources/q4.png")


def q5():
configs = {
"CEM 2 iterations": "hw4_q5_cheetah_cem_2_cheetah-cs285-v0",
"CEM 4 iterations": "hw4_q5_cheetah_cem_4_cheetah-cs285-v0",
"Random shooting": "hw4_q5_cheetah_random_cheetah-cs285-v0",
}

rows, cols = 1, 1
fig, ax = plt.subplots(rows, cols, figsize=(10 * cols, 8 * rows))

for config_name, config_prefix in configs.items():
steps, eval_returns = get_eval_averagereturns(config_prefix)

ax.plot(steps, eval_returns, label=config_name)

ax.set_xlabel("Train iterations")
ax.set_ylabel("Eval average return")
ax.legend()

fig.suptitle("Comparison of sampling methods for Model based RL (MBRL) performance on cheetah environment")
fig.tight_layout()
fig.savefig("report_resources/q5.png")


def q6():
configs = {
"MBPO rollout length 0": "hw4_q6_cheetah_rlenl0_cheetah-cs285-v0",
"MBPO rollout length 1": "hw4_q6_cheetah_rlen1_cheetah-cs285-v0",
"MBPO rollout length 10": "hw4_q6_cheetah_rlen10_cheetah-cs285-v0",
}

rows, cols = 1, 1
fig, ax = plt.subplots(rows, cols, figsize=(10 * cols, 8 * rows))

for config_name, config_prefix in configs.items():
steps, eval_returns = get_eval_averagereturns(config_prefix)

ax.plot(steps, eval_returns, label=config_name)

ax.set_xlabel("Train iterations")
ax.set_ylabel("Eval average return")
ax.legend()

fig.suptitle("Comparison of rollout lengths for model-based policy optimization (MBPO) performance on cheetah environment")
fig.tight_layout()
fig.savefig("report_resources/q6.png")


if __name__ == "__main__":
q3()
59 changes: 59 additions & 0 deletions hw4/report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Problem 1
<div style="text-align: center">
<img src="run_logs/hw4_q1_cheetah_n500_arch1x32_cheetah-cs285-v0_01-11-2022_22-54-53/itr_0_predictions.png" width="250" height="200" />
<img src="run_logs/hw4_q1_cheetah_n500_arch2x250_cheetah-cs285-v0_01-11-2022_22-55-11/itr_0_predictions.png" width="250" height="200" />
<img src="run_logs/hw4_q1_cheetah_n5_arch2x250_cheetah-cs285-v0_01-11-2022_22-55-04/itr_0_predictions.png" width="250" height="200" />
</div>

The predictions results for a small model (1 layer of size 32) trained for 500 iterations (left), a large model (2 layers of size 250) trained for 500 iterations (center), and a large model (2 layers of size 250) trained for 5 iterations (right).

Clearly the large model trained for 500 iterations performs the best with the lowest mean prediction error (MPE) value of 0.028, outclassing the small model trained for the same amount of time. However, the large model definitely needs many iterations of training as the same size model trained for 5 iterations performed much much worse than either of the two other configurations.

This environment may be too complex for smaller models to correctly learn and also too complex for larger models to fully understand without sufficient datapoints.

<div style="page-break-after: always;"></div>

# Problem 2
<div style="text-align: center">
<img src="report_resources/q2.png" width="250" height="200" />
</div>

<div style="page-break-after: always;"></div>

# Problem 3
<div style="text-align: center">
<img src="report_resources/q3.png" width="750" height="200" />
</div>

<div style="page-break-after: always;"></div>

# Problem 4
<div style="text-align: center">
<img src="report_resources/q4.png" width="750" height="200" />
</div>

All three of the ablated ensemble size values reached around the same eval average return. Ensemble size of 3 was most consistent in doing so. Ensemble size of 5 had the lowest initial performance, probably because averages of initial estimates by more networks is more variable in the beginning.

The ablation with horizon length of 5 was wildly variable, jumping up and down. This means that horizon length 5 is too short. The ablation with horizon length of 30 had much lower performance than both of the other configs, signaling that horizon length of 30 is too long. This is probably due to distributional drift, where the model error compounds to a point past a certain horizon length where it's no longer useful. The best horizon length was 15, the middle ground in between the 2. The optimal horizon length probably depends on the maximum number of steps for a particular env and on how frequently we receive rewards.

The ablations of number of generated candidate action sequences tells us that the more candidate sequences we generate, the better our estimation of the true "optimal" action as the config with 1000 candidate action sequences was less variable and better performing than the config with 100 candidate action sequences.

<div style="page-break-after: always;"></div>

# Problem 5
<div style="text-align: center">
<img src="report_resources/q5.png" width="250" height="200" />
</div>

Cross-entropy sampling method (CEM) is much more consistent than random shooting, both in absolute average return as well as in improvement.

CEM with 4 iterations is much better than CEM with 2 iterations, where the former makes huge performance increases over single iterations and ends at almost double the score of the latter. This is probably because we iteratively maximize over elites. More iterations of maximization reduce variance and improve estimates when we sample from our normal distribution with elite mean and variance.

<div style="page-break-after: always;"></div>

# Problem 6
<div style="text-align: center">
<img src="report_resources/q6.png" width="250" height="200" />
</div>

MBPO with rollout lengths of 0 and 1 make progress to move to non-negative reward, but are unable to move past around 0 eval return i.e. they reach 0 return and then oscillate around 0. This is probably because the rollout lookahead length is too short to actually be informative to the policy for this particular environment. The performance of the config with rollout of length 10 steadily increases over iterations up to an impressive performance around 2000 eval average return. MBPO with rollouts of length 10 is able to make significant progress over much fewer policy training iterations than model-free and Dyna-style policy optimization.
Empty file added hw4/report_resources/.gitkeep
Empty file.
Binary file added hw4/report_resources/q2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw4/report_resources/q3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw4/report_resources/q4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw4/report_resources/q5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw4/report_resources/q6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 845a082

Please sign in to comment.