-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine_plotter.py
154 lines (120 loc) · 7.07 KB
/
combine_plotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import matplotlib.pyplot as plt
import numpy as np
import torch
import os
def combined_rewards_plot(rewards_list_1, rewards_list_2, labels, environment_type, window_size=10):
"""
Plots rewards per episode for two separate reward lists with optional smoothing.
Args:
rewards_list_1 (dict): A dictionary where keys are episodes and values are rewards for the first set.
rewards_list_2 (dict): A dictionary where keys are episodes and values are rewards for the second set.
labels (tuple): A tuple containing the labels for the two reward lists (e.g., ('Model A', 'Model B')).
environment_type (str): The type of environment (for plot title).
window_size (int, optional): The window size for moving average smoothing. Default is 10.
"""
# Extract episode and reward values for both reward lists
episodes_1 = np.array(list(rewards_list_1.keys())).flatten()
rewards_1 = np.array(list(rewards_list_1.values())).flatten()
episodes_2 = np.array(list(rewards_list_2.keys())).flatten()
rewards_2 = np.array(list(rewards_list_2.values())).flatten()
plt.figure(figsize=(12, 7))
# Plot first reward list
plt.plot(episodes_1, rewards_1, marker='o', linestyle='-', markersize=4, label=f'{labels[0]} Rewards')
# Plot moving average for the first reward list
if window_size > 1:
moving_avg_1 = np.convolve(rewards_1, np.ones(window_size) / window_size, mode='valid')
plt.plot(episodes_1[:len(moving_avg_1)], moving_avg_1, linestyle='--', color='r', alpha=0.8, label=f'{labels[0]} Moving Avg')
# Plot second reward list
plt.plot(episodes_2, rewards_2, marker='s', linestyle='-', markersize=4, label=f'{labels[1]} Rewards')
# Plot moving average for the second reward list
if window_size > 1:
moving_avg_2 = np.convolve(rewards_2, np.ones(window_size) / window_size, mode='valid')
plt.plot(episodes_2[:len(moving_avg_2)], moving_avg_2, linestyle='--', color='b', alpha=0.8, label=f'{labels[1]} Moving Avg')
plt.title(f'Comparison of Rewards per Episode: {environment_type}', fontsize=20)
plt.xlabel('Episode', fontsize=14)
plt.ylabel('Reward', fontsize=14)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()
return plt
def combined_rewards_plot2(rewards_list_1, rewards_list_2, rewards_list_3 ,labels, environment_type, window_size=10):
"""
Plots rewards per episode for two separate reward lists with optional smoothing.
Args:
rewards_list_1 (dict): A dictionary where keys are episodes and values are rewards for the first set.
rewards_list_2 (dict): A dictionary where keys are episodes and values are rewards for the second set.
labels (tuple): A tuple containing the labels for the two reward lists (e.g., ('Model A', 'Model B')).
environment_type (str): The type of environment (for plot title).
window_size (int, optional): The window size for moving average smoothing. Default is 10.
"""
# Extract episode and reward values for both reward lists
episodes_1 = np.array(list(rewards_list_1.keys())).flatten()
rewards_1 = np.array(list(rewards_list_1.values())).flatten()
episodes_2 = np.array(list(rewards_list_2.keys())).flatten()
rewards_2 = np.array(list(rewards_list_2.values())).flatten()
episodes_3 = np.array(list(rewards_list_3.keys())).flatten()
rewards_3 = np.array(list(rewards_list_3.values())).flatten()
plt.figure(figsize=(12, 7))
# Plot first reward list
plt.plot(episodes_1, rewards_1, marker='o', linestyle='-', markersize=4, label=f'{labels[0]} Rewards')
# Plot moving average for the first reward list
if window_size > 1:
moving_avg_1 = np.convolve(rewards_1, np.ones(window_size) / window_size, mode='valid')
plt.plot(episodes_1[:len(moving_avg_1)], moving_avg_1, linestyle='--', color='r', alpha=0.8, label=f'{labels[0]} Moving Avg')
# Plot second reward list
plt.plot(episodes_2, rewards_2, marker='s', linestyle='-', markersize=4, label=f'{labels[1]} Rewards')
# Plot moving average for the second reward list
if window_size > 1:
moving_avg_2 = np.convolve(rewards_2, np.ones(window_size) / window_size, mode='valid')
plt.plot(episodes_2[:len(moving_avg_2)], moving_avg_2, linestyle='--', color='b', alpha=0.8, label=f'{labels[1]} Moving Avg')
plt.plot(episodes_3, rewards_3, marker='s', linestyle='-', markersize=4, label=f'{labels[2]} Rewards')
# Plot moving average for the second reward list
if window_size > 1:
moving_avg_3 = np.convolve(rewards_3, np.ones(window_size) / window_size, mode='valid')
plt.plot(episodes_3[:len(moving_avg_2)], moving_avg_3, linestyle='--', color='b', alpha=0.8, label=f'{labels[2]} Moving Avg')
plt.title(f'Comparison of Rewards per Episode: {environment_type}', fontsize=20)
plt.xlabel('Episode', fontsize=14)
plt.ylabel('Reward', fontsize=14)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('All plots combined SUMO.png')
plt.show()
return plt
def load_variables(file_path):
try:
# Load the variables from the specified file
loaded_variables = torch.load(file_path)
print(f"Loaded variables from {file_path}.")
return loaded_variables
except FileNotFoundError:
print(f"Error: File not found at {file_path}.")
return None
except Exception as e:
print(f"An error occurred while loading variables: {e}")
return None
if __name__ == "__main__":
# Example usage
dir_path = 'saved_variables'
# Load DDQN variables
ddqn_rewards = torch.load(os.path.join(dir_path, 'ddqn2way_rewards.pkl'))
ddqn_losses = torch.load(os.path.join(dir_path, 'ddqn2way_losses.pkl'))
ddqn_rewards_per_episode = torch.load(os.path.join(dir_path, 'ddqn2way_rewards_per_episode.pkl'))
ddqn_epsilon_values = torch.load(os.path.join(dir_path, 'ddqn2way_epsilon_values.pkl'))
# Load DQN variables
dqn_rewards = torch.load(os.path.join(dir_path, 'dqn2way_rewards.pkl'))
dqn_losses = torch.load(os.path.join(dir_path, 'dqn2way_losses.pkl'))
dqn_rewards_per_episode = torch.load(os.path.join(dir_path, 'dqn2way_rewards_per_episode.pkl'))
dqn_epsilon_values = torch.load(os.path.join(dir_path, 'dqn2way_epsilon_values.pkl'))
labels = ('DQN', 'DDQN')
environment_type = 'Traffic Simulation'
window_size = 10
# Similarly load for A2C and A3C
a3c_rewards_per_episode = torch.load(os.path.join(dir_path, 'a3c_rewards_per_episode.pth'))
a2c_rewards_per_episode = torch.load(os.path.join(dir_path, 'sum2way_a2c_rewards.pkl'))
labels2 = ('A2C', 'A3C')
# a3c_average_rewards = torch.load(os.path.join(dir_path, 'a3c_average_rewards.pkl'))
a3c_rewards_per_ep_ = {i: rew for i, rew in enumerate(a3c_rewards_per_episode)}
# combined_rewards_plot(dqn_rewards_per_episode, ddqn_rewards_per_episode, labels, environment_type, window_size)
combined_rewards_plot2(a3c_rewards_per_ep_, dqn_rewards_per_episode, ddqn_rewards_per_episode, ('A3C', 'DQN', 'DDQN'), environment_type, window_size)