-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHelper.py
81 lines (69 loc) · 2.48 KB
/
Helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Practical for course 'Reinforcement Learning',
Leiden University, The Netherlands
By Thomas Moerland
"""
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
class LearningCurvePlot:
def __init__(self,title=None):
self.fig,self.ax = plt.subplots()
self.ax.set_xlabel('Timestep')
self.ax.set_ylabel('Episode Return')
if title is not None:
self.ax.set_title(title)
def add_curve(self,x,y,label=None):
''' y: vector of average reward results
label: string to appear as label in plot legend '''
if label is not None:
self.ax.plot(x,y,label=label)
else:
self.ax.plot(x,y)
def set_ylim(self,lower,upper):
self.ax.set_ylim([lower,upper])
def add_hline(self,height,label):
self.ax.axhline(height,ls='--',c='k',label=label)
def save(self,name='test.png'):
''' name: string for filename of saved figure '''
self.ax.legend()
self.fig.savefig(name,dpi=300)
def smooth(y, window, poly=2):
'''
y: vector to be smoothed
window: size of the smoothing window '''
return savgol_filter(y,window,poly)
def softmax(x, temp):
''' Computes the softmax of vector x with temperature parameter 'temp' '''
x = x / temp # scale by temperature
z = x - max(x) # substract max to prevent overflow of softmax
return np.exp(z)/np.sum(np.exp(z)) # compute softmax
def argmax(x):
''' Own variant of np.argmax with random tie breaking '''
try:
return np.random.choice(np.where(x == np.max(x))[0])
except:
return np.argmax(x)
def linear_anneal(t,T,start,final,percentage):
''' Linear annealing scheduler
t: current timestep
T: total timesteps
start: initial value
final: value after percentage*T steps
percentage: percentage of T after which annealing finishes
'''
final_from_T = int(percentage*T)
if t > final_from_T:
return final
else:
return final + (start - final) * (final_from_T - t)/final_from_T
if __name__ == '__main__':
# Test Learning curve plot
x = np.arange(100)
y = 0.01*x + np.random.rand(100) - 0.4 # generate some learning curve y
LCTest = LearningCurvePlot(title="Test Learning Curve")
LCTest.add_curve(y,label='method 1')
LCTest.add_curve(smooth(y,window=35),label='method 1 smoothed')
LCTest.save(name='learning_curve_test.png')