-
Notifications
You must be signed in to change notification settings - Fork 9
/
AGCRLEnvWater.py
71 lines (66 loc) · 2.88 KB
/
AGCRLEnvWater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
class AGCRLEnv:
def __init__(self,observations,actions,action_parameter,action_space):
"""initialise action space, observation space & load data"""
self.action_parameter=action_parameter
self.action_space=action_space
self.observations=observations
self.actions=actions
self.index=0
self.teamindex=0
self.observation_space=self.observations[self.teamindex].iloc[0].shape
self.curr_obs=self.observations[self.teamindex].iloc[self.index]
self.next_obs=self.curr_obs=self.observations[self.teamindex].iloc[self.index+1]
self.curr_reward=0
self.ep_reward=0
self.interval=action_space[1]-action_space[0]
def estimate_closest_as(self,value):
return self.action_space[int(value/self.interval)]
def step(self,action):
"""
return reward and next obs
"""
self.reward=self.rewardfunc(action)
self.ep_reward+=self.reward
self.index+=1
cur_obs=self.curr_obs
next_obs=self.next_obs
if self.index>=len(self.observations[self.teamindex])-2:
self.reset()
return self.curr_obs,self.reward,True
self.curr_obs=self.observations[self.teamindex].iloc[self.index]
self.next_obs=self.curr_obs=self.observations[self.teamindex].iloc[self.index+1]
return self.curr_obs,self.reward,False
def rewardfunc(self,action):
"""
action with action at current index in action if equal positive or else negative
"""
# print(self.action_space[action])
# print(self.actions[self.teamindex]["assim_sp"][self.index])
if(self.action_space[action]==self.estimate_closest_as(self.actions[self.teamindex][self.action_parameter][self.index])):
return 1000
else:
return -1*abs(self.action_space[action]-self.actions[self.teamindex][self.action_parameter][self.index])
def reset(self):
"""
set index to 0 and increment team index by 1 if greater than 4 go back to 0
"""
self.teamindex+=1
if self.teamindex>=5:
self.teamindex=0
self.index=0
self.observation_space=self.observations[self.teamindex].iloc[0].shape
self.curr_obs=self.observations[self.teamindex].iloc[self.index]
self.next_obs=self.curr_obs=self.observations[self.teamindex].iloc[self.index+1]
self.curr_reward=0
self.ep_reward=0
return self.curr_obs
def resetinit(self):
self.teamindex=0
self.index=0
self.observation_space=self.observations[self.teamindex].iloc[0].shape
self.curr_obs=self.observations[self.teamindex].iloc[self.index]
self.next_obs=self.curr_obs=self.observations[self.teamindex].iloc[self.index+1]
self.curr_reward=0
self.ep_reward=0
return self.curr_obs