-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathddpg.f90
324 lines (287 loc) · 15.3 KB
/
ddpg.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
subroutine ddpg(state_1,reward,Done,Simu_Step_In,action_1,Simu_Step_Out)
use mod_OUActionNoise, only: noise_type
use mod_network, only: network_type
use mod_buffer, only: buffer_type
implicit none
type(buffer_type) :: buffer
integer :: buffer_capacity, batch_size, num_states, num_actions, Simu_Step_In, Simu_Step_Out
real :: state_1, action_1
real :: state(1), action(1)
real :: prev_state(1)
real :: reward, Done, episodic_reward
real :: episodic_reward_store(100)
real :: critic_lr, actor_lr, gamma, tau
type(network_type) :: actor_model, critic_model_1, critic_model_2, critic_model, target_actor, target_critic_1, target_critic_2, target_critic
type(noise_type) :: ou_noise
real :: mean, std_dev, lower_bound, upper_bound
logical :: Train, alive
integer :: FID, i, episode_counter
!! Environment setting
num_states = 1
num_actions = 1
upper_bound = 5
lower_bound = -5
!! Training hyperparameters
mean = 0
std_dev = 0.2
ou_noise = noise_type(mean, std_dev)
actor_model = network_type([num_states, 256, 200, num_actions], activation='relu') !!!这里有个小bug,相邻两层的layer的neuron之积不能太大
call actor_model%layers(4)%set_activation('tanh')
critic_model_1 = network_type([num_states, 16, 32], activation='relu')
critic_model_2 = network_type([num_actions, 32], activation='relu')
critic_model = network_type([64,256,200,1], activation='relu')
call critic_model%layers(4)%set_activation('linear')
target_actor = network_type([num_states, 256, 200, num_actions], activation='relu')
call target_actor%layers(4)%set_activation('tanh')
target_critic_1 = network_type([num_states, 16, 32], activation='relu')
target_critic_2 = network_type([num_actions, 32], activation='relu')
target_critic = network_type([64,256,200,1], activation='relu')
call target_critic%layers(4)%set_activation('linear')
! Making the weights equal initially
do i = 1, size(actor_model%layers)
target_actor%layers(i)%b = actor_model%layers(i)%b
target_actor%layers(i)%w = actor_model%layers(i)%w
end do
do i = 1, size(critic_model_1%layers)
target_critic_1%layers(i)%b = critic_model_1%layers(i)%b
target_critic_1%layers(i)%w = critic_model_1%layers(i)%w
end do
do i = 1, size(critic_model_2%layers)
target_critic_2%layers(i)%b = critic_model_2%layers(i)%b
target_critic_2%layers(i)%w = critic_model_2%layers(i)%w
end do
do i = 1, size(critic_model%layers)
target_critic%layers(i)%b = critic_model%layers(i)%b
target_critic%layers(i)%w = critic_model%layers(i)%w
end do
! Learning rate for actor-critic models
critic_lr = 0.0002
actor_lr = 0.0001
! Discount factor for future rewards
gamma = 0.9
! Used to update target networks
tau = 0.0005
! Create experience buffer
buffer_capacity = 50000
batch_size = 64
buffer = buffer_type(buffer_capacity, batch_size, num_states, num_actions)
episodic_reward = 0
! Train or not Train
Train = .true.
!! Start Training
if (Train) then
if (Simu_Step_In == 0) then
INQUIRE (file='D:\PSCAD_RL_Test\Fortran_DDPG\PSCAD_DDPG.if15\PSCAD_actor.txt', exist=alive)
if (alive) then
!/// Not the fist eisode: Load the weights
call actor_model%load("PSCAD_actor.txt")
call critic_model_1%load("PSCAD_critic_1.txt")
call critic_model_2%load("PSCAD_critic_2.txt")
call critic_model%load("PSCAD_critic.txt")
call target_actor%load("PSCAD_target_actor.txt")
call target_critic_1%load("PSCAD_target_critic_1.txt")
call target_critic_2%load("PSCAD_target_critic_2.txt")
call target_critic%load("PSCAD_target_critic.txt")
else
!/// First epiosde: Save the weights
call actor_model%save("PSCAD_actor.txt")
call critic_model_1%save("PSCAD_critic_1.txt")
call critic_model_2%save("PSCAD_critic_2.txt")
call critic_model%save("PSCAD_critic.txt")
call target_actor%save("PSCAD_target_actor.txt")
call target_critic_1%save("PSCAD_target_critic_1.txt")
call target_critic_2%save("PSCAD_target_critic_2.txt")
call target_critic%save("PSCAD_target_critic.txt")
! Save the experience buffer
Open(NewUnit=FID, File="buffer_counter_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%buffer_counter
Close(FID)
Open(NewUnit=FID, File="state_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%state_buffer
Close(FID)
Open(NewUnit=FID, File="action_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%action_buffer
Close(FID)
Open(NewUnit=FID, File="reward_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%reward_buffer
Close(FID)
Open(NewUnit=FID, File="next_state_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%next_state_buffer
Close(FID)
! Save the episode reward for final plotting
episode_counter = 1
Open(NewUnit=FID, File="episode_counter",action='readwrite',form='unformatted',access='stream')
Write(FID) episode_counter
Close(FID)
episodic_reward_store(episode_counter) = 0
Open(NewUnit=FID, File="episodic_reward_store",action='readwrite',form='unformatted',access='stream')
Write(FID) episodic_reward_store
Close(FID)
end if
!/// First step in each episode: store the 'episodic_reward' and 'prev_state'
Open(NewUnit=FID, File="episodic_reward",action='readwrite',form='unformatted',access='stream')
Write(FID) episodic_reward
Close(FID)
! iterate the state to prev_state
state(1) = state_1
prev_state = state
Open(NewUnit=FID, File="prev_state",action='readwrite',form='unformatted',access='stream')
Write(FID) prev_state
Close(FID)
! Excute action and save it
action = policy(state, lower_bound, upper_bound, actor_model, ou_noise)
Open(NewUnit=FID, File="action",action='readwrite',form='unformatted',access='stream')
Write(FID) action
Close(FID)
action_1 = action(1)
Simu_Step_Out = Simu_Step_In + 1
else
! Load previous state and previous action
Open(NewUnit=FID, File="prev_state",action='readwrite',form='unformatted',access='stream')
Read(FID) prev_state
!print *, prev_state
Close(FID)
Open(NewUnit=FID, File="action",action='readwrite',form='unformatted',access='stream')
Read(FID) action
Close(FID)
! Load the weights
call actor_model%load("PSCAD_actor.txt")
call critic_model_1%load("PSCAD_critic_1.txt")
call critic_model_2%load("PSCAD_critic_2.txt")
call critic_model%load("PSCAD_critic.txt")
call target_actor%load("PSCAD_target_actor.txt")
call target_critic_1%load("PSCAD_target_critic_1.txt")
call target_critic_2%load("PSCAD_target_critic_2.txt")
call target_critic%load("PSCAD_target_critic.txt")
! Load the experience buffer
Open(NewUnit=FID, File="buffer_counter_store",action='readwrite',form='unformatted',access='stream')
Read(FID) buffer%buffer_counter
!print *, buffer%buffer_counter
Close(FID)
Open(NewUnit=FID, File="state_buffer_store",action='readwrite',form='unformatted',access='stream')
Read(FID) buffer%state_buffer
Close(FID)
Open(NewUnit=FID, File="action_buffer_store",action='readwrite',form='unformatted',access='stream')
Read(FID) buffer%action_buffer
Close(FID)
Open(NewUnit=FID, File="reward_buffer_store",action='readwrite',form='unformatted',access='stream')
Read(FID) buffer%reward_buffer
Close(FID)
Open(NewUnit=FID, File="next_state_buffer_store",action='readwrite',form='unformatted',access='stream')
Read(FID) buffer%next_state_buffer
Close(FID)
call buffer%nrecord(prev_state, action, reward, state)
Open(NewUnit=FID, File="episodic_reward",action='readwrite',form='unformatted',access='stream')
Read(FID) episodic_reward
episodic_reward = episodic_reward + reward
Write(FID) episodic_reward
Close(FID)
call buffer%learn(actor_model, critic_model_1, critic_model_2, critic_model, target_actor, target_critic_1, target_critic_2, target_critic, critic_lr, actor_lr, gamma)
! Update_target network
do i = 1, size(actor_model%layers)
target_actor%layers(i)%b = actor_model%layers(i)%b * tau + target_actor%layers(i)%b * (1-tau)
target_actor%layers(i)%w = actor_model%layers(i)%w * tau + target_actor%layers(i)%w * (1-tau)
end do
do i = 1, size(critic_model_1%layers)
target_critic_1%layers(i)%b = critic_model_1%layers(i)%b * tau + target_critic_1%layers(i)%b * (1-tau)
target_critic_1%layers(i)%w = critic_model_1%layers(i)%w * tau + target_critic_1%layers(i)%w * (1-tau)
end do
do i = 1, size(critic_model_2%layers)
target_critic_2%layers(i)%b = critic_model_2%layers(i)%b * tau + target_critic_2%layers(i)%b * (1-tau)
target_critic_2%layers(i)%w = critic_model_2%layers(i)%w * tau + target_critic_2%layers(i)%w * (1-tau)
end do
do i = 1, size(critic_model%layers)
target_critic%layers(i)%b = critic_model%layers(i)%b * tau + target_critic%layers(i)%b * (1-tau)
target_critic%layers(i)%w = critic_model%layers(i)%w * tau + target_critic%layers(i)%w * (1-tau)
end do
! Save the experience buffer
Open(NewUnit=FID, File="buffer_counter_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%buffer_counter
Close(FID)
Open(NewUnit=FID, File="state_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%state_buffer
Close(FID)
Open(NewUnit=FID, File="action_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%action_buffer
Close(FID)
Open(NewUnit=FID, File="reward_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%reward_buffer
Close(FID)
Open(NewUnit=FID, File="next_state_buffer_store",action='readwrite',form='unformatted',access='stream')
Write(FID) buffer%next_state_buffer
Close(FID)
! Save the weights
call actor_model%save("PSCAD_actor.txt")
call critic_model_1%save("PSCAD_critic_1.txt")
call critic_model_2%save("PSCAD_critic_2.txt")
call critic_model%save("PSCAD_critic.txt")
call target_actor%save("PSCAD_target_actor.txt")
call target_critic_1%save("PSCAD_target_critic_1.txt")
call target_critic_2%save("PSCAD_target_critic_2.txt")
call target_critic%save("PSCAD_target_critic.txt")
! Excute action and save it
action = policy(state, lower_bound, upper_bound, actor_model, ou_noise)
Open(NewUnit=FID, File="action",action='readwrite',form='unformatted',access='stream')
Write(FID) action
Close(FID)
! iterate the state to prev_state
state(1) = state_1
prev_state = state
Open(NewUnit=FID, File="prev_state",action='readwrite',form='unformatted',access='stream')
Write(FID) prev_state
Close(FID)
! Last step of this episode: Save the episode reward to 'episodic_reward_store'
if (Simu_Step_In == 500) then
Open(NewUnit=FID, File="episode_counter",action='readwrite',form='unformatted',access='stream')
Read(FID) episode_counter
Close(FID)
Open(NewUnit=FID, File="episodic_reward_store",action='readwrite',form='unformatted',access='stream')
Read(FID) episodic_reward_store
episodic_reward_store(episode_counter) = episodic_reward
Write(FID) episodic_reward_store
Close(FID)
episode_counter = episode_counter + 1
Open(NewUnit=FID, File="episode_counter",action='readwrite',form='unformatted',access='stream')
Write(FID) episode_counter
Close(FID)
end if
action_1 = action(1)
Simu_Step_Out = Simu_Step_In + 1
end if
!! Not train, just run
else
state(1) = state_1
! Load the weights
call actor_model%load("PSCAD_actor.txt")
! Excute action
action = policy(state, lower_bound, upper_bound, actor_model, ou_noise)
action_1 = action(1)
Simu_Step_Out = Simu_Step_In + 1
end if
contains
! Define policy function for taking action
function policy(state, lower_bound, upper_bound, actor_model, ou_noise) result(legal_action)
implicit none
real, intent(in) :: state(:)
real, intent(in) :: lower_bound, upper_bound
type(network_type), intent(in) :: actor_model
type(noise_type) :: ou_noise
real, allocatable :: sampled_action(:), legal_action(:)
real :: noise
allocate(sampled_action(actor_model%dims(size(actor_model%dims))))
allocate(legal_action(actor_model%dims(size(actor_model%dims))))
sampled_action = actor_model%output(state)
sampled_action = sampled_action * upper_bound
noise = ou_noise%ncall()
! Adding noise to action
sampled_action = sampled_action + noise
! We make sure action is within bounds
if (sampled_action(1) < lower_bound) then
legal_action = lower_bound
elseif (sampled_action(1) > upper_bound) then
legal_action = upper_bound
else
legal_action = sampled_action
end if
end function policy
end subroutine ddpg