Skip to content

Commit

Permalink
- started adjusting ddpg
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Weber committed Jan 26, 2021
1 parent a62b9ec commit a9f27b4
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 8 deletions.
48 changes: 43 additions & 5 deletions experiments/issue51_new/stable_baselinesDDPG.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from os import makedirs
from typing import List

import torch as th
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

import gym
import numpy as np
from stable_baselines3 import DDPG
Expand Down Expand Up @@ -110,13 +114,47 @@ def _on_step(self) -> bool:
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/')
checkpoint_on_event = CheckpointCallback(save_freq=100000, save_path=f'{timestamp}/checkpoints/')

class CustomMPL(BaseFeaturesExtractor):

def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
super(CustomMPL, self).__init__(observation_space, features_dim)
# We assume CxHxW images (channels first)
# Re-ordering will be done by pre-preprocessing or wrapper
n_input_channels = observation_space.shape[0]
self.cnn = nn.Sequential(
nn.Linear(n_input_channels, 32),
nn.ReLU(),
nn.Linear(32, 64),
nn.ReLU(),
)

# Compute shape by doing one forward pass
with th.no_grad():
n_flatten = self.cnn(
th.as_tensor(observation_space.sample()[None]).float()
).shape[1]

self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())

def forward(self, observations: th.Tensor) -> th.Tensor:
return self.linear(self.cnn(observations))


policy_kwargs = dict(
features_extractor_class=CustomMPL,
features_extractor_kwargs=dict(features_dim=128, net_arch=[32, 32]),
)

# policy_kwargs = dict(net_arch=dict(pi=[5, 5], qf=[10, 10]))
# policy_kwargs = dict( activation_fn=th.nn.LeakyReLU, net_arch=[32, 32])
model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/', policy_kwargs=policy_kwargs)
checkpoint_on_event = CheckpointCallback(save_freq=10000, save_path=f'{timestamp}/checkpoints/')
record_env = RecordEnvCallback()
plot_callback = EveryNTimesteps(n_steps=50000, callback=record_env)
model.learn(total_timesteps=500000, callback=[checkpoint_on_event, plot_callback])
plot_callback = EveryNTimesteps(n_steps=10000, callback=record_env)
model.learn(total_timesteps=50000, callback=[checkpoint_on_event, plot_callback])

model.save('ddpg_CC')
model.save('ddpg_CC2')

del model # remove to demonstrate saving and loading

Expand Down
6 changes: 3 additions & 3 deletions net/net_single-inv-curr.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
v_nom: 230*sqrt(2)
#freq_nom: 50
ts: .5e-4
freq_nom: 50
ts: 0.5e-4

components:
inv1:
Expand All @@ -10,7 +10,7 @@ components:
v_DC: 1000
cls: MasterInverterCurrentSourcing
in:
u: [i1p1, i1p2, i1p3] # names of the inputs
u: [ i1p1, i1p2, i1p3 ] # names of the inputs
out:
v: [lc1.capacitor1.v, lc1.capacitor2.v, lc1.capacitor3.v]
i: [lc1.inductor1.i, lc1.inductor2.i, lc1.inductor3.i]
Expand Down

0 comments on commit a9f27b4

Please sign in to comment.