Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added opensim-rl environment, extended dqn agent for multi-dimensional action space, and a sample configuration and options to config an agent to learn in opensim-rl #13

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ NOTE: we follow the exact code structure as [pytorch-dnc](https://github.com/jin
- [Visdom](https://github.com/facebookresearch/visdom)
- [OpenAI Gym >=v0.9.0 (for lower versoins, just need to change into the available games, e.g. change PongDeterministic-v4 to PongDeterministic-v3)](https://github.com/openai/gym)
- [mujoco-py (Optional: for training continuous version of a3c)](https://github.com/openai/mujoco-py)
- [opensim-rl (Optional: for training in Opensim-rl environment) (https://github.com/stanfordnmbl/osim-rl)
*******


Expand Down
4 changes: 2 additions & 2 deletions core/agents/dqn.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _epsilon_greedy(self, q_values_ts):
self.eps = self.eps_eval
# choose action
if np.random.uniform() < self.eps: # then we choose a random action
action = random.randrange(self.action_dim)
action = np.random.rand(self.action_dim).tolist()
else: # then we choose the greedy action
if self.use_cuda:
action = np.argmax(q_values_ts.cpu().numpy())
Expand All @@ -164,7 +164,7 @@ def _forward(self, observation):
state_ts = torch.from_numpy(np.array(state)).unsqueeze(0).type(self.dtype)
q_values_ts = self.model(Variable(state_ts, volatile=True)).data # NOTE: only doing inference here, so volatile=True
if self.training and self.step < self.learn_start: # then we don't do any learning, just accumulate experiences into replay memory
action = random.randrange(self.action_dim) # thus we only randomly sample actions here, since the model hasn't been updated at all till now
action = np.random.rand(self.action_dim).tolist() # thus we only randomly sample actions here, since the model hasn't been updated at all till now
else:
action = self._epsilon_greedy(q_values_ts)

Expand Down
75 changes: 75 additions & 0 deletions opensim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from copy import deepcopy
from gym.spaces.box import Box
import inspect

from utils.helpers import Experience # NOTE: here state0 is always "None"
from utils.helpers import preprocessAtari, rgb2gray, rgb2y, scale
from core.env import Env

class OpenSim(Env): # low dimensional observations
""" Class to setup the OpenSim-RL environment (https://github.com/praveen-palanisamy/pytorch-rl.git) Where the agent has to learn to run! Continuous (18 dim) action space."""
def __init__(self, args, env_ind=0):
super(OpenSim, self).__init__(args, env_ind)

assert self.env_type == "opensim"
try: from osim.env import RunEnv
except ImportError as e: self.logger.warning("WARNING: opensim not found")

self.env = RunEnv(visualize= True)
#self.env.seed(self.seed) # NOTE: so each env would be different

# action space setup
self.actions = range(self.action_dim)
self.logger.warning("Action Space: %s", self.env.action_space)

# state space setup
self.logger.warning("State Space: %s", self.state_shape)

# continuous space
#if args.agent_type == "a3c":
self.enable_continuous = True #args.enable_continuous

def _preprocessState(self, state): # NOTE: here no preprecessing is needed
return state

@property
def action_dim(self):
return self.env.action_space.shape[0]

@property
def state_shape(self):
return self.env.observation_space.shape[0]

def render(self):
#if self.mode == 2:
# frame = self.env.render(mode='rgb_array')
# frame_name = self.img_dir + "frame_%04d.jpg" % self.frame_ind
# self.imsave(frame_name, frame)
# self.logger.warning("Saved Frame @ Step: " + str(self.frame_ind) + " To: " + frame_name)
# self.frame_ind += 1
# return frame
#else:
# return self.env.render()
return


def visual(self):
pass

def sample_random_action(self):
return self.env.action_space.sample()

def reset(self):
self._reset_experience()
self.exp_state1 = self.env.reset()
return self._get_experience()

def step(self, action):
self.exp_action = action
if self.enable_continuous:
self.exp_state1, self.exp_reward, self.exp_terminal1, _ = self.env.step(self.exp_action)
return self._get_experience()
5 changes: 4 additions & 1 deletion utils/factory.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from core.envs.atari_ram import AtariRamEnv
from core.envs.atari import AtariEnv
from core.envs.lab import LabEnv
from core.envs.opensim import OpenSim
EnvDict = {"gym": GymEnv, # classic control games from openai w/ low-level input
"atari-ram": AtariRamEnv, # atari integrations from openai, with low-level input
"atari": AtariEnv, # atari integrations from openai, with pixel-level input
"lab": LabEnv}
"lab": LabEnv,
"opensim": OpenSim}

from core.models.empty import EmptyModel
from core.models.dqn_mlp import DQNMlpModel
Expand All @@ -20,6 +22,7 @@
from core.models.acer_cnn_dis import ACERCnnDisModel
ModelDict = {"empty": EmptyModel, # contains nothing, only should be used w/ EmptyAgent
"dqn-mlp": DQNMlpModel, # for dqn low-level input
"dqn-mlp-con": DQNMlpModel, # for dqn low-level input
"dqn-cnn": DQNCnnModel, # for dqn pixel-level input
"a3c-mlp-con": A3CMlpConModel, # for a3c low-level input (NOTE: continuous must end in "-con")
"a3c-cnn-dis": A3CCnnDisModel, # for a3c pixel-level input
Expand Down
42 changes: 36 additions & 6 deletions utils/options.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,20 @@
[ "dqn", "atari", "BreakoutDeterministic-v4", "dqn-cnn", "sequential"], # 4
[ "a3c", "atari", "PongDeterministic-v4", "a3c-cnn-dis", "none" ], # 5
[ "a3c", "gym", "InvertedPendulum-v1", "a3c-mlp-con", "none" ], # 6
[ "acer", "gym", "MountainCar-v0", "acer-mlp-dis", "episodic" ] # 7 # NOTE: acer under testing
[ "acer", "gym", "MountainCar-v0", "acer-mlp-dis", "episodic" ], # 7 # NOTE: acer under testing
[ "dqn", "opensim", "opensim", "dqn-mlp-con", "sequential"] # 8
]

class Params(object): # NOTE: shared across all modules
def __init__(self):
self.verbose = 0 # 0(warning) | 1(info) | 2(debug)

# training signature
self.machine = "aisdaim" # "machine_id"
self.timestamp = "17082400" # "yymmdd##"
self.machine = "hpc011" # "machine_id"
self.timestamp = "1" # "yymmdd##"
# training configuration
self.mode = 1 # 1(train) | 2(test model_file)
self.config = 7
self.config = 8

self.seed = 123
self.render = False # whether render the window from the original envs or not
Expand All @@ -53,7 +54,7 @@ def __init__(self):
self.hidden_dim = 16
else:
self.hist_len = 4
self.hidden_dim = 256
self.hidden_dim = 512#256

self.use_cuda = torch.cuda.is_available()
self.dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
Expand Down Expand Up @@ -139,8 +140,12 @@ def __init__(self):
self.wid_state = 80
self.preprocess_mode = 3 # 0(nothing) | 1(rgb2gray) | 2(rgb2y) | 3(crop&resize depth)
self.img_encoding_type = "passthrough"

elif self.env_type == "opensim":
pass

else:
assert False, "env_type must be: gym | atari-ram | atari | lab"
assert False, "env_type must be: gym | atari-ram | atari | lab | opensim"

class ModelParams(Params): # settings for network architecture
def __init__(self):
Expand Down Expand Up @@ -228,6 +233,31 @@ def __init__(self):
self.action_repetition = 4
self.memory_interval = 1
self.train_interval = 4
elif self.agent_type == "dqn" and self.env_type == "opensim":
self.steps = 50000000 # max #iterations
self.early_stop = None # max #steps per episode
self.gamma = 0.99
self.clip_grad = 40.#np.inf
self.lr = 0.00025
self.lr_decay = False
self.weight_decay = 0.
self.eval_freq = 250000#12500 # NOTE: here means every this many steps
self.eval_steps = 125000#2500
self.prog_freq = 10000#self.eval_freq
self.test_nepisodes = 1

self.learn_start = 50000 # start update params after this many steps
self.batch_size = 32
self.valid_size = 500
self.eps_start = 1
self.eps_end = 0.1
self.eps_eval = 0.#0.05
self.eps_decay = 1000000
self.target_model_update = 10000
self.action_repetition = 4
self.memory_interval = 1
self.train_interval = 4

elif self.agent_type == "a3c":
self.steps = 20000000 # max #iterations
self.early_stop = None # max #steps per episode
Expand Down