Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Noise Benchmarking, LSTM sb3_zoo hyperpar, Issue #1 #9

Merged
merged 24 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hyperpars/ppo-asm-v0-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ config: {}
use_sde: True
id: "1"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/rl4greencrab/saved_agents"
felimomo marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 2 additions & 1 deletion hyperpars/ppo-asm2o-v0-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
algo: "PPO"
env_id: "Asm2o-v0"
n_envs: 12
tensorboard: "/home/jovyan/logs"
tensorboard: "/home/rstudio/logs"
felimomo marked this conversation as resolved.
Show resolved Hide resolved
total_timesteps: 6000000
config: {}
use_sde: True
id: "1"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/rl4greencrab/saved_agents"
felimomo marked this conversation as resolved.
Show resolved Hide resolved
128 changes: 121 additions & 7 deletions hyperpars/rppo-asm2o.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,127 @@
# stable-baselines3 configuration template.
# template for using sb3_zoo hyperparameter yamls

# algo overall
algo: "RPPO"
policyType: "MlpLstmPolicy"
total_timesteps: 10000000
felimomo marked this conversation as resolved.
Show resolved Hide resolved

additional_imports: ["torch"]

# env overall
env_id: "Asm2o-v0"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 500000
config: {}
use_sde: True
id: "1"
n_envs: 32

# io
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/rl4fisheries/saved_agents"

# # MINIMAL CONFIG
# id: "minimal"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "/home/rstudio/logs"

# # SLOW LEARN
# id: "slow"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "/home/rstudio/logs"
# learning_rate: 0.0001
# # default learning rate = 0.0003

# # EXTRA SLOW LEARN
id: "extra-slow"
algo_config:
policy: 'MlpLstmPolicy'
tensorboard_log: "/home/rstudio/logs"
learning_rate: 0.00003


# algo hyperpars taken from:
# https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo_lstm.yml


# # BIPEDAL WALKER
# id: "bipedal"
# algo_config:
# # normalize: True # not clear what this one actually does -- from the source code it seems to 'activate' VecNormalize, but more care & examination needed
# policy: 'MlpLstmPolicy'
# tensorboard_log: "/home/rstudio/logs"
# n_steps: 256
# batch_size: 256
# gae_lambda: 0.95
# gamma: 0.999
# n_epochs: 10
# ent_coef: 0.0
# learning_rate: !!float 3e-4
# clip_range: 0.18
# policy_kwargs: "dict(
# ortho_init=False,
# activation_fn=torch.nn.ReLU,
# lstm_hidden_size=64,
# enable_critic_lstm=True,
# net_arch=dict(pi=[64], vf=[64])
# )"

# # HALF CHEETAH V4

# id: "cheetah"
# algo_config:
# policy: 'MlpLstmPolicy'
# tensorboard_log: "/home/rstudio/logs"
# batch_size: 64
# n_steps: 512
# gamma: 0.98
# learning_rate: 2.0633e-05
# ent_coef: 0.000401762
# clip_range: 0.1
# n_epochs: 20
# gae_lambda: 0.92
# max_grad_norm: 0.8
# vf_coef: 0.58096
# policy_kwargs: "dict(
# log_std_init=-2,
# ortho_init=False,
# activation_fn=torch.nn.ReLU,
# net_arch=dict(pi=[256, 256], vf=[256, 256])
# )"



# # INVERTED PENDULUM
# id: "inv_pend"
# algo_config:
# tensorboard_log: "/home/rstudio/logs"
# policy: 'MlpLstmPolicy'
# n_steps: 2048
# batch_size: 64
# gae_lambda: 0.95
# gamma: 0.99
# n_epochs: 10
# ent_coef: 0.0
# learning_rate: 2.5e-4
# clip_range: 0.2


# # MOUNTAIN CAR NO VEL

# id: "mount_car"
# algo_config:
# tensorboard_log: "/home/rstudio/logs"
# policy: 'MlpLstmPolicy'
# batch_size: 256
# n_steps: 1024
# gamma: 0.9999
# learning_rate: !!float 7.77e-05
# ent_coef: 0.00429
# clip_range: 0.1
# n_epochs: 10
# gae_lambda: 0.9
# max_grad_norm: 5
# vf_coef: 0.19
# use_sde: True
# sde_sample_freq: 8
# policy_kwargs: "dict(log_std_init=0.0, ortho_init=False,
# lstm_hidden_size=32,
# enable_critic_lstm=True,
# net_arch=dict(pi=[64], vf=[64]))"
3 changes: 2 additions & 1 deletion hyperpars/tqc-asm2o-v0-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
algo: "TQC"
env_id: "Asm2o-v0"
n_envs: 6
tensorboard: "/home/jovyan/logs"
tensorboard: "/home/rstudio/logs"
total_timesteps: 12000000
config: {"learning_rate": 0.0001,
"learning_starts": 1000,
}
use_sde: True
id: "1"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/rl4greencrab/saved_agents"
18 changes: 18 additions & 0 deletions scripts/benchmark_noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/opt/venv/bin/python
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", help="Path config file", type=str)
parser.add_argument("-pb", "--progress_bar", help="Use progress bar for training", type=bool)
parser.add_argument("-noise", "--noise", help="sigma for the noise level to use", type=float)
args = parser.parse_args()

import rl4fisheries


from rl4fisheries.utils import sb3_train
sb3_train(
args.file,
progress_bar = args.progress_bar,
config={"sigma": args.noise},
id=f"noise_{args.noise}",
)
16 changes: 16 additions & 0 deletions scripts/benchmark_noise.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

# move to script directory for normalized relative paths.
scriptdir="$(dirname "$0")"
cd "$scriptdir"

# original noise setting
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 1.5 &

# lower noise setting
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 1.00 &
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 0.75 &
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 0.50 &
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 0.25 &
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 0.10 &
python benchmark_noise.py -f ../hyperpars/ppo-asm2o-v0-1.yml -noise 0.05 &
felimomo marked this conversation as resolved.
Show resolved Hide resolved
10 changes: 7 additions & 3 deletions scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", help="Path config file", type=str)
parser.add_argument("-pb", "--progress_bar", help="Use progress bar for training", type=bool)
parser.add_argument("-rppo", "--recurrent-ppo", help="Hyperpar structure for recurrent ppo.", type=bool, default=False)
args = parser.parse_args()

import rl4fisheries


from rl4fisheries.utils import sb3_train
sb3_train(args.file)
if args.recurrent_ppo:
from rl4fisheries.utils import sb3_train_v2
sb3_train_v2(args.file, progress_bar = args.progress_bar)
else:
from rl4fisheries.utils import sb3_train
sb3_train(args.file, progress_bar = args.progress_bar)
59 changes: 29 additions & 30 deletions src/rl4fisheries/envs/asm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,29 @@ class Asm(gym.Env):

def __init__(self, render_mode: Optional[str] = 'rgb_array', config={}):
config = config or {}
parameters = {
self.parameters = {
"n_age": 20, # number of age classes
"vbk": np.float32(0.23), # von Bertalanffy kappa
"s": np.float32(0.86), # average survival
"cr": np.float32(6.0), # Goodyear compensation ratio
"rinit": np.float32(0.01), # initial number age-1 recruits
"ro": np.float32(1.0), # average unfished recruitment
"uo": np.float32(0.12), # average historical exploitation rate
"asl": np.float32(0.5), # vul par 1
"ahv": np.float32(5.0), # vul par 2
"ahm": np.float32(6.0), # age 50% maturity
"upow": np.float32(1.0), # 1 = max yield objective, < 1 = HARA ## both 0.6 and 1.0 -- this describes risk adversion in the utility function
"p_big": np.float32(0.05), # probability of big year class
"sdr": np.float32(0.3), # recruit sd given stock-recruit relationship
"rho": np.float32(0.0), # autocorrelation in recruitment sequence
"sdv": np.float32(1e-9), # sd in vulnerable biomass (survey)
"sigma": np.float32(1.5),
"vbk": config.get("vbk" , np.float32(0.23)), # von Bertalanffy kappa
"s": config.get("s" , np.float32(0.86)), # average survival
"cr": config.get("cr" , np.float32(6.0)), # Goodyear compensation ratio
"rinit": config.get("rinit" , np.float32(0.01)), # initial number age-1 recruits
"ro": config.get("ro" , np.float32(1.0)), # average unfished recruitment
"uo": config.get("uo" , np.float32(0.12)), # average historical exploitation rate
"asl": config.get("asl" , np.float32(0.5)), # vul par 1
"ahv": config.get("ahv" , np.float32(5.0)), # vul par 2
"ahm": config.get("ahm" , np.float32(6.0)), # age 50% maturity
"upow": config.get("upow" , np.float32(1.0)), # 1 = max yield objective, < 1 = HARA
"p_big": config.get("p_big" , np.float32(0.05)), # probability of big year class
"sdr": config.get("sdr" , np.float32(0.3)), # recruit sd given stock-recruit relationship
"rho": config.get("rho" , np.float32(0.0)), # autocorrelation in recruitment sequence
"sdv": config.get("sdv" , np.float32(1e-9)), # sd in vulnerable biomass (survey)
"sigma": config.get("sigma" , np.float32(1.5)),
}
# these parameters can be specified in config
self.n_year = config.get("n_year", 1000)
self.Tmax = self.n_year
self.threshold = config.get("threshold", np.float32(1e-4))
self.training = config.get("training", True)
self.parameters = config.get("parameters", parameters)
self.timestep = 0
self.bound = 50 # a rescaling parameter
self.parameters["ages"] = range(
Expand Down Expand Up @@ -165,7 +164,7 @@ def render(self):
def initialize_population(self):
p = self.parameters # snag those pars
ninit = np.float32([0] * p["n_age"]) # initial numbers
vul = ninit.copy() # vulnerability
survey_vul = ninit.copy() # vulnerability
felimomo marked this conversation as resolved.
Show resolved Hide resolved
wt = ninit.copy() # weight
mat = ninit.copy() # maturity
Lo = ninit.copy() # survivorship unfished
Expand All @@ -174,7 +173,7 @@ def initialize_population(self):

# leading array calculations to get vul-at-age, wt-at-age, etc.
for a in range(0, p["n_age"], 1):
vul[a] = 1 / (1 + np.exp(-p["asl"] * (p["ages"][a] - p["ahv"])))
survey_vul[a] = 1 / (1 + np.exp(-p["asl"] * (p["ages"][a] - p["ahv"])))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

once we have a harvest_vul, it's going to need to be initialized here too. Presumably it's identical but has different values for p["asl"] and p["ahv"]. (We'll need @CarlJwalters or @ChrisFishCahill to figure out some reasonable choices there)

wt[a] = pow(
(1 - np.exp(-p["vbk"] * p["ages"][a])), 3
) # 3 --> isometric growth
Expand All @@ -184,14 +183,14 @@ def initialize_population(self):
Lf[a] = 1
elif a > 0 and a < (p["n_age"] - 1):
Lo[a] = Lo[a - 1] * p["s"]
Lf[a] = Lf[a - 1] * p["s"] * (1 - vul[a - 1] * p["uo"])
Lf[a] = Lf[a - 1] * p["s"] * (1 - survey_vul[a - 1] * p["uo"])
elif a == (p["n_age"] - 1):
Lo[a] = Lo[a - 1] * p["s"] / (1 - p["s"])
Lf[a] = (
Lf[a - 1]
* p["s"]
* (1 - vul[a - 1] * p["uo"])
/ (1 - p["s"] * (1 - vul[a - 1] * p["uo"]))
* (1 - survey_vul[a - 1] * p["uo"])
/ (1 - p["s"] * (1 - survey_vul[a - 1] * p["uo"]))
)
ninit = np.array(p["rinit"]) * Lf
mwt = mat * np.array(wt)
Expand All @@ -202,7 +201,7 @@ def initialize_population(self):
# put it all in self so we can reference later
self.parameters["Lo"] = Lo
self.parameters["Lf"] = Lf
self.parameters["vul"] = vul
self.parameters["survey_vul"] = survey_vul
self.parameters["wt"] = wt
self.parameters["mwt"] = mwt
self.parameters["bha"] = bha
Expand All @@ -216,18 +215,18 @@ def initialize_population(self):

def harvest(self, n, mortality):
p = self.parameters
self.vulb = sum(p["vul"] * n * p["mwt"])
self.vulb = sum(p["survey_vul"] * n * p["wt"])
felimomo marked this conversation as resolved.
Show resolved Hide resolved
self.vbobs = self.vulb # could multiply this by random deviate
self.ssb = sum(p["mwt"] * n)
if sum(n) > 0:
self.abar = sum(p["vul"] * np.array(p["ages"]) * n) / sum(n)
self.wbar = sum(p["vul"] * n * p["wt"]) / sum(n * p["wt"])
self.abar = sum(p["survey_vul"] * np.array(p["ages"]) * n) / sum(n)
self.wbar = sum(p["survey_vul"] * n * p["wt"]) / sum(n * p["wt"])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above, eventually this should be harvest_vul

else:
self.abar = 0
self.wbar = 0
self.yieldf = mortality[0] * self.vulb # fishery yield
reward = self.yieldf ** p["upow"] # this is utility
n = p["s"] * n * (1 - p["vul"] * mortality) # eat fish
n = p["s"] * n * (1 - p["survey_vul"] * mortality) # eat fish - TBD, change survey to fishery vulnerability
return n, reward

def population_growth(self, n):
Expand All @@ -250,9 +249,9 @@ def population_growth(self, n):
return n

def observe(self):
total = np.array([sum(self.state)])
observation = 2 * total / self.bound - 1
observation = np.clip(observation, [-1.0], [1.0])
self.vulb = sum(self.parameters["survey_vul"] * self.state * self.parameters["wt"]) # update vulnerable biomass
observation = 2 * np.array([self.vulb]) / self.bound - 1
observation = np.clip(observation, -1.0, 1.0)
return np.float32(observation)

def population_units(self):
Expand Down
Loading
Loading