Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

push trained model weights for 0_baseline_LSTM and 2_multitask_dense … #178

Merged
merged 3 commits into from
Dec 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions 2a_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,14 @@ p2a_targets_list <- list(
list(model_id = "0_baseline_LSTM",
snakefile_dir = "0_baseline_LSTM",
config_path = stringr::str_remove(p2a_config_baseline_LSTM_yml, "2a_model/src/models/")),
# the 1_ models use the same model and therefore the same Snakefile
# as the 0_baseline_LSTM run
list(model_id = "1_metab_multitask",
snakefile_dir = "0_baseline_LSTM",
config_path = stringr::str_remove(p2a_config_metab_multitask_yml, "2a_model/src/models/")),
list(model_id = "1a_multitask_do_gpp_er",
snakefile_dir = "0_baseline_LSTM",
config_path = stringr::str_remove(p2a_config_1a_metab_multitask_yml, "2a_model/src/models/")),
#the 1_ models use the same model and therefore the same Snakefile
#as the 0_baseline_LSTM run
#list(model_id = "1_metab_multitask",
#snakefile_dir = "0_baseline_LSTM",
#config_path = stringr::str_remove(p2a_config_metab_multitask_yml, "2a_model/src/models/")),
#list(model_id = "1a_multitask_do_gpp_er",
#snakefile_dir = "0_baseline_LSTM",
#config_path = stringr::str_remove(p2a_config_1a_metab_multitask_yml, "2a_model/src/models/")),
Comment on lines +192 to +199
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I commented these lines out since we didn't train new model weights for these models.

This undoes 0dbac60. @galengorski - was there a reason you uncommented the 1 and 1a models?

list(model_id = "2_multitask_dense",
snakefile_dir = "2_multitask_dense",
config_path = stringr::str_remove(p2a_config_multitask_dense_yml, "2a_model/src/models/"))
Expand All @@ -219,16 +219,18 @@ p2a_targets_list <- list(
# this path is relative to the Snakefile
prepped_data_file <- file.path("../../../out/models",p2a_model_ids$model_id, "prepped.npz")

# make sure the directory is unlocked (this has been a hangup for me)
system(stringr::str_glue("snakemake -s {snakefile_path} --configfile {config_path} --unlock"))
# First create the prepped data files if they are not already.
# These are needed to make the predictions.
system(stringr::str_glue("snakemake {prepped_data_file} -s {snakefile_path} --configfile {config_path} -j"))

# Then touch all of the existing files. This makes the weights "up-to-date"
# so snakemake doesn't train the models again
system(stringr::str_glue("snakemake -s {snakefile_path} --configfile {config_path} -j --touch"))
system(stringr::str_glue("snakemake -s {snakefile_path} --configfile {config_path} -j --touch --rerun-incomplete"))

# then run the snakemake pipeline to produce the predictions and metric files
system(stringr::str_glue("snakemake -s {snakefile_path} --configfile {config_path} -j --rerun-incomplete"))
system(stringr::str_glue("snakemake -s {snakefile_path} --configfile {config_path} -j --rerun-incomplete "))

# print out the metrics file name for the target
file.path("2a_model/out/models", p2a_model_ids$model_id, "exp_overall_metrics.csv")
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "."
all_model_checkpoint_paths: "."
2 changes: 1 addition & 1 deletion 2a_model/out/models/add_weights.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
files=(checkpoint .data-00000-of-00001 .index .snakemake_timestamp)

models=(0_baseline_LSTM 1_metab_multitask 1a_multitask_do_gpp_er)
models=(0_baseline_LSTM 2_multitask_dense)

for f in ${files[@]}; do
for d in ${models[@]}; do
Expand Down
8 changes: 4 additions & 4 deletions 2a_model/src/models/0_baseline_LSTM/config.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
exp_name: "0_baseline_LSTM"
exp_name: 0_baseline_LSTM
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I specify that these should be quoted in the function that builds the config files so I was surprised to see that the quoted formatting gets lost when built on Tallgrass. I've played with this a bit before and I think if we updated the version of the {yaml} R package and added it to the container these config files would keep the quoted format as before. I've added a note in #158.

It's good to know that the models still run even though these new config files don't have quotes around exp_name and y_vars.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think python will automatically read these as strings since they aren't numbers

y_vars:
- "do_min"
- "do_mean"
- "do_max"
- do_min
- do_mean
- do_max
lambdas:
- 1
- 1
Expand Down
18 changes: 9 additions & 9 deletions 2a_model/src/models/1_metab_multitask/1a_multitask_do_gpp_er.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
exp_name: "1a_multitask_do_gpp_er"
exp_name: 1a_multitask_do_gpp_er
y_vars:
- "do_min"
- "do_mean"
- "do_max"
- "GPP"
- "ER"
- "K600"
- "depth"
- "temp.water"
- do_min
- do_mean
- do_max
- GPP
- ER
- K600
- depth
- temp.water
lambdas:
- 1
- 1
Expand Down
18 changes: 9 additions & 9 deletions 2a_model/src/models/1_metab_multitask/config.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
exp_name: "1_metab_multitask"
exp_name: 1_metab_multitask
y_vars:
- "do_min"
- "do_mean"
- "do_max"
- "GPP"
- "ER"
- "K600"
- "depth"
- "temp.water"
- do_min
- do_mean
- do_max
- GPP
- ER
- K600
- depth
- temp.water
lambdas:
- 1
- 1
Expand Down
18 changes: 9 additions & 9 deletions 2a_model/src/models/2_multitask_dense/config.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
exp_name: "2_multitask_dense"
exp_name: 2_multitask_dense
y_vars:
- "do_min"
- "do_mean"
- "do_max"
- "GPP"
- "ER"
- "K600"
- "depth"
- "temp.water"
- do_min
- do_mean
- do_max
- GPP
- ER
- K600
- depth
- temp.water
lambdas:
- 1
- 1
Expand Down
54 changes: 37 additions & 17 deletions 2a_model/src/models/config_base.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
out_dir: "../../../out/models"
out_dir: ../../../out/models
seed: False
num_replicates: 1
num_replicates: 10
trn_offset: 1.0
tst_val_offset: 1.0
epochs: 100
Expand All @@ -10,27 +10,47 @@ recurrent_dropout: 0.2
finetune_learning_rate: 0.01
early_stopping: False
validation_sites:
- "01472104"
- "01473500"
- "01481500"
- '01472104'
- '01473500'
- 01481500
test_sites:
- "01475530"
- "01475548"
- '01475530'
- 01475548
train_start_date: '1980-01-01'
train_end_date: '2014-10-01'
val_start_date: '2014-10-01'
val_end_date: '2015-10-01'
test_start_date: '2015-10-01'
test_end_date: '2022-10-01'
x_vars:
- "pr"
- "SLOPE"
- "tmmx"
- "tmmn"
- "srad"
- "CAT_BASIN_SLOPE"
- "CAT_ELEV_MEAN"
- "CAT_IMPV11"
- "CAT_CNPY11_BUFF100"
- "CAT_TWI"
- tmmn
- tmmx
- pr
- srad
- SLOPE
- TOTDASQKM
- CAT_BASIN_SLOPE
- TOT_BASIN_SLOPE
- CAT_ELEV_MEAN
- CAT_RDX
- CAT_BFI
- CAT_EWT
- CAT_TWI
- CAT_PPT7100_ANN
- TOT_PPT7100_ANN
- CAT_RUN7100
- CAT_CNPY11_BUFF100
- CAT_IMPV11
- TOT_IMPV11
- CAT_NLCD11_wetland
- TOT_NLCD11_wetland
- CAT_SANDAVE
- CAT_PERMAVE
- TOT_PERMAVE
- CAT_RFACT
- CAT_WTDEP
- TOT_WTDEP
- CAT_NPDES_MAJ
- CAT_NDAMS2010
- CAT_NORM_STORAGE2010

2 changes: 1 addition & 1 deletion _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ base_config_options <- list(
out_dir = "../../../out/models",
# random seed for training; If FALSE, no seed. Otherwise, specify the seed:
seed = FALSE,
num_replicates = 1,
num_replicates = 10,
trn_offset = 1,
tst_val_offset = 1,
epochs = 100,
Expand Down