Updated to run Example 4, which includes the CMA-ES optimization rout…

…ine in the calibrate method of the marrmot_model class
RTIInternational · May 23, 2024 · 5337dee · 5337dee
1 parent f465b05
commit 5337dee
Show file tree

Hide file tree

Showing 60 changed files with 476 additions and 85 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -10,7 +10,8 @@
             "type": "debugpy",
             "request": "launch",
             "program": "${file}",
-            "console": "integratedTerminal"
+            "console": "integratedTerminal",
+            "justMyCode": true
         }
     ]
 }
diff --git a/examples/example_3.py b/examples/example_3.py
@@ -0,0 +1,89 @@
+"""
+Copyright (C) 2019, 2021 Wouter J.M. Knoben, Luca Trotter
+This file is part of the Modular Assessment of Rainfall-Runoff Models
+Toolbox (MARRMoT).
+MARRMoT is a free software (GNU GPL v3) and distributed WITHOUT ANY
+WARRANTY. See <https://www.gnu.org/licenses/> for details.
+
+Contact: [email protected]
+
+This example workflow  contains an example application of multiple models
+to a single catchment.
+It includes 6 steps:
+
+1. Data preparation
+2. Model choice and setup
+3. Model solver settings
+For each model in list
+    4. Model generation and set-up
+    5. Model runs
+6. Output visualization
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from pymarrmot.models.models.m_29_hymod_5p_5s import m_29_hymod_5p_5s
+from pymarrmot.models.models.m_01_collie1_1p_1s import m_01_collie1_1p_1s
+from pymarrmot.models.models.m_27_tank_12p_4s import m_27_tank_12p_4s
+
+# 1. Prepare data
+df = pd.read_csv('c:/users/ssheeder/repos/pymarrmot/examples/Example_DataSet.csv')
+
+# Create a climatology data input structure
+input_climatology = {
+    'dates': df['Date'].to_numpy(),      # Daily data: date in ??? format
+    'precip': df['Precip'].to_numpy(),   # Daily data: P rate [mm/d]
+    'temp': df['Temp'].to_numpy(),       # Daily data: mean T [degree C]
+    'pet': df['PET'].to_numpy(),         # Daily data: Ep rate [mm/d]
+}
+
+# 2. Define the model settings
+model_list = [m_29_hymod_5p_5s,
+              m_01_collie1_1p_1s,
+              m_27_tank_12p_4s]
+
+# 3. Define the solver settings
+input_solver_opts = {
+    'resnorm_tolerance': 0.1,
+    'resnorm_maxiter': 6
+}
+
+# Create and run all model objects
+results_sampling = []
+
+results_sampling.append(['model name', 'parameter_values', 'output_ex', 'output_in', 'output_ss', 'output_wb'])
+
+for model in model_list:
+    print(f"Now starting model {model}.")
+    m = model()
+    m.delta_t = 1
+    model_range = m.parRanges
+    numPar = m.numParams
+    numStore = m.numStores
+
+    input_theta = model_range[:, 0] + np.random.rand(numPar) * (model_range[:, 1] - model_range[:, 0])
+    input_s0 = np.zeros(numStore)
+
+    output_ex, output_in, output_ss, output_waterbalance = m.get_output(4,input_climatology, input_s0, input_theta, input_solver_opts)
+
+    results_sampling.append([model, input_theta, output_ex, output_in, output_ss, output_waterbalance])
+
+# 6. Analyze the outputs
+t = input_climatology['dates']
+streamflow_observed = df['Q'].to_numpy()
+
+plt.figure(figsize=(10, 6))
+plt.plot(t, streamflow_observed, 'k', linewidth=2)
+for i in range(len(model_list)):
+    plt.plot(t, results_sampling[i + 1][2]['Q'])
+
+plt.legend(['Observed'] + model_list, loc='upper right')
+plt.title('Model sampling results')
+plt.ylabel('Streamflow [mm/d]')
+plt.xlabel('Time [d]')
+plt.grid(True)
+plt.xticks(rotation=45)
+plt.ylim([0, 70])
+plt.tight_layout()
+plt.show()
diff --git a/examples/example_4.py b/examples/example_4.py
@@ -0,0 +1,198 @@
+"""
+Copyright (C) 2019, 2021 Wouter J.M. Knoben, Luca Trotter
+This file is part of the Modular Assessment of Rainfall-Runoff Models
+Toolbox (MARRMoT).
+MARRMoT is a free software (GNU GPL v3) and distributed WITHOUT ANY
+WARRANTY. See <https://www.gnu.org/licenses/> for details.
+
+Contact: [email protected]
+
+This example workflow contains an example application of calibration of a
+single models to a single catchment.
+It includes 7 steps:
+
+1. Data preparation
+2. Model choice and setup
+3. Model solver settings and time-stepping scheme
+4. Calibration settings
+5. Model calibration
+6. Evaluation of calibration results
+7. Output visualization
+
+NOTE: this example uses a custom function 'my_cmaes' to perform the
+optimization, it is a wrapper around 'cmaes' to ensure inputs and outputs
+are consistent with other MATLAB's optimization algorithms (e.g.
+'fminsearch' or 'fminsearchbnd').
+While the wrapper is provided as part of MARRMoT, it requires the source
+code to 'cmaes' to function, it is available at:
+http://cma.gforge.inria.fr/cmaes.m
+The wrapper is necessary for the optimizer to function within the
+MARRMoT_model.calibrate method.
+Alternatively any model can be calibrated using any optimization
+algorithm using the MARRMoT_model.calc_par_fitness method which returns
+the value of an objective function and can be used as input to an
+optimizer.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from datetime import datetime
+from pymarrmot.models.models.m_29_hymod_5p_5s import m_29_hymod_5p_5s
+import pymarrmot.functions.objective_functions as objective_functions
+
+# 1. Prepare data
+df = pd.read_csv('c:/users/ssheeder/repos/pymarrmot/examples/Example_DataSet.csv')
+
+# Create a climatology data input structure
+input_climatology = {
+    'dates': df['Date'].to_numpy(),      # Daily data: date in ??? format
+    'precip': df['Precip'].to_numpy(),   # Daily data: P rate [mm/d]
+    'temp': df['Temp'].to_numpy(),       # Daily data: mean T [degree C]
+    'pet': df['PET'].to_numpy(),         # Daily data: Ep rate [mm/d]
+}
+
+
+
+# Extract observed streamflow
+Q_obs = df['Q'].to_numpy()
+
+# 2. Define the model settings and create the model object
+m = m_29_hymod_5p_5s()  # Create an instance of the model object
+parRanges = m.parRanges  # Parameter ranges
+numParams = m.numParams  # Number of parameters
+numStores = m.numStores  # Number of stores
+input_s0 = np.zeros(numStores)  # Initial storages (see note in paragraph 5 on model warm-up)
+
+# 3. Define the solver settings
+input_solver_opts = {
+    'resnorm_tolerance': 0.1,  # Root-finding convergence tolerance;
+    # users have reported differences in simulation accuracy (KGE scores) during calibration between Matlab and Octave for a given tolerance.
+    # In certain cases, Octave seems to require tighter tolerances to obtain the same KGE scores as Matlab does.
+    'resnorm_maxiter': 6  # Maximum number of re-runs
+}
+
+# 4. Define calibration settings
+# Settings for 'my_cmaes'
+# the opts struct is made up of two fields, the names are hardcoded, so
+# they cannot be changed:
+#    .sigma0:     initial value of sigma
+#    .cmaes_opts: struct of options for cmaes, see cmaes documentation
+#                 or type cmaes to see list of options and default values
+
+# starting sigma
+optim_opts = {'insigma': .3 * (parRanges[:, 1] - parRanges[:, 0])}  # starting sigma (this is default, could have left it blank)
+
+# other options
+optim_opts['LBounds'] = parRanges[:, 0]  # lower bounds of parameters
+optim_opts['UBounds'] = parRanges[:, 1]  # upper bounds of parameters
+optim_opts['PopSize'] = 4 + np.floor(3 * np.log(numParams))  # population size (default)
+optim_opts['TolX'] = 1e-6 * np.min(optim_opts['insigma'])  # stopping criterion on changes to parameters
+optim_opts['TolFun'] = 1e-6  # stopping criterion on changes to fitness function
+optim_opts['TolHistFun'] = 1e-6  # stopping criterion on changes to fitness function
+optim_opts['SaveFilename'] = 'wf_ex_4_cmaesvars.txt'  # output file of cmaes variables
+optim_opts['LogFilenamePrefix'] = 'wf_ex_4_'  # prefix for cmaes log-files
+
+# Other useful options
+optim_opts['EvalParallel'] = False  # change to true to run in parallel on a pool of CPUs (e.g. on a cluster)
+# uncomment to restart r-times until the condition
+# r = 2
+# optim_opts['Restarts'] = r
+# optim_opts['RestartIf'] = 'fmin > -.8' # OF is inverted, so this restarts
+#                                        unless the OF (KGE here) > 0.8
+
+# debugging options
+#optim_opts['MaxIter'] = 5  # just do 5 iterations, to check if it works
+#optim_opts['Seed'] = 1234  # for reproducibility
+
+# initial parameter set
+par_ini = np.mean(parRanges, axis=1)  # same as default value
+m.theta = par_ini  # set the initial parameter set
+
+# Choose the objective function
+of_name = 'of_kge'  # This function is provided as part of MARRMoT. See ./MARRMoT/Functions/Objective functions
+weights = [1, 1, 1]  # Weights for the three KGE components
+
+# Time periods for calibration.
+# Indices of timestep to use for calibration, here we are using 1 year for
+# warm-up, and 2 years for calibration, leaving the rest of the dataset for
+# independent evaluation.
+n = len(Q_obs)
+warmup = 365
+cal_idx = list(range(warmup + 1, warmup + 365 * 2 + 1))
+eval_idx = list(range(max(cal_idx), n))
+
+# 5. Calibrate the model
+# MARRMoT model objects have a "calibrate" method that takes uses a chosen
+# optimization algorithm and objective function to optimize the parameter
+# set. See MARRMoT_model class for details.
+
+# first set up the model
+m.delta_t = 1
+m.input_climate = input_climatology
+m.solver_opts = input_solver_opts
+m.S0 = input_s0
+
+par_opt, of_cal, stopflag, output = m.calibrate(
+    Q_obs,  # observed streamflow
+    cal_idx,  # timesteps to use for model calibration
+    'my_cmaes',  # function to use for optimization (must have same structure as fminsearch)
+    par_ini,  # initial parameter estimates
+    optim_opts,  # options to optim_fun
+    of_name,  # name of objective function to use
+    True, True,  # should the OF be inversed?   Should I display details about the calibration?
+    weights  # additional arguments to of_name
+)
+
+# 6. Evaluate the calibrated parameters on unseen data
+# Run the model with calibrated parameters, get only the streamflow
+Q_sim = m.get_streamflow(par_opt)
+
+# Compute evaluation performance
+obj_func = getattr(objective_functions, of_name) # Objective function (here 'of_KGE')
+of_eval = obj_func(Q_obs,       # Observed flow during evaluation period
+                   Q_sim,       # Simulated flow during evaluation period, using calibrated parameters
+                   eval_idx,    # Indices of evaluation period
+                   weights)     # KGE component weights
+of_eval = of_eval[0]  # Extract the KGE value from the tuple
+
+# 7. Visualize the results
+# Prepare a time vector
+t = [datetime.fromtimestamp(t) for t in m.input_climate['dates']]
+
+# Compare simulated and observed streamflow
+
+plt.figure(figsize=(10, 6), facecolor='w')
+plt.box(True)
+
+# Flows
+plt.plot(t, Q_obs, 'k', label='Q_obs')
+plt.plot(t, Q_sim, 'r', label='Q_sim')
+
+# Dividing line
+plt.axvline(t[max(cal_idx)], color='b', linestyle='--', linewidth=2, label='Cal // Eval')
+plt.axvline(t[warmup], color='g', linestyle='--', linewidth=2, label='warmup // Cal')
+
+# Legend & text
+plt.legend(loc='upper left')
+plt.title('Model calibration and evaluation results')
+plt.ylabel('Streamflow [mm/d]')
+plt.xlabel('Time [d]')
+
+max_obs = np.max(Q_obs)
+max_sim = np.max(Q_sim)
+max_val = 1.05 * max(max_obs, max_sim)
+max_t = max(t)
+min_t = min(t)
+del_t = max(t) - min(t)
+
+plt_txt = 'Cal ' + of_name + ' = %.2f' % round(of_cal,2)
+plt_txt += '\nEval ' + of_name + ' = %.2f' % round(of_eval,2)
+
+plt.text(min_t + del_t *.85, max_val * .85, plt_txt, fontsize = 10, bbox=dict(facecolor='white', edgecolor='red'))
+plt.xticks(rotation=45)
+
+plt.ylim([0, max_val])
+plt.tick_params(axis='both', which='major', labelsize=12)
+plt.tight_layout()
+plt.show(block=True)
diff --git a/outcmaes/axlen.dat b/outcmaes/axlen.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, sigma, max axis length,  min axis length, all principal axes lengths  (sorted square roots of eigenvalues of C)", seed=581622, Thu May 23 13:25:19 2024
+1 8 0.914647981032298 1.0000400008000108 1.0 1.0 1.00001000005 1.0000200002000015 1.0000300004500045 1.0000400008000108
diff --git a/outcmaes/axlencorr.dat b/outcmaes/axlencorr.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, min max(neg(.)) min(pos(.)) max correlation, correlation matrix principal axes lengths  (sorted square roots of eigenvalues of correlation matrix)", seed=581622, Thu May 23 13:25:19 2024
+1 8 -0.10816836668420154 -0.027037173283884528 0.04466017330713045 0.08447965524701906 0.9154832908288328 0.9685323926232423 1.0016151054911655 1.0242153450485072 1.082398011856912
diff --git a/outcmaes/axlenprec.dat b/outcmaes/axlenprec.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, min max(neg(.)) min(pos(.)) max correlation, correlation matrix principal axes lengths  (sorted square roots of eigenvalues of correlation matrix)", seed=581622, Thu May 23 13:25:19 2024
+1 8 -0.08454984751796392 -0.04660240315446251 0.0303716355834803 0.10588756675618491 0.9163263717187823 0.970760339100305 0.9957568698812483 1.0254102610826064 1.0839614370785302
diff --git a/outcmaes/fit.dat b/outcmaes/fit.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, sigma, axis ratio, bestever, best, median, worst objective function value, further objective values of best", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
+1 8 0.914647981032298 1.0000400008000108 -0.03268569325229487 -3.2685693252294867e-02 -0.03268569325229487 -0.03268569325229487   
diff --git a/outcmaes/sigvec.dat b/outcmaes/sigvec.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, sigma, beta, void,  sigvec==sigma_vec.scaling factors from diagonal decoding", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
+1 8 0.914647981032298 1 0 599.6999999999999 3.0 0.3 0.3 0.3
diff --git a/outcmaes/stddev.dat b/outcmaes/stddev.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, sigma, void, void,  stds==sigma*sigma_vec.scaling*sqrt(diag(C))", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
+1 8 0.914647981032298 0 0 548.3099360911514 2.805167756156038 0.2614078771696393 0.2624990947176959 0.2713936714567164
diff --git a/outcmaes/xmean.dat b/outcmaes/xmean.dat
@@ -0,0 +1,2 @@
+% # columns="iteration, evaluation, void, void, void, xmean", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python> # scaling_of_variables: 1, typical_x: 0
+1 8 0 0 0 654.5122932599646 3.661683907932788 0.2777202634101034 0.439071381950522 0.6169564844827764
diff --git a/outcmaes/xrecentbest.dat b/outcmaes/xrecentbest.dat
@@ -0,0 +1,2 @@
+% # columns="iter, evals, sigma, 0, fitness, xbest" seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
+1 8 0.914647981032298 0 -0.03268569325229487 266.7118468801151 6.042852169155995 0.13597395782092375 0.23869199917925976 0.7612633551792338
diff --git a/src/pymarrmot/Functions/Objective_functions/__init__.py b/src/pymarrmot/Functions/Objective_functions/__init__.py
@@ -0,0 +1,13 @@
+from pymarrmot.functions.objective_functions.of_bias_penalised_log import of_bias_penalised_log as of_bias_penalised_log
+from pymarrmot.functions.objective_functions.of_inverse_kge import of_inverse_kge as of_inverse_kge
+from pymarrmot.functions.objective_functions.of_inverse_nse import of_inverse_nse as of_inverse_nse
+from pymarrmot.functions.objective_functions.of_kge import of_kge as of_kge
+from pymarrmot.functions.objective_functions.of_log_nse import of_log_nse as of_log_nse
+from pymarrmot.functions.objective_functions.of_mare import of_mare as of_mare
+from pymarrmot.functions.objective_functions.of_mean_hilo_kge import of_mean_hilo_kge as of_mean_hilo_kge
+from pymarrmot.functions.objective_functions.of_mean_hilo_root5_kge import of_mean_hilo_root5_kge as of_mean_hilo_root5_kge
+from pymarrmot.functions.objective_functions.of_nrmse import of_nrmse as of_nrmse
+from pymarrmot.functions.objective_functions.of_nse import of_nse as of_nse
+from pymarrmot.functions.objective_functions.of_pcmare import of_pcmare as of_pcmare
+from pymarrmot.functions.objective_functions.of_rmse import of_rmse as of_rmse
+from pymarrmot.functions.objective_functions.of_root5_kge import of_root5_kge as of_root5_kge
diff --git a/src/pymarrmot/Functions/Objective_functions/of_inverse_nse.py b/src/pymarrmot/Functions/Objective_functions/of_inverse_nse.py
@@ -2,7 +2,7 @@
 from pymarrmot.functions.objective_functions.check_and_select import check_and_select
 from typing import Tuple
 
-def of_inverse_NSE(obs: np.array, sim: np.array, idx: np.array=None) -> Tuple[float, np.array]:
+def of_inverse_nse(obs: np.array, sim: np.array, idx: np.array=None) -> Tuple[float, np.array]:
     """
     Calculates the Nash-Sutcliffe Efficiency (Nash & Sutcliffe, 1970) of the log of simulated streamflow.
     Ignores time steps with negative flow values. Adds a constant e of 1/100 of mean(obs) to avoid issues with zero

diff --git a/src/pymarrmot/Functions/Objective_functions/of_kge.py b/src/pymarrmot/Functions/Objective_functions/of_kge.py
@@ -55,8 +55,8 @@ def of_kge(obs: np.array, sim: np.array, idx: np.array=None, w=None):
     if w is None:
         w = w_default
     else:
-        if not (w.size == 3 and np.ndim(w) == 1):
-            raise ValueError('Weights should be a 3x1 or 1x3 vector.')
+        if not (len(w) == 3 and np.ndim(w) == 1):
+            raise ValueError('Weights should be a 1x3 vector.')
 
     # Calculate components
     c = np.array([np.corrcoef(obs, sim)[0, 1], np.std(sim) / np.std(obs), np.mean(sim) / np.mean(obs)])

diff --git a/src/pymarrmot/Functions/Objective_functions/of_mean_hilo_root5_kge.py b/src/pymarrmot/Functions/Objective_functions/of_mean_hilo_root5_kge.py
@@ -1,10 +1,10 @@
 import numpy as np
 from pymarrmot.functions.objective_functions.check_and_select import check_and_select
 from pymarrmot.functions.objective_functions.of_kge import of_kge
-from pymarrmot.functions.objective_functions.of_mean_hilo_root5_kge import of_root5_kge
+from pymarrmot.functions.objective_functions.of_root5_kge import of_root5_kge
 from typing import Tuple
 
-def of_mean_hilo_root5_KGE(obs: np.array, sim: np.array, idx: np.array=None, w: np.array=None) -> Tuple[float, np.array, np.array, np.array]:
+def of_mean_hilo_root5_kge(obs: np.array, sim: np.array, idx: np.array=None, w: np.array=None) -> Tuple[float, np.array, np.array, np.array]:
     """
     Calculates the average Kling-Gupta Efficiency of the untransformed and fifth root of streamflow
     (Gupta et al., 2009) of the untransformed and fifth root (Chiew et al., 1993) of

diff --git a/src/pymarrmot/Functions/solver_functions/NewtonRaphson.py b/src/pymarrmot/Functions/solver_functions/NewtonRaphson.py
@@ -43,7 +43,6 @@ def NewtonRaphson(fun, x0, options=None):
     # Initialize
     x0 = np.array(x0).astype(float)
     x0 = np.array(x0).reshape(-1, 1)  # Ensure column vector
-    #x0 = np.array(x0).reshape(-1, 1)  # Ensure column vector
     defaultopt = {'TolX': 1e-12, 'TolFun': 1e-6, 'MaxIter': 1000}
     if options is None:
         options = defaultopt
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iteration, evaluation, sigma, max axis length, min axis length, all principal axes lengths (sorted square roots of eigenvalues of C)", seed=581622, Thu May 23 13:25:19 2024
		1 8 0.914647981032298 1.0000400008000108 1.0 1.0 1.00001000005 1.0000200002000015 1.0000300004500045 1.0000400008000108
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iteration, evaluation, min max(neg(.)) min(pos(.)) max correlation, correlation matrix principal axes lengths (sorted square roots of eigenvalues of correlation matrix)", seed=581622, Thu May 23 13:25:19 2024
		1 8 -0.10816836668420154 -0.027037173283884528 0.04466017330713045 0.08447965524701906 0.9154832908288328 0.9685323926232423 1.0016151054911655 1.0242153450485072 1.082398011856912
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iteration, evaluation, min max(neg(.)) min(pos(.)) max correlation, correlation matrix principal axes lengths (sorted square roots of eigenvalues of correlation matrix)", seed=581622, Thu May 23 13:25:19 2024
		1 8 -0.08454984751796392 -0.04660240315446251 0.0303716355834803 0.10588756675618491 0.9163263717187823 0.970760339100305 0.9957568698812483 1.0254102610826064 1.0839614370785302
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iteration, evaluation, sigma, beta, void, sigvec==sigma_vec.scaling factors from diagonal decoding", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
		1 8 0.914647981032298 1 0 599.6999999999999 3.0 0.3 0.3 0.3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iteration, evaluation, sigma, void, void, stds==sigmasigma_vec.scalingsqrt(diag(C))", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
		1 8 0.914647981032298 0 0 548.3099360911514 2.805167756156038 0.2614078771696393 0.2624990947176959 0.2713936714567164
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iteration, evaluation, void, void, void, xmean", seed=581622, Thu May 23 13:25:19 2024, <python>{}</python> # scaling_of_variables: 1, typical_x: 0
		1 8 0 0 0 654.5122932599646 3.661683907932788 0.2777202634101034 0.439071381950522 0.6169564844827764
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		% # columns="iter, evals, sigma, 0, fitness, xbest" seed=581622, Thu May 23 13:25:19 2024, <python>{}</python>
		1 8 0.914647981032298 0 -0.03268569325229487 266.7118468801151 6.042852169155995 0.13597395782092375 0.23869199917925976 0.7612633551792338