cleaned repo

cerea-daml · Jan 10, 2024 · 6658d3f · 6658d3f
1 parent a24d8fc
commit 6658d3f
Show file tree

Hide file tree

Showing 7 changed files with 557 additions and 279 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 .ipynb_checkpoints
+__pycache__/
 *.swp
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,13 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Farchi"
+  given-names: "Alban"
+  orcid: "https://orcid.org/0000-0002-4162-8289"
+- family-names: "Bocquet"
+  given-names: "Marc"
+title: "Introduction to surrogate modelling in the geosciences."
+version: 1.0.0
+doi: 
+date-released: 2024-01-10
+url: "https://github.com/cerea-daml/tdma-practical-session"
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Alban Farchi and Marc Bocquet
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,31 @@
+# Introduction to surrogate modelling in the geosciences
+
+#### Marc Bocquet¹ [[email protected]](mailto:[email protected]) and Alban Farchi¹ [[email protected]](mailto:[email protected])
+##### (1) CEREA, École des Ponts and EdF R&D, IPSL, Île-de-France, France
+
+During this session, we will apply standard machine learning methods to learn the dynamics of the Lorenz 1996 model. 
+The objective here is to get a preview of how machine learning can be applied to geoscientific models in a low-order models where testing is quick.
+
+These practical sessions are part of the 
+[TDMA summer school](https://tdma2023.sciencesconf.org) 
+held in 2023 in Grenoble, France.
+
+## Installation
+
+Install conda, for example through [miniconda](https://docs.conda.io/en/latest/miniconda.html) or through [mamba](https://mamba.readthedocs.io/en/latest/installation.html).
+
+Clone the repertory:
+
+    $ git clone [email protected]:cerea-daml/tdma-practical-session.git
+
+Go to the repertory. Once there, create a dedicated anaconda environment for the sessions:
+
+    $ conda env create -f environment.yaml
+
+Activate the newly created environment:
+
+    $ conda activate tdma
+
+Open the notebook (e.g. with Jupyter) and follow the instructions:
+
+    $ jupyter-notebook questions.ipynb
diff --git a/answers.py b/answers.py
@@ -0,0 +1,235 @@
+
+class Lorenz1996Model:
+    """Implementation of the Lorenz 1996 model.
+    
+    Use the `tendency()` method to compute the model tendencies (i.e., dx/dt)
+    and use the `forward()` method to apply an integration step forward in time,
+    using a fourth order Runge--Kutta scheme.
+    
+    Attributes
+    ----------
+    Nx : int
+        The number of variables in the model.
+    F : float
+        The model forcing.
+    dt : float
+        The model integration time step.
+    """
+
+    def __init__(self, Nx, F, dt):
+        """Initialise the model."""
+        self.Nx = Nx
+        self.F = F
+        self.dt = dt
+
+    def tendency(self, x):
+        """Compute the model tendencies dx/dt.
+        
+        The tendencies are computed by batch using
+        `numpy` vectorisation.
+        
+        Parameters
+        ----------
+        x : np.ndarray, shape (..., Nx)
+            Batch of input states.
+            
+        Returns
+        -------
+        dx_dt : np.ndarray, shape (..., Nx)
+            Model tendencies computed at the input states.
+        """
+        # TODO: implement it!
+        xp = np.roll(x, shift=-1, axis=-1)
+        xmm = np.roll(x, shift=+2, axis=-1)
+        xm = np.roll(x, shift=+1, axis=-1)
+        return (xp - xmm)*xm - x + self.F
+
+    def forward(self, x):
+        """Apply an integration step forward in time.
+        
+        This method uses a fourth-order Runge--Kutta scheme:
+        k1 <- dx/dt at x
+        k2 <- dx/dt at x + dt/2*k1
+        k3 <- dx/dt at x + dt/2*k2
+        k4 <- dx/dt at x + dt*k3
+        k <- (k1 + 2*k2 + 2*k3 + k4)/6
+        x <- x + dt*k
+        
+        Parameters
+        ----------
+        x : np.ndarray, shape (..., Nx)
+            Batch of input states.
+            
+        Returns
+        -------
+        integrated_x : np.ndarray, shape (..., Nx)
+            The integrated states after one step.
+        """
+        # TODO: implement it!
+        k1 = self.tendency(x)
+        k2 = self.tendency(x+self.dt/2*k1)
+        k3 = self.tendency(x+self.dt/2*k2)
+        k4 = self.tendency(x+self.dt*k3)
+        k = (k1 + 2*k2 + 2*k3 + k4)/6
+        return x + self.dt*k
+
+def perform_true_model_integration(Nt, Ne=1, seed=None):
+    """Perform an integration in time using the true model.
+    
+    The initial state is a batch of random fields.
+    
+    Parameters
+    ----------
+    Nt : int
+        The number of integration steps to perform.
+    Ne : int
+        The batch size.
+    seed : int
+        The random seed for the initialisation.
+        
+    Returns
+    -------
+    xr : np.ndarray, shape (Nt+1, Ne, Nx)
+        The integrated batch of trajectories.
+    """
+    # define rng
+    rng = np.random.default_rng(seed=seed)
+
+    # allocate memory
+    xt = np.zeros((Nt+1, Ne, true_model.Nx))
+
+    # initialisation
+    xt[0] = rng.normal(loc=3, scale=1, size=(Ne, true_model.Nx))
+
+    # TODO: implement the model integration for Nt steps
+    for t in trange(Nt, desc='model integration'):
+        xt[t+1] = true_model.forward(xt[t])
+
+    # return the trajectory
+    return xt
+
+def extract_input_output(xt):
+    # TODO: extract x (input)
+    x = xt[:-1]
+    # TODO: extract y (output)
+    y = xt[1:]
+    # return input/output
+    return (x, y)
+
+def make_dense_network(seed, num_layers, num_nodes, activation):
+    """Build a sequential neural network using dense layers.
+    
+    Parameters
+    ----------
+    seed : int
+        The random seed.
+    num_layers : int
+        The number of hidden layers.
+    num_nodes : int
+        The number of nodes per hidden layer.
+    activation : str
+        The activation function for the hidden layers.
+        
+    Returns
+    -------
+    network : tf.keras.Sequential
+    """
+    # set seed
+    tf.keras.utils.set_random_seed(seed=seed)
+    # TODO: create a sequential network
+    network = tf.keras.models.Sequential()
+    # TODO: add the input layer
+    network.add(tf.keras.Input(shape=(true_model.Nx,)))
+    # TODO: add the hidden layers
+    for i in range(num_layers):
+        network.add(tf.keras.layers.Dense(num_nodes, activation=activation))
+    # TODO: add the output layer
+    network.add(tf.keras.layers.Dense(true_model.Nx))
+    # compile the neural network
+    network.compile(loss='mse', optimizer='adam')
+    # print short summary
+    network.summary()
+    # return the network
+    return network
+
+def compute_trajectories(network):
+    """Compute the forecast skill trajectories.
+    
+    Parameters
+    ----------
+    network : tf.keras.Model
+        The model to evaluate.
+        
+    Returns
+    -------
+    xt : np.ndarray, shape (Nt, Ne, Nx)
+        The trajectories.
+    """
+    # allocate memory
+    (Nt, Ne, Nx) = xt_fs.shape
+    xt = np.zeros((Nt, Ne, Nx))
+
+    # initialisation
+    xt[0] = xt_fs[0]
+
+    # TODO: implement the neural network integration
+    for t in trange(Nt-1, desc='surrogate model integration'):
+        x_norm = normalise_x(xt[t])
+        y_norm = network.predict(x_norm, batch_size=Ne, verbose=0)
+        xt[t+1] = denormalise_y(y_norm)
+
+    return xt
+
+def make_convolutional_network(seed, num_layers, num_filters, kernel_size, activation):
+    """Build a sequential neural network with convolutional layers.
+    
+    Parameters
+    ----------
+    seed : int
+        The random seed.
+    num_layers : int
+        The number of hidden layers.
+    num_filters : int
+        The number of convolution filters per hidden layer.
+    kernel_size : int
+        The convolution kernel size for the hidden layer.
+    activation : str
+        The activation function for the hidden layers.
+        
+    Returns
+    -------
+    network : tf.keras.Sequential
+    """
+    # set seed
+    tf.keras.utils.set_random_seed(seed=seed)
+    # reshape layers
+    reshape_input = tf.keras.layers.Reshape((true_model.Nx, 1))
+    reshape_output = tf.keras.layers.Reshape((true_model.Nx,))
+    # padding layer
+    border = kernel_size//2
+    def apply_padding(x):
+        x_left = x[..., -border:, :]
+        x_right = x[..., :border, :]
+        return tf.concat([x_left, x, x_right], axis=-2)
+    padding_layer = tf.keras.layers.Lambda(apply_padding)   
+    # TODO: create a sequential network
+    network = tf.keras.models.Sequential()
+    # TODO: add the input layer
+    network.add(tf.keras.Input(shape=(true_model.Nx,)))
+    # TODO: add the reshape_input layer
+    network.add(reshape_input)
+    # TODO: add the hidden layers
+    for i in range(num_layers):
+        network.add(padding_layer)
+        network.add(tf.keras.layers.Conv1D(num_filters, kernel_size, activation=activation))
+    # TODO: add the output layer
+    network.add(tf.keras.layers.Conv1D(1, 1))
+    # TODO: add the reshape_output layer
+    network.add(reshape_output)
+    # compile the neural network
+    network.compile(loss='mse', optimizer='adam')
+    # print short summary
+    network.summary()
+    # return the network
+    return network
+