From ea60a1607c203eef98987823bb357d83604d3f97 Mon Sep 17 00:00:00 2001 From: Iglesia Dolci Date: Thu, 7 Sep 2023 15:29:04 +0100 Subject: [PATCH] notebooks' review --- checkpoint_schedules/basic_schedules.py | 2 +- checkpoint_schedules/schedule.py | 2 +- docs/notebooks/tutorial.ipynb | 560 ++++++++++++++++++------ 3 files changed, 421 insertions(+), 143 deletions(-) diff --git a/checkpoint_schedules/basic_schedules.py b/checkpoint_schedules/basic_schedules.py index 6a66230..15c337e 100644 --- a/checkpoint_schedules/basic_schedules.py +++ b/checkpoint_schedules/basic_schedules.py @@ -125,7 +125,7 @@ def _iterator(self): while True: while self._r < self._max_n: n1 = self._max_n - self._r - n0 = ((n1 - 1) // sys.maxsize) * sys.maxsize + n0 = n1 - 1 self._n = n0 if self._move_data: diff --git a/checkpoint_schedules/schedule.py b/checkpoint_schedules/schedule.py index 6ba3ae5..3c838bb 100644 --- a/checkpoint_schedules/schedule.py +++ b/checkpoint_schedules/schedule.py @@ -17,7 +17,7 @@ "EndForward", "EndReverse", "CheckpointSchedule", - "StorageType" + "StorageType", ] diff --git a/docs/notebooks/tutorial.ipynb b/docs/notebooks/tutorial.ipynb index 7a0a25c..13bf028 100644 --- a/docs/notebooks/tutorial.ipynb +++ b/docs/notebooks/tutorial.ipynb @@ -6,21 +6,22 @@ "source": [ "# Using *checkpoint_schedules*\n", "\n", - "This tutorial aims to introduce the usage of *checkpoint_schedules* for step based incremental checkpointing of the adjoints to computer models. This tutorial aims to reach illustrative purposes only. However, the code is fully functional and can be used as a starting point for more complex applications.\n", + "This notebook aims to present an illustrative example explaining the usage of *checkpoint_schedules* for step-based incremental checkpointing of the adjoints to computer models. While it is an illustrative example, this code can be also employed for real applications.\n", "\n", "## Managing the forward and adjoint executions with schedules\n", - "We initially write the `CheckpointingManager` class, which is intended to manage the execution of forward and adjoint models using a checkpointing schedule. `CheckpointingManager` constructor takes the argument `max_n`, which represents the maximum number of steps for the models execution. The attributes `index_action` and `list_actions` are used only for illustration matter.\n", + "Fistly, we initially write the `CheckpointingManager` class intending to manage the execution of forward and adjoint models using a checkpointing schedule. The `CheckpointingManager` constructor takes the maximum steps numbers for the models' execution, `max_n`. The `index_action` is a counter of the actions executed, and the `list_actions` is a list of the actions executed. The attributes `index_action` and `list_actions` are here used only for illustration matter.\n", "\n", - "`CheckpointingManager` has the method `execute` to manage the step executions of the forward and adjoint models. `execute` takes the `cp_schedule` parameter that expects to be a generator given by *checkpoint_schedules* package. Inside of `execute`, the code iterates over elements in the `cp_schedule` by using a `for` loop. The iteration reached with `enumerate(cp_schedule)` returns a tuple `(count, cp_action)` where `count` is the list index, and `cp_action` is a *checkpoint_schedules* action. The latter is argument of a single-dispatch generic function `action` designed to handle different types of actions using specific functions. A specific function is for instance the `action_forward` that is registered to handle the `Forward` action. Hence, if `cp_action` is the *Forward* action, the `action_forward` function is called and inside of this specific function we can either implement or call any code required to execute the forward model. Analogously with the other *checkpoint_schedules* actions *Reverse*, *Copy*, *Move*, *EndForward* and *EndReverse*.\n", + "In `CheckpointingManager`, we have the method `execute` able to make the step executions of the forward and adjoint models. `execute` takes the `cp_schedule` argument, which expects to be a generator provided by the *checkpoint_schedules* package. In the `execute` method, we iterate over the elements of the `cp_schedule` using `enumerate(cp_schedule)`, which yields a tuple `(count, cp_action)`. Here, `count` represents the index of the action within the list, and `cp_action` is a checkpoint action provided by *checkpoint_schedules*.\n", + "\n", + "`cp_action` is the argument to a single-dispatch generic function named `action`. The purpose of this function is to process different types of checkpoint actions using specific function. The overloading of the `action` function is given by its `register()` attribute employed as a decorator in the specific functions, e.g., we have the `@action.register(Forward)` decorator for the `action_forward` function. Thus, `action` is overloaded by `action_forward` if `cp_action` is the `Forward` action. Inside the `action_forward`, we can implement the necessary code to the step execution of the forward model. \n", "\n", "**Notes:**\n", - "* The *checkpoint_schedules* action will treated with more details in the following section of this tutorial.\n", - "* The codes insider of the specific functions are intended to be illustrative only. Hence, we only added symbolic print statements to illustrate the execution of the forward and adjoint models.\n" + "* The `action.register` decorator takes *checkpoint_schedules* actions as the arguments. These actions will be presented with more detail in the following sections of this tutorial.\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -158,12 +159,12 @@ "source": [ "## A trivial schedule for forward computation\n", "\n", - "Firstly, let us define the maximum solvers time steps `max_n = 4`. Next, we instantiate an object named `solver_manager` of the `CheckpointingManager` class, using the `max_n` value.\n" + "We start with a trivial checkpoint schedule used to execute only the forward solver, excluding any data storage. Hence, let us define the maximum solver time steps as `max_n = 4` and the object `solver_manager` used to manage the solver (in this case, a forward solver)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -175,14 +176,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `NoneCheckpointSchedule` class provides a schedule object providing execute` method. In this case, the schedule is built to execute the forward solver exclusively, excluding any data storage.\n" + "In this current case, `NoneCheckpointSchedule` class provides the checkpoint schedule to the time execution of the forward model. " ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- -------------------------------------------------------\n", + " 0 ---▷---▷---▷---▷ Forward(0, sys.maxsize, False, False, StorageType.NONE)\n", + " 1 End Forward EndForward()\n" + ] + } + ], "source": [ "cp_schedule = NoneCheckpointSchedule() # Checkpoint schedule object\n", "solver_manager.execute(cp_schedule) # Execute the forward solver by following the schedule." @@ -194,41 +206,31 @@ "source": [ "When executing `solver_manager.execute(cp_schedule)`, the output provides a visual representation of the three distinct informations: \n", "\n", - "1. An index linked to each action,\n", + "- An index linked to each action,\n", "\n", - "2. A visualisation demonstrating the advancing of time-steps,\n", + "- A visualisation showing the steps advancing,\n", "\n", - "3. Actions associated with each step.\n", + "- The actions associated with each step.\n", "\n", - "Notice in the output that we have two actions: *Forward* and *EndForward()*. The fundamental structure of the *Forward* action is given by:\n", + "Notice in the output that we have two actions: *Forward* and *EndForward()*. The latter indicates the forward solver has reached the end of the step interval. Whereas the *Forward* action is fundamentally given by:\n", "```python\n", "Forward(n0, n1, write_ics, write_adj_deps, storage_type)\n", "```\n", "This action is read as:\n", "\n", - " - Advance the forward solver from step `n0` to the start of any step `n1`.\n", - "\n", - " - `write_ics` and `write_adj_deps` are booleans that indicate whether the forward solver should store the forward restart data and the forward data required for the adjoint computation, respectively.\n", - "\n", - " - `storage_type` indicates the type of storage required for the forward restart data and the forward data required for the adjoint computation.\n", - "\n", - "Therefore, for the current example, the `Forward` action indicates the following:\n", - "\n", - " - Advance the forward solver from step `n0 = 0` to the start of any step `n1`.\n", + "- Advance the forward from the start of step `n0` to the start of a step `n1`. \n", + " In this case, `n1 = sys. max size is used because it is not a prerequisite to specify `n1` for the `NoneCheckpointSchedule` schedule, which leads to the flexibility to determine the desired steps during the forward execution.\n", "\n", - " - Both `write_ics` and `write_adj_deps` are set to `'False'`, indicating no storage of the forward restart data and the forward data required for the adjoint computation. \n", + "- `write_ics` and `write_adj_deps` are booleans that indicate whether the forward solver should store the forward restarting data and the forward data required for the adjoint computation, respectively.\n", "\n", - " - The storage type is `StorageType.NONE`, indicating that no specific storage type is required. \n", + "- `storage_type` indicates the type of storage type, which can be `StorageType.NONE`, `StorageType.RAM`, `StorageType.DISK` or `StorageType.WORK`.\n", "\n", - "*This schedule is built without specifying a maximum step for the forward solver execution. Therefore, using the `NoneCheckpointSchedule` schedule offers the flexibility to determine the desired steps while the forward solver is time advancing.*\n", "\n", - "In the current example, we determine the maximum step `max_n = 4`, an attribute within the `CheckpointingManager`. Next, we conclude the forward solver execution with the following python script:\n", + "As mentioned above, `NoneCheckpointSchedule` schedule is flexible to specify the desired steps during the forward execution. In this case, we can specify the steps by using the `finalize` method as shown below:\n", "```python\n", " cp_schedule.finalize(n1)\n", "```\n", - "where `n1 = max_n = 4`. This line is incorporated in the `action_forward` that is `singledispatch` registered function from `CheckpointingManager.execute`.\n", - "\n", - "Another action provided by the current schedule is the `EndForward()`, which indicates the forward solver has reached the end of the time interval.\n" + "where `n1 = max_n = 4`. This code line is incorporated in the `action_forward`.\n" ] }, { @@ -237,20 +239,29 @@ "source": [ "## Trivial Schedule for all storage data\n", "\n", - "We now begin to present the schedules when there is the adjoint solver computation. \n", - "\n", - "The following code is valuable for the cases where the user intend to store the forward data for all time-steps. This schedule is achieved by using the `SingleMemoryStorageSchedule` class.\n", - "\n", - "Storing the forward restart data is unnecessary by this schedule, as there is no need to recompute the forward solver while time advancing the adjoint solver.\n", + "The following code is practical for the cases where the user intend to store the forward data for all steps. This schedule is given by `stepSingleMemoryStorageSchedule`.\n", "\n", - "*The `SingleMemoryStorageSchedule` schedule offers the flexibility to determine the desired steps while the forward solver is time advancing.*" + "*The `SingleMemoryStorageSchedule` schedule does not require the `n1` step. Analagous to `NoneCheckpointSchedule`, `SingleMemoryStorageSchedule` can create its schedule without prerequisite of specifying the `n1`.*" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ------------------------------------------------------\n", + " 0 ---▶---▶---▶---▶ Forward(0, sys.maxsize, False, True, StorageType.WORK)\n", + " 1 End Forward EndForward()\n", + " 2 ◀---◀---◀---◀--- Reverse(4, 0, True)\n", + " 3 End Reverse EndReverse()\n" + ] + } + ], "source": [ "cp_schedule = SingleMemoryStorageSchedule()\n", "solver_manager.execute(cp_schedule)\n" @@ -262,45 +273,65 @@ "source": [ "In this particular case, the *Forward* action is given by:\n", "\n", - " - Advance the forward solver from the step `n0 = 0` to the start of any step `n1`.\n", + "- Advance the forward solver from the step `n0 = 0` to the start of any step `n1`.\n", + "\n", + "- Do not store the forward restart data once `write_ics` is `'False'`.\n", "\n", - " - Do not store the forward restart data once if `write_ics` is `'False'`.\n", + "- Store the forward data required for the adjoint computation once `write_adj_deps` is `'True'`.\n", "\n", - " - Store the forward data required for the adjoint computation once `write_adj_deps` is `'True'`.\n", - " \n", - " - Storage type is ``, which indicates the storage that has imediate usage. I this case the usage is the adjoint computation.\n", + "- Storage type is ``, which is the working memory location for the adjoint.\n", "\n", - "When the adjoint computation is considered in the schedule, we have the *Reverse* action that is fundamentally given by:\n", + "For the adjoint computation, we have the *Reverse* action that has the base form:\n", "```python\n", "Reverse(n0, n1, clear_adj_deps)\n", "```\n", "This is interpreted as follows:\n", "\n", - " - Advance the adjoint solver from the step `n0` to the start of the step `n1`.\n", + "- Advance the adjoint model from the step `n0` to the start of the step `n1`.\n", "\n", - " - Clear the adjoint dependency data if `clear_adj_deps` is `'True'`.\n", + "- Clear the adjoint dependency data if `clear_adj_deps` is `'True'`.\n", "\n", - "In the current example, the *Reverse* action reads:\n", + "Thus, in the current example, the *Reverse* action reads:\n", "\n", - " - Advance the forward solver from the step `4` to the start of the step `0`.\n", + "- Advance the adjoint from the start of step 4 to the start of the step 0 (i.e. over step 0).\n", "\n", - " - Clear the adjoint dependency (forward data) once `clear_adj_deps` is `'True'`.\n", + "- Clear the forward data used by the adjoint (`clear_adj_deps` is `'True'`).\n", "\n", - "When adjoint computations are taken into account in the schedules, an additional action referred to a `EndReverse(True)` is required to indicate the end of the adjoint advancing." + "Lastly, the `EndReverse()` is an action used to inform the finalisation of the adjoint model executions." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The *checkpoint_schedules* additionally allows users to execute forward and adjoint solvers while storing all adjoint dependencies on `'disk'`. The following code shows this schedule applied in the forward and adjoint executions with the object generated by the `SingleDiskStorageSchedule` class." + "*checkpoint_schedules* allows the forward data storage on `'disk'`. The storage of all forward data used for adjoint computation on `'disk'` is reached with `SingleDiskStorageSchedule`." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ------------------------------------------------------\n", + " 0 ---▶---▶---▶---▶ Forward(0, sys.maxsize, False, True, StorageType.DISK)\n", + " 1 End Forward EndForward()\n", + " 2 Copy(3, StorageType.DISK, StorageType.WORK)\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Copy(2, StorageType.DISK, StorageType.WORK)\n", + " 5 . . ◀--- Reverse(3, 2, True)\n", + " 6 Copy(1, StorageType.DISK, StorageType.WORK)\n", + " 7 . ◀--- Reverse(2, 1, True)\n", + " 8 Copy(0, StorageType.DISK, StorageType.WORK)\n", + " 9 ◀--- Reverse(1, 0, True)\n", + " 10 End Reverse EndReverse()\n" + ] + } + ], "source": [ "cp_schedule = SingleDiskStorageSchedule()\n", "solver_manager.execute(cp_schedule)\n" @@ -310,36 +341,56 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the case illustrated above, forward and adjoint executions with `SingleDiskStorageSchedule` have the *Copy* action (see the outputs associated with the indexes 2, 4, 6, 8) which indicates copying of the forward data from one storage type to another. \n", + "In this case, forward and adjoint executions with `SingleDiskStorageSchedule` have the *Copy* action (see the outputs associated with the indexes 2, 4, 6, 8) which indicates copying of the forward data from one storage type to another. \n", "\n", - "The *Copy* action has the fundamental structure:\n", + "The *Copy* action has the general form:\n", "```python\n", "Copy(n, from_storage, to_storage)\n", "```\n", "which reads:\n", "\n", - " - Copy the data associated with step `n`.\n", + "- Copy the data associated with step `n`.\n", "\n", - " - The term `from_storage` denotes the storage type responsible for retaining forward data at step n, while `to_storage` refers to the designated storage type for storing this forward data.\n", + "- The term `from_storage` denotes the storage type responsible for retaining forward data at step `n`, while `to_storage` refers to the designated storage type for storing this forward data.\n", "\n", "Hence, on considering the *Copy* action associated with the output `Action index 4`, we have:\n", - " - Copy the data associated with step `4`.\n", "\n", - " - The forward data is copied from `'disk'` storage, and the specified storage type for coping (`StorageType.WORK`) refers to the storage type that indicates a prompt usage for the adjoint computation." + "- Copy the data associated with step `n = 2`, which is stored in `StorageType.DISK`, to working storage for use \n", + "by the adjoint model." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now, let us consider the case where the objective is to move the data from one storage type to another insteady of copying it. To achieve this, the optional `move_data` parameter within the `SingleDiskStorageSchedule` need to be set as `True`. This configuration is illustrated in the following code example:" + "Instead of copying the data, we can move the data from one storage type to another. To do so, *checkpoint_schedules* has a *Move* action used to indicate that the data, once moved, is no longer accessible in the original storage type. In `SingleDiskStorageSchedule`, we can move the forward data by setting the optional `move_data` parameter as `True`." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ------------------------------------------------------\n", + " 0 ---▶---▶---▶---▶ Forward(0, sys.maxsize, False, True, StorageType.DISK)\n", + " 1 End Forward EndForward()\n", + " 2 Move(3, StorageType.DISK, StorageType.WORK)\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Move(2, StorageType.DISK, StorageType.WORK)\n", + " 5 . . ◀--- Reverse(3, 2, True)\n", + " 6 Move(1, StorageType.DISK, StorageType.WORK)\n", + " 7 . ◀--- Reverse(2, 1, True)\n", + " 8 Move(0, StorageType.DISK, StorageType.WORK)\n", + " 9 ◀--- Reverse(1, 0, True)\n", + " 10 End Reverse EndReverse()\n" + ] + } + ], "source": [ "cp_schedule = SingleDiskStorageSchedule(move_data=True)\n", "solver_manager.execute(cp_schedule)" @@ -349,47 +400,67 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The *Move* action follows a basic structure:\n", + "The *Move* action follows a basic form:\n", "```python\n", "Move(n, from_storage, to_storage)\n", "```\n", "\n", - "This can be understood as:\n", - "\n", - " - Move the data associated with step `n`.\n", + "which can be read as:\n", "\n", - " - The terms `from_storage` and `to_storage` hold the same significance as in the *Copy* action.\n", + "- Move the data associated with step `n`.\n", "\n", - "Now, on considering one of the *Move* action associated with the output `Action index: 4`:\n", + "- The terms `from_storage` and `to_storage` are the storage types from and to which the data should be moved, respectively.\n", "\n", - " - Move the data associated with the step `4`.\n", - " \n", - " - The forward data is moved from `'disk'` storage to a storage used for the adjoint computation.\n", + "Thus, the *Move* action associated with the output `Action index: 4` reads:\n", "\n", - "**The *Move* action entails that the data, once moved, becomes no longer accessible in the original storage type. Whereas the *Copy* action means that the copied data remains available in the original storage type.**" + "- Move the data associated with step `n = 2`, which is stored in `StorageType.DISK`, to working storage for use \n", + "by the adjoint model." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Schedules given by checkointing methods\n", + "## Schedules given by checkointing algorithms\n", + "Here, we start to present the schedules obtained by the checkpointing algorithms\n", "### Revolve\n", - "Now, let us consider the schedules given by the checkpointing strategies. We begin by employing the Revolve approach, according to introduced in reference [1].\n", - "\n", - "The Revolve checkpointing strategy generates a schedule that only uses `'RAM'` storage type. \n", + "The Revolve strategy, as introduced in reference [1], generates a schedule that only uses `'RAM'` storage type. \n", "\n", - "The `Revolve` class gives a schedule according to two essential parameters: the total count of forward time steps (`max_n = 4`) and the number of checkpoints to store in `'RAM'` (`snaps_in_ram = 2`).\n", - "\n", - "The code below shows the execution of the forward and adjoint solvers with the the `Revolve` schedule." + "The `Revolve` class gives a schedule according to two parameters: the total forward steps (`max_n = 4`) and the number of checkpoints to store in `'RAM'` (`snaps_in_ram = 2`)." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 *---▷---▷ Forward(0, 2, True, False, StorageType.RAM)\n", + " 1 . . *---▷ Forward(2, 3, True, False, StorageType.RAM)\n", + " 2 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 3 End Forward EndForward()\n", + " 4 . . . ◀--- Reverse(4, 3, True)\n", + " 5 Move(2, StorageType.RAM, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 9 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 13 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 14 ◀--- Reverse(1, 0, True)\n", + " 15 End Reverse EndReverse()\n" + ] + } + ], + "source": [ + "from checkpoint_schedules import StorageType\n", "snaps_in_ram = 2 \n", "solver_manager = CheckpointingManager(max_n) # manager object\n", "cp_schedule = Revolve(max_n, snaps_in_ram) \n", @@ -400,7 +471,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The employment of the checkpointing strategies within an adjoint-based gradient requires the forward solver recomputation. As demonstrated in the output above, we have the *Forward* action associated with the `Action index: 0` that is read as follows:\n", + "The employment of the checkpointing strategies in the an adjoint-based gradient requires the forward solver recomputation. As demonstrated in the output above, we have the *Forward* action associated with the `Action index: 0` that reads as follows:\n", "\n", " - Advance from time step 0 to the start of the time step 2.\n", "\n", @@ -413,14 +484,12 @@ "```python\n", "Forward(0, 2, True, False, )\n", "```\n", - "The symbol `'*'` indicates that the forward data necessary for restarting the forward computation from step 0 is stored in `'RAM'`. In the time illustrations, we have `'−−−▷'` that indicates the forward data used for the adjoint computation is **not** stored. On the other hand, the illustration `'−−−▶'` indicates that the forward data is stored.\n", + "The symbolic illustration of the step advancing reads:\n", + "- `'*'`: Forward data for restarting the forward solver is stored in `'RAM'`.\n", "\n", - "To summarize:\n", - " - `'*'`: Forward data for restarting the forward solver is stored in `'RAM'`.\n", + "- `'−−−▷'`: Forward data used for adjoint computation is not stored.\n", "\n", - " - `'−−−▷'`: Forward data used for adjoint computation is not stored.\n", - " \n", - " - `'−−−▶'`: Forward data used for adjoint computation is stored.*" + "- `'−−−▶'`: Forward data used for adjoint computation is stored." ] }, { @@ -429,20 +498,41 @@ "source": [ "### Multistage checkpoiting \n", "\n", - "The schedule as depicted below, employes a *MultiStage* distribution of checkpoints between `'RAM'` and `'disk'` as described in [2]. This checkpointing allows exclusively the memory storage (`'RAM'`), or exclusively the `'disk'` storage, or in both storage locations. \n", - "\n", - "The following code use two types of storage, `'RAM'` and `'disk'`. \n", - "\n", - "*MultiStage* checkpointing schedule is given by `MultistageCheckpointSchedule`, which requires the parameters: number of checkpoints stored in `'RAM'` and `'disk'`. \n", + "The schedule as depicted below, employes a *MultiStage* distribution of checkpoints between `'RAM'` and `'disk'` as described in [2]. This checkpointing allows only memory storage (`'RAM'`), or only `'disk'` storage, or in both `'RAM'` and `'disk'`.\n", "\n", - "See the forward and adjoint executions with `MultistageCheckpointSchedule` in the following example:" + "The following code use two types of storage, `'RAM'` and `'disk'`." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 *---▷---▷ Forward(0, 2, True, False, StorageType.RAM)\n", + " 1 . . +---▷ Forward(2, 3, True, False, StorageType.DISK)\n", + " 2 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 3 End Forward EndForward()\n", + " 4 . . . ◀--- Reverse(4, 3, True)\n", + " 5 Move(2, StorageType.DISK, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 9 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 13 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 14 ◀--- Reverse(1, 0, True)\n", + " 15 End Reverse EndReverse()\n" + ] + } + ], "source": [ "snaps_in_ram = 1 # number of checkpoints stored in RAM\n", "snaps_on_disk = 1 # number of checkpoints stored in disk\n", @@ -454,7 +544,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The symbol `'*'` indicates that the forward data necessary for restarting the forward computation from step 0 is stored in `'RAM'`." + "The symbol `'+'` indicates that the forward data necessary for restarting the forward computation from step 0 is stored in `'disk'`." ] }, { @@ -462,18 +552,43 @@ "metadata": {}, "source": [ "### Disk-Revolve\n", - "The following code shows the the execution of a solver over time using the *Disk-Revolve* schedule, as described in reference [3]. This schedule considers two type of storage: memory (`'RAM'`) and `'disk'`. \n", + "The following code shows the execution of a forward step advancing using the *Disk-Revolve* schedule [3]. This schedule considers two type of storage: memory (`'RAM'`) and `'disk'`. \n", "\n", "The *Disk-Revolve* algorithm, available within the *checkpoint_schedules*, requires the definition of checkpoints stored in memory to be greater than 0 (`'snap_in_ram > 0'`). Specifying the checkpoints stored on `'disk'` is not required, as the algorithm itself calculates this value.\n", "\n", - "The number of checkpoints stored in `'disk'` is determined according the costs associated with advancing the backward and forward solvers in a single time-step, and the costs of writing and reading the checkpoints saved on disk. Additional details of the definition of these parameters can be found in the references [3], [4] and [5]." + "The number of checkpoints stored in `'disk'` is determined according to the costs associated with advancing the backward and forward solvers in a single step and the costs of writing and reading the checkpoints saved on disk. Additional details of the *Disk-Revolve* algorithmic are avaible in the references [3], [4] and [5]." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 *---▷---▷---▷ Forward(0, 3, True, False, StorageType.RAM)\n", + " 1 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 2 End Forward EndForward()\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 5 ---▷---▷ Forward(0, 2, False, False, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 9 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 13 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 14 ◀--- Reverse(1, 0, True)\n", + " 15 End Reverse EndReverse()\n" + ] + } + ], "source": [ "snaps_in_ram = 1 # number of checkpoints stored in RAM\n", "cp_schedule = DiskRevolve(max_n, snapshots_in_ram=snaps_in_ram) # checkpointing schedule object\n", @@ -486,16 +601,42 @@ "source": [ "### Periodic Disk Revolve\n", "\n", - "The schedule used in the following code was presented in reference [4]. It is a two type hierarchical schedule and it is referred here to as *Periodic Disk Revolve*. Analogously to the *Disk Revolve* schedule, this approach requires the specification of the maximum number of steps (`max_n`) and the number of checkpoints saved in memory (`snaps_in_ram`). The *Periodic Disk Revolve* computes automatically the number of checkpoint stored in disk.\n", + "*Periodic Disk Revolve* is a two type hierarchical schedule [4]. This strategy requires the specification of the maximum number of steps (`max_n`) and the number of checkpoints stored in memory (`snaps_in_ram`) and computes automatically the number of checkpoint stored in disk.\n", "\n", - "*It is essential for the number of checkpoints in `'RAM'` to be greater than zero (`'snap_in_ram > 0'`)*" + "*Periodic Disk Revolve* schedule is generated with `PeriodicDiskRevolve` class. This schedule is contrained to `'snap_in_ram > 0'`." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We use periods of size 3\n", + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 *---▷---▷---▷ Forward(0, 3, True, False, StorageType.RAM)\n", + " 1 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 2 End Forward EndForward()\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 5 ---▷---▷ Forward(0, 2, False, False, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 9 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 13 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 14 ◀--- Reverse(1, 0, True)\n", + " 15 End Reverse EndReverse()\n" + ] + } + ], "source": [ "snaps_in_ram = 1\n", "cp_schedule = PeriodicDiskRevolve(max_n, snaps_in_ram)\n", @@ -509,14 +650,39 @@ "### H-Revolve \n", "The following code illustrates the forward and adjoint computations using the checkpointing given by H-Revolve strategy [5]. This checkpointing schedule is generated with `HRevolve` class, which requires the following parameters: maximum steps stored in RAM (`snap_in_ram`), maximum steps stored on disk (`snap_on_disk`), and the number of time steps (`max_n`). \n", "\n", - "*It is essential for the number of checkpoints in `'RAM'` to be greater than zero (`'snap_in_ram > 0'`)*" + "*`HRevolve` is constrained for the number of checkpoints in `'RAM'` to be greater than zero (`'snap_in_ram > 0'`)*" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 *---▷---▷---▷ Forward(0, 3, True, False, StorageType.RAM)\n", + " 1 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 2 End Forward EndForward()\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 5 ---▷---▷ Forward(0, 2, False, False, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 9 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 13 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 14 ◀--- Reverse(1, 0, True)\n", + " 15 End Reverse EndReverse()\n" + ] + } + ], "source": [ "snaps_on_disk = 1\n", "snaps_in_ram = 1\n", @@ -530,16 +696,40 @@ "source": [ "### Mixed checkpointing\n", "\n", - "The *Mixed* checkpointing strategy works under the assumption that the data required to restart the forward computation is of the same size as the data required to advance the adjoint computation in one step. Further details into the *Mixed* checkpointing schedule was discussed in reference [6].\n", + "The *Mixed* checkpointing strategy works under the assumption that the data required to restart the forward computation is of the same size as the data required to advance the adjoint model in one step. Further details into the *Mixed* checkpointing schedule was discussed in reference [6].\n", "\n", "This specific schedule provides the flexibility to store the forward restart data either in `'RAM'` or on `'disk'`, but not both simultaneously within the same schedule." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 +---▷---▷---▷ Forward(0, 3, True, False, StorageType.DISK)\n", + " 1 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 2 End Forward EndForward()\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Copy(0, StorageType.DISK, StorageType.WORK)\n", + " 5 ---▷---▷ Forward(0, 2, False, False, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Move(0, StorageType.DISK, StorageType.WORK)\n", + " 9 ---▶ Forward(0, 1, False, True, StorageType.DISK)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.DISK, StorageType.WORK)\n", + " 13 ◀--- Reverse(1, 0, True)\n", + " 14 End Reverse EndReverse()\n" + ] + } + ], "source": [ "snaps_on_disk = 1\n", "max_n = 4\n", @@ -556,9 +746,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 *---▷---▷---▷ Forward(0, 3, True, False, StorageType.RAM)\n", + " 1 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 2 End Forward EndForward()\n", + " 3 . . . ◀--- Reverse(4, 3, True)\n", + " 4 Copy(0, StorageType.RAM, StorageType.WORK)\n", + " 5 ---▷---▷ Forward(0, 2, False, False, StorageType.WORK)\n", + " 6 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 7 . . ◀--- Reverse(3, 2, True)\n", + " 8 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 9 ---▶ Forward(0, 1, False, True, StorageType.RAM)\n", + " 10 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 11 . ◀--- Reverse(2, 1, True)\n", + " 12 Move(0, StorageType.RAM, StorageType.WORK)\n", + " 13 ◀--- Reverse(1, 0, True)\n", + " 14 End Reverse EndReverse()\n" + ] + } + ], "source": [ "snaps_in_ram = 1\n", "cp_schedule = MixedCheckpointSchedule(max_n, snaps_on_disk, storage=StorageType.RAM)\n", @@ -573,19 +787,46 @@ "\n", "Two-level binomial schedule was presented in reference [6], and its application was performed in the work [7]. \n", "\n", - "The two-level binomial checkpointing stores the forward restart data based on the user-defined `period`. In this schedule, the user also define the limite for additional storage of the forward restart data to use during the advancing of the adjoint between periodic storage checkpoints. The default sotrage type is `'disk'`.\n", + "The two-level binomial checkpointing stores the forward restart data based on the user-defined `period`. In this schedule, we can define the limite for additional storage of the forward restart data during the step advancing of the adjoint model. The default storage type is `'disk'`.\n", "\n", - "Now, let us define the period of storage `period = 2` and the extra forward restart data storage `add_snaps = 1`. The code displayed below shows the execution in time illustration for this setup." + "The two-level binomial schedule is provided by `TwoLevelCheckpointSchedule`. To obtain this schedule we need the period `period = 2` and the extra forward restart data storage `add_snaps = 1`. " ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "add_snaps = 1 # of additional storage of the forward restart data\n", - "period = 2\n", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 +---▷---▷---▷ Forward(0, 3, True, False, StorageType.DISK)\n", + " 1 . . . +---▷ Forward(3, 6, True, False, StorageType.DISK)\n", + " 2 End Forward EndForward()\n", + " 3 Copy(3, StorageType.DISK, StorageType.WORK)\n", + " 4 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 5 . . . ◀--- Reverse(4, 3, True)\n", + " 6 Copy(0, StorageType.DISK, StorageType.WORK)\n", + " 7 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 8 . +---▷ Forward(1, 2, True, False, StorageType.DISK)\n", + " 9 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 10 . . ◀--- Reverse(3, 2, True)\n", + " 11 Move(1, StorageType.DISK, StorageType.WORK)\n", + " 12 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 13 . ◀--- Reverse(2, 1, True)\n", + " 14 Copy(0, StorageType.DISK, StorageType.WORK)\n", + " 15 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 16 ◀--- Reverse(1, 0, True)\n", + " 17 End Reverse EndReverse()\n" + ] + } + ], + "source": [ + "add_snaps = 1 # additional storage of the forward restart data\n", + "period = 3\n", "revolver = TwoLevelCheckpointSchedule(period, add_snaps)\n", "solver_manager.execute(revolver)" ] @@ -594,20 +835,57 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now, let us modify the storage type to `'RAM'` of the additional forward restart checkpointing by setting the optional `TwoLevelCheckpointSchedule` argument `binomial_storage = StorageType.RAM`. Thus, on the example above, ones notices that the action associated with `Action index: 8` implies the forward restart data storage should be on `'disk'`. On the other hand, the example below displays that the action associated to `Action index: 8` indicates that the forward restart data storage should be in `'RAM'`.\n" + "The output above shows the forward and adjoint executions using the two-level binomial checkpointing. Notice that the action associated with `Action index: 8` shows that the additional forward restart data storage is stored on `'disk'`.\n", + "\n", + "We can also store the additional forward restart checkpointing in `'RAM'` by setting the optional argument `binomial_storage = StorageType.RAM`. The output below displays the action associated with `Action index: 8` showing that the forward restart data storage is in `'RAM'`.\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "revolver = TwoLevelCheckpointSchedule(3, binomial_snapshots=snaps_on_disk, \n", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Action index: Run-time illustration Action:\n", + "--------------- ----------------------- ---------------------------------------------\n", + " 0 +---▷---▷---▷ Forward(0, 3, True, False, StorageType.DISK)\n", + " 1 . . . +---▷ Forward(3, 6, True, False, StorageType.DISK)\n", + " 2 End Forward EndForward()\n", + " 3 Copy(3, StorageType.DISK, StorageType.WORK)\n", + " 4 . . . ---▶ Forward(3, 4, False, True, StorageType.WORK)\n", + " 5 . . . ◀--- Reverse(4, 3, True)\n", + " 6 Copy(0, StorageType.DISK, StorageType.WORK)\n", + " 7 ---▷ Forward(0, 1, False, False, StorageType.WORK)\n", + " 8 . *---▷ Forward(1, 2, True, False, StorageType.RAM)\n", + " 9 . . ---▶ Forward(2, 3, False, True, StorageType.WORK)\n", + " 10 . . ◀--- Reverse(3, 2, True)\n", + " 11 Move(1, StorageType.RAM, StorageType.WORK)\n", + " 12 . ---▶ Forward(1, 2, False, True, StorageType.WORK)\n", + " 13 . ◀--- Reverse(2, 1, True)\n", + " 14 Copy(0, StorageType.DISK, StorageType.WORK)\n", + " 15 ---▶ Forward(0, 1, False, True, StorageType.WORK)\n", + " 16 ◀--- Reverse(1, 0, True)\n", + " 17 End Reverse EndReverse()\n" + ] + } + ], + "source": [ + "revolver = TwoLevelCheckpointSchedule(period, binomial_snapshots=snaps_on_disk, \n", " binomial_storage=StorageType.RAM)\n", "solver_manager.execute(revolver)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Final remarks\n", + "This notebook focused on a visual illustration of the employment of the schedules available in the *checkpointing_schedule* package. The specific function (e.g. `action_forward`) proposed to illustrate the step execution of the forward and adjoint models. However, the user can implement the necessary code to the step execution of the forward and adjoint models and the copy and move codes instead of the illustrative code." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -650,7 +928,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.4" } }, "nbformat": 4,