dolfin-adjoint · dham · Jun 20, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 24, 2024
diff --git a/pyadjoint/adjfloat.py b/pyadjoint/adjfloat.py
@@ -91,7 +91,12 @@ def __rsub__(self, other):
     def __pow__(self, power):
         return PowBlock(self, power)
 
-    def _ad_convert_type(self, value, options={}):
+    def _ad_init_zero(self, dual=False):
+        return type(self)(0.)
+
+    def _ad_convert_riesz(self, value, riesz_map=None):
+        if riesz_map is not None:
+            raise ValueError(f"Unexpected Riesz map for Adjfloat: {riesz_map}")
         return AdjFloat(value)
 
     def _ad_create_checkpoint(self):

diff --git a/pyadjoint/control.py b/pyadjoint/control.py
@@ -1,13 +1,14 @@
+from typing import Any
 from .overloaded_type import OverloadedType, create_overloaded_object
 import logging
 
 
 class Control(object):
     """Defines a control variable from an OverloadedType.
 
-    The control object references a specific node on the Tape.
-    For mutable OverloadedType instances the Control only represents
-    the value at the time of initialization.
+    The control object references a specific node on the Tape. For mutable
+    OverloadedType instances the Control only represents the value at the time
+    of initialization.
 
     Example:
         Given a mutable OverloadedType instance u.
@@ -25,18 +26,22 @@ class Control(object):
         >>> c2.data()
         3.0
 
-        Now c1 represents the node prior to the add_in_place Block,
-        while c2 represents the node after the add_in_place Block.
-        Creating a `ReducedFunctional` with c2 as Control results in
-        a reduced problem without the add_in_place Block, while a ReducedFunctional
-        with c1 as Control results in a forward model including the add_in_place.
+        Now c1 represents the node prior to the add_in_place Block, while c2
+        represents the node after the add_in_place Block. Creating a
+        `ReducedFunctional` with c2 as Control results in a reduced problem
+        without the add_in_place Block, while a ReducedFunctional with c1 as
+        Control results in a forward model including the add_in_place.
 
     Args:
-        control (OverloadedType): The OverloadedType instance to define this control from.
+        control: The OverloadedType instance to define this control from.
+        riesz_map: Parameters controlling how to find the Riesz representer of
+            a dual (adjoint) variable to this control. The permitted values are
+            type-dependent.
 
     """
-    def __init__(self, control):
+    def __init__(self, control: OverloadedType, riesz_map: Any = None):
         self.control = control
+        self.riesz_map = None
         self.block_variable = control.block_variable
 
     def data(self):
@@ -45,17 +50,27 @@ def data(self):
     def tape_value(self):
         return create_overloaded_object(self.block_variable.saved_output)
 
-    def get_derivative(self, options={}):
+    def get_derivative(self, apply_riesz=False):
         if self.block_variable.adj_value is None:
             logging.warning("Adjoint value is None, is the functional independent of the control variable?")
-            return self.control._ad_convert_type(0., options=options)
-        return self.control._ad_convert_type(self.block_variable.adj_value, options=options)
+            return self.control._ad_init_zero(dual=not apply_riesz)
+        elif apply_riesz:
+            return self.control._ad_convert_riesz(
+                self.block_variable.adj_value, riesz_map=self.riesz_map)
+        else:
+            return self.control._ad_init_object(self.block_variable.adj_value)
 
-    def get_hessian(self, options={}):
+    def get_hessian(self, apply_riesz=False):
         if self.block_variable.adj_value is None:
             logging.warning("Hessian value is None, is the functional independent of the control variable?")
-            return self.control._ad_convert_type(0., options=options)
-        return self.control._ad_convert_type(self.block_variable.hessian_value, options=options)
+            return self.control._ad_init_zero(dual=not apply_riesz)
+        elif apply_riesz:
+            return self.control._ad_convert_riesz(
+                self.block_variable.hessian_value, riesz_map=self.riesz_map)
+        else:
+            return self.control._ad_init_object(
+                self.block_variable.hessian_value
+            )
 
     def update(self, value):
         # In the future we might want to call a static method

diff --git a/pyadjoint/drivers.py b/pyadjoint/drivers.py
@@ -2,23 +2,28 @@
 from .tape import get_working_tape, stop_annotating
 
 
-def compute_gradient(J, m, options=None, tape=None, adj_value=1.0):
+def compute_gradient(J, m, tape=None, adj_value=1.0, apply_riesz=False):
     """
     Compute the gradient of J with respect to the initialisation value of m,
     that is the value of m at its creation.
 
     Args:
-        J (AdjFloat):  The objective functional.
+        J (OverloadedType):  The objective functional.
         m (list or instance of Control): The (list of) controls.
-        options (dict): A dictionary of options. To find a list of available options
-            have a look at the specific control type.
         tape: The tape to use. Default is the current tape.
+        adj_value: The adjoint value to the result. Required if the functional
+            is not scalar-valued, or if the functional is not the final stage 
+            in the computation of an outer functional.
+        apply_riesz: If True, apply the Riesz map of each control in order
+            to return a primal gradient rather than a derivative in the
+            dual space.
 
     Returns:
-        OverloadedType: The derivative with respect to the control. Should be an instance of the same type as
-            the control.
+        OverloadedType: The derivative with respect to the control.
+            If apply_riesz is False, should be an instance of the type dual
+            to that of the control. If apply_riesz is true should have the
+            same type as the control.
     """
-    options = options or {}
     tape = tape or get_working_tape()
     tape.reset_variables()
     J.block_variable.adj_value = adj_value
@@ -29,28 +34,31 @@ def compute_gradient(J, m, options=None, tape=None, adj_value=1.0):
             with marked_controls(m):
                 tape.evaluate_adj(markings=True)
 
-    grads = [i.get_derivative(options=options) for i in m]
+    grads = [i.get_derivative(apply_riesz=apply_riesz) for i in m]
     return m.delist(grads)
 
 
-def compute_hessian(J, m, m_dot, options=None, tape=None):
+def compute_hessian(J, m, m_dot, tape=None, apply_riesz=False):
     """
     Compute the Hessian of J in a direction m_dot at the current value of m
 
     Args:
         J (AdjFloat):  The objective functional.
         m (list or instance of Control): The (list of) controls.
-        m_dot (list or instance of the control type): The direction in which to compute the Hessian.
-        options (dict): A dictionary of options. To find a list of available options
-            have a look at the specific control type.
+        m_dot (list or instance of the control type): The direction in which to
+            compute the Hessian.
         tape: The tape to use. Default is the current tape.
+        apply_riesz: If True, apply the Riesz map of each control in order
+            to return the (primal) Riesz representer of the Hessian
+            action.
 
     Returns:
-        OverloadedType: The second derivative with respect to the control in direction m_dot. Should be an instance of
-            the same type as the control.
+        OverloadedType: The action of the Hessian in the direction m_dot.
+            If apply_riesz is False, should be an instance of the type dual
+            to that of the control. If apply_riesz is true should have the
+            same type as the control.
     """
     tape = tape or get_working_tape()
-    options = options or {}
 
     tape.reset_tlm_values()
     tape.reset_hessian_values()
@@ -68,7 +76,7 @@ def compute_hessian(J, m, m_dot, options=None, tape=None):
         with tape.marked_nodes(m):
             tape.evaluate_hessian(markings=True)
 
-    r = [v.get_hessian(options=options) for v in m]
+    r = [v.get_hessian(apply_riesz=apply_riesz) for v in m]
     return m.delist(r)
 
 

diff --git a/pyadjoint/optimization/optimization.py b/pyadjoint/optimization/optimization.py
@@ -33,7 +33,7 @@ def serialise_bounds(rf_np, bounds):
     return np.array(bounds_arr).T
 
 
-def minimize_scipy_generic(rf_np, method, bounds=None, derivative_options=None, **kwargs):
+def minimize_scipy_generic(rf_np, method, bounds=None, **kwargs):
     """Interface to the generic minimize method in scipy
 
     """
@@ -51,17 +51,10 @@ def minimize_scipy_generic(rf_np, method, bounds=None, derivative_options=None,
 
         raise
 
-    if method in ["Newton-CG"]:
-        forget = None
-    else:
-        forget = False
-
-    project = kwargs.pop("project", False)
-
     m = [p.tape_value() for p in rf_np.controls]
     m_global = rf_np.obj_to_array(m)
     J = rf_np.__call__
-    dJ = lambda m: rf_np.derivative(m, forget=forget, project=project, options=derivative_options)
+    dJ = lambda m: rf_np.derivative(apply_riesz=True)
     H = rf_np.hessian
 
     if "options" not in kwargs:
@@ -136,7 +129,7 @@ def jac(x):
     return m
 
 
-def minimize_custom(rf_np, bounds=None, derivative_options=None, **kwargs):
+def minimize_custom(rf_np, bounds=None, **kwargs):
     """ Interface to the user-provided minimisation method """
 
     try:
@@ -152,7 +145,7 @@ def minimize_custom(rf_np, bounds=None, derivative_options=None, **kwargs):
     m_global = rf_np.obj_to_array(m)
     J = rf_np.__call__
 
-    dJ = lambda m: rf_np.derivative(m, forget=None, options=derivative_options)
+    dJ = lambda m: rf_np.derivative(m, apply_riesz=True)
     H = rf_np.hessian
 
     if bounds is not None:
@@ -255,7 +248,7 @@ def minimize(rf, method='L-BFGS-B', scale=1.0, **kwargs):
         return opt
 
 
-def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs):
+def maximize(rf, method='L-BFGS-B', scale=1.0, **kwargs):
     """ Solves the maximisation problem with PDE constraint:
 
            max_m func(u, m)
@@ -274,7 +267,6 @@ def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs
         * 'method' specifies the optimization method to be used to solve the problem.
             The available methods can be listed with the print_optimization_methods function.
         * 'scale' is a factor to scale to problem (default: 1.0).
-        * 'derivative_options' is a dictionary of options that will be passed to the `rf.derivative`.
         * 'bounds' is an optional keyword parameter to support control constraints: bounds = (lb, ub).
             lb and ub must be of the same type than the parameters m.
 
@@ -283,7 +275,7 @@ def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs
         For detailed information about which arguments are supported for each optimization method,
         please refer to the documentaton of the optimization algorithm.
         """
-    return minimize(rf, method, scale=-scale, derivative_options=derivative_options, **kwargs)
+    return minimize(rf, method, scale=-scale, **kwargs)
 
 
 minimise = minimize

diff --git a/pyadjoint/overloaded_type.py b/pyadjoint/overloaded_type.py
@@ -64,7 +64,7 @@ def register_overloaded_type(overloaded_type, classes=None):
     return overloaded_type
 
 
-class OverloadedType(object):
+class OverloadedType:
     """Base class for OverloadedType types.
 
     The purpose of each OverloadedType is to extend a type such that
@@ -93,22 +93,36 @@ def _ad_init_object(cls, obj):
         """
         return cls(obj)
 
+    def _ad_init_zero(self, dual=False):
+        """This method must be overridden.
+
+        Return a new overloaded zero of the appropriate type.
+
+        If `dual` is `True`, return a zero of the dual type to this type. If
+        the type is self-dual, this parameter is ignored. Note that by
+        linearity there is no need to provide a riesz map in this case.
+
+        Args:
+            dual: Whether to return a primal or dual zero.
+
+        Returns:
+            OverloadedType: An object of the relevant type with the value zero.
+
+        """
+        raise NotImplementedError
+
     def create_block_variable(self):
         self.block_variable = BlockVariable(self)
         return self.block_variable
 
-    def _ad_convert_type(self, value, options={}):
-        """This method must be overridden.
-
-        Should implement a way to convert the result of an adjoint computation, `value`,
-        into the same type as `self`.
+    def _ad_convert_riesz(self, value, riesz_map=None):
+        """Apply a Riesz map to convert an adjoint result to a primal variable.
 
         Args:
-            value (Any): The value to convert. Should be a result of an adjoint computation.
-            options (dict): A dictionary with options that may be supplied by the user.
-                If the convert type functionality offers some options on how to convert,
-                this is the dictionary that should be used.
-                For an example see fenics_adjoint.types.Function
+            value (Any): The value to convert. Should be a result of an adjoint
+                computation.
+            riesz_map: Parameters controlling how to find the Riesz
+                representer. The permitted values are type-dependent.
 
         Returns:
             OverloadedType: An instance of the same type as `self`.