diff --git a/pyadjoint/adjfloat.py b/pyadjoint/adjfloat.py index 57e633fa..6d5a0dbe 100644 --- a/pyadjoint/adjfloat.py +++ b/pyadjoint/adjfloat.py @@ -91,7 +91,12 @@ def __rsub__(self, other): def __pow__(self, power): return PowBlock(self, power) - def _ad_convert_type(self, value, options={}): + def _ad_init_zero(self, dual=False): + return type(self)(0.) + + def _ad_convert_riesz(self, value, riesz_map=None): + if riesz_map is not None: + raise ValueError(f"Unexpected Riesz map for Adjfloat: {riesz_map}") return AdjFloat(value) def _ad_create_checkpoint(self): diff --git a/pyadjoint/control.py b/pyadjoint/control.py index f8e626be..28153d51 100644 --- a/pyadjoint/control.py +++ b/pyadjoint/control.py @@ -1,3 +1,4 @@ +from typing import Any from .overloaded_type import OverloadedType, create_overloaded_object import logging @@ -5,9 +6,9 @@ class Control(object): """Defines a control variable from an OverloadedType. - The control object references a specific node on the Tape. - For mutable OverloadedType instances the Control only represents - the value at the time of initialization. + The control object references a specific node on the Tape. For mutable + OverloadedType instances the Control only represents the value at the time + of initialization. Example: Given a mutable OverloadedType instance u. @@ -25,18 +26,22 @@ class Control(object): >>> c2.data() 3.0 - Now c1 represents the node prior to the add_in_place Block, - while c2 represents the node after the add_in_place Block. - Creating a `ReducedFunctional` with c2 as Control results in - a reduced problem without the add_in_place Block, while a ReducedFunctional - with c1 as Control results in a forward model including the add_in_place. + Now c1 represents the node prior to the add_in_place Block, while c2 + represents the node after the add_in_place Block. Creating a + `ReducedFunctional` with c2 as Control results in a reduced problem + without the add_in_place Block, while a ReducedFunctional with c1 as + Control results in a forward model including the add_in_place. Args: - control (OverloadedType): The OverloadedType instance to define this control from. + control: The OverloadedType instance to define this control from. + riesz_map: Parameters controlling how to find the Riesz representer of + a dual (adjoint) variable to this control. The permitted values are + type-dependent. """ - def __init__(self, control): + def __init__(self, control: OverloadedType, riesz_map: Any = None): self.control = control + self.riesz_map = None self.block_variable = control.block_variable def data(self): @@ -45,17 +50,27 @@ def data(self): def tape_value(self): return create_overloaded_object(self.block_variable.saved_output) - def get_derivative(self, options={}): + def get_derivative(self, apply_riesz=False): if self.block_variable.adj_value is None: logging.warning("Adjoint value is None, is the functional independent of the control variable?") - return self.control._ad_convert_type(0., options=options) - return self.control._ad_convert_type(self.block_variable.adj_value, options=options) + return self.control._ad_init_zero(dual=not apply_riesz) + elif apply_riesz: + return self.control._ad_convert_riesz( + self.block_variable.adj_value, riesz_map=self.riesz_map) + else: + return self.control._ad_init_object(self.block_variable.adj_value) - def get_hessian(self, options={}): + def get_hessian(self, apply_riesz=False): if self.block_variable.adj_value is None: logging.warning("Hessian value is None, is the functional independent of the control variable?") - return self.control._ad_convert_type(0., options=options) - return self.control._ad_convert_type(self.block_variable.hessian_value, options=options) + return self.control._ad_init_zero(dual=not apply_riesz) + elif apply_riesz: + return self.control._ad_convert_riesz( + self.block_variable.hessian_value, riesz_map=self.riesz_map) + else: + return self.control._ad_init_object( + self.block_variable.hessian_value + ) def update(self, value): # In the future we might want to call a static method diff --git a/pyadjoint/drivers.py b/pyadjoint/drivers.py index 9bf03b0c..dad1f77d 100644 --- a/pyadjoint/drivers.py +++ b/pyadjoint/drivers.py @@ -2,23 +2,28 @@ from .tape import get_working_tape, stop_annotating -def compute_gradient(J, m, options=None, tape=None, adj_value=1.0): +def compute_gradient(J, m, tape=None, adj_value=1.0, apply_riesz=False): """ Compute the gradient of J with respect to the initialisation value of m, that is the value of m at its creation. Args: - J (AdjFloat): The objective functional. + J (OverloadedType): The objective functional. m (list or instance of Control): The (list of) controls. - options (dict): A dictionary of options. To find a list of available options - have a look at the specific control type. tape: The tape to use. Default is the current tape. + adj_value: The adjoint value to the result. Required if the functional + is not scalar-valued, or if the functional is not the final stage + in the computation of an outer functional. + apply_riesz: If True, apply the Riesz map of each control in order + to return a primal gradient rather than a derivative in the + dual space. Returns: - OverloadedType: The derivative with respect to the control. Should be an instance of the same type as - the control. + OverloadedType: The derivative with respect to the control. + If apply_riesz is False, should be an instance of the type dual + to that of the control. If apply_riesz is true should have the + same type as the control. """ - options = options or {} tape = tape or get_working_tape() tape.reset_variables() J.block_variable.adj_value = adj_value @@ -29,28 +34,31 @@ def compute_gradient(J, m, options=None, tape=None, adj_value=1.0): with marked_controls(m): tape.evaluate_adj(markings=True) - grads = [i.get_derivative(options=options) for i in m] + grads = [i.get_derivative(apply_riesz=apply_riesz) for i in m] return m.delist(grads) -def compute_hessian(J, m, m_dot, options=None, tape=None): +def compute_hessian(J, m, m_dot, tape=None, apply_riesz=False): """ Compute the Hessian of J in a direction m_dot at the current value of m Args: J (AdjFloat): The objective functional. m (list or instance of Control): The (list of) controls. - m_dot (list or instance of the control type): The direction in which to compute the Hessian. - options (dict): A dictionary of options. To find a list of available options - have a look at the specific control type. + m_dot (list or instance of the control type): The direction in which to + compute the Hessian. tape: The tape to use. Default is the current tape. + apply_riesz: If True, apply the Riesz map of each control in order + to return the (primal) Riesz representer of the Hessian + action. Returns: - OverloadedType: The second derivative with respect to the control in direction m_dot. Should be an instance of - the same type as the control. + OverloadedType: The action of the Hessian in the direction m_dot. + If apply_riesz is False, should be an instance of the type dual + to that of the control. If apply_riesz is true should have the + same type as the control. """ tape = tape or get_working_tape() - options = options or {} tape.reset_tlm_values() tape.reset_hessian_values() @@ -68,7 +76,7 @@ def compute_hessian(J, m, m_dot, options=None, tape=None): with tape.marked_nodes(m): tape.evaluate_hessian(markings=True) - r = [v.get_hessian(options=options) for v in m] + r = [v.get_hessian(apply_riesz=apply_riesz) for v in m] return m.delist(r) diff --git a/pyadjoint/optimization/optimization.py b/pyadjoint/optimization/optimization.py index 1b08b50c..492cbb7f 100644 --- a/pyadjoint/optimization/optimization.py +++ b/pyadjoint/optimization/optimization.py @@ -33,7 +33,7 @@ def serialise_bounds(rf_np, bounds): return np.array(bounds_arr).T -def minimize_scipy_generic(rf_np, method, bounds=None, derivative_options=None, **kwargs): +def minimize_scipy_generic(rf_np, method, bounds=None, **kwargs): """Interface to the generic minimize method in scipy """ @@ -51,17 +51,10 @@ def minimize_scipy_generic(rf_np, method, bounds=None, derivative_options=None, raise - if method in ["Newton-CG"]: - forget = None - else: - forget = False - - project = kwargs.pop("project", False) - m = [p.tape_value() for p in rf_np.controls] m_global = rf_np.obj_to_array(m) J = rf_np.__call__ - dJ = lambda m: rf_np.derivative(m, forget=forget, project=project, options=derivative_options) + dJ = lambda m: rf_np.derivative(apply_riesz=True) H = rf_np.hessian if "options" not in kwargs: @@ -136,7 +129,7 @@ def jac(x): return m -def minimize_custom(rf_np, bounds=None, derivative_options=None, **kwargs): +def minimize_custom(rf_np, bounds=None, **kwargs): """ Interface to the user-provided minimisation method """ try: @@ -152,7 +145,7 @@ def minimize_custom(rf_np, bounds=None, derivative_options=None, **kwargs): m_global = rf_np.obj_to_array(m) J = rf_np.__call__ - dJ = lambda m: rf_np.derivative(m, forget=None, options=derivative_options) + dJ = lambda m: rf_np.derivative(m, apply_riesz=True) H = rf_np.hessian if bounds is not None: @@ -255,7 +248,7 @@ def minimize(rf, method='L-BFGS-B', scale=1.0, **kwargs): return opt -def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs): +def maximize(rf, method='L-BFGS-B', scale=1.0, **kwargs): """ Solves the maximisation problem with PDE constraint: max_m func(u, m) @@ -274,7 +267,6 @@ def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs * 'method' specifies the optimization method to be used to solve the problem. The available methods can be listed with the print_optimization_methods function. * 'scale' is a factor to scale to problem (default: 1.0). - * 'derivative_options' is a dictionary of options that will be passed to the `rf.derivative`. * 'bounds' is an optional keyword parameter to support control constraints: bounds = (lb, ub). lb and ub must be of the same type than the parameters m. @@ -283,7 +275,7 @@ def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs For detailed information about which arguments are supported for each optimization method, please refer to the documentaton of the optimization algorithm. """ - return minimize(rf, method, scale=-scale, derivative_options=derivative_options, **kwargs) + return minimize(rf, method, scale=-scale, **kwargs) minimise = minimize diff --git a/pyadjoint/overloaded_type.py b/pyadjoint/overloaded_type.py index a6a02f68..f3fbd914 100644 --- a/pyadjoint/overloaded_type.py +++ b/pyadjoint/overloaded_type.py @@ -64,7 +64,7 @@ def register_overloaded_type(overloaded_type, classes=None): return overloaded_type -class OverloadedType(object): +class OverloadedType: """Base class for OverloadedType types. The purpose of each OverloadedType is to extend a type such that @@ -93,22 +93,36 @@ def _ad_init_object(cls, obj): """ return cls(obj) + def _ad_init_zero(self, dual=False): + """This method must be overridden. + + Return a new overloaded zero of the appropriate type. + + If `dual` is `True`, return a zero of the dual type to this type. If + the type is self-dual, this parameter is ignored. Note that by + linearity there is no need to provide a riesz map in this case. + + Args: + dual: Whether to return a primal or dual zero. + + Returns: + OverloadedType: An object of the relevant type with the value zero. + + """ + raise NotImplementedError + def create_block_variable(self): self.block_variable = BlockVariable(self) return self.block_variable - def _ad_convert_type(self, value, options={}): - """This method must be overridden. - - Should implement a way to convert the result of an adjoint computation, `value`, - into the same type as `self`. + def _ad_convert_riesz(self, value, riesz_map=None): + """Apply a Riesz map to convert an adjoint result to a primal variable. Args: - value (Any): The value to convert. Should be a result of an adjoint computation. - options (dict): A dictionary with options that may be supplied by the user. - If the convert type functionality offers some options on how to convert, - this is the dictionary that should be used. - For an example see fenics_adjoint.types.Function + value (Any): The value to convert. Should be a result of an adjoint + computation. + riesz_map: Parameters controlling how to find the Riesz + representer. The permitted values are type-dependent. Returns: OverloadedType: An instance of the same type as `self`. diff --git a/pyadjoint/reduced_functional.py b/pyadjoint/reduced_functional.py index 8b2af45f..452d8d54 100644 --- a/pyadjoint/reduced_functional.py +++ b/pyadjoint/reduced_functional.py @@ -1,13 +1,100 @@ +"""Provide the abstract reduced functional, and a vanilla implementation.""" +from abc import ABC, abstractmethod +from contextlib import contextmanager from .drivers import compute_gradient, compute_hessian from .enlisting import Enlist from .tape import get_working_tape, stop_annotating, no_annotations from .overloaded_type import OverloadedType, create_overloaded_object +from .control import Control -def _get_extract_derivative_components(derivative_components): +class AbstractReducedFunctional(ABC): + """Base class for reduced functionals. + + An object which encompasses computations of the form:: + + J(u(m), m) + + Where `u` is the system state and `m` is a `pyadjoint.Control` or list of + `pyadjoint.Control`. + + A reduced functional is callable and takes as arguments the value(s) of the + control(s) at which it is to be evaluated. + + Despite the name, the value of a reduced functional need not be scalar. If + the functional is non-scalar valued then derivative calculations will need + to be seeded with an input adjoint value of the adjoint type matching the + result of the functional. """ - Construct a function to pass as a pre derivative callback - when derivative components are required. + + @property + @abstractmethod + def controls(self) -> list[Control]: + """Return the list of controls on which this functional depends.""" + + @abstractmethod + def __call__( + self, values: OverloadedType | list[OverloadedType] + ) -> OverloadedType: + """Evalute the functional at a new control value.""" + raise NotImplementedError + + @abstractmethod + def derivative(self, adj_input=1.0, apply_reisz=False): + """Return the derivative of the functional w.r.t. the control. + + Using the adjoint method, the derivative of the functional with respect + to the control, around the last supplied value of the control, is + computed and returned. + + Args: + adj_input: The adjoint value to the functional result. Required if + the functional is not scalar-valued, or if the functional is an + intermediate result in the computation of an outer functional. + apply_riesz: If True, apply the Riesz map of each control in order + to return a primal gradient rather than a derivative in the + dual space. + + Returns: + OverloadedType: The derivative with respect to the control. + If apply_riesz is False, should be an instance of the type dual + to that of the control. If apply_riesz is true should have the + same type as the control. + + """ + raise NotImplementedError + + @abstractmethod + def hessian(self, m_dot, apply_reisz=False): + """Return the action of the Hessian of the functional. + + The Hessian is evaluate w.r.t. the control on a vector m_dot. + + Using the second-order adjoint method, the action of the Hessian of the + functional with respect to the control, around the last supplied value + of the control, is computed and returned. + + Args: + m_dot ([OverloadedType]): The direction in which to compute the + action of the Hessian. + apply_riesz: If True, apply the Riesz map of each control in order + to return the (primal) Riesz representer of the Hessian + action. + + Returns: + OverloadedType: The action of the Hessian in the direction m_dot. + If apply_riesz is False, should be an instance of the type dual + to that of the control. If apply_riesz is true should have the + same type as the control. + + """ + raise NotImplementedError + + +def _get_extract_derivative_components(derivative_components): + """Construct a function to pass as a pre derivative callback. + + Used when derivative components are required. """ def extract_derivative_components(controls): controls_out = Enlist([controls[i] @@ -17,9 +104,9 @@ def extract_derivative_components(controls): def _get_pack_derivative_components(controls, derivative_components): - """ - Construct a function to pass as a post derivative callback - when derivative components are required. + """Construct a function to pass as a post derivative callback. + + Used when derivative components are required. """ def pack_derivative_components(checkpoint, derivatives, values): derivatives_out = [] @@ -36,7 +123,7 @@ def pack_derivative_components(checkpoint, derivatives, values): return pack_derivative_components -class ReducedFunctional(object): +class ReducedFunctional(AbstractReducedFunctional): """Class representing the reduced functional. A reduced functional maps a control value to the provided functional. @@ -71,14 +158,15 @@ def __init__(self, functional, controls, eval_cb_pre=lambda *args: None, eval_cb_post=lambda *args: None, derivative_cb_pre=lambda controls: controls, - derivative_cb_post=lambda checkpoint, derivative_components, controls: derivative_components, + derivative_cb_post=lambda checkpoint, derivative_components, + controls: derivative_components, hessian_cb_pre=lambda *args: None, hessian_cb_post=lambda *args: None): if not isinstance(functional, OverloadedType): raise TypeError("Functional must be an OverloadedType.") self.functional = functional self.tape = get_working_tape() if tape is None else tape - self.controls = Enlist(controls) + self._controls = Enlist(controls) self.derivative_components = derivative_components self.scale = scale self.eval_cb_pre = eval_cb_pre @@ -96,23 +184,11 @@ def __init__(self, functional, controls, self.derivative_cb_post = _get_pack_derivative_components( controls, derivative_components) - def derivative(self, adj_input=1.0, options={}): - """Returns the derivative of the functional w.r.t. the control. - Using the adjoint method, the derivative of the functional with - respect to the control, around the last supplied value of the - control, is computed and returned. - - Args: - options (dict): A dictionary of options. To find a list of - available options have a look at the specific control type. - - Returns: - OverloadedType: The derivative with respect to the control. - Should be an instance of the same type as the control. + @property + def controls(self) -> list[Control]: + return self._controls - """ - - # Call callback + def derivative(self, adj_input=1.0, apply_riesz=False): values = [c.tape_value() for c in self.controls] controls = self.derivative_cb_pre(self.controls) @@ -129,9 +205,9 @@ def derivative(self, adj_input=1.0, options={}): derivatives = compute_gradient(self.functional, controls, - options=options, tape=self.tape, - adj_value=adj_value) + adj_value=adj_value, + apply_riesz=apply_riesz) # Call callback derivatives = self.derivative_cb_post( @@ -147,28 +223,13 @@ def derivative(self, adj_input=1.0, options={}): return self.controls.delist(derivatives) @no_annotations - def hessian(self, m_dot, options={}): - """Returns the action of the Hessian of the functional w.r.t. the control on a vector m_dot. - - Using the second-order adjoint method, the action of the Hessian of the - functional with respect to the control, around the last supplied value - of the control, is computed and returned. - - Args: - m_dot ([OverloadedType]): The direction in which to compute the - action of the Hessian. - options (dict): A dictionary of options. To find a list of - available options have a look at the specific control type. - - Returns: - OverloadedType: The action of the Hessian in the direction m_dot. - Should be an instance of the same type as the control. - """ + def hessian(self, m_dot, apply_riesz=False): # Call callback values = [c.tape_value() for c in self.controls] self.hessian_cb_pre(self.controls.delist(values)) - r = compute_hessian(self.functional, self.controls, m_dot, options=options, tape=self.tape) + r = compute_hessian(self.functional, self.controls, m_dot, + tape=self.tape, apply_riesz=apply_riesz) # Call callback self.hessian_cb_post(self.functional.block_variable.checkpoint, @@ -179,22 +240,26 @@ def hessian(self, m_dot, options={}): @no_annotations def __call__(self, values): - """Computes the reduced functional with supplied control value. + """Compute the reduced functional with supplied control value. Args: - values ([OverloadedType]): If you have multiple controls this should be a list of - new values for each control in the order you listed the controls to the constructor. - If you have a single control it can either be a list or a single object. - Each new value should have the same type as the corresponding control. + values ([OverloadedType]): If you have multiple controls this + should be a list of new values for each control in the order + you listed the controls to the constructor. If you have a + single control it can either be a list or a single object. + Each new value should have the same type as the corresponding + control. Returns: - :obj:`OverloadedType`: The computed value. Typically of instance - of :class:`AdjFloat`. + :obj:`OverloadedType`: The computed value. Typically of instance of + :class:`AdjFloat`. """ values = Enlist(values) if len(values) != len(self.controls): - raise ValueError("values should be a list of same length as controls.") + raise ValueError( + "values should be a list of same length as controls." + ) # Call callback. self.eval_cb_pre(self.controls.delist(values)) @@ -230,18 +295,13 @@ def optimize_tape(self): functionals=[self.functional] ) + @contextmanager def marked_controls(self): - return marked_controls(self) - - -class marked_controls(object): - def __init__(self, rf): - self.rf = rf - - def __enter__(self): - for control in self.rf.controls: + """Return a context manager which marks the active controls.""" + for control in self.controls: control.mark_as_control() - - def __exit__(self, *args): - for control in self.rf.controls: - control.unmark_as_control() + try: + yield + finally: + for control in self.controls: + control.unmark_as_control() diff --git a/pyadjoint/reduced_functional_numpy.py b/pyadjoint/reduced_functional_numpy.py index 34ac7f43..4228ddc0 100644 --- a/pyadjoint/reduced_functional_numpy.py +++ b/pyadjoint/reduced_functional_numpy.py @@ -1,5 +1,5 @@ from __future__ import print_function -from .reduced_functional import ReducedFunctional +from .reduced_functional import AbstractReducedFunctional, ReducedFunctional from .tape import no_annotations, get_working_tape from .enlisting import Enlist from .control import Control @@ -8,7 +8,7 @@ import numpy -class ReducedFunctionalNumPy(ReducedFunctional): +class ReducedFunctionalNumPy(AbstractReducedFunctional): """This class implements the reduced functional for given functional and controls based on numpy data structures. @@ -24,6 +24,10 @@ def __init__(self, functional, controls=None, tape=None): tape=tape) self.rf = functional + @property + def controls(self) -> list[Control]: + return self.rf._controls + def __getattr__(self, item): return getattr(self.rf, item) @@ -55,19 +59,14 @@ def get_global(self, m): return numpy.array(m_global, dtype="d") @no_annotations - def derivative(self, m_array=None, forget=True, project=False, options=None): - """ An implementation of the reduced functional derivative evaluation - that accepts the controls as an array of scalars. If no control values are given, - the result is derivative at the lastest forward run. - """ + def derivative(self, adj_input=1.0, apply_riesz=True): + + if not apply_riesz: + raise ValueError( + "ReducedFunctionalNumpy only returns primal gradients." + ) - # In the case that the control values have changed since the last forward run, - # we first need to rerun the forward model with the new controls to have the - # correct forward solutions - # TODO: No good way to check. Is it ok to always assume `m_array` is the same as used last in __call__? - # if m_array is not None: - # self.__call__(m_array) - dJdm = self.rf.derivative(options=options) + dJdm = self.rf.derivative(adj_input, apply_riesz) dJdm = Enlist(dJdm) m_global = [] @@ -79,14 +78,18 @@ def derivative(self, m_array=None, forget=True, project=False, options=None): return numpy.array(m_global, dtype="d") @no_annotations - def hessian(self, m_array, m_dot_array): - """ An implementation of the reduced functional hessian action evaluation - that accepts the controls as an array of scalars. If m_array is None, - the Hessian action at the latest forward run is returned. """ + def hessian(self, m_dot_array, apply_riesz=True): + + if not apply_riesz: + raise ValueError( + "ReducedFunctionalNumpy only returns primal gradients." + ) + # Calling derivative is needed, see i.e. examples/stokes-shape-opt self.derivative() m_copies = [control.copy_data() for control in self.controls] - Hm = self.rf.hessian(self.set_local(m_copies, m_dot_array)) + Hm = self.rf.hessian(self.set_local(m_copies, m_dot_array), + apply_riesz) Hm = Enlist(Hm) m_global = []