lululxvi · vl-dud · Jul 31, 2023 · Aug 3, 2023 · Aug 4, 2023 · Aug 23, 2023
diff --git a/deepxde/data/pde_operator.py b/deepxde/data/pde_operator.py
@@ -236,8 +236,7 @@ def _losses(self, outputs, loss_fn, inputs, model, num_func):
 
         losses = []
         for i in range(num_func):
-            out = outputs[i][:, None]
-
+            out = outputs[i] if model.net.num_outputs > 1 else outputs[i][:, None]
             f = []
             if self.pde.pde is not None:
                 f = self.pde.pde(inputs[1], out, model.net.auxiliary_vars[i][:, None])

diff --git a/deepxde/nn/tensorflow_compat_v1/deeponet.py b/deepxde/nn/tensorflow_compat_v1/deeponet.py
@@ -7,6 +7,131 @@
 from ... import config
 from ...backend import tf
 from ...utils import timing
+from abc import ABC, abstractmethod
+
+
+class DeepONetStrategy(ABC):
+    """DeepONet building strategy.
+
+    See the section 3.1.6. in
+    L. Lu, X. Meng, S. Cai, Z. Mao, S. Goswami, Z. Zhang, & G. Karniadakis.
+    A comprehensive and fair comparison of two neural operators
+    (with practical extensions) based on FAIR data.
+    Computer Methods in Applied Mechanics and Engineering, 393, 114778, 2022.
+    """
+
+    def __init__(self, net):
+        self.net = net
+
+    def _build_branch_and_trunk(self):
+        # Branch net to encode the input function
+        branch = self.net.build_branch_net()
+        # Trunk net to encode the domain of the output function
+        trunk = self.net.build_trunk_net()
+        return branch, trunk
+
+    @abstractmethod
+    def build(self):
+        pass
+
+
+class VanillaStrategy(DeepONetStrategy):
+    def build(self):
+        branch, trunk = self._build_branch_and_trunk()
+        if branch.shape[-1] != trunk.shape[-1]:
+            raise AssertionError(
+                "Output sizes of branch net and trunk net do not match."
+            )
+        y = self.net.merge(branch, trunk)
+        return y
+
+
+class IndependentStrategy(DeepONetStrategy):
+    """Directly use n independent DeepONets,
+    and each DeepONet outputs only one function.
+    """
+
+    def build(self):
+        vanilla_strategy = VanillaStrategy(self.net)
+        ys = []
+        for _ in range(self.net.num_outputs):
+            ys.append(vanilla_strategy.build())
+        return self.net.concatenate_outputs(ys)
+
+
+class SplitBothStrategy(DeepONetStrategy):
+    """Split the outputs of both the branch net and the trunk net into n groups,
+    and then the kth group outputs the kth solution.
+
+    For example, if n = 2 and both the branch and trunk nets have 100 output neurons,
+    then the dot product between the first 50 neurons of
+    the branch and trunk nets generates the first function,
+    and the remaining 50 neurons generate the second function.
+    """
+
+    def build(self):
+        branch, trunk = self._build_branch_and_trunk()
+        if branch.shape[-1] != trunk.shape[-1]:
+            raise AssertionError(
+                "Output sizes of branch net and trunk net do not match."
+            )
+        if branch.shape[-1] % self.net.num_outputs != 0:
+            raise AssertionError(
+                f"Output size of the branch net is not evenly divisible by {self.net.num_outputs}."
+            )
+        branch_groups = tf.split(
+            branch, num_or_size_splits=self.net.num_outputs, axis=1
+        )
+        trunk_groups = tf.split(trunk, num_or_size_splits=self.net.num_outputs, axis=1)
+        ys = []
+        for i in range(self.net.num_outputs):
+            y = self.net.merge(branch_groups[i], trunk_groups[i])
+            ys.append(y)
+        return self.net.concatenate_outputs(ys)
+
+
+class SplitBranchStrategy(DeepONetStrategy):
+    """Split the branch net and share the trunk net."""
+
+    def build(self):
+        branch, trunk = self._build_branch_and_trunk()
+        if branch.shape[-1] % self.net.num_outputs != 0:
+            raise AssertionError(
+                f"Output size of the branch net is not evenly divisible by {self.net.num_outputs}."
+            )
+        if branch.shape[-1] / self.net.num_outputs != trunk.shape[-1]:
+            raise AssertionError(
+                f"Output size of the trunk net does not equal to {branch.shape[-1] // self.net.num_outputs}."
+            )
+        branch_groups = tf.split(
+            branch, num_or_size_splits=self.net.num_outputs, axis=1
+        )
+        ys = []
+        for i in range(self.net.num_outputs):
+            y = self.net.merge(branch_groups[i], trunk)
+            ys.append(y)
+        return self.net.concatenate_outputs(ys)
+
+
+class SplitTrunkStrategy(DeepONetStrategy):
+    """Split the trunk net and share the branch net."""
+
+    def build(self):
+        branch, trunk = self._build_branch_and_trunk()
+        if trunk.shape[-1] % self.net.num_outputs != 0:
+            raise AssertionError(
+                f"Output size of the trunk net is not evenly divisible by {self.net.num_outputs}."
+            )
+        if trunk.shape[-1] / self.net.num_outputs != branch.shape[-1]:
+            raise AssertionError(
+                f"Output size of the branch net does not equal to {trunk.shape[-1] // self.net.num_outputs}."
+            )
+        trunk_groups = tf.split(trunk, num_or_size_splits=self.net.num_outputs, axis=1)
+        ys = []
+        for i in range(self.net.num_outputs):
+            y = self.net.merge(branch, trunk_groups[i])
+            ys.append(y)
+        return self.net.concatenate_outputs(ys)
 
 
 class DeepONet(NN):
@@ -20,7 +145,7 @@ class DeepONet(NN):
         layer_sizes_branch: A list of integers as the width of a fully connected
             network, or `(dim, f)` where `dim` is the input dimension and `f` is a
             network function. The width of the last layer in the branch and trunk net
-            should be equal.
+            should be equal. The exception is the use of "split_branch" and "split_trunk" strategies.
         layer_sizes_trunk (list): A list of integers as the width of a fully connected
             network.
         activation: If `activation` is a ``string``, then the same activation is used in
@@ -29,6 +154,15 @@ class DeepONet(NN):
             `activation["branch"]`.
         trainable_branch: Boolean.
         trainable_trunk: Boolean or a list of booleans.
+        num_outputs (integer): number of outputs.
+        strategy (str): "vanilla", "independent", "split_both", "split_branch" or "split_trunk".
+            It makes sense to set in case of multiple outputs.
+
+            - Сhoose "vanilla" for classical implementation of DeepONet. Can not be used with num_outputs > 1.
+            - Сhoose "independent" to use num_outputs independent DeepONets, and each DeepONet outputs only one function.
+            - Сhoose "split_both" to split the outputs of both the branch net and the trunk net into num_outputs groups, and then the kth group outputs the kth solution.
+            - Сhoose "split_branch" to split the branch net and share the trunk net. The width of the last layer in the branch net should be equal to the one in the trunk net multiplied by the number of outputs.
+            - Сhoose "split_trunk" to split the trunk net and share the branch net. The width of the last layer in the trunk net should be equal to the one in the branch net multiplied by the number of outputs.
     """
 
     def __init__(
@@ -42,6 +176,8 @@ def __init__(
         stacked=False,
         trainable_branch=True,
         trainable_trunk=True,
+        num_outputs=1,
+        strategy="independent",
     ):
         super().__init__()
         if isinstance(trainable_trunk, (list, tuple)):
@@ -69,6 +205,22 @@ def __init__(
         self._inputs = None
         self._X_func_default = None
 
+        self.num_outputs = num_outputs
+        if self.num_outputs == 1:
+            if strategy != "vanilla":
+                strategy = "vanilla"
+                print('Strategy is forcibly changed to "vanilla".')
+        elif strategy == "vanilla":
+            strategy = "independent"
+            print('Strategy is forcibly changed to "independent".')
+        self.strategy = {
+            "independent": IndependentStrategy,
+            "split_both": SplitBothStrategy,
+            "split_branch": SplitBranchStrategy,
+            "split_trunk": SplitTrunkStrategy,
+            "vanilla": VanillaStrategy,
+        }.get(strategy, IndependentStrategy)(self)
+
     @property
     def inputs(self):
         return self._inputs
@@ -101,7 +253,14 @@ def build(self):
         self.X_loc = tf.placeholder(config.real(tf), [None, self.layer_size_loc[0]])
         self._inputs = [self.X_func, self.X_loc]
 
-        # Branch net to encode the input function
+        self.y = self.strategy.build()
+        if self._output_transform is not None:
+            self.y = self._output_transform(self._inputs, self.y)
+
+        self.target = tf.placeholder(config.real(tf), [None, self.num_outputs])
+        self.built = True
+
+    def build_branch_net(self):
         y_func = self.X_func
         if callable(self.layer_size_func[1]):
             # User-defined network
@@ -141,8 +300,9 @@ def build(self):
                 regularizer=self.regularizer,
                 trainable=self.trainable_branch,
             )
+        return y_func
 
-        # Trunk net to encode the domain of the output function
+    def build_trunk_net(self):
         y_loc = self.X_loc
         if self._input_transform is not None:
             y_loc = self._input_transform(y_loc)
@@ -156,24 +316,20 @@ def build(self):
                 if isinstance(self.trainable_trunk, (list, tuple))
                 else self.trainable_trunk,
             )
+        return y_loc
 
+    def merge(self, branch, trunk):
         # Dot product
-        if y_func.shape[-1] != y_loc.shape[-1]:
-            raise AssertionError(
-                "Output sizes of branch net and trunk net do not match."
-            )
-        self.y = tf.einsum("bi,bi->b", y_func, y_loc)
-        self.y = tf.expand_dims(self.y, axis=1)
-        # Add bias
+        y = tf.einsum("bi,bi->b", branch, trunk)
+        y = tf.expand_dims(y, axis=1)
         if self.use_bias:
             b = tf.Variable(tf.zeros(1, dtype=config.real(tf)))
-            self.y += b
-
-        if self._output_transform is not None:
-            self.y = self._output_transform(self._inputs, self.y)
+            y += b
+        return y
 
-        self.target = tf.placeholder(config.real(tf), [None, 1])
-        self.built = True
+    @staticmethod
+    def concatenate_outputs(ys):
+        return tf.concat(ys, axis=1)
 
     def _dense(
         self,
@@ -252,13 +408,22 @@ class DeepONetCartesianProd(NN):
         layer_size_branch: A list of integers as the width of a fully connected network,
             or `(dim, f)` where `dim` is the input dimension and `f` is a network
             function. The width of the last layer in the branch and trunk net should be
-            equal.
+            equal. The exception is the use of "split_branch" and "split_trunk" strategies.
         layer_size_trunk (list): A list of integers as the width of a fully connected
             network.
         activation: If `activation` is a ``string``, then the same activation is used in
             both trunk and branch nets. If `activation` is a ``dict``, then the trunk
             net uses the activation `activation["trunk"]`, and the branch net uses
             `activation["branch"]`.
+        num_outputs (integer): number of outputs.
+        strategy (str): "vanilla", "independent", "split_both", "split_branch" or "split_trunk".
+            It makes sense to set in case of multiple outputs.
+
+            - Сhoose "vanilla" for classical implementation of DeepONet. Can not be used with num_outputs > 1.
+            - Сhoose "independent" to use num_outputs independent DeepONets, and each DeepONet outputs only one function.
+            - Сhoose "split_both" to split the outputs of both the branch net and the trunk net into num_outputs groups, and then the kth group outputs the kth solution.
+            - Сhoose "split_branch" to split the branch net and share the trunk net. The width of the last layer in the branch net should be equal to the one in the trunk net multiplied by the number of outputs.
+            - Сhoose "split_trunk" to split the trunk net and share the branch net. The width of the last layer in the trunk net should be equal to the one in the branch net multiplied by the number of outputs.
     """
 
     def __init__(
@@ -268,6 +433,8 @@ def __init__(
         activation,
         kernel_initializer,
         regularization=None,
+        num_outputs=1,
+        strategy="independent",
     ):
         super().__init__()
         self.layer_size_func = layer_size_branch
@@ -279,9 +446,24 @@ def __init__(
             self.activation_branch = self.activation_trunk = activations.get(activation)
         self.kernel_initializer = initializers.get(kernel_initializer)
         self.regularizer = regularizers.get(regularization)
-
         self._inputs = None
 
+        self.num_outputs = num_outputs
+        if self.num_outputs == 1:
+            if strategy != "vanilla":
+                strategy = "vanilla"
+                print('Strategy is forcibly changed to "vanilla".')
+        elif strategy == "vanilla":
+            strategy = "independent"
+            print('Strategy is forcibly changed to "independent".')
+        self.strategy = {
+            "independent": IndependentStrategy,
+            "split_both": SplitBothStrategy,
+            "split_branch": SplitBranchStrategy,
+            "split_trunk": SplitTrunkStrategy,
+            "vanilla": VanillaStrategy,
+        }.get(strategy, IndependentStrategy)(self)
+
     @property
     def inputs(self):
         return self._inputs
@@ -301,7 +483,14 @@ def build(self):
         self.X_loc = tf.placeholder(config.real(tf), [None, self.layer_size_loc[0]])
         self._inputs = [self.X_func, self.X_loc]
 
-        # Branch net to encode the input function
+        self.y = self.strategy.build()
+        if self._output_transform is not None:
+            self.y = self._output_transform(self._inputs, self.y)
+
+        self.target = tf.placeholder(config.real(tf), [None, None])
+        self.built = True
+
+    def build_branch_net(self):
         y_func = self.X_func
         if callable(self.layer_size_func[1]):
             # User-defined network
@@ -322,7 +511,9 @@ def build(self):
                 kernel_initializer=self.kernel_initializer,
                 kernel_regularizer=self.regularizer,
             )
+        return y_func
 
+    def build_trunk_net(self):
         # Trunk net to encode the domain of the output function
         y_loc = self.X_loc
         if self._input_transform is not None:
@@ -335,19 +526,15 @@ def build(self):
                 kernel_initializer=self.kernel_initializer,
                 kernel_regularizer=self.regularizer,
             )
+        return y_loc
 
-        # Dot product
-        if y_func.shape[-1] != y_loc.shape[-1]:
-            raise AssertionError(
-                "Output sizes of branch net and trunk net do not match."
-            )
-        self.y = tf.einsum("bi,ni->bn", y_func, y_loc)
+    def merge(self, branch, trunk):
+        y = tf.einsum("bi,ni->bn", branch, trunk)
         # Add bias
         b = tf.Variable(tf.zeros(1, dtype=config.real(tf)))
-        self.y += b
+        y += b
+        return y
 
-        if self._output_transform is not None:
-            self.y = self._output_transform(self._inputs, self.y)
-
-        self.target = tf.placeholder(config.real(tf), [None, None])
-        self.built = True
+    @staticmethod
+    def concatenate_outputs(ys):
+        return tf.stack(ys, axis=2)