tf conversion and edits

ASEM000 · Sep 18, 2023 · 232358b · 232358b
1 parent 0e73bca
commit 232358b
Show file tree

Hide file tree

Showing 7 changed files with 388 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ net = sk.tree_unmask(net)
 |Attention| - `MultiHeadAttention`|
 | Containers| - `Sequential`, `RandomApply`, `RandomChoice` |
 | Convolution | - `{FFT,_}Conv{1D,2D,3D}` <br> - `{FFT,_}Conv{1D,2D,3D}Transpose` <br> - `Depthwise{FFT,_}Conv{1D,2D,3D}`  <br> - `Separable{FFT,_}Conv{1D,2D,3D}` <br> - `Conv{1D,2D,3D}Local` |
-|Dropout|- `Dropout`<br> - `Dropout{1D,2D,3D}` <br> - `GeneralDropout` <br> - `RandomCutout{1D,2D}` |
+|Dropout|- `Dropout`<br> - `Dropout{1D,2D,3D}`  <br> - `RandomCutout{1D,2D}` |
 | Linear  | - `Linear`, `Multilinear`, `GeneralLinear`, `Identity`  |
 |Densely connected| - `FNN` , <br> - `MLP` _compile time_ optimized |
 |Normalization|- `{Layer,Instance,Group,Batch}Norm`|

diff --git a/docs/index.rst b/docs/index.rst
@@ -79,6 +79,7 @@ Install from github::
     notebooks/evaluation
     notebooks/mixed_precision
     notebooks/checkpointing
+    notebooks/convert_tensorflow
     notebooks/regularization
     notebooks/subset_training
 

diff --git a/docs/notebooks/convert_tensorflow.ipynb b/docs/notebooks/convert_tensorflow.ipynb
diff --git a/docs/notebooks/train_pinn_burgers.ipynb b/docs/notebooks/train_pinn_burgers.ipynb
@@ -457,7 +457,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.11.0"
   },
   "orig_nbformat": 4
  },

diff --git a/serket/_src/nn/attention.py b/serket/_src/nn/attention.py
@@ -69,7 +69,7 @@ def calculate_attention(
     v_heads: jax.Array,
     mask: jax.Array,
     num_heads: int,
-    drop_layer: sk.nn.GeneralDropout,
+    drop_layer: sk.nn.Dropout,
     key: jr.KeyArray,
 ) -> jax.Array:
     """Applies multi-head attention to the given inputs.
@@ -176,7 +176,7 @@ class MultiHeadAttention(sk.TreeClass):
         >>> import serket as sk
         >>> layer = sk.nn.MultiHeadAttention(1, 1, key=jr.PRNGKey(0))
         >>> print(repr(layer.dropout))
-        GeneralDropout(drop_rate=0.0, drop_axes=Ellipsis)
+        Dropout(drop_rate=0.0, drop_axes=None)
         >>> print(repr(sk.tree_eval(layer).dropout))
         Identity()
 
@@ -247,8 +247,8 @@ def __init__(
         qkey, kkey, vkey, okey = jr.split(key, 4)
 
         self.num_heads = num_heads
-        drop_axes = (-1, -2) if drop_broadcast else ...
-        self.dropout = sk.nn.GeneralDropout(drop_rate, drop_axes)
+        drop_axes = (-1, -2) if drop_broadcast else None
+        self.dropout = sk.nn.Dropout(drop_rate, drop_axes)
 
         self.q_projection = sk.nn.Linear(
             in_features=q_features,

diff --git a/serket/_src/nn/dropout.py b/serket/_src/nn/dropout.py
@@ -37,13 +37,13 @@ def dropout_nd(
     key: jr.KeyArray,
     x: jax.Array,
     drop_rate,
-    drop_axes: Sequence[int] | Literal["..."] = ...,
+    drop_axes: Sequence[int] | None = None
 ) -> jax.Array:
     """Drop some elements of the input array."""
     # drop_axes = None means dropout is applied to all axes
     shape = (
         x.shape
-        if drop_axes is ...
+        if drop_axes is None
         else (x.shape[i] if i in drop_axes else 1 for i in range(x.ndim))
     )
 
@@ -132,40 +132,15 @@ def scan_step(x, key):
 
 
 @sk.autoinit
-class GeneralDropout(sk.TreeClass):
-    """Drop some elements of the input array.
-
-    Args:
-        drop_rate: probability of an element to be zeroed. Default: 0.5
-        drop_axes: axes along which dropout is applied. default: ``...`` which means
-            dropout is applied to all axes.
-    """
-
-    drop_rate: float = sk.field(
-        default=0.5,
-        on_setattr=[IsInstance(float), Range(0, 1)],
-        on_getattr=[jax.lax.stop_gradient_p.bind],
-    )
-    drop_axes: tuple[int, ...] | Literal["..."] = ...
-
-    def __call__(self, x, *, key: jr.KeyArray):
-        """Drop some elements of the input array.
-
-        Args:
-            x: input array
-            key: random number generator key
-        """
-        return dropout_nd(key, x, self.drop_rate, self.drop_axes)
-
-
-class Dropout(GeneralDropout):
+class Dropout(sk.TreeClass):
     """Drop some elements of the input array.
 
     Randomly zeroes some of the elements of the input array with
     probability ``drop_rate`` using samples from a Bernoulli distribution.
 
     Args:
         drop_rate: probability of an element to be zeroed. Default: 0.5
+        drop_axes: axes to apply dropout. Default: None to apply to all axes.
 
     Example:
         >>> import serket as sk
@@ -199,8 +174,21 @@ class Dropout(GeneralDropout):
         )
     """
 
-    def __init__(self, drop_rate: float = 0.5):
-        super().__init__(drop_rate=drop_rate, drop_axes=...)
+    drop_rate: float = sk.field(
+        default=0.5,
+        on_setattr=[IsInstance(float), Range(0, 1)],
+        on_getattr=[jax.lax.stop_gradient_p.bind],
+    )
+    drop_axes: tuple[int, ...] | None = None
+
+    def __call__(self, x, *, key: jr.KeyArray):
+        """Drop some elements of the input array.
+
+        Args:
+            x: input array
+            key: random number generator key
+        """
+        return dropout_nd(key, x, self.drop_rate, self.drop_axes)
 
 
 @sk.autoinit
@@ -467,7 +455,7 @@ def spatial_ndim(self) -> int:
 
 @tree_eval.def_eval(RandomCutout1D)
 @tree_eval.def_eval(RandomCutout2D)
-@tree_eval.def_eval(GeneralDropout)
 @tree_eval.def_eval(DropoutND)
+@tree_eval.def_eval(Dropout)
 def _(_) -> sk.nn.Identity:
     return sk.nn.Identity()
diff --git a/serket/nn/__init__.py b/serket/nn/__init__.py
@@ -80,7 +80,6 @@
     Dropout1D,
     Dropout2D,
     Dropout3D,
-    GeneralDropout,
     RandomCutout1D,
     RandomCutout2D,
 )
@@ -233,7 +232,6 @@
     "Dropout1D",
     "Dropout2D",
     "Dropout3D",
-    "GeneralDropout",
     "RandomCutout1D",
     "RandomCutout2D",
     # linear