Merge branch 'keras-team:master' into improve_CONTRIBUTING.md

keras-team · Jan 29, 2024 · 8a95061 · 8a95061
2 parents 573e93f + f7401d4
commit 8a95061
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 29 deletions.
diff --git a/keras/applications/imagenet_utils_test.py b/keras/applications/imagenet_utils_test.py
@@ -6,7 +6,7 @@
 from keras import backend
 from keras import testing
 from keras.applications import imagenet_utils as utils
-from keras.mixed_precision import set_dtype_policy
+from keras.dtype_policies.dtype_policy import set_dtype_policy
 
 
 class TestImageNetUtils(testing.TestCase, parameterized.TestCase):

diff --git a/keras/mixed_precision/__init__.py → keras/dtype_policies/__init__.py b/keras/mixed_precision/__init__.py → keras/dtype_policies/__init__.py
@@ -1,21 +1,19 @@
 from keras import backend
-from keras.mixed_precision.dtype_policy import DTypePolicy
-from keras.mixed_precision.dtype_policy import dtype_policy
-from keras.mixed_precision.dtype_policy import set_dtype_policy
+from keras.dtype_policies import dtype_policy
 from keras.saving import serialization_lib
 
 
-def resolve_policy(identifier):
+def get(identifier):
     if identifier is None:
-        return dtype_policy()
-    if isinstance(identifier, DTypePolicy):
+        return dtype_policy.dtype_policy()
+    if isinstance(identifier, dtype_policy.DTypePolicy):
         return identifier
     if isinstance(identifier, dict):
         return serialization_lib.deserialize_keras_object(identifier)
     if isinstance(identifier, str):
-        return DTypePolicy(identifier)
+        return dtype_policy.DTypePolicy(identifier)
     try:
-        return DTypePolicy(backend.standardize_dtype(identifier))
+        return dtype_policy.DTypePolicy(backend.standardize_dtype(identifier))
     except:
         raise ValueError(
             "Cannot interpret `dtype` argument. Expected a string "

diff --git a/keras/mixed_precision/dtype_policy.py → keras/dtype_policies/dtype_policy.py b/keras/mixed_precision/dtype_policy.py → keras/dtype_policies/dtype_policy.py
@@ -5,8 +5,10 @@
 
 @keras_export(
     [
-        "keras.mixed_precision.DTypePolicy",
-        "keras.mixed_precision.Policy",
+        "keras.DTypePolicy",
+        "keras.dtype_policies.DTypePolicy",
+        "keras.mixed_precision.DTypePolicy",  # Legacy
+        "keras.mixed_precision.Policy",  # Legacy
     ]
 )
 class DTypePolicy:
@@ -15,7 +17,7 @@ class DTypePolicy:
     A dtype policy determines a layer's computation and variable dtypes. Each
     layer has a policy. Policies can be passed to the `dtype` argument of layer
     constructors, or a global policy can be set with
-    `keras.mixed_precision.set_dtype_policy`.
+    `keras.config.set_dtype_policy`.
 
     Args:
         name: The policy name, which determines the compute and variable dtypes.
@@ -32,7 +34,7 @@ class DTypePolicy:
     API name. Mixed precision can be enabled by passing `"mixed_float16"` or
     `"mixed_bfloat16"` to `keras.mixed_precision.set_dtype_policy()`.
 
-    >>> keras.mixed_precision.set_dtype_policy("mixed_float16")
+    >>> keras.config.set_dtype_policy("mixed_float16")
     >>> layer1 = keras.layers.Dense(10)
     >>> layer1.dtype_policy  # layer1 will automatically use mixed precision
     <DTypePolicy "mixed_float16">
@@ -42,11 +44,11 @@ class DTypePolicy:
     >>> layer2.dtype_policy
     <DTypePolicy "float32">
     >>> # Set policy back to initial float32.
-    >>> keras.mixed_precision.set_dtype_policy('float32')
+    >>> keras.config.set_dtype_policy('float32')
 
     In the example above, passing `dtype="float32"` to the layer is
     equivalent to passing
-    `dtype=keras.mixed_precision.DTypePolicy("float32")`.
+    `dtype=keras.config.DTypePolicy("float32")`.
     In general, passing a dtype policy name to a layer is equivalent
     to passing the corresponding policy, so it is never necessary
     to explicitly construct a `DTypePolicy` object.
@@ -142,16 +144,17 @@ def from_config(cls, config):
 
 @keras_export(
     [
-        "keras.mixed_precision.set_dtype_policy",
-        "keras.mixed_precision.set_global_policy",
+        "keras.config.set_dtype_policy",
+        "keras.mixed_precision.set_dtype_policy",  # Legacy
+        "keras.mixed_precision.set_global_policy",  # Legacy
     ]
 )
 def set_dtype_policy(policy):
     """Sets the default dtype policy globally.
 
     Example:
 
-    >>> keras.mixed_precision.set_dtype_policy("mixed_float16")
+    >>> keras.config.set_dtype_policy("mixed_float16")
     """
     if not isinstance(policy, DTypePolicy):
         if isinstance(policy, str):
@@ -169,8 +172,9 @@ def set_dtype_policy(policy):
 
 @keras_export(
     [
-        "keras.mixed_precision.dtype_policy",
-        "keras.mixed_precision.global_policy",
+        "keras.config.dtype_policy",
+        "keras.mixed_precision.dtype_policy",  # Legacy
+        "keras.mixed_precision.global_policy",  # Legacy
     ]
 )
 def dtype_policy():

diff --git a/keras/mixed_precision/dtype_policy_test.py → keras/dtype_policies/dtype_policy_test.py b/keras/mixed_precision/dtype_policy_test.py → keras/dtype_policies/dtype_policy_test.py
@@ -1,6 +1,6 @@
-from keras.mixed_precision import DTypePolicy
-from keras.mixed_precision import dtype_policy
-from keras.mixed_precision import set_dtype_policy
+from keras.dtype_policies.dtype_policy import DTypePolicy
+from keras.dtype_policies.dtype_policy import dtype_policy
+from keras.dtype_policies.dtype_policy import set_dtype_policy
 from keras.testing import test_case
 
 

diff --git a/keras/layers/attention/attention_test.py b/keras/layers/attention/attention_test.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from keras import layers
+from keras import ops
 from keras import testing
 
 
@@ -205,6 +206,7 @@ def test_attention_compute_mask_does_not_return_none_with_valid_mask(self):
         valid_mask = np.array([True, False, True])
         mask = [valid_mask, np.array([False, True, False])]
         computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
         self.assertIsNotNone(
             computed_mask,
             "compute_mask should not return None with a valid mask",
@@ -221,7 +223,122 @@ def test_attention_compute_mask_returns_correct_tensor_with_valid_mask(
         valid_mask = np.array([True, False, True])
         mask = [valid_mask, np.array([False, True, False])]
         computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
         self.assertTrue(
             np.array_equal(computed_mask, valid_mask),
             "compute_mask did not return the correct mask tensor",
         )
+
+    def test_attention_compute_mask_returns_correct_tensor_with_all_true_mask(
+        self,
+    ):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([True, True, True])
+        mask = [valid_mask, np.array([True, True, True])]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        expected_mask = np.array([True, True, True])
+        self.assertTrue(
+            np.array_equal(computed_mask, expected_mask),
+            "compute_mask did not return the correct mask tensor",
+        )
+
+    def test_attention_compute_mask_returns_correct_tensor_with_all_false_mask(
+        self,
+    ):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([False, False, False])
+        mask = [valid_mask, np.array([False, False, False])]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        expected_mask = np.array([False, False, False])
+        self.assertTrue(
+            np.array_equal(computed_mask, expected_mask),
+            "compute_mask did not return the correct mask tensor",
+        )
+
+    def test_attention_compute_mask_with_tolerance_1e_3(self):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([1.0, 0.0, 1.0], dtype=float)
+        mask = [valid_mask, np.array([0.0, 1.0, 0.0], dtype=float)]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        expected_mask = valid_mask
+        self.assertTrue(
+            np.allclose(computed_mask, expected_mask, atol=1e-3),
+            "Incorrect mask tensor within tolerance 1e-3",
+        )
+
+    def test_attention_compute_mask_with_tolerance_1e_5(self):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([1.0, 0.0, 1.0], dtype=float)
+        mask = [valid_mask, np.array([0.0, 1.0, 0.0], dtype=float)]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        expected_mask = valid_mask
+        self.assertTrue(
+            np.allclose(computed_mask, expected_mask, atol=1e-5),
+            "Incorrect mask tensor within tolerance 1e-5",
+        )
+
+    def test_attention_compute_mask_with_tolerance_1e_7(self):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([1.0, 0.0, 1.0], dtype=float)
+        mask = [valid_mask, np.array([0.0, 1.0, 0.0], dtype=float)]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        expected_mask = valid_mask
+        self.assertTrue(
+            np.allclose(computed_mask, expected_mask, atol=1e-7),
+            "Incorrect mask tensor within tolerance 1e-7 ",
+        )
+
+    def test_attention_compute_mask_with_single_element_masks(self):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([True])
+        mask = [valid_mask, np.array([False])]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        expected_shape = (1,)
+        self.assertEqual(computed_mask.shape, expected_shape)
+
+    def test_attention_compute_mask_with_non_boolean_masks(self):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        valid_mask = np.array([1, 0, 1])
+        mask = [valid_mask, np.array([0, 1, 0])]
+        computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+        computed_mask = ops.convert_to_numpy(computed_mask)
+        self.assertTrue(np.array_equal(computed_mask, valid_mask))
+
+    def test_attention_compute_mask_with_edge_case_masks(self):
+        layer = layers.Attention()
+        dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))]
+        edge_case_masks = [
+            np.array([True, True, True]),
+            np.array([False, False, False]),
+            np.array([True, False, True]),
+        ]
+        for mask in edge_case_masks:
+            computed_mask = layer.compute_mask(
+                inputs=dummy_inputs, mask=[mask, mask]
+            )
+            computed_mask = ops.convert_to_numpy(computed_mask)
+            self.assertTrue(np.array_equal(computed_mask, mask))
+
+    def test_attention_compute_mask_with_different_input_shapes(self):
+        layer = layers.Attention()
+        input_shapes = [(2, 3, 4), (3, 2, 5), (4, 1, 6)]
+        valid_mask = np.array([True, False, True])
+        for shape in input_shapes:
+            dummy_inputs = [np.ones(shape), np.ones(shape)]
+            mask = [valid_mask, np.array([False, True, False])]
+            computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask)
+            computed_mask = ops.convert_to_numpy(computed_mask)
+            self.assertTrue(np.array_equal(computed_mask, valid_mask))
diff --git a/keras/layers/layer.py b/keras/layers/layer.py
@@ -25,8 +25,8 @@
 
 from keras import backend
 from keras import constraints
+from keras import dtype_policies
 from keras import initializers
-from keras import mixed_precision
 from keras import ops
 from keras import regularizers
 from keras import utils
@@ -83,12 +83,12 @@ class Layer(BackendLayer, Operation):
         trainable: Boolean, whether the layer's variables should be trainable.
         name: String name of the layer.
         dtype: The dtype of the layer's computations and weights. Can also be a
-            `keras.mixed_precision.DTypePolicy`,
+            `keras.DTypePolicy`,
             which allows the computation and
             weight dtype to differ. Defaults to `None`. `None` means to use
-            `keras.mixed_precision.dtype_policy()`,
+            `keras.config.dtype_policy()`,
             which is a `float32` policy unless set to different value
-            (via `keras.mixed_precision.set_dtype_policy()`).
+            (via `keras.config.set_dtype_policy()`).
 
     Attributes:
         name: The name of the layer (string).
@@ -98,7 +98,7 @@ class Layer(BackendLayer, Operation):
             Layers automatically cast inputs to this dtype, which causes
             the computations and output to also be in this dtype.
             When mixed precision is used with a
-            `keras.mixed_precision.DTypePolicy`, this will be different
+            `keras.DTypePolicy`, this will be different
             than `variable_dtype`.
         trainable_weights: List of variables to be included in backprop.
         non_trainable_weights: List of variables that should not be
@@ -269,7 +269,7 @@ def __init__(
             )
 
         self.built = False
-        self.dtype_policy = mixed_precision.resolve_policy(dtype)
+        self.dtype_policy = dtype_policies.get(dtype)
         self.autocast = autocast
         self._input_spec = None
         self._called = False

diff --git a/keras/quantizers/__init__.py b/keras/quantizers/__init__.py