From b4d9567e28a8137c01511c69038b3c2e2040e264 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 29 Jan 2024 09:36:45 -0800 Subject: [PATCH 1/4] Dtype policy refactor --- keras/applications/imagenet_utils_test.py | 2 +- .../dtype_policy.py | 32 ++++++++++++++++--- .../dtype_policy_test.py | 6 ++-- keras/layers/layer.py | 12 +++---- keras/mixed_precision/__init__.py | 23 ------------- 5 files changed, 37 insertions(+), 38 deletions(-) rename keras/{mixed_precision => layers}/dtype_policy.py (86%) rename keras/{mixed_precision => layers}/dtype_policy_test.py (97%) delete mode 100644 keras/mixed_precision/__init__.py diff --git a/keras/applications/imagenet_utils_test.py b/keras/applications/imagenet_utils_test.py index 3fd0af2523bd..e0aaae689d41 100644 --- a/keras/applications/imagenet_utils_test.py +++ b/keras/applications/imagenet_utils_test.py @@ -6,7 +6,7 @@ from keras import backend from keras import testing from keras.applications import imagenet_utils as utils -from keras.mixed_precision import set_dtype_policy +from keras.layers.dtype_policy import set_dtype_policy class TestImageNetUtils(testing.TestCase, parameterized.TestCase): diff --git a/keras/mixed_precision/dtype_policy.py b/keras/layers/dtype_policy.py similarity index 86% rename from keras/mixed_precision/dtype_policy.py rename to keras/layers/dtype_policy.py index 8eaec757ca27..fd7927d239c6 100644 --- a/keras/mixed_precision/dtype_policy.py +++ b/keras/layers/dtype_policy.py @@ -1,10 +1,12 @@ from keras import backend from keras.api_export import keras_export from keras.backend.common import global_state +from keras.saving import serialization_lib @keras_export( [ + "keras.DTypePolicy", "keras.mixed_precision.DTypePolicy", "keras.mixed_precision.Policy", ] @@ -15,7 +17,7 @@ class DTypePolicy: A dtype policy determines a layer's computation and variable dtypes. Each layer has a policy. Policies can be passed to the `dtype` argument of layer constructors, or a global policy can be set with - `keras.mixed_precision.set_dtype_policy`. + `keras.config.set_dtype_policy`. Args: name: The policy name, which determines the compute and variable dtypes. @@ -32,7 +34,7 @@ class DTypePolicy: API name. Mixed precision can be enabled by passing `"mixed_float16"` or `"mixed_bfloat16"` to `keras.mixed_precision.set_dtype_policy()`. - >>> keras.mixed_precision.set_dtype_policy("mixed_float16") + >>> keras.config.set_dtype_policy("mixed_float16") >>> layer1 = keras.layers.Dense(10) >>> layer1.dtype_policy # layer1 will automatically use mixed precision @@ -42,11 +44,11 @@ class DTypePolicy: >>> layer2.dtype_policy >>> # Set policy back to initial float32. - >>> keras.mixed_precision.set_dtype_policy('float32') + >>> keras.config.set_dtype_policy('float32') In the example above, passing `dtype="float32"` to the layer is equivalent to passing - `dtype=keras.mixed_precision.DTypePolicy("float32")`. + `dtype=keras.config.DTypePolicy("float32")`. In general, passing a dtype policy name to a layer is equivalent to passing the corresponding policy, so it is never necessary to explicitly construct a `DTypePolicy` object. @@ -142,6 +144,7 @@ def from_config(cls, config): @keras_export( [ + "keras.config.set_dtype_policy", "keras.mixed_precision.set_dtype_policy", "keras.mixed_precision.set_global_policy", ] @@ -151,7 +154,7 @@ def set_dtype_policy(policy): Example: - >>> keras.mixed_precision.set_dtype_policy("mixed_float16") + >>> keras.config.set_dtype_policy("mixed_float16") """ if not isinstance(policy, DTypePolicy): if isinstance(policy, str): @@ -169,6 +172,7 @@ def set_dtype_policy(policy): @keras_export( [ + "keras.config.dtype_policy", "keras.mixed_precision.dtype_policy", "keras.mixed_precision.global_policy", ] @@ -180,3 +184,21 @@ def dtype_policy(): policy = DTypePolicy(backend.floatx()) set_dtype_policy(policy) return policy + + +def get(identifier): + if identifier is None: + return dtype_policy() + if isinstance(identifier, DTypePolicy): + return identifier + if isinstance(identifier, dict): + return serialization_lib.deserialize_keras_object(identifier) + if isinstance(identifier, str): + return DTypePolicy(identifier) + try: + return DTypePolicy(backend.standardize_dtype(identifier)) + except: + raise ValueError( + "Cannot interpret `dtype` argument. Expected a string " + f"or an instance of DTypePolicy. Received: dtype={identifier}" + ) diff --git a/keras/mixed_precision/dtype_policy_test.py b/keras/layers/dtype_policy_test.py similarity index 97% rename from keras/mixed_precision/dtype_policy_test.py rename to keras/layers/dtype_policy_test.py index de9ddbfbb4f7..e881c5ef0df9 100644 --- a/keras/mixed_precision/dtype_policy_test.py +++ b/keras/layers/dtype_policy_test.py @@ -1,6 +1,6 @@ -from keras.mixed_precision import DTypePolicy -from keras.mixed_precision import dtype_policy -from keras.mixed_precision import set_dtype_policy +from keras.layers.dtype_policy import DTypePolicy +from keras.layers.dtype_policy import dtype_policy +from keras.layers.dtype_policy import set_dtype_policy from keras.testing import test_case diff --git a/keras/layers/layer.py b/keras/layers/layer.py index ae3f0f4b5f88..e1d770ae7950 100644 --- a/keras/layers/layer.py +++ b/keras/layers/layer.py @@ -26,7 +26,6 @@ from keras import backend from keras import constraints from keras import initializers -from keras import mixed_precision from keras import ops from keras import regularizers from keras import utils @@ -35,6 +34,7 @@ from keras.backend.common import global_state from keras.backend.common.name_scope import current_path from keras.distribution import distribution_lib +from keras.layers import dtype_policy from keras.layers import input_spec from keras.metrics.metric import Metric from keras.ops.operation import Operation @@ -83,12 +83,12 @@ class Layer(BackendLayer, Operation): trainable: Boolean, whether the layer's variables should be trainable. name: String name of the layer. dtype: The dtype of the layer's computations and weights. Can also be a - `keras.mixed_precision.DTypePolicy`, + `keras.DTypePolicy`, which allows the computation and weight dtype to differ. Defaults to `None`. `None` means to use - `keras.mixed_precision.dtype_policy()`, + `keras.config.dtype_policy()`, which is a `float32` policy unless set to different value - (via `keras.mixed_precision.set_dtype_policy()`). + (via `keras.config.set_dtype_policy()`). Attributes: name: The name of the layer (string). @@ -98,7 +98,7 @@ class Layer(BackendLayer, Operation): Layers automatically cast inputs to this dtype, which causes the computations and output to also be in this dtype. When mixed precision is used with a - `keras.mixed_precision.DTypePolicy`, this will be different + `keras.DTypePolicy`, this will be different than `variable_dtype`. trainable_weights: List of variables to be included in backprop. non_trainable_weights: List of variables that should not be @@ -269,7 +269,7 @@ def __init__( ) self.built = False - self.dtype_policy = mixed_precision.resolve_policy(dtype) + self.dtype_policy = dtype_policy.get(dtype) self.autocast = autocast self._input_spec = None self._called = False diff --git a/keras/mixed_precision/__init__.py b/keras/mixed_precision/__init__.py deleted file mode 100644 index 669d1540a19a..000000000000 --- a/keras/mixed_precision/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from keras import backend -from keras.mixed_precision.dtype_policy import DTypePolicy -from keras.mixed_precision.dtype_policy import dtype_policy -from keras.mixed_precision.dtype_policy import set_dtype_policy -from keras.saving import serialization_lib - - -def resolve_policy(identifier): - if identifier is None: - return dtype_policy() - if isinstance(identifier, DTypePolicy): - return identifier - if isinstance(identifier, dict): - return serialization_lib.deserialize_keras_object(identifier) - if isinstance(identifier, str): - return DTypePolicy(identifier) - try: - return DTypePolicy(backend.standardize_dtype(identifier)) - except: - raise ValueError( - "Cannot interpret `dtype` argument. Expected a string " - f"or an instance of DTypePolicy. Received: dtype={identifier}" - ) From b6eb310f75fb9783951911b7525d7f71c20de4ff Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 29 Jan 2024 09:37:22 -0800 Subject: [PATCH 2/4] Add quantization folder --- keras/quantization/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 keras/quantization/__init__.py diff --git a/keras/quantization/__init__.py b/keras/quantization/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From 419973ee15ecd0e2d085e077399ce3bd5437df15 Mon Sep 17 00:00:00 2001 From: Faisal Alsrheed <47912291+Faisal-Alsrheed@users.noreply.github.com> Date: Mon, 29 Jan 2024 20:41:07 +0300 Subject: [PATCH 3/4] Fix PyTorch GPU Compatibility for `compute_mask` Attention Layer Tests (#19112) * GPU tests in test_attention `compute_mask` and Fix code formatting and indentation * Fix `compute_mask` issue in AttentionTest if `backend.backend() == "torch"` and `computed_mask.is_cuda`py * Fix formatting issues in code * Refactor code * Fix formatting * Refactor attention_test.py to use keras.ops.convert_to_numpy() for converting computed_mask to numpy array --- keras/layers/attention/attention_test.py | 117 +++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/keras/layers/attention/attention_test.py b/keras/layers/attention/attention_test.py index 73ca320d7508..c6010b674618 100644 --- a/keras/layers/attention/attention_test.py +++ b/keras/layers/attention/attention_test.py @@ -1,6 +1,7 @@ import numpy as np from keras import layers +from keras import ops from keras import testing @@ -205,6 +206,7 @@ def test_attention_compute_mask_does_not_return_none_with_valid_mask(self): valid_mask = np.array([True, False, True]) mask = [valid_mask, np.array([False, True, False])] computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) self.assertIsNotNone( computed_mask, "compute_mask should not return None with a valid mask", @@ -221,7 +223,122 @@ def test_attention_compute_mask_returns_correct_tensor_with_valid_mask( valid_mask = np.array([True, False, True]) mask = [valid_mask, np.array([False, True, False])] computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) self.assertTrue( np.array_equal(computed_mask, valid_mask), "compute_mask did not return the correct mask tensor", ) + + def test_attention_compute_mask_returns_correct_tensor_with_all_true_mask( + self, + ): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([True, True, True]) + mask = [valid_mask, np.array([True, True, True])] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + expected_mask = np.array([True, True, True]) + self.assertTrue( + np.array_equal(computed_mask, expected_mask), + "compute_mask did not return the correct mask tensor", + ) + + def test_attention_compute_mask_returns_correct_tensor_with_all_false_mask( + self, + ): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([False, False, False]) + mask = [valid_mask, np.array([False, False, False])] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + expected_mask = np.array([False, False, False]) + self.assertTrue( + np.array_equal(computed_mask, expected_mask), + "compute_mask did not return the correct mask tensor", + ) + + def test_attention_compute_mask_with_tolerance_1e_3(self): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([1.0, 0.0, 1.0], dtype=float) + mask = [valid_mask, np.array([0.0, 1.0, 0.0], dtype=float)] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + expected_mask = valid_mask + self.assertTrue( + np.allclose(computed_mask, expected_mask, atol=1e-3), + "Incorrect mask tensor within tolerance 1e-3", + ) + + def test_attention_compute_mask_with_tolerance_1e_5(self): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([1.0, 0.0, 1.0], dtype=float) + mask = [valid_mask, np.array([0.0, 1.0, 0.0], dtype=float)] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + expected_mask = valid_mask + self.assertTrue( + np.allclose(computed_mask, expected_mask, atol=1e-5), + "Incorrect mask tensor within tolerance 1e-5", + ) + + def test_attention_compute_mask_with_tolerance_1e_7(self): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([1.0, 0.0, 1.0], dtype=float) + mask = [valid_mask, np.array([0.0, 1.0, 0.0], dtype=float)] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + expected_mask = valid_mask + self.assertTrue( + np.allclose(computed_mask, expected_mask, atol=1e-7), + "Incorrect mask tensor within tolerance 1e-7 ", + ) + + def test_attention_compute_mask_with_single_element_masks(self): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([True]) + mask = [valid_mask, np.array([False])] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + expected_shape = (1,) + self.assertEqual(computed_mask.shape, expected_shape) + + def test_attention_compute_mask_with_non_boolean_masks(self): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + valid_mask = np.array([1, 0, 1]) + mask = [valid_mask, np.array([0, 1, 0])] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + self.assertTrue(np.array_equal(computed_mask, valid_mask)) + + def test_attention_compute_mask_with_edge_case_masks(self): + layer = layers.Attention() + dummy_inputs = [np.ones((2, 3, 4)), np.ones((2, 4, 4))] + edge_case_masks = [ + np.array([True, True, True]), + np.array([False, False, False]), + np.array([True, False, True]), + ] + for mask in edge_case_masks: + computed_mask = layer.compute_mask( + inputs=dummy_inputs, mask=[mask, mask] + ) + computed_mask = ops.convert_to_numpy(computed_mask) + self.assertTrue(np.array_equal(computed_mask, mask)) + + def test_attention_compute_mask_with_different_input_shapes(self): + layer = layers.Attention() + input_shapes = [(2, 3, 4), (3, 2, 5), (4, 1, 6)] + valid_mask = np.array([True, False, True]) + for shape in input_shapes: + dummy_inputs = [np.ones(shape), np.ones(shape)] + mask = [valid_mask, np.array([False, True, False])] + computed_mask = layer.compute_mask(inputs=dummy_inputs, mask=mask) + computed_mask = ops.convert_to_numpy(computed_mask) + self.assertTrue(np.array_equal(computed_mask, valid_mask)) From 4766e80b9a614378e0bdc44a0a38eadb3266b095 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 29 Jan 2024 10:36:45 -0800 Subject: [PATCH 4/4] Dtype policy refactor --- keras/applications/imagenet_utils_test.py | 2 +- keras/dtype_policies/__init__.py | 21 ++++++++++++ .../dtype_policy.py | 32 ++++--------------- .../dtype_policy_test.py | 6 ++-- keras/layers/layer.py | 4 +-- .../{quantization => quantizers}/__init__.py | 0 6 files changed, 34 insertions(+), 31 deletions(-) create mode 100644 keras/dtype_policies/__init__.py rename keras/{layers => dtype_policies}/dtype_policy.py (87%) rename keras/{layers => dtype_policies}/dtype_policy_test.py (96%) rename keras/{quantization => quantizers}/__init__.py (100%) diff --git a/keras/applications/imagenet_utils_test.py b/keras/applications/imagenet_utils_test.py index e0aaae689d41..dc09911ed44f 100644 --- a/keras/applications/imagenet_utils_test.py +++ b/keras/applications/imagenet_utils_test.py @@ -6,7 +6,7 @@ from keras import backend from keras import testing from keras.applications import imagenet_utils as utils -from keras.layers.dtype_policy import set_dtype_policy +from keras.dtype_policies.dtype_policy import set_dtype_policy class TestImageNetUtils(testing.TestCase, parameterized.TestCase): diff --git a/keras/dtype_policies/__init__.py b/keras/dtype_policies/__init__.py new file mode 100644 index 000000000000..871491d5c7d5 --- /dev/null +++ b/keras/dtype_policies/__init__.py @@ -0,0 +1,21 @@ +from keras import backend +from keras.dtype_policies import dtype_policy +from keras.saving import serialization_lib + + +def get(identifier): + if identifier is None: + return dtype_policy.dtype_policy() + if isinstance(identifier, dtype_policy.DTypePolicy): + return identifier + if isinstance(identifier, dict): + return serialization_lib.deserialize_keras_object(identifier) + if isinstance(identifier, str): + return dtype_policy.DTypePolicy(identifier) + try: + return dtype_policy.DTypePolicy(backend.standardize_dtype(identifier)) + except: + raise ValueError( + "Cannot interpret `dtype` argument. Expected a string " + f"or an instance of DTypePolicy. Received: dtype={identifier}" + ) diff --git a/keras/layers/dtype_policy.py b/keras/dtype_policies/dtype_policy.py similarity index 87% rename from keras/layers/dtype_policy.py rename to keras/dtype_policies/dtype_policy.py index fd7927d239c6..6870a52d3011 100644 --- a/keras/layers/dtype_policy.py +++ b/keras/dtype_policies/dtype_policy.py @@ -1,14 +1,14 @@ from keras import backend from keras.api_export import keras_export from keras.backend.common import global_state -from keras.saving import serialization_lib @keras_export( [ "keras.DTypePolicy", - "keras.mixed_precision.DTypePolicy", - "keras.mixed_precision.Policy", + "keras.dtype_policies.DTypePolicy", + "keras.mixed_precision.DTypePolicy", # Legacy + "keras.mixed_precision.Policy", # Legacy ] ) class DTypePolicy: @@ -145,8 +145,8 @@ def from_config(cls, config): @keras_export( [ "keras.config.set_dtype_policy", - "keras.mixed_precision.set_dtype_policy", - "keras.mixed_precision.set_global_policy", + "keras.mixed_precision.set_dtype_policy", # Legacy + "keras.mixed_precision.set_global_policy", # Legacy ] ) def set_dtype_policy(policy): @@ -173,8 +173,8 @@ def set_dtype_policy(policy): @keras_export( [ "keras.config.dtype_policy", - "keras.mixed_precision.dtype_policy", - "keras.mixed_precision.global_policy", + "keras.mixed_precision.dtype_policy", # Legacy + "keras.mixed_precision.global_policy", # Legacy ] ) def dtype_policy(): @@ -184,21 +184,3 @@ def dtype_policy(): policy = DTypePolicy(backend.floatx()) set_dtype_policy(policy) return policy - - -def get(identifier): - if identifier is None: - return dtype_policy() - if isinstance(identifier, DTypePolicy): - return identifier - if isinstance(identifier, dict): - return serialization_lib.deserialize_keras_object(identifier) - if isinstance(identifier, str): - return DTypePolicy(identifier) - try: - return DTypePolicy(backend.standardize_dtype(identifier)) - except: - raise ValueError( - "Cannot interpret `dtype` argument. Expected a string " - f"or an instance of DTypePolicy. Received: dtype={identifier}" - ) diff --git a/keras/layers/dtype_policy_test.py b/keras/dtype_policies/dtype_policy_test.py similarity index 96% rename from keras/layers/dtype_policy_test.py rename to keras/dtype_policies/dtype_policy_test.py index e881c5ef0df9..f543226fe8fb 100644 --- a/keras/layers/dtype_policy_test.py +++ b/keras/dtype_policies/dtype_policy_test.py @@ -1,6 +1,6 @@ -from keras.layers.dtype_policy import DTypePolicy -from keras.layers.dtype_policy import dtype_policy -from keras.layers.dtype_policy import set_dtype_policy +from keras.dtype_policies.dtype_policy import DTypePolicy +from keras.dtype_policies.dtype_policy import dtype_policy +from keras.dtype_policies.dtype_policy import set_dtype_policy from keras.testing import test_case diff --git a/keras/layers/layer.py b/keras/layers/layer.py index e1d770ae7950..e22e833ede8c 100644 --- a/keras/layers/layer.py +++ b/keras/layers/layer.py @@ -25,6 +25,7 @@ from keras import backend from keras import constraints +from keras import dtype_policies from keras import initializers from keras import ops from keras import regularizers @@ -34,7 +35,6 @@ from keras.backend.common import global_state from keras.backend.common.name_scope import current_path from keras.distribution import distribution_lib -from keras.layers import dtype_policy from keras.layers import input_spec from keras.metrics.metric import Metric from keras.ops.operation import Operation @@ -269,7 +269,7 @@ def __init__( ) self.built = False - self.dtype_policy = dtype_policy.get(dtype) + self.dtype_policy = dtype_policies.get(dtype) self.autocast = autocast self._input_spec = None self._called = False diff --git a/keras/quantization/__init__.py b/keras/quantizers/__init__.py similarity index 100% rename from keras/quantization/__init__.py rename to keras/quantizers/__init__.py