Skip to content

Commit

Permalink
Fix allocation memory Keras tests (sony#840)
Browse files Browse the repository at this point in the history
Move tests from Keras function tests to a new package of nonparallel tests. This package runs first in GitHub actions separately from other tests since the rest run parallel. This is a workaround to a memory issue because we use tf gradients on a multi-thread/process CPU environment (tensorflow/tensorflow#41718).

Several modifications:

- Free gradients and gradients tape after using them for the score calculation.
- Free hessian service in tests.
- Remove printing code that was accidentally committed in the previous PR.
- Speed-up tests in nonparallel (smaller models, disabling gradients computation where not needed, etc.)

Co-authored-by: reuvenp <[email protected]>
  • Loading branch information
reuvenperetz and reuvenp authored Oct 23, 2023
1 parent c1bae64 commit 25570a0
Show file tree
Hide file tree
Showing 13 changed files with 87 additions and 34 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/run_keras_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ jobs:
pip install -r requirements.txt
pip install tensorflow==${{ inputs.tf-version }}
- name: Run unittests
# Some tests are sensitive to memory because we use tf gradients on a multi-thread/process
# CPU environment (https://github.com/tensorflow/tensorflow/issues/41718).
# For this reason, if we run them in such an environment, we need to run them first non-parallel separately.
run: |
for script in tests/keras_tests/exporter_tests tests/keras_tests/feature_networks_tests tests/keras_tests/function_tests tests/keras_tests/graph_tests tests/keras_tests/layer_tests; do python -m unittest discover $script -v & pids+=($!); done; for pid in ${pids[@]}; do wait $pid || exit 1; done
python -m unittest discover tests/keras_tests/non_parallel_tests -v
for script in tests/keras_tests/exporter_tests tests/keras_tests/feature_networks_tests tests/keras_tests/graph_tests tests/keras_tests/layer_tests; do python -m unittest discover $script -v & pids+=($!); done; for pid in ${pids[@]}; do wait $pid || exit 1; done
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@ def fetch_hessian(self,
The inner list length dependent on the granularity (1 for per-tensor,
OC for per-output-channel when the requested node has OC output-channels, etc.)
"""
num_keys = len(self.trace_hessian_request_to_score_list)
num_values = sum([len(list(v)) for v in self.trace_hessian_request_to_score_list.values()])
print(f"########### Keys: {num_keys}")
print(f"########### Values: {num_values}")


if required_size==0:
return []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ def compute(self) -> List[float]:
grad = tf.reshape(grad, [grad.shape[0], -1])
score_approx_per_output.append(tf.reduce_mean(tf.reduce_sum(tf.pow(grad, 2.0))))

# Free gradients
del grad
del gradients

# If the change to the mean approximation is insignificant (to all outputs)
# we stop the calculation.
if j > MIN_JACOBIANS_ITER:
Expand Down Expand Up @@ -133,7 +137,11 @@ def compute(self) -> List[float]:

trace_approx_by_node = tf.reduce_mean([trace_approx_by_node], axis=0) # Just to get one tensor instead of list of tensors with single element

return trace_approx_by_node.numpy().tolist()
# Free gradient tape
del g

return trace_approx_by_node.numpy().tolist()

else:
Logger.error(f"{self.hessian_request.granularity} is not supported for Keras activation hessian's trace approx calculator")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import numpy as np
import tensorflow as tf
from keras.layers import Conv2D, Dense, Conv2DTranspose, DepthwiseConv2D
from typing import List

from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_JACOBIANS_ITER, JACOBIANS_COMP_TOLERANCE
Expand Down Expand Up @@ -116,6 +115,9 @@ def compute(self) -> np.ndarray:
num_of_scores)
approx = tf.reduce_sum(tf.pow(gradients, 2.0), axis=1)

# Free gradients
del gradients

# If the change to the mean approximation is insignificant (to all outputs)
# we stop the calculation.
if j > MIN_JACOBIANS_ITER:
Expand All @@ -132,6 +134,9 @@ def compute(self) -> np.ndarray:
# Compute the mean of the approximations
final_approx = tf.reduce_mean(tf.stack(approximation_per_iteration), axis=0)

# Free gradient tape
del tape

if self.hessian_request.granularity == HessianInfoGranularity.PER_TENSOR:
if final_approx.shape != (1,):
Logger.error(f"In HessianInfoGranularity.PER_TENSOR the score shape is expected"
Expand Down
2 changes: 2 additions & 0 deletions model_compression_toolkit/gptq/keras/quantization_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ def keras_gradient_post_training_quantization_experimental(in_model: Model,
tb_w,
hessian_info_service=hessian_info_service)

del hessian_info_service

if core_config.debug_config.analyze_similarity:
analyzer_model_quantization(representative_data_gen, tb_w, tg_gptq, fw_impl, fw_info)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def test_conv2d_granularity(self):
interest_points[1],
granularity=hessian_common.HessianInfoGranularity.PER_ELEMENT,
expected_shape=(3, 3, 3, 2))
del hessian_service

def test_dense_granularity(self):
input_shape = (1, 8)
Expand Down Expand Up @@ -160,6 +161,7 @@ def test_dense_granularity(self):
interest_points[1],
granularity=hessian_common.HessianInfoGranularity.PER_ELEMENT,
expected_shape=(8, 2))
del hessian_service

def test_conv2dtranspose_granularity(self):
input_shape = (1, 8, 8, 3)
Expand Down Expand Up @@ -191,6 +193,7 @@ def test_conv2dtranspose_granularity(self):
interest_points[1],
granularity=hessian_common.HessianInfoGranularity.PER_ELEMENT,
expected_shape=(3, 3, 2, 3))
del hessian_service

def test_depthwiseconv2d_granularity(self):
input_shape = (1, 8, 8, 3)
Expand Down Expand Up @@ -222,6 +225,7 @@ def test_depthwiseconv2d_granularity(self):
interest_points[1],
granularity=hessian_common.HessianInfoGranularity.PER_ELEMENT,
expected_shape=(3, 3, 3, 1))
del hessian_service

def test_reused_layer(self):
input_shape = (1, 8, 8, 3)
Expand Down Expand Up @@ -268,6 +272,7 @@ def test_reused_layer(self):
granularity=hessian_common.HessianInfoGranularity.PER_ELEMENT))
self.assertTrue(node2_count == 1)
self.assertTrue(len(hessian_service.trace_hessian_request_to_score_list)==1)
del hessian_service

#########################################################
# The following part checks different possible graph
Expand Down Expand Up @@ -308,6 +313,8 @@ def _test_advanced_graph(self, float_model, _repr_dataset):
granularity=hessian_common.HessianInfoGranularity.PER_ELEMENT,
expected_shape=(3, 3, 3, 2))

del hessian_service


def test_multiple_inputs(self):
input_shape = (1, 8, 8, 3)
Expand Down
2 changes: 2 additions & 0 deletions tests/keras_tests/function_tests/test_hessian_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def representative_dataset(num_of_inputs=1):


class TestHessianService(unittest.TestCase):
def tearDown(self) -> None:
del self.hessian_service

def setUp(self):

Expand Down
1 change: 1 addition & 0 deletions tests/keras_tests/function_tests/test_model_gradients.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def _get_normalized_hessian_trace_approx(graph, interest_points, keras_impl, alp
assert len(hessian_data_per_image) == 1
x.append(hessian_data_per_image[0])
x = hessian_common.hessian_utils.normalize_weights(x, alpha=alpha, outputs_indices=[len(interest_points) - 1])
del hessian_service
return x


Expand Down
19 changes: 19 additions & 0 deletions tests/keras_tests/non_parallel_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Some tests are sensitive to memory because we use tf gradients on a multi-thread/process
# CPU environment (https://github.com/tensorflow/tensorflow/issues/41718).
# For this reason, if we run them in such an environment, we need to run them first non-parallel separately.

Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import keras
import unittest
from functools import partial

Expand All @@ -24,10 +25,10 @@
from model_compression_toolkit.core.common import BaseNode

if version.parse(tf.__version__) >= version.parse("2.13"):
from keras.src.layers import Conv2D, Conv2DTranspose, ReLU, Activation
from keras.src.layers import Conv2D, Conv2DTranspose, ReLU, Activation, BatchNormalization
from keras.src import Input
else:
from keras.layers import Conv2D, Conv2DTranspose, ReLU, Activation
from keras.layers import Conv2D, Conv2DTranspose, ReLU, Activation, BatchNormalization
from keras import Input

import model_compression_toolkit as mct
Expand Down Expand Up @@ -229,18 +230,24 @@ def test_keras_fusing_patterns(self):
class TestGetKerasTPC(unittest.TestCase):
def test_get_keras_tpc(self):
tpc = mct.get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)
model = MobileNetV2()
input_shape = (1, 8, 8, 3)
input_tensor = Input(shape=input_shape[1:])
conv = Conv2D(3, 3)(input_tensor)
bn = BatchNormalization()(conv)
relu = ReLU()(bn)
model = keras.Model(inputs=input_tensor, outputs=relu)

def rep_data():
yield [np.random.randn(1, 224, 224, 3)]
yield [np.random.randn(*input_shape)]

quantized_model, _ = mct.ptq.keras_post_training_quantization_experimental(model,
rep_data,
target_platform_capabilities=tpc,
new_experimental_exporter=True)

core_config = mct.core.CoreConfig(
mixed_precision_config=mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=1))
mixed_precision_config=mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=1,
use_grad_based_weights=False))
quantized_model, _ = mct.ptq.keras_post_training_quantization_experimental(model,
rep_data,
core_config=core_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,30 @@
# ==============================================================================
import numpy as np
import unittest
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

import keras
from model_compression_toolkit.core import DEFAULTCONFIG
from model_compression_toolkit.core.common.mixed_precision.distance_weighting import get_average_weights, \
get_last_layer_weights
from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI, KPITarget
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
MixedPrecisionQuantizationConfigV2
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width, \
BitWidthSearchMethod
from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
mp_integer_programming_search
from model_compression_toolkit.core.common.model_collector import ModelCollector
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
from model_compression_toolkit.core.common.quantization.quantization_analyzer import analyzer_graph
from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \
calculate_quantization_params
from model_compression_toolkit.core.common.quantization.set_node_quantization_config import \
set_quantization_configuration_to_graph
from model_compression_toolkit.core.common.model_collector import ModelCollector
from model_compression_toolkit.core import DEFAULTCONFIG
from model_compression_toolkit.core.common.similarity_analyzer import compute_mse
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs, generate_keras_tpc
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \
get_op_quantization_configs
from tests.keras_tests.tpc_keras import get_weights_only_mp_tpc_keras


Expand Down Expand Up @@ -204,7 +205,12 @@ def run_search_bitwidth_config_test(self, core_config):
name="bitwidth_cfg_test")

fw_info = DEFAULT_KERAS_INFO
in_model = MobileNetV2()
input_shape = (1, 8, 8, 3)
input_tensor = keras.layers.Input(shape=input_shape[1:])
conv = keras.layers.Conv2D(3, 3)(input_tensor)
bn = keras.layers.BatchNormalization()(conv)
relu = keras.layers.ReLU()(bn)
in_model = keras.Model(inputs=input_tensor, outputs=relu)
keras_impl = KerasImplementation()

def dummy_representative_dataset():
Expand All @@ -230,19 +236,19 @@ def dummy_representative_dataset():
fw_info=DEFAULT_KERAS_INFO,
fw_impl=keras_impl)

for i in range(10):
mi.infer([np.random.randn(1, 224, 224, 3)])
for i in range(1):
mi.infer([np.random.randn(*input_shape)])

def representative_data_gen():
yield [np.random.random((1, 224, 224, 3))]
yield [np.random.random(input_shape)]

calculate_quantization_params(graph,
fw_info,
fw_impl=keras_impl)
keras_sens_eval = keras_impl.get_sensitivity_evaluator(graph,
core_config.mixed_precision_config,
representative_data_gen,
fw_info=fw_info)
keras_impl.get_sensitivity_evaluator(graph,
core_config.mixed_precision_config,
representative_data_gen,
fw_info=fw_info)

cfg = search_bit_width(graph_to_search_cfg=graph,
fw_info=DEFAULT_KERAS_INFO,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def test_steps_by_order(self):
def rep_data():
yield [np.random.randn(1, 8, 8, 3)]

mp_qc = mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=1)
mp_qc = mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=1,
use_grad_based_weights=False)
core_config = mct.core.CoreConfig(mixed_precision_config=mp_qc)
quantized_model, _ = mct.ptq.keras_post_training_quantization_experimental(self.model,
rep_data,
Expand Down
9 changes: 3 additions & 6 deletions tests/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
import importlib
import unittest

from packaging import version

from tests.common_tests.function_tests.test_collectors_manipulation import TestCollectorsManipulations
from tests.common_tests.function_tests.test_folder_image_loader import TestFolderLoader
# ---------------- Individual test suites
Expand All @@ -38,16 +36,15 @@
"torchvision") is not None

if found_tf:
import tensorflow as tf
from tests.keras_tests.function_tests.test_hessian_info_calculator_weights import TestHessianInfoCalculatorWeights
from tests.keras_tests.function_tests.test_hessian_service import TestHessianService
from tests.keras_tests.feature_networks_tests.test_features_runner import FeatureNetworkTest
from tests.keras_tests.function_tests.test_quantization_configurations import TestQuantizationConfigurations
from tests.keras_tests.function_tests.test_tensorboard_writer import TestFileLogger
from tests.keras_tests.non_parallel_tests.test_tensorboard_writer import TestFileLogger
from tests.keras_tests.function_tests.test_lut_quanitzer_params import TestLUTQuantizerParams
from tests.keras_tests.function_tests.test_lut_activation_quanitzer_params import TestLUTActivationsQuantizerParams
from tests.keras_tests.function_tests.test_lut_activation_quanitzer_fake_quant import TestLUTQuantizerFakeQuant
from tests.keras_tests.function_tests.test_lp_search_bitwidth import TestLpSearchBitwidth, \
from tests.keras_tests.non_parallel_tests.test_lp_search_bitwidth import TestLpSearchBitwidth, \
TestSearchBitwidthConfiguration
from tests.keras_tests.function_tests.test_bn_info_collection import TestBNInfoCollection
from tests.keras_tests.graph_tests.test_graph_reading import TestGraphReading
Expand All @@ -57,7 +54,7 @@
TestSymmetricThresholdSelectionWeights
from tests.keras_tests.function_tests.test_uniform_quantize_tensor import TestUniformQuantizeTensor
from tests.keras_tests.function_tests.test_uniform_range_selection_weights import TestUniformRangeSelectionWeights
from tests.keras_tests.function_tests.test_keras_tp_model import TestKerasTPModel
from tests.keras_tests.non_parallel_tests.test_keras_tp_model import TestKerasTPModel
from tests.keras_tests.function_tests.test_sensitivity_metric_interest_points import \
TestSensitivityMetricInterestPoints
from tests.keras_tests.function_tests.test_weights_activation_split_substitution import TestWeightsActivationSplit
Expand Down

0 comments on commit 25570a0

Please sign in to comment.