From 0cada4cbe9d8ccbfea74b21dabf263dbecb216a9 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 11 Sep 2024 08:12:56 -0600 Subject: [PATCH] fix: distingush engines based on compilation settings in addition to graph structure Signed-off-by: Naren Dasan --- py/torch_tensorrt/dynamo/_engine_cache.py | 13 +++++------ .../dynamo/conversion/_TRTInterpreter.py | 4 ++-- py/torch_tensorrt/dynamo/utils.py | 2 +- tests/py/dynamo/models/test_engine_cache.py | 22 ++++++++++++++----- tests/py/requirements.txt | 2 +- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/py/torch_tensorrt/dynamo/_engine_cache.py b/py/torch_tensorrt/dynamo/_engine_cache.py index 7a640b96b0..f166b489cb 100644 --- a/py/torch_tensorrt/dynamo/_engine_cache.py +++ b/py/torch_tensorrt/dynamo/_engine_cache.py @@ -75,7 +75,6 @@ def get_hash( engine_specs_data = pickletools.optimize(engine_specs_data) engine_specs_hash = sha256_hash(engine_specs_data) - # TODO: Super first idea I had hash combination solution @Evan please iterate on this hash_val: str = graph_hash_val + input_specs_hash + engine_specs_hash return hash_val @@ -95,6 +94,8 @@ def pack( serialized_engine (bytes): serialized TRT engine input_names (List[str]): input names of TRT engine output_names (List[str]): output names of TRT engine + input_specs (Sequence[Input]): input specs of TRT engine + compilation_settings (CompilationSettings): compilation settings of TRT engine weight_name_map (Optional[Dict[Any, Any]]): weight name map for refitting Returns: @@ -121,7 +122,7 @@ def unpack(packed_obj: bytes) -> UnpackedCacheHit: packed_obj (bytes): packed blob Returns: - Tuple[bytes, List[str], List[str], CompilationSettings, Optional[Dict[str, Any]]]: serialized engine, input names, output names, CompilationSettings, weight name map + Tuple[bytes, List[str], List[str], Sequence[Input], CompilationSettings, Optional[Dict[str, Any]]]: serialized engine, input names, output names, input specs, CompilationSettings, weight name map """ unpacked = pickle.loads(packed_obj) return ( @@ -283,11 +284,7 @@ def LRU() -> None: else: LRU() - def save( - self, - hash: str, - blob: bytes, - ) -> None: + def save(self, hash: str, blob: bytes, *args: Any, **kwargs: Any) -> None: blob_size = len(blob) if blob_size > self.total_engine_cache_size: _LOGGER.warning( @@ -324,7 +321,7 @@ def save( f"The size {blob_size} is still larger than the available cache size {self.available_engine_cache_size}." ) - def load(self, hash: str) -> Optional[bytes]: + def load(self, hash: str, *args: Any, **kwargs: Any) -> Optional[bytes]: directory = os.path.join(self.engine_cache_dir, hash) if os.path.exists(directory): blob_path = os.path.join(directory, "blob.bin") diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index f1b68b5436..ff35bf39d7 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -557,7 +557,7 @@ def run( ) assert ( setting_compatiblity - ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})" + ), f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})" for i, e in enumerate( [ @@ -567,7 +567,7 @@ def run( ): assert ( e - ), f"Found that cached engine was built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}" + ), f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}" _LOGGER.info( "Found the cached engine that corresponds to this graph. It is directly loaded." diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py index d8aea04fbb..ba39ca923b 100644 --- a/py/torch_tensorrt/dynamo/utils.py +++ b/py/torch_tensorrt/dynamo/utils.py @@ -499,7 +499,7 @@ def parse_dynamo_kwargs( # If cache_built_engines and reuse_cached_engines are True but custom_engine_cache is not provided, # then create a default disk engine cache - # + engine_cache = None if kwargs.get("cache_built_engines") or kwargs.get("reuse_cached_engines"): assert kwargs.get( diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py index 2a253924c5..367f68c1f6 100644 --- a/tests/py/dynamo/models/test_engine_cache.py +++ b/tests/py/dynamo/models/test_engine_cache.py @@ -9,7 +9,7 @@ import torch_tensorrt as torch_trt import torchvision.models as models from torch.testing._internal.common_utils import TestCase -from torch_tensorrt.dynamo._defaults import ENGINE_CACHE_DIR +from torch_tensorrt.dynamo._defaults import TIMING_CACHE_PATH from torch_tensorrt.dynamo._engine_cache import BaseEngineCache from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity @@ -160,9 +160,9 @@ def test_engine_settings_is_not_equal(self): ) input_specs2 = ( torch_trt.Input( - min_shape=(1, 3, 300, 300), - opt_shape=(100, 3, 300, 300), - max_shape=(200, 3, 300, 300), + min_shape=(1, 3, 224, 224), + opt_shape=(100, 3, 224, 224), + max_shape=(200, 3, 224, 224), ), ) settings2 = CompilationSettings( @@ -192,6 +192,10 @@ def test_dynamo_compile_with_default_disk_engine_cache(self): if os.path.exists(engine_cache_dir): shutil.rmtree(engine_cache_dir) + def remove_timing_cache(path=TIMING_CACHE_PATH): + if os.path.exists(path): + os.remove(path) + # The 1st iteration is to measure the compilation time without engine caching # The 2nd and 3rd iterations are to measure the compilation time with engine caching. # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration. @@ -202,6 +206,8 @@ def test_dynamo_compile_with_default_disk_engine_cache(self): start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) for i in range(3): + remove_timing_cache() + torch._dynamo.reset() if i == 0: cache_built_engines = False reuse_cached_engines = False @@ -351,6 +357,10 @@ def test_torch_compile_with_default_disk_engine_cache(self): if os.path.exists(engine_cache_dir): shutil.rmtree(engine_cache_dir) + def remove_timing_cache(path=TIMING_CACHE_PATH): + if os.path.exists(path): + os.remove(path) + # The 1st iteration is to measure the compilation time without engine caching # The 2nd and 3rd iterations are to measure the compilation time with engine caching. # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration. @@ -361,7 +371,9 @@ def test_torch_compile_with_default_disk_engine_cache(self): start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) for i in range(3): - # remove timing cache and reset dynamo for engine caching messurement + # remove timing cache and reset dynamo for engine caching measurement + remove_timing_cache() + torch._dynamo.reset() if i == 0: cache_built_engines = False reuse_cached_engines = False diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 5e4842954e..5c26748804 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -10,5 +10,5 @@ pyyaml timm>=1.0.3 transformers==4.40.2 # TODO: once 0.16.1 is out, update it here -nvidia-modelopt>=0.15.1 +nvidia-modelopt[torch]>=0.16.1 --extra-index-url https://pypi.nvidia.com