From b614d9fc67b5a4ad833b63a4432ddd67097a61e1 Mon Sep 17 00:00:00 2001 From: dbogunowicz Date: Fri, 19 Apr 2024 12:06:15 +0000 Subject: [PATCH] initial commit --- README.md | 99 ++++++++++++++++++++++ src/compressed_tensors/base.py | 2 +- src/compressed_tensors/compressors/base.py | 4 +- src/compressed_tensors/utils/helpers.py | 56 ++++++++++-- tests/quantization/lifecycle/test_apply.py | 1 - tests/test_utils/test_helpers.py | 79 +++++++++++++++++ 6 files changed, 232 insertions(+), 9 deletions(-) create mode 100644 tests/test_utils/test_helpers.py diff --git a/README.md b/README.md index 05fa83a3..fa3b812e 100644 --- a/README.md +++ b/README.md @@ -1 +1,100 @@ # compressed-tensors + +This repository extends a [safetensors](https://github.com/huggingface/safetensors) format to efficiently store sparse and/or quantized tensors on disk. `compressed-tensors` format supports multiple compression types to minimize the disk space and facilitate the tensor manipulation. + +## Motivation + +### Reduce disk space by saving sparse tensors in a compressed format + +The compressed format stores the data much more efficiently by taking advantage of two properties of tensors: + +- Sparse tensors -> due to a large number of entries that are equal to zero. +- Quantized -> due to their low precision representation. + + +### Introduce an elegant interface to save/load compressed tensors + +The library provides the user with the ability to compress/decompress tensors. The properties of tensors are defined by human-readable configs, allowing the users to understand the compression format at a quick glance. + +## Installation + +### Pip + +```bash +pip install compressed-tensors +``` + +### From source + +```bash +git clone https://github.com/neuralmagic/compressed-tensors +cd compressed-tensors +pip install -e . +``` + +## Getting started + +### Saving + +The function `save_compressed` returns an optional `compression_config` (if compression has been applied). It can be used to inspect the applied compression. + +```python +from compressed_tensors import save_compressed +from torch import Tensor + +tensors: Dict[str, Tensor] = ... +compression_config: Dict = save_compressed(tensors, "model.safetensors") + + +``` + +### Loading + +```python +from compressed_tensors import load_compressed +from torch import Tensor + +tensors: Dict[str, Tensor] = load_compressed("model.safetensors", device="cpu") +``` + +## Benefits +TODO + +## SafeTensors File Format + +For each parameter in the uncompressed state_dict, we store the following attributes needed for decompression in the compressed state_dict: + +- Compressed tensor +- Bitmask +- Uncompressed shape +- Row offsets + +```python +# Dense +{ + PARAM_NAME: uncompressed_tensor +} + +# Compressed +{ + PARAM_NAME.compressed: compressed_tensor, # 1d tensor + PARAM_NAME.bitmask: value, # 2d bitmask tensor (nrows x (ncols / 8)) + PARAM_NAME.shape: value, # Uncompressed shape tensor + PARAM_NAME.row_offsets: value # 1d offsets tensor +} +``` + +The library provides pathways to automatically add the config information to the HF config file. + +```json +// config.json +{ + "sparsity_config": { + "format": "sparse_bitmask", // "dense_sparsity" for the original tensor format + + // Informational + "sparsity_structure": "unstructured", // Or 2:4, 8:16, etc. + "global_sparsity": "0.5" + } +} +``` \ No newline at end of file diff --git a/src/compressed_tensors/base.py b/src/compressed_tensors/base.py index f01a055f..964b3087 100644 --- a/src/compressed_tensors/base.py +++ b/src/compressed_tensors/base.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -SPARSITY_CONFIG_NAME = "sparsity_config" +CONFIG_NAME = "compression_config" diff --git a/src/compressed_tensors/compressors/base.py b/src/compressed_tensors/compressors/base.py index 9c205f93..aed54069 100644 --- a/src/compressed_tensors/compressors/base.py +++ b/src/compressed_tensors/compressors/base.py @@ -15,7 +15,7 @@ import operator from typing import Dict, Generator, Tuple -from compressed_tensors.base import SPARSITY_CONFIG_NAME +from compressed_tensors.base import CONFIG_NAME from compressed_tensors.config import CompressionConfig from compressed_tensors.registry import RegistryMixin from torch import Tensor @@ -70,4 +70,4 @@ def overwrite_weights(self, model_path: str, model: Module): data_old = operator.attrgetter(name)(model) data_old.data = data_new.data - setattr(model, SPARSITY_CONFIG_NAME, self.config) + setattr(model, CONFIG_NAME, self.config) diff --git a/src/compressed_tensors/utils/helpers.py b/src/compressed_tensors/utils/helpers.py index ac9ed229..e776f9f7 100644 --- a/src/compressed_tensors/utils/helpers.py +++ b/src/compressed_tensors/utils/helpers.py @@ -12,16 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path +from typing import Dict, Optional, Union -from typing import Optional - -from compressed_tensors.base import SPARSITY_CONFIG_NAME +from compressed_tensors.base import CONFIG_NAME from compressed_tensors.compressors import ModelCompressor from compressed_tensors.config import CompressionConfig +from safetensors.torch import save_file +from torch import Tensor from transformers import AutoConfig -__all__ = ["infer_compressor_from_model_config"] +__all__ = ["infer_compressor_from_model_config", "load_compressed", "save_compressed"] def infer_compressor_from_model_config( @@ -35,7 +37,7 @@ def infer_compressor_from_model_config( :return: matching compressor if config contains a sparsity config """ config = AutoConfig.from_pretrained(pretrained_model_name_or_path) - sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None) + sparsity_config = getattr(config, CONFIG_NAME, None) if sparsity_config is None: return None @@ -43,3 +45,47 @@ def infer_compressor_from_model_config( sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config) compressor = ModelCompressor.load_from_registry(format, config=sparsity_config) return compressor + + +def save_compressed( + tensors: Dict[str, Tensor], + save_path: Union[str, Path], + compression_config: Optional[CompressionConfig] = None, +) -> Optional[CompressionConfig]: + """ + Save compressed tensors to disk. If tensors are not compressed, + save them as is. + + :param tensors: dictionary of tensors to compress + :param save_path: path to save compressed tensors + :param compression_config: compression config to use for compressing tensors. + Can be either inferred from tensors or provided explicitly + :return: compression config, if tensors were compressed - None otherwise + """ + if tensors is None or len(tensors) == 0: + raise ValueError("No tensors or empty tensors provided to compress") + + # create compression config if not provided + # TODO: Not implemented, need to get this in ASAP + # compression_config = compression_config or infer_compression_config(tensors) + + if compression_config is None: + # no compression applied + save_file(tensors, save_path) + return None + + # compress + compression_format = compression_config.format + compressor = ModelCompressor.load_from_registry( + compression_format, config=compression_config + ) + # save compressed tensors + compressed_tensors = compressor.compress(tensors) + save_file(compressed_tensors, save_path) + + # return compression_config as dict + return {CONFIG_NAME: compression_config.model_dump(exclude_unset=True)} + + +def load_compressed(compressed_tensors: Union[str, Path], device: str): + pass diff --git a/tests/quantization/lifecycle/test_apply.py b/tests/quantization/lifecycle/test_apply.py index 6a3d17af..7e4e0f77 100644 --- a/tests/quantization/lifecycle/test_apply.py +++ b/tests/quantization/lifecycle/test_apply.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from compressed_tensors.quantization.lifecycle import apply_quantization_config from compressed_tensors.quantization.quant_config import ( QuantizationConfig, diff --git a/tests/test_utils/test_helpers.py b/tests/test_utils/test_helpers.py new file mode 100644 index 00000000..d4d91e97 --- /dev/null +++ b/tests/test_utils/test_helpers.py @@ -0,0 +1,79 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch +from compressed_tensors import save_compressed +from compressed_tensors.config import BitmaskConfig + + +@pytest.fixture +def tensors_and_config_sparse(): + tensors = {"tensor_1": torch.Tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])} + expected_config_json = { + "compression_config": { + "format": "sparse_bitmask", + "global_sparsity": ( + tensors["tensor_1"].sum() / tensors["tensor_1"].numel() + ).item(), + "sparsity_structure": "unstructured", + } + } + return tensors, expected_config_json + + +@pytest.fixture +def tensors_dense(): + tensors = {"tensor_1": torch.Tensor([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])} + return tensors + + +def test_save_compressed_sparse(tmp_path, tensors_and_config_sparse): + tensors, expected_config_json = tensors_and_config_sparse + + config_json = save_compressed( + tensors, + compression_config=BitmaskConfig( + format=expected_config_json["compression_config"]["format"], + global_sparsity=expected_config_json["compression_config"][ + "global_sparsity" + ], + sparsity_structure=expected_config_json["compression_config"][ + "sparsity_structure" + ], + ), + save_path=tmp_path / "model.safetensors", + ) + assert (tmp_path / "model.safetensors").exists() + assert config_json == expected_config_json + + +def test_save_compressed_dense(tmp_path, tensors_dense): + tensors = tensors_dense + + config_json = save_compressed( + tensors, + save_path=tmp_path / "model.safetensors", + ) + assert (tmp_path / "model.safetensors").exists() + assert config_json is None + + +def test_save_compressed_empty(): + # make sure function raises error + with pytest.raises(Exception): + save_compressed({}, "") + + with pytest.raises(Exception): + save_compressed(None, "")