Skip to content

Commit

Permalink
update test_run_compressed
Browse files Browse the repository at this point in the history
  • Loading branch information
horheynm committed Dec 11, 2024
1 parent 9f58887 commit b7a968e
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 40 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_FP8_Dynamic_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_FP8_Dynamic_uncompressed
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-w4a16-compressed"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W4A16_G128_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W4A16_G128_uncompressed
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-w8a16-dense"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W8A16_G128_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W8A16_G128_uncompressed
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-w8a8-compressed"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W8A8_Dynamic_Per_Token_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W8A8_Dynamic_Per_Token_uncompressed
51 changes: 19 additions & 32 deletions tests/llmcompressor/transformers/compression/test_run_compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
import unittest

import torch
from compressed_tensors import QUANTIZATION_CONFIG_NAME
from compressed_tensors.compressors import ModelCompressor
from compressed_tensors.quantization import QuantizationStatus
from parameterized import parameterized_class
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils.quantization_config import CompressedTensorsConfig

from tests.testing_utils import parse_params, requires_gpu

Expand All @@ -17,39 +15,28 @@
@requires_gpu
@parameterized_class(parse_params(CONFIG_DIR))
class TestQuantizationMatches(unittest.TestCase):
model_stub = None
empty_model = None
compressed_model_stub = None
uncompressed_model_stub = None

@classmethod
def setUpClass(cls):
cls.test_dir = tempfile.mkdtemp()

# TODO: Give option on HFQuantizer to run run_compressed True/False
# currently hardcoded to True
cls.compressed_model = AutoModelForCausalLM.from_pretrained(
cls.model_stub,
quantization_config = CompressedTensorsConfig(run_compressed=False)
cls.decompressed_model = AutoModelForCausalLM.from_pretrained(
cls.compressed_model_stub,
torch_dtype="auto",
device_map="auto",
# run_compressed=True, # TODO: Give option on HFQuantizer
quantization_config=quantization_config,
)
# TODO: Use ModelCompressor until decompression is supported through
# HFQuant/run_compressed can be turned off.
cls.uncompressed_model = AutoModelForCausalLM.from_pretrained(
cls.empty_model,
torch_dtype=cls.compressed_model.dtype,
device_map=cls.compressed_model.device,
)
config = AutoConfig.from_pretrained(cls.model_stub)
compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
cls.compressor = ModelCompressor.from_compression_config(compression_config)
cls.compressor.quantization_config.quantization_status = (
QuantizationStatus.FROZEN
)
cls.compressor.decompress(
model_path=cls.model_stub, model=cls.uncompressed_model

cls.non_comp_model = AutoModelForCausalLM.from_pretrained(
cls.uncompressed_model_stub,
torch_dtype=cls.decompressed_model.dtype,
device_map=cls.decompressed_model.device,
)

cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_stub)
cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub)

def test_compressed_matches_uncompressed(self):
SAMPLE_INPUT = [
Expand All @@ -59,13 +46,13 @@ def test_compressed_matches_uncompressed(self):
]

inputs = self.tokenizer(SAMPLE_INPUT, return_tensors="pt", padding=True).to(
self.compressed_model.device
self.decompressed_model.device
)
compressed_output = self.tokenizer.batch_decode(
self.compressed_model.generate(**inputs, max_length=50)
self.decompressed_model.generate(**inputs, max_length=50)
)
uncompressed_output = self.tokenizer.batch_decode(
self.uncompressed_model.generate(**inputs, max_length=50)
self.non_comp_model.generate(**inputs, max_length=50)
)

for idx in range(len(SAMPLE_INPUT)):
Expand All @@ -74,6 +61,6 @@ def test_compressed_matches_uncompressed(self):
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.test_dir)
del cls.compressed_model
del cls.uncompressed_model
del cls.decompressed_model
del cls.non_comp_model
torch.cuda.empty_cache()

0 comments on commit b7a968e

Please sign in to comment.