From 50e47077430050056dfc14e7406172edbc35f745 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 19 Nov 2024 16:50:45 +0000 Subject: [PATCH 1/7] fix --- src/llmcompressor/core/session.py | 10 +++++-- .../compressed_tensors_utils.py | 7 +++-- tests/e2e/vLLM/test_vllm.py | 30 ++++++++++++------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/llmcompressor/core/session.py b/src/llmcompressor/core/session.py index 41072feb9..9b846fc23 100644 --- a/src/llmcompressor/core/session.py +++ b/src/llmcompressor/core/session.py @@ -1,6 +1,8 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Union +from loguru import logger + from llmcompressor.core.events import EventType from llmcompressor.core.helpers import log_model_info, should_log_model_info from llmcompressor.core.lifecycle import CompressionLifecycle @@ -260,12 +262,16 @@ def reset_stage(self): self.lifecycle.initialized_ = False self.lifecycle.finalized = False - def get_serialized_recipe(self) -> str: + def get_serialized_recipe(self) -> Optional[str]: """ :return: serialized string of the current compiled recipe """ recipe = self.lifecycle.recipe_container.compiled_recipe - return recipe.yaml() + + if recipe is not None and hasattr(recipe, "yaml"): + return recipe.yaml() + + logger.warning("Recipe not found in session - may been reset") def _log_model_info(self): # Log model level logs if cadence reached diff --git a/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py b/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py index 001b43b0b..6de89dd8b 100644 --- a/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py +++ b/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py @@ -191,9 +191,10 @@ def skip(*args, **kwargs): recipe_path = os.path.join(save_directory, "recipe.yaml") session = active_session() - recipe_yaml_str = session.get_serialized_recipe() - with open(recipe_path, "w") as fp: - fp.write(recipe_yaml_str) + + if (recipe_yaml_str := session.get_serialized_recipe()) is not None: + with open(recipe_path, "w") as fp: + fp.write(recipe_yaml_str) # copy python files from cache dir to save_path if any copy_python_files_from_model_cache(model, save_directory) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 988793849..c141c34ef 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -1,9 +1,11 @@ +import os import shutil import unittest from typing import Callable import pytest from datasets import load_dataset +from loguru import logger from parameterized import parameterized, parameterized_class from transformers import AutoTokenizer @@ -22,6 +24,7 @@ vllm_installed = True except ImportError: vllm_installed = False + logger.warning("vllm is not installed. This test will be skipped") # Defines the file paths to the directories containing the test configs # for each of the quantization schemes @@ -32,6 +35,8 @@ WNA16_2of4 = "tests/e2e/vLLM/configs/WNA16_2of4" CONFIGS = [WNA16, FP8, INT8, ACTORDER, WNA16_2of4] +HF_MODEL_HUB_NAME = "nm-testing" + def gen_test_name(testcase_func: Callable, param_num: int, param: dict) -> str: return "_".join( @@ -76,8 +81,8 @@ class TestvLLM(unittest.TestCase): save_dir = None def setUp(self): - print("========== RUNNING ==============") - print(self.scheme) + logger.info("========== RUNNING ==============") + logger.debug(self.scheme) self.device = "cuda:0" self.oneshot_kwargs = {} @@ -124,16 +129,18 @@ def test_vllm(self): ) # Apply quantization. - print("ONESHOT KWARGS", self.oneshot_kwargs) + logger.debug("ONESHOT KWARGS", self.oneshot_kwargs) oneshot( **self.oneshot_kwargs, clear_sparse_session=True, oneshot_device=self.device, ) + self.oneshot_kwargs["model"].save_pretrained(self.save_dir) tokenizer.save_pretrained(self.save_dir) + # Run vLLM with saved model - print("================= RUNNING vLLM =========================") + logger.info("================= RUNNING vLLM =========================") sampling_params = SamplingParams(temperature=0.80, top_p=0.95) if "W4A16_2of4" in self.scheme: # required by the kernel @@ -141,16 +148,19 @@ def test_vllm(self): else: llm = LLM(model=self.save_dir) outputs = llm.generate(self.prompts, sampling_params) - print("================= vLLM GENERATION ======================") + + logger.info("================= vLLM GENERATION ======================") for output in outputs: assert output prompt = output.prompt generated_text = output.outputs[0].text - print("PROMPT", prompt) - print("GENERATED TEXT", generated_text) - print("================= UPLOADING TO HUB ======================") - self.oneshot_kwargs["model"].push_to_hub(f"nm-testing/{self.save_dir}-e2e") - tokenizer.push_to_hub(f"nm-testing/{self.save_dir}-e2e") + logger.debug("PROMPT", prompt) + logger.debug("GENERATED TEXT", generated_text) + + logger.info("================= UPLOADING TO HUB ======================") + hf_upload_path = os.path.join(HF_MODEL_HUB_NAME, f"{self.save_dir}-e2e") + self.oneshot_kwargs["model"].push_to_hub(hf_upload_path) + tokenizer.push_to_hub(hf_upload_path) def tearDown(self): if self.save_dir is not None: From 5c9559bcc4bc6c8c5f18231c69a46795143aded9 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 20 Nov 2024 16:36:32 +0000 Subject: [PATCH 2/7] fix grammar --- src/llmcompressor/core/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/core/session.py b/src/llmcompressor/core/session.py index 9b846fc23..7c489f36f 100644 --- a/src/llmcompressor/core/session.py +++ b/src/llmcompressor/core/session.py @@ -271,7 +271,7 @@ def get_serialized_recipe(self) -> Optional[str]: if recipe is not None and hasattr(recipe, "yaml"): return recipe.yaml() - logger.warning("Recipe not found in session - may been reset") + logger.warning("Recipe not found in session - it may have been reset") def _log_model_info(self): # Log model level logs if cadence reached From 84ca3c8bc1d3ac80a3fefbad726007dc0602c9ee Mon Sep 17 00:00:00 2001 From: George Date: Wed, 20 Nov 2024 22:27:46 -0500 Subject: [PATCH 3/7] comments --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index c141c34ef..eba2a2c36 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -132,7 +132,7 @@ def test_vllm(self): logger.debug("ONESHOT KWARGS", self.oneshot_kwargs) oneshot( **self.oneshot_kwargs, - clear_sparse_session=True, + clear_sparse_session=False, oneshot_device=self.device, ) From c719c5dab5151ec76287ba7d6981b8dfc25cedc0 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 21 Nov 2024 03:36:17 +0000 Subject: [PATCH 4/7] remove args, use default --- tests/e2e/vLLM/test_vllm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index eba2a2c36..7803486b2 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -132,7 +132,6 @@ def test_vllm(self): logger.debug("ONESHOT KWARGS", self.oneshot_kwargs) oneshot( **self.oneshot_kwargs, - clear_sparse_session=False, oneshot_device=self.device, ) From cfa6c768c022750d35b07cbd5cf8d5f8332893c9 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 21 Nov 2024 17:06:50 +0000 Subject: [PATCH 5/7] add recipe check and expected file match tests --- tests/e2e/vLLM/test_vllm.py | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 7803486b2..71f3d40ea 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -1,4 +1,5 @@ import os +import re import shutil import unittest from typing import Callable @@ -9,6 +10,7 @@ from parameterized import parameterized, parameterized_class from transformers import AutoTokenizer +from llmcompressor.core import active_session from llmcompressor.modifiers.quantization import QuantizationModifier from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot from tests.testing_utils import ( @@ -37,6 +39,13 @@ HF_MODEL_HUB_NAME = "nm-testing" +EXPECTED_SAVED_FILES = [ + "config.json", + r"^model(?:-\d{5}-of-\d{5})?\.safetensors$", + "recipe.yaml", + "tokenizer.json", +] + def gen_test_name(testcase_func: Callable, param_num: int, param: dict) -> str: return "_".join( @@ -135,9 +144,15 @@ def test_vllm(self): oneshot_device=self.device, ) + # check that session contains recipe + self._check_session_contains_recipe() + self.oneshot_kwargs["model"].save_pretrained(self.save_dir) tokenizer.save_pretrained(self.save_dir) + # check that expected files exist + self._check_save_dir_has_expected_files() + # Run vLLM with saved model logger.info("================= RUNNING vLLM =========================") sampling_params = SamplingParams(temperature=0.80, top_p=0.95) @@ -164,3 +179,35 @@ def test_vllm(self): def tearDown(self): if self.save_dir is not None: shutil.rmtree(self.save_dir) + + def _check_session_contains_recipe(self) -> None: + session = active_session() + recipe_yaml_str = session.get_serialized_recipe() + assert recipe_yaml_str is not None + + def _check_save_dir_has_expected_files(self): + files = os.listdir(self.save_dir) + logger.debug("Saved files: ", files) + + matched_patterns = set() + + for expected in EXPECTED_SAVED_FILES: + # Find all files matching the expected pattern + matches = [ + file + for file in files + if ( + re.fullmatch(expected, file) + if expected.startswith("^") + else file == expected + ) + ] + if matches is not None: + matched_patterns.add(expected) + + assert len(matched_patterns) == len(EXPECTED_SAVED_FILES), ( + "expected: ", + EXPECTED_SAVED_FILES, + "\n saved: ", + list(matched_patterns), + ) From c584f2af922ddd521ffcfbcc94f9921ed9ea207d Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Thu, 12 Dec 2024 19:17:26 -0500 Subject: [PATCH 6/7] comment --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index aab2a7ed8..96c16951c 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -41,7 +41,7 @@ def record_config_file(record_testsuite_property: Callable[[str, object], None]) # Will run each test case in its own process through run_tests.sh # emulating vLLM CI testing @requires_gpu_count(1) -# @pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test") +@pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test") class TestvLLM: """ The following test quantizes a model using a preset scheme or recipe, From 6a0961161879225367f4d25b5c954df9d090c423 Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Fri, 13 Dec 2024 14:29:19 -0500 Subject: [PATCH 7/7] remove redandant code --- tests/e2e/vLLM/test_vllm.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 96c16951c..3d96fcbe2 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -119,9 +119,6 @@ def test_vllm(self): # check that expected files exist self._check_save_dir_has_expected_files() - # Reset after session info is extracted on save -- recipe - self.session.reset() - # Use the session to fetch the recipe; # Reset session for next test case session = active_session()