From 7a4642f7f60349c2a59784f6837780780f99d5c8 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 19 Aug 2024 09:15:48 +0100 Subject: [PATCH 01/53] feat (example/llm): Moved argument parser to separate function. --- src/brevitas_examples/llm/main.py | 311 +++++++++++++++--------------- 1 file changed, 157 insertions(+), 154 deletions(-) diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py index 05d84f647..94f596529 100644 --- a/src/brevitas_examples/llm/main.py +++ b/src/brevitas_examples/llm/main.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause """ +import sys import argparse import re @@ -40,157 +41,6 @@ from brevitas_examples.llm.llm_quant.run_utils import get_fx from brevitas_examples.llm.llm_quant.run_utils import modify_dataloader -parser = argparse.ArgumentParser() -parser.add_argument( - '--model', - type=str, - default="facebook/opt-125m", - help='HF model name. Default: facebook/opt-125m.') -parser.add_argument( - '--seed', type=int, default=0, help='Seed for sampling the calibration data. Default: 0.') -parser.add_argument( - '--nsamples', type=int, default=128, help='Number of calibration data samples. Default: 128.') -parser.add_argument('--seqlen', type=int, default=2048, help='Sequence length. Default: 2048.') -parser.add_argument('--eval', action='store_true', help='Eval model PPL on the chosen Dataset.') -parser.add_argument( - '--dataset', - type=str, - choices=['wikitext2', 'c4'], - default='wikitext2', - help='Dataset to use for quantization (default: %(default)s)') -parser.add_argument('--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.') -parser.add_argument( - '--weight-param-method', - type=str, - default='stats', - choices=['stats', 'mse'], - help='How scales/zero-point are determined. Default: stats.') -parser.add_argument( - '--weight-scale-precision', - type=str, - default='float_scale', - choices=['float_scale', 'po2_scale'], - help='Whether scale is a float value or a po2. Default: po2.') -parser.add_argument( - '--weight-quant-type', - type=str, - default='sym', - choices=['sym', 'asym'], - help='Weight quantization type. Default: asym.') -parser.add_argument( - '--weight-quant-format', - type=quant_format_validator, - default='int', - help= - 'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.' -) -parser.add_argument( - '--weight-quant-granularity', - type=str, - default='per_group', - choices=['per_channel', 'per_tensor', 'per_group'], - help='Granularity for scales/zero-point of weights. Default: per_group.') -parser.add_argument( - '--weight-group-size', - type=int, - default=128, - help='Group size for per_group weight quantization. Default: 128.') -parser.add_argument( - '--quantize-weight-zero-point', action='store_true', help='Quantize weight zero-point.') -parser.add_argument( - '--input-bit-width', - type=int, - default=None, - help='Input bit width. Default: None (disables input quantization).') -parser.add_argument( - '--input-quant-format', - type=quant_format_validator, - default='int', - help= - 'Input quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.' -) -parser.add_argument( - '--input-param-method', - type=str, - default='stats', - choices=['stats', 'mse'], - help= - 'How scales/zero-point are determined. Default: stats (percentile for static, absmax or minmax for dynamic).' -) -parser.add_argument( - '--input-scale-precision', - type=str, - default='float_scale', - choices=['float_scale', 'po2_scale'], - help='Whether input scale is a float value or a po2. Default: float.') -parser.add_argument( - '--input-scale-type', - type=str, - default='static', - choices=['static', 'dynamic', 'no_scale'], - help='Whether input scale is a static value or a dynamic value.') -parser.add_argument( - '--input-quant-type', - type=str, - default='asym', - choices=['sym', 'asym'], - help='Input quantization type. Default: asym.') -parser.add_argument( - '--input-quant-granularity', - type=str, - default='per_tensor', - choices=['per_tensor', 'per_row', 'per_group'], - help='Granularity for scales/zero-point of inputs. Default: per_tensor.') -parser.add_argument( - '--input-group-size', - type=int, - default=64, - help='Group size for per_group input quantization. Default: 64.') -parser.add_argument( - '--quantize-input-zero-point', action='store_true', help='Quantize input zero-point.') -parser.add_argument( - '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.') -parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.') -parser.add_argument('--act-calibration', action='store_true', help='Apply activation calibration.') -parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.') -parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.') -parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.') -parser.add_argument( - '--no-float16', - action='store_true', - help='Disable float16 as base datatype and switch to float32.') -parser.add_argument( - '--replace-mha', - action='store_true', - help='Replace HuggingFace Attention with a quantizable version') -parser.add_argument( - '--weight-equalization', - action='store_true', - help='Apply weight equalization. Relevant to ReLU based models (e.g. OPT).') -parser.add_argument( - '--act-equalization', - default=None, - choices=[None, 'layerwise', 'fx'], - help='Apply activation equalization (SmoothQuant). Layerwise introduces standalone mul nodes,' - 'while fx merges them whenever possible into previous tensors, which is possible on ReLU based models (e.g. OPT).' -) -parser.add_argument('--load-awq', type=str, default=None, help="Load the awq search results.") -parser.add_argument( - '--export-target', - default=None, - choices=[ - None, - 'onnx_qcdq', - 'torch_qcdq', - 'sharded_torchmlir_group_weight', - 'sharded_packed_torchmlir_group_weight'], - help='Model export.') -parser.add_argument( - '--checkpoint-name', - type=str, - default=None, - help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)") - def set_seed(seed): np.random.seed(seed) @@ -261,8 +111,7 @@ def validate(args): assert args.export_target != 'torch_qcdq', "Cannot export Torch QCDQ with FX" -def main(): - args = parser.parse_args() +def main(args): validate(args) set_seed(args.seed) @@ -448,5 +297,159 @@ def main(): model_export(model, calibration_loader[0], args) +def parse_args(args): + parser = argparse.ArgumentParser() + parser.add_argument( + '--model', + type=str, + default="facebook/opt-125m", + help='HF model name. Default: facebook/opt-125m.') + parser.add_argument( + '--seed', type=int, default=0, help='Seed for sampling the calibration data. Default: 0.') + parser.add_argument( + '--nsamples', type=int, default=128, help='Number of calibration data samples. Default: 128.') + parser.add_argument('--seqlen', type=int, default=2048, help='Sequence length. Default: 2048.') + parser.add_argument('--eval', action='store_true', help='Eval model PPL on the chosen Dataset.') + parser.add_argument( + '--dataset', + type=str, + choices=['wikitext2', 'c4'], + default='wikitext2', + help='Dataset to use for quantization (default: %(default)s)') + parser.add_argument('--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.') + parser.add_argument( + '--weight-param-method', + type=str, + default='stats', + choices=['stats', 'mse'], + help='How scales/zero-point are determined. Default: stats.') + parser.add_argument( + '--weight-scale-precision', + type=str, + default='float_scale', + choices=['float_scale', 'po2_scale'], + help='Whether scale is a float value or a po2. Default: po2.') + parser.add_argument( + '--weight-quant-type', + type=str, + default='sym', + choices=['sym', 'asym'], + help='Weight quantization type. Default: asym.') + parser.add_argument( + '--weight-quant-format', + type=quant_format_validator, + default='int', + help= + 'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.' + ) + parser.add_argument( + '--weight-quant-granularity', + type=str, + default='per_group', + choices=['per_channel', 'per_tensor', 'per_group'], + help='Granularity for scales/zero-point of weights. Default: per_group.') + parser.add_argument( + '--weight-group-size', + type=int, + default=128, + help='Group size for per_group weight quantization. Default: 128.') + parser.add_argument( + '--quantize-weight-zero-point', action='store_true', help='Quantize weight zero-point.') + parser.add_argument( + '--input-bit-width', + type=int, + default=None, + help='Input bit width. Default: None (disables input quantization).') + parser.add_argument( + '--input-quant-format', + type=quant_format_validator, + default='int', + help= + 'Input quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.' + ) + parser.add_argument( + '--input-param-method', + type=str, + default='stats', + choices=['stats', 'mse'], + help= + 'How scales/zero-point are determined. Default: stats (percentile for static, absmax or minmax for dynamic).' + ) + parser.add_argument( + '--input-scale-precision', + type=str, + default='float_scale', + choices=['float_scale', 'po2_scale'], + help='Whether input scale is a float value or a po2. Default: float.') + parser.add_argument( + '--input-scale-type', + type=str, + default='static', + choices=['static', 'dynamic', 'no_scale'], + help='Whether input scale is a static value or a dynamic value.') + parser.add_argument( + '--input-quant-type', + type=str, + default='asym', + choices=['sym', 'asym'], + help='Input quantization type. Default: asym.') + parser.add_argument( + '--input-quant-granularity', + type=str, + default='per_tensor', + choices=['per_tensor', 'per_row', 'per_group'], + help='Granularity for scales/zero-point of inputs. Default: per_tensor.') + parser.add_argument( + '--input-group-size', + type=int, + default=64, + help='Group size for per_group input quantization. Default: 64.') + parser.add_argument( + '--quantize-input-zero-point', action='store_true', help='Quantize input zero-point.') + parser.add_argument( + '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.') + parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.') + parser.add_argument('--act-calibration', action='store_true', help='Apply activation calibration.') + parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.') + parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.') + parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.') + parser.add_argument( + '--no-float16', + action='store_true', + help='Disable float16 as base datatype and switch to float32.') + parser.add_argument( + '--replace-mha', + action='store_true', + help='Replace HuggingFace Attention with a quantizable version') + parser.add_argument( + '--weight-equalization', + action='store_true', + help='Apply weight equalization. Relevant to ReLU based models (e.g. OPT).') + parser.add_argument( + '--act-equalization', + default=None, + choices=[None, 'layerwise', 'fx'], + help='Apply activation equalization (SmoothQuant). Layerwise introduces standalone mul nodes,' + 'while fx merges them whenever possible into previous tensors, which is possible on ReLU based models (e.g. OPT).' + ) + parser.add_argument('--load-awq', type=str, default=None, help="Load the awq search results.") + parser.add_argument( + '--export-target', + default=None, + choices=[ + None, + 'onnx_qcdq', + 'torch_qcdq', + 'sharded_torchmlir_group_weight', + 'sharded_packed_torchmlir_group_weight'], + help='Model export.') + parser.add_argument( + '--checkpoint-name', + type=str, + default=None, + help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)") + return parser.parse_args(args) + if __name__ == '__main__': - main() + args = parse_args(sys.argv[1:]) + main(args) From cd73b1409157c5f0c7f4025057b9ea480d47eafe Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 19 Aug 2024 17:07:14 +0100 Subject: [PATCH 02/53] Refactor (example/llm): Refactor to prepare for automated tests. --- src/brevitas_examples/llm/main.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py index 94f596529..78cf8ba49 100644 --- a/src/brevitas_examples/llm/main.py +++ b/src/brevitas_examples/llm/main.py @@ -3,9 +3,9 @@ # SPDX-License-Identifier: BSD-3-Clause """ -import sys import argparse import re +import sys import numpy as np from optimum.amd.brevitas.accelerate_utils import offload_model @@ -130,6 +130,8 @@ def main(args): print("Model loaded.") model.eval() tokenizer = AutoTokenizer.from_pretrained(args.model) + float_ppl = None + quant_ppl = None if args.load_awq: from brevitas_examples.llm.llm_quant.awq.pre_quant import apply_awq @@ -174,10 +176,10 @@ def main(args): assert args.export_target != 'torch_qcdq', "TorchScript QCDQ export and Evaluation simultaneously" print("Float model eval...") model = offload_model(model) - ppl = compute_perplexity( + float_ppl = compute_perplexity( model, validation_loader, context_length=args.seqlen // 2, tokenizer=tokenizer) remove_hooks(model) - print(f"Float perplexity ({args.dataset}): {ppl}") + print(f"Float perplexity ({args.dataset}): {float_ppl}") if require_fx: model = get_fx(model) @@ -281,9 +283,9 @@ def main(args): if args.eval: print("Model eval...") - ppl = compute_perplexity( + quant_ppl = compute_perplexity( model, validation_loader, context_length=args.seqlen // 2, tokenizer=tokenizer) - print(f"Quantized perplexity ({args.dataset}): {ppl}") + print(f"Quantized perplexity ({args.dataset}): {quant_ppl}") remove_hooks(model) if args.checkpoint_name is not None: @@ -296,6 +298,8 @@ def main(args): model = model.to(dtype=torch.float32) model_export(model, calibration_loader[0], args) + return float_ppl, quant_ppl, model + def parse_args(args): parser = argparse.ArgumentParser() @@ -307,7 +311,10 @@ def parse_args(args): parser.add_argument( '--seed', type=int, default=0, help='Seed for sampling the calibration data. Default: 0.') parser.add_argument( - '--nsamples', type=int, default=128, help='Number of calibration data samples. Default: 128.') + '--nsamples', + type=int, + default=128, + help='Number of calibration data samples. Default: 128.') parser.add_argument('--seqlen', type=int, default=2048, help='Sequence length. Default: 2048.') parser.add_argument('--eval', action='store_true', help='Eval model PPL on the chosen Dataset.') parser.add_argument( @@ -316,7 +323,8 @@ def parse_args(args): choices=['wikitext2', 'c4'], default='wikitext2', help='Dataset to use for quantization (default: %(default)s)') - parser.add_argument('--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.') + parser.add_argument( + '--weight-bit-width', type=int, default=8, help='Weight bit width. Default: 8.') parser.add_argument( '--weight-param-method', type=str, @@ -409,7 +417,8 @@ def parse_args(args): parser.add_argument( '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.') parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.') - parser.add_argument('--act-calibration', action='store_true', help='Apply activation calibration.') + parser.add_argument( + '--act-calibration', action='store_true', help='Apply activation calibration.') parser.add_argument('--bias-corr', action='store_true', help='Apply bias correction.') parser.add_argument('--ln-affine-merge', action='store_true', help='Merge LN affine params.') parser.add_argument('--no-quantize', action='store_true', help='Disable quantization.') @@ -450,6 +459,7 @@ def parse_args(args): help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)") return parser.parse_args(args) + if __name__ == '__main__': args = parse_args(sys.argv[1:]) main(args) From dcd6a782c310797a46957ae264bc4cbc7509e77f Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 19 Aug 2024 17:09:44 +0100 Subject: [PATCH 03/53] test (example/llm): Added inital end-to-end example for LLM entry-point. --- tests/brevitas_examples/llm.py | 113 +++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/brevitas_examples/llm.py diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py new file mode 100644 index 000000000..5188141e3 --- /dev/null +++ b/tests/brevitas_examples/llm.py @@ -0,0 +1,113 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import dataclass +from argparse import Namespace +import logging +import shutil + +import pytest + +import numpy as np + +from brevitas_examples.llm.main import main +from brevitas_examples.llm.main import parse_args + + +def ptid2pathname(string): + return string.replace("/", "-").replace(":", "-") + + +def allclose(x, y): + return np.allclose(x, y, rtol=1e-02, atol=5e-01, equal_nan=False) + + +def allexact(x, y): + return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False) + + +def requires_fx(args): + return args.act_equalization == "fx" or args.weight_equalization or args.ln_affine_merge + + +@dataclass +class ModelAndPpl: + name: str + float_ppl: float + quant_ppl: float + supports_fx: bool + + +class UpdatableNamespace(Namespace): + def update(self, **kwargs): + self.__dict__.update(**kwargs) + + +@pytest.fixture(scope="session", params=[ + ModelAndPpl( + name="hf-internal-testing/tiny-random-LlamaForCausalLM", + float_ppl=None, + quant_ppl=None, + supports_fx=True, + ), + ModelAndPpl( + name="hf-internal-testing/tiny-random-OPTForCausalLM", + float_ppl=None, + quant_ppl=None, + supports_fx=True, + ), + ModelAndPpl( + name="hf-internal-testing/tiny-random-MistralForCausalLM", + float_ppl=None, + quant_ppl=None, + supports_fx=False, + ), +]) +def small_models_with_ppl(request): + yield request.param + + +@pytest.fixture() +def default_run_args(request): + args = UpdatableNamespace(**vars(parse_args([]))) + args.nsamples = 2 + args.seqlen = 2 + args.model = "hf-internal-testing/tiny-random-MistralForCausalLM" + args.dataset = "c4" + args.eval = True + #args.checkpoint = ptid2pathname(request.node.nodeid) + ".pth" # Example filename which won't clash + args.weight_bit_width = 8 + args.weight_quant_granularity = "per_channel" # "per_tensor", "per_channel", "per_group". + args.input_bit_width = 8 + args.act_calibration = True + return args + + +@pytest.fixture(params=[ + {}, + {"bias_corr": True}, + {"act_equalization": "layerwise"}, + {"act_equalization": "fx"}, + {"weight_equalization": True}, + {"gptq": True}, + {"ln_affine_merge": True}, + {"replace_mha": True}, + ]) +def toggle_run_args(default_run_args, request): + args = default_run_args + args.update(**request.param) + yield args + + +@pytest.mark.examples +@pytest.mark.weekly +def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): + caplog.set_level(logging.INFO) + args = toggle_run_args + args.model = small_models_with_ppl.name + exp_float_ppl = small_models_with_ppl.float_ppl + exp_quant_ppl = small_models_with_ppl.quant_ppl + use_fx = requires_fx(args) + if use_fx and not small_models_with_ppl.supports_fx: + pytest.xfail(f"{small_models_with_ppl.name} does not support FX") + float_ppl, quant_ppl, model = main(args) From 3ef28d4a6f0d26fed71d557876c17fecef2fa5d1 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 19 Aug 2024 17:45:02 +0100 Subject: [PATCH 04/53] setup (requirements): added requirements file for LLM example for easier testing --- requirements/requirements-example-llm.txt | 1 + setup.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 requirements/requirements-example-llm.txt diff --git a/requirements/requirements-example-llm.txt b/requirements/requirements-example-llm.txt new file mode 100644 index 000000000..a1a2c615f --- /dev/null +++ b/requirements/requirements-example-llm.txt @@ -0,0 +1 @@ +optimum-amd[brevitas] @ git+https://github.com/huggingface/optimum-amd.git@main diff --git a/setup.py b/setup.py index 10e920981..817c7f88e 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,8 @@ def read_requirements(filename): "stt": read_requirements('requirements-stt.txt'), "vision": read_requirements('requirements-vision.txt'), "finn_integration": read_requirements('requirements-finn-integration.txt'), - "ort_integration": read_requirements('requirements-ort-integration.txt')}, + "ort_integration": read_requirements('requirements-ort-integration.txt'), + "example_llm": read_requirements('requirements-example-llm.txt')}, packages=find_packages('src'), package_dir={'': 'src'}, zip_safe=False, From 5aad69e37bc75deef150a0c6435f42981a3ff299 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Tue, 20 Aug 2024 17:44:45 +0100 Subject: [PATCH 05/53] Fix: pre-commit --- src/brevitas_examples/llm/main.py | 7 +-- tests/brevitas_examples/llm.py | 74 +++++++++++++++++-------------- 2 files changed, 45 insertions(+), 36 deletions(-) diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py index 78cf8ba49..6c3c34dd5 100644 --- a/src/brevitas_examples/llm/main.py +++ b/src/brevitas_examples/llm/main.py @@ -349,7 +349,7 @@ def parse_args(args): default='int', help= 'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.' - ) + ) parser.add_argument( '--weight-quant-granularity', type=str, @@ -374,7 +374,7 @@ def parse_args(args): default='int', help= 'Input quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. It\'s possible to add float_ocp_ or float_fnuz_ before the exponent/mantissa bitwidth. Default: int.' - ) + ) parser.add_argument( '--input-param-method', type=str, @@ -456,7 +456,8 @@ def parse_args(args): '--checkpoint-name', type=str, default=None, - help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)") + help="Filename to save checkpoint. If `None`, no checkpoint is saved (default: %(default)s)" + ) return parser.parse_args(args) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 5188141e3..d251d040e 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -1,14 +1,13 @@ # Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause -from dataclasses import dataclass from argparse import Namespace +from dataclasses import dataclass import logging import shutil -import pytest - import numpy as np +import pytest from brevitas_examples.llm.main import main from brevitas_examples.llm.main import parse_args @@ -39,30 +38,32 @@ class ModelAndPpl: class UpdatableNamespace(Namespace): + def update(self, **kwargs): self.__dict__.update(**kwargs) -@pytest.fixture(scope="session", params=[ - ModelAndPpl( - name="hf-internal-testing/tiny-random-LlamaForCausalLM", - float_ppl=None, - quant_ppl=None, - supports_fx=True, - ), - ModelAndPpl( - name="hf-internal-testing/tiny-random-OPTForCausalLM", - float_ppl=None, - quant_ppl=None, - supports_fx=True, - ), - ModelAndPpl( - name="hf-internal-testing/tiny-random-MistralForCausalLM", - float_ppl=None, - quant_ppl=None, - supports_fx=False, - ), -]) +@pytest.fixture( + scope="session", + params=[ + ModelAndPpl( + name="hf-internal-testing/tiny-random-LlamaForCausalLM", + float_ppl=None, + quant_ppl=None, + supports_fx=True, + ), + ModelAndPpl( + name="hf-internal-testing/tiny-random-OPTForCausalLM", + float_ppl=None, + quant_ppl=None, + supports_fx=True, + ), + ModelAndPpl( + name="hf-internal-testing/tiny-random-MistralForCausalLM", + float_ppl=None, + quant_ppl=None, + supports_fx=False, + ),]) def small_models_with_ppl(request): yield request.param @@ -77,22 +78,29 @@ def default_run_args(request): args.eval = True #args.checkpoint = ptid2pathname(request.node.nodeid) + ".pth" # Example filename which won't clash args.weight_bit_width = 8 - args.weight_quant_granularity = "per_channel" # "per_tensor", "per_channel", "per_group". + args.weight_quant_granularity = "per_channel" # "per_tensor", "per_channel", "per_group". args.input_bit_width = 8 args.act_calibration = True return args -@pytest.fixture(params=[ +@pytest.fixture( + params=[ {}, - {"bias_corr": True}, - {"act_equalization": "layerwise"}, - {"act_equalization": "fx"}, - {"weight_equalization": True}, - {"gptq": True}, - {"ln_affine_merge": True}, - {"replace_mha": True}, - ]) + { + "bias_corr": True}, + { + "act_equalization": "layerwise"}, + { + "act_equalization": "fx"}, + { + "weight_equalization": True}, + { + "gptq": True}, + { + "ln_affine_merge": True}, + { + "replace_mha": True},]) def toggle_run_args(default_run_args, request): args = default_run_args args.update(**request.param) From 25d79e3efe99b805bba7d2537476b35a1fcabfcd Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Tue, 20 Aug 2024 18:13:53 +0100 Subject: [PATCH 06/53] test (ex/llm): reorg code a little bit. --- tests/brevitas_examples/llm.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index d251d040e..d8f5720d6 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -25,6 +25,12 @@ def allexact(x, y): return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False) +class UpdatableNamespace(Namespace): + + def update(self, **kwargs): + self.__dict__.update(**kwargs) + + def requires_fx(args): return args.act_equalization == "fx" or args.weight_equalization or args.ln_affine_merge @@ -37,12 +43,6 @@ class ModelAndPpl: supports_fx: bool -class UpdatableNamespace(Namespace): - - def update(self, **kwargs): - self.__dict__.update(**kwargs) - - @pytest.fixture( scope="session", params=[ From e3575f4ac961f7cc8ef5739fce9a6a409ccbdb1e Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Tue, 20 Aug 2024 18:18:03 +0100 Subject: [PATCH 07/53] test (ex/llm): removed quant_ppl to model definition --- tests/brevitas_examples/llm.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index d8f5720d6..a200b4386 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -39,7 +39,6 @@ def requires_fx(args): class ModelAndPpl: name: str float_ppl: float - quant_ppl: float supports_fx: bool @@ -49,19 +48,16 @@ class ModelAndPpl: ModelAndPpl( name="hf-internal-testing/tiny-random-LlamaForCausalLM", float_ppl=None, - quant_ppl=None, supports_fx=True, ), ModelAndPpl( name="hf-internal-testing/tiny-random-OPTForCausalLM", float_ppl=None, - quant_ppl=None, supports_fx=True, ), ModelAndPpl( name="hf-internal-testing/tiny-random-MistralForCausalLM", float_ppl=None, - quant_ppl=None, supports_fx=False, ),]) def small_models_with_ppl(request): @@ -114,7 +110,6 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with args = toggle_run_args args.model = small_models_with_ppl.name exp_float_ppl = small_models_with_ppl.float_ppl - exp_quant_ppl = small_models_with_ppl.quant_ppl use_fx = requires_fx(args) if use_fx and not small_models_with_ppl.supports_fx: pytest.xfail(f"{small_models_with_ppl.name} does not support FX") From 6f8b18660440c7de7481010d5543360dc9c89d99 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Tue, 20 Aug 2024 18:55:30 +0100 Subject: [PATCH 08/53] test (ex/llm): removed replace MHA from toggle tests --- tests/brevitas_examples/llm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index a200b4386..d40a62799 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -94,9 +94,7 @@ def default_run_args(request): { "gptq": True}, { - "ln_affine_merge": True}, - { - "replace_mha": True},]) + "ln_affine_merge": True},]) def toggle_run_args(default_run_args, request): args = default_run_args args.update(**request.param) From a5eedeb50ef23828a8f9da8a4e2a9bd2008f2186 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Tue, 20 Aug 2024 18:56:24 +0100 Subject: [PATCH 09/53] test (ex/llm): added accuracy tests for some basic configurations --- tests/brevitas_examples/llm.py | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index d40a62799..f389b780d 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -112,3 +112,47 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with if use_fx and not small_models_with_ppl.supports_fx: pytest.xfail(f"{small_models_with_ppl.name} does not support FX") float_ppl, quant_ppl, model = main(args) + + +@pytest.fixture( + params=[ + { + "model": "hf-internal-testing/tiny-random-MistralForCausalLM", + "act_equalization": "layerwise", + "gptq": True, + "float_ppl": 31274.05078125, + "quant_ppl": 33139.23046875}, + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "act_equalization": "fx", + "bias_corr": True, + "float_ppl": 33239.5, + "quant_ppl": 33283.75390625}, + { + "model": "hf-internal-testing/tiny-random-OPTForCausalLM", + "weight_equalization": True, + "ln_affine_merge": True, + "replace_mha": True, + "float_ppl": 50016.0, + "quant_ppl": 50016.0},]) +def acc_args_and_acc(default_run_args, request): + args = default_run_args + run_dict = request.param + float_ppl = run_dict["float_ppl"] + quant_ppl = run_dict["quant_ppl"] + del run_dict["float_ppl"] + del run_dict["quant_ppl"] + args.update(**run_dict) + yield args, float_ppl, quant_ppl + + +@pytest.mark.examples +@pytest.mark.weekly +def test_small_models_acc(caplog, acc_args_and_acc): + caplog.set_level(logging.INFO) + args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc + float_ppl, quant_ppl, model = main(args) + float_ppl = float_ppl.detach().cpu().numpy() + quant_ppl = quant_ppl.detach().cpu().numpy() + assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" From 89ffa008dcca9bc57e054e151b645312270c5a0f Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 12:13:39 +0100 Subject: [PATCH 10/53] test (ex/llm): Added initial tests that layers are inserted properly. --- tests/brevitas_examples/llm.py | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index f389b780d..4e4f90f3a 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -25,6 +25,15 @@ def allexact(x, y): return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False) +def assert_layer_type(model, key, string): + for name, layer in model.named_modules(): + if name == key: + ltype = str(type(layer)) + assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}" + return + assert False, f"Layer key: {key} not found" + + class UpdatableNamespace(Namespace): def update(self, **kwargs): @@ -156,3 +165,33 @@ def test_small_models_acc(caplog, acc_args_and_acc): quant_ppl = quant_ppl.detach().cpu().numpy() assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + + +@pytest.fixture( + params=[ + { + "model": "hf-internal-testing/tiny-random-MistralForCausalLM", + "quantize_last_layer": False, + "layer_key": "lm_head", + "layer_type": ""}, + { + "model": "hf-internal-testing/tiny-random-MistralForCausalLM", + "quantize_last_layer": True, + "layer_key": "lm_head", + "layer_type": ""},]) +def layer_args(default_run_args, request): + args = default_run_args + layer_dict = request.param + layer_key = layer_dict["layer_key"] + layer_type = layer_dict["layer_type"] + del layer_dict["layer_key"] + del layer_dict["layer_type"] + args.update(**layer_dict) + yield args, layer_key, layer_type + + +def test_small_models_quant_layer(caplog, layer_args): + caplog.set_level(logging.INFO) + args, layer_key, layer_type = layer_args + float_ppl, quant_ppl, model = main(args) + assert_layer_type(model, layer_key, layer_type) From cb149b4a8fec60f5fde419ab58eee891388da0d7 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 12:44:45 +0100 Subject: [PATCH 11/53] test (ex/llm): allowed testing of multiple layer types --- tests/brevitas_examples/llm.py | 35 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 4e4f90f3a..4664523a0 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -25,13 +25,16 @@ def allexact(x, y): return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False) -def assert_layer_type(model, key, string): - for name, layer in model.named_modules(): - if name == key: - ltype = str(type(layer)) - assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}" - return - assert False, f"Layer key: {key} not found" +def assert_layer_types(model, exp_layer_types): + for key, string in exp_layer_types.items(): + matched = False + for name, layer in model.named_modules(): + if name == key: + matched = True + ltype = str(type(layer)) + assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}" + continue + assert matched, f"Layer key: {key} not found" class UpdatableNamespace(Namespace): @@ -172,26 +175,22 @@ def test_small_models_acc(caplog, acc_args_and_acc): { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "quantize_last_layer": False, - "layer_key": "lm_head", - "layer_type": ""}, + "exp_layer_types": {"lm_head": ""}}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "quantize_last_layer": True, - "layer_key": "lm_head", - "layer_type": ""},]) + "exp_layer_types": {"lm_head": ""}},]) def layer_args(default_run_args, request): args = default_run_args layer_dict = request.param - layer_key = layer_dict["layer_key"] - layer_type = layer_dict["layer_type"] - del layer_dict["layer_key"] - del layer_dict["layer_type"] + exp_layer_types = layer_dict["exp_layer_types"] + del layer_dict["exp_layer_types"] args.update(**layer_dict) - yield args, layer_key, layer_type + yield args, exp_layer_types def test_small_models_quant_layer(caplog, layer_args): caplog.set_level(logging.INFO) - args, layer_key, layer_type = layer_args + args, exp_layer_types = layer_args float_ppl, quant_ppl, model = main(args) - assert_layer_type(model, layer_key, layer_type) + assert_layer_types(model, exp_layer_types) From 5e7a7e753acf203d9a82e732210d8912dbbfa1d4 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 13:20:56 +0100 Subject: [PATCH 12/53] test (ex/llm): Added extra layer replacement checks --- tests/brevitas_examples/llm.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 4664523a0..33f347699 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -28,13 +28,15 @@ def allexact(x, y): def assert_layer_types(model, exp_layer_types): for key, string in exp_layer_types.items(): matched = False + layer_names = [] for name, layer in model.named_modules(): + layer_names += [name] if name == key: matched = True ltype = str(type(layer)) assert ltype == string, f"Expected layer type: {string}, found {ltype} for key: {key}" continue - assert matched, f"Layer key: {key} not found" + assert matched, f"Layer key: {key} not found in {layer_names}" class UpdatableNamespace(Namespace): @@ -174,8 +176,26 @@ def test_small_models_acc(caplog, acc_args_and_acc): params=[ { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", - "quantize_last_layer": False, - "exp_layer_types": {"lm_head": ""}}, + "exp_layer_types": { + "lm_head": "", + "model.layers.0.self_attn.q_proj": "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", + }}, + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "act_equalization": "layerwise", + "exp_layer_types": { + "model.layers.0.self_attn.q_proj": "", + "model.layers.0.self_attn.q_proj.layer": "", + }}, + { + "model": "hf-internal-testing/tiny-random-OPTForCausalLM", + "replace_mha": True, + "exp_layer_types": { + "model.decoder.layers.0.self_attn": "", + "model.decoder.layers.0.self_attn.mha": "", + }}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "quantize_last_layer": True, From 9dd38aa73c2b44926ca9c2b80b9f3e7dfb9614ee Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 15:50:58 +0100 Subject: [PATCH 13/53] Fix (ex/gen): Bugfix when applying MX as activation quantization type --- src/brevitas_examples/common/generative/quantize.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/brevitas_examples/common/generative/quantize.py b/src/brevitas_examples/common/generative/quantize.py index 57670f6f6..73831d5fa 100644 --- a/src/brevitas_examples/common/generative/quantize.py +++ b/src/brevitas_examples/common/generative/quantize.py @@ -153,7 +153,8 @@ 'sym': Int8DynamicActPerGroupFloat}}}, 'po2_scale': { 'stats': { - 'per_group': MXInt8Act}}}}, + 'per_group': { + 'sym': MXInt8Act}}}}}, 'float': { 'static': { 'float_scale': { @@ -175,7 +176,8 @@ 'dynamic': { 'po2_scale': { 'stats': { - 'per_group': MXFloat8e4m3Act}}}}, + 'per_group': { + 'sym': MXFloat8e4m3Act}}}}}, 'float_fnuz': { 'static': { 'float_scale': { From 9940a9f9aa6de86c54462fa181fdfcc438f71c53 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 16:44:36 +0100 Subject: [PATCH 14/53] test (ex/llm): Added tests for FP8, MX datatypes --- tests/brevitas_examples/llm.py | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 33f347699..f261700cc 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -182,6 +182,51 @@ def test_small_models_acc(caplog, acc_args_and_acc): "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", }}, + { + "model": "hf-internal-testing/tiny-random-MistralForCausalLM", + "weight_quant_format": "float_ocp_e4m3", + "weight_quant_type": "sym", + "input_quant_format": "float_ocp_e5m2", + "input_quant_type": "sym", + "exp_layer_types": { + "model.layers.0.self_attn.q_proj": "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", + }}, + { + "model": "hf-internal-testing/tiny-random-MistralForCausalLM", + "weight_quant_format": "float_fnuz_e4m3", + "weight_quant_type": "sym", + "input_quant_format": "float_fnuz_e5m2", + "input_quant_type": "sym", + "exp_layer_types": { + "model.layers.0.self_attn.q_proj": "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", + }}, + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "weight_quant_format": "float_ocp_e4m3", + "weight_scale_precision": "po2_scale", + "weight_param_method": "stats", + "weight_quant_granularity": "per_group", + "weight_group_size": 16, + "weight_quant_type": "sym", + "input_quant_format": "float_ocp_e5m2", + "input_scale_type": "dynamic", + "input_scale_precision": "po2_scale", + "input_param_method": "stats", + "input_quant_granularity": "per_group", + "input_group_size": 16, + "input_quant_type": "sym", + "act_calibration": False, + "exp_layer_types": { + "model.layers.0.self_attn.q_proj": "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl": "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl": "", + }}, { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", "act_equalization": "layerwise", From b84a645603b51e14b64390d64a07c47d7641f79d Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 16:50:08 +0100 Subject: [PATCH 15/53] setup (ex/llm): Renamed requirements to match other examples. --- .../{requirements-example-llm.txt => requirements-llm.txt} | 0 setup.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename requirements/{requirements-example-llm.txt => requirements-llm.txt} (100%) diff --git a/requirements/requirements-example-llm.txt b/requirements/requirements-llm.txt similarity index 100% rename from requirements/requirements-example-llm.txt rename to requirements/requirements-llm.txt diff --git a/setup.py b/setup.py index 817c7f88e..4a756962d 100644 --- a/setup.py +++ b/setup.py @@ -39,10 +39,10 @@ def read_requirements(filename): "test": read_requirements('requirements-test.txt'), "tts": read_requirements('requirements-tts.txt'), "stt": read_requirements('requirements-stt.txt'), + "llm": read_requirements('requirements-llm.txt'), "vision": read_requirements('requirements-vision.txt'), "finn_integration": read_requirements('requirements-finn-integration.txt'), - "ort_integration": read_requirements('requirements-ort-integration.txt'), - "example_llm": read_requirements('requirements-example-llm.txt')}, + "ort_integration": read_requirements('requirements-ort-integration.txt')}, packages=find_packages('src'), package_dir={'': 'src'}, zip_safe=False, From 9f23bfb90ba12b06abd956911b90b7c7c6319ad3 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 16:55:06 +0100 Subject: [PATCH 16/53] req (ex/llm): Added explicit dependencies --- requirements/requirements-llm.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements/requirements-llm.txt b/requirements/requirements-llm.txt index a1a2c615f..7070cc9c6 100644 --- a/requirements/requirements-llm.txt +++ b/requirements/requirements-llm.txt @@ -1 +1,3 @@ optimum-amd[brevitas] @ git+https://github.com/huggingface/optimum-amd.git@main +tqdm +transformers From af6af4bd9561e56becf821888b864f55331ed3d5 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 17:18:33 +0100 Subject: [PATCH 17/53] test (ex/llm): added weight-only test --- tests/brevitas_examples/llm.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index f261700cc..94297343f 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -182,6 +182,15 @@ def test_small_models_acc(caplog, acc_args_and_acc): "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", }}, + { + "model": "hf-internal-testing/tiny-random-MistralForCausalLM", + "input_bit_width": None, + "act_calibration": False, + "exp_layer_types": { + "model.layers.0.self_attn.q_proj": "", + "model.layers.0.self_attn.q_proj.input_quant": "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", + }}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "weight_quant_format": "float_ocp_e4m3", From 70145da6d0d19b28efd27174008817058f580e51 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 17:33:47 +0100 Subject: [PATCH 18/53] precommit --- tests/brevitas_examples/llm.py | 76 ++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 94297343f..675bf2df2 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -177,20 +177,25 @@ def test_small_models_acc(caplog, acc_args_and_acc): { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "exp_layer_types": { - "lm_head": "", - "model.layers.0.self_attn.q_proj": "", - "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", - }}, + "lm_head": + "", + "model.layers.0.self_attn.q_proj": + "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": + "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": + "",}}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "input_bit_width": None, "act_calibration": False, "exp_layer_types": { - "model.layers.0.self_attn.q_proj": "", - "model.layers.0.self_attn.q_proj.input_quant": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", - }}, + "model.layers.0.self_attn.q_proj": + "", + "model.layers.0.self_attn.q_proj.input_quant": + "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": + "",}}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "weight_quant_format": "float_ocp_e4m3", @@ -198,10 +203,12 @@ def test_small_models_acc(caplog, acc_args_and_acc): "input_quant_format": "float_ocp_e5m2", "input_quant_type": "sym", "exp_layer_types": { - "model.layers.0.self_attn.q_proj": "", - "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", - }}, + "model.layers.0.self_attn.q_proj": + "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": + "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": + "",}}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "weight_quant_format": "float_fnuz_e4m3", @@ -209,10 +216,12 @@ def test_small_models_acc(caplog, acc_args_and_acc): "input_quant_format": "float_fnuz_e5m2", "input_quant_type": "sym", "exp_layer_types": { - "model.layers.0.self_attn.q_proj": "", - "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", - }}, + "model.layers.0.self_attn.q_proj": + "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": + "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": + "",}}, { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", "weight_quant_format": "float_ocp_e4m3", @@ -230,30 +239,37 @@ def test_small_models_acc(caplog, acc_args_and_acc): "input_quant_type": "sym", "act_calibration": False, "exp_layer_types": { - "model.layers.0.self_attn.q_proj": "", - "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", - "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl": "", - }}, + "model.layers.0.self_attn.q_proj": + "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": + "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl": + "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": + "", + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl": + "",}}, { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", "act_equalization": "layerwise", "exp_layer_types": { - "model.layers.0.self_attn.q_proj": "", - "model.layers.0.self_attn.q_proj.layer": "", - }}, + "model.layers.0.self_attn.q_proj": + "", + "model.layers.0.self_attn.q_proj.layer": + "",}}, { "model": "hf-internal-testing/tiny-random-OPTForCausalLM", "replace_mha": True, "exp_layer_types": { - "model.decoder.layers.0.self_attn": "", - "model.decoder.layers.0.self_attn.mha": "", - }}, + "model.decoder.layers.0.self_attn": + "", + "model.decoder.layers.0.self_attn.mha": + "",}}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "quantize_last_layer": True, - "exp_layer_types": {"lm_head": ""}},]) + "exp_layer_types": { + "lm_head": ""}},]) def layer_args(default_run_args, request): args = default_run_args layer_dict = request.param From f0ec89e076af47f4bc596121d0fa326707fad86c Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 18:19:43 +0100 Subject: [PATCH 19/53] Feat (ex/llm): Allow supplying a prefix for the exported ONNX/TS model --- src/brevitas_examples/llm/main.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py index 6c3c34dd5..6060ef498 100644 --- a/src/brevitas_examples/llm/main.py +++ b/src/brevitas_examples/llm/main.py @@ -63,16 +63,15 @@ def model_export(model, ref_input, args): export_manager = StdQCDQONNXManager export_manager.change_weight_export(export_weight_q_node=True) - print(f"Exporting the model in ./quantized_onnx/{args.model.replace('/', '-')}") + print(f"Exporting the model in ./{args.export_prefix}") with torch.no_grad(), brevitas_proxy_export_mode(model, export_manager=export_manager): onnx_export_from_model( model, - f"./quantized_onnx/{args.model.replace('/', '-')}", + f"./{args.export_prefix}", task="text-generation-with-past", do_validation=False) elif args.export_target == 'torch_qcdq': - export_torch_qcdq( - model, ref_input['input_ids'], export_path=f"{args.model.replace('/', '-')}.pt") + export_torch_qcdq(model, ref_input['input_ids'], export_path=f"{args.export_prefix}.pt") def validate(args): @@ -115,6 +114,9 @@ def main(args): validate(args) set_seed(args.seed) + if args.export_prefix is None: + args.export_prefix = f"{args.model.replace('/', '--')}" + if args.no_float16: dtype = torch.float32 else: @@ -452,6 +454,13 @@ def parse_args(args): 'sharded_torchmlir_group_weight', 'sharded_packed_torchmlir_group_weight'], help='Model export.') + parser.add_argument( + '--export-prefix', + type=str, + default=None, + help= + "Path prefix to use for the various export flows. If None, a path will be derived from the model name (default: %(default)s)" + ) parser.add_argument( '--checkpoint-name', type=str, From c9d9b666a00a99977a5e0b54dba792f7bd2ca59c Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 21 Aug 2024 18:20:33 +0100 Subject: [PATCH 20/53] test (ex/llm): Added ONNX export and torchscript tests. --- tests/brevitas_examples/llm.py | 61 ++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 675bf2df2..a05a2e2c5 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -4,10 +4,13 @@ from argparse import Namespace from dataclasses import dataclass import logging +import os import shutil import numpy as np +import onnx import pytest +import torch from brevitas_examples.llm.main import main from brevitas_examples.llm.main import parse_args @@ -87,6 +90,7 @@ def default_run_args(request): args.dataset = "c4" args.eval = True #args.checkpoint = ptid2pathname(request.node.nodeid) + ".pth" # Example filename which won't clash + args.export_prefix = ptid2pathname(request.node.nodeid) args.weight_bit_width = 8 args.weight_quant_granularity = "per_channel" # "per_tensor", "per_channel", "per_group". args.input_bit_width = 8 @@ -284,3 +288,60 @@ def test_small_models_quant_layer(caplog, layer_args): args, exp_layer_types = layer_args float_ppl, quant_ppl, model = main(args) assert_layer_types(model, exp_layer_types) + + +@pytest.fixture( + params=[ + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "quantize_weight_zero_point": True, + "quantize_input_zero_point": True, + "export_target": "onnx_qcdq",}, + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "weight_quant_type": "sym", + "input_quant_type": "sym", + "export_target": "onnx_qcdq",},]) +def onnx_export_args(default_run_args, request): + args = default_run_args + export_dict = request.param + args.update(**export_dict) + yield args + + +def test_small_models_onnx_export(caplog, onnx_export_args): + caplog.set_level(logging.INFO) + args = onnx_export_args + float_ppl, quant_ppl, model = main(args) + onnx_model = onnx.load(os.path.join(args.export_prefix, "model.onnx")) + shutil.rmtree(args.export_prefix) + + +@pytest.fixture( + params=[ + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "eval": False, + "quantize_weight_zero_point": True, + "quantize_input_zero_point": True, + "export_target": "torch_qcdq",}, + { + "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "eval": False, + "weight_quant_type": "sym", + "input_quant_type": "sym", + "export_target": "torch_qcdq",},]) +def torch_export_args(default_run_args, request): + args = default_run_args + export_dict = request.param + args.update(**export_dict) + yield args + + +def test_small_models_torch_export(caplog, torch_export_args): + caplog.set_level(logging.INFO) + args = torch_export_args + float_ppl, quant_ppl, model = main(args) + filepath = args.export_prefix + ".pt" + torchscript_model = torch.jit.load(filepath) + os.remove(filepath) From 9a6e50916c01bbd189b80a95eb1c36e2673746c6 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 11:22:37 +0100 Subject: [PATCH 21/53] test (ex/llm): marked llm tests. --- tests/brevitas_examples/llm.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index a05a2e2c5..138591606 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -119,8 +119,7 @@ def toggle_run_args(default_run_args, request): yield args -@pytest.mark.examples -@pytest.mark.weekly +@pytest.mark.llm def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): caplog.set_level(logging.INFO) args = toggle_run_args @@ -164,8 +163,7 @@ def acc_args_and_acc(default_run_args, request): yield args, float_ppl, quant_ppl -@pytest.mark.examples -@pytest.mark.weekly +@pytest.mark.llm def test_small_models_acc(caplog, acc_args_and_acc): caplog.set_level(logging.INFO) args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc @@ -283,6 +281,7 @@ def layer_args(default_run_args, request): yield args, exp_layer_types +@pytest.mark.llm def test_small_models_quant_layer(caplog, layer_args): caplog.set_level(logging.INFO) args, exp_layer_types = layer_args @@ -309,6 +308,7 @@ def onnx_export_args(default_run_args, request): yield args +@pytest.mark.llm def test_small_models_onnx_export(caplog, onnx_export_args): caplog.set_level(logging.INFO) args = onnx_export_args @@ -338,6 +338,7 @@ def torch_export_args(default_run_args, request): yield args +@pytest.mark.llm def test_small_models_torch_export(caplog, torch_export_args): caplog.set_level(logging.INFO) args = torch_export_args From c39749bce5554155fe08e024136b3b26fe20a25a Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 11:33:02 +0100 Subject: [PATCH 22/53] test (ex/llm): Added commented-out mixtral model --- tests/brevitas_examples/llm.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/llm.py index 138591606..3b70d266f 100644 --- a/tests/brevitas_examples/llm.py +++ b/tests/brevitas_examples/llm.py @@ -76,7 +76,13 @@ class ModelAndPpl: name="hf-internal-testing/tiny-random-MistralForCausalLM", float_ppl=None, supports_fx=False, - ),]) + ), + #ModelAndPpl( + # name="dacorvo/Mixtral-tiny", + # float_ppl=None, + # supports_fx=True, + #), + ]) def small_models_with_ppl(request): yield request.param From f214f7c53cf1904245b37554defe3f294af78178 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 11:40:02 +0100 Subject: [PATCH 23/53] test (ex/llm): Renamed llm test file. --- tests/brevitas_examples/{llm.py => test_llm.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/brevitas_examples/{llm.py => test_llm.py} (100%) diff --git a/tests/brevitas_examples/llm.py b/tests/brevitas_examples/test_llm.py similarity index 100% rename from tests/brevitas_examples/llm.py rename to tests/brevitas_examples/test_llm.py From 852714f90937f20ca6a03f14c85c215b221b5f46 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 12:19:12 +0100 Subject: [PATCH 24/53] test (ex/llm): improved test compatibility with test infrastructure --- noxfile.py | 12 +++++++++++- pytest.ini | 3 +++ tests/brevitas_examples/test_llm.py | 15 ++++++++++++--- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index ffb1c5fbd..7af395105 100644 --- a/noxfile.py +++ b/noxfile.py @@ -105,7 +105,17 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): install_pytorch(pytorch, session) install_torchvision(pytorch, session) # For CV eval scripts session.install('--upgrade', '.[test, tts, stt, vision]') - session.run('pytest', '-n', 'logical', 'tests/brevitas_examples') + session.run('pytest', '-n', 'logical', '-k', 'not llm', 'tests/brevitas_examples') + + +@nox.session(python=PYTHON_VERSIONS) +@nox.parametrize("pytorch", PYTORCH_VERSIONS, ids=PYTORCH_IDS) +@nox.parametrize("jit_status", JIT_STATUSES, ids=JIT_IDS) +def tests_brevitas_examples_llm(session, pytorch, jit_status): + session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled')) + install_pytorch(pytorch, session) + session.install('--upgrade', '-e', '.[test, llm, export]') + session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples') @nox.session(python=PYTHON_VERSIONS) diff --git a/pytest.ini b/pytest.ini index a560d3e16..a3cd14b59 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,3 +7,6 @@ log_cli_format = %(message)s # pytest-mock should use Pypi's mock rather than Python's built-in mock_use_standalone_module = true + +markers = + llm: mark a test which tests brevitas_examples/llm diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 3b70d266f..a646764f4 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -12,16 +12,19 @@ import pytest import torch +from brevitas import config from brevitas_examples.llm.main import main from brevitas_examples.llm.main import parse_args +from tests.marker import jit_disabled_for_export + def ptid2pathname(string): return string.replace("/", "-").replace(":", "-") def allclose(x, y): - return np.allclose(x, y, rtol=1e-02, atol=5e-01, equal_nan=False) + return np.allclose(x, y, rtol=1e-04, atol=3e-00, equal_nan=False) def allexact(x, y): @@ -176,8 +179,12 @@ def test_small_models_acc(caplog, acc_args_and_acc): float_ppl, quant_ppl, model = main(args) float_ppl = float_ppl.detach().cpu().numpy() quant_ppl = quant_ppl.detach().cpu().numpy() - assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" - assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + if config.JIT_ENABLED: + assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + else: + assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" @pytest.fixture( @@ -315,6 +322,7 @@ def onnx_export_args(default_run_args, request): @pytest.mark.llm +@jit_disabled_for_export() def test_small_models_onnx_export(caplog, onnx_export_args): caplog.set_level(logging.INFO) args = onnx_export_args @@ -345,6 +353,7 @@ def torch_export_args(default_run_args, request): @pytest.mark.llm +@jit_disabled_for_export() def test_small_models_torch_export(caplog, torch_export_args): caplog.set_level(logging.INFO) args = torch_export_args From c778818946ddb6fe223127176a00d0b39361ee4e Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 12:25:29 +0100 Subject: [PATCH 25/53] precommit --- tests/brevitas_examples/test_llm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index a646764f4..3a362b06e 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -15,7 +15,6 @@ from brevitas import config from brevitas_examples.llm.main import main from brevitas_examples.llm.main import parse_args - from tests.marker import jit_disabled_for_export @@ -85,7 +84,7 @@ class ModelAndPpl: # float_ppl=None, # supports_fx=True, #), - ]) + ]) def small_models_with_ppl(request): yield request.param From c659e64aec6d62e58933f7055cfbf4b95676121b Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 12:27:13 +0100 Subject: [PATCH 26/53] test (gha/ex/llm): Added new workflow to testing LLM example --- .github/workflows/examples_llm_pytest.yml | 65 +++++++++++++++++++ .github/workflows/gen_github_actions.py | 26 ++++++++ .../workflows/reduced_examples_llm_pytest.yml | 64 ++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 .github/workflows/examples_llm_pytest.yml create mode 100644 .github/workflows/reduced_examples_llm_pytest.yml diff --git a/.github/workflows/examples_llm_pytest.yml b/.github/workflows/examples_llm_pytest.yml new file mode 100644 index 000000000..bc7f82287 --- /dev/null +++ b/.github/workflows/examples_llm_pytest.yml @@ -0,0 +1,65 @@ +name: Examples LLM Pytest + +on: + push: + branches: [ master, dev ] + pull_request: + types: + - review_requested + +jobs: + build: + runs-on: ${{ matrix.platform }} + strategy: + fail-fast: false + + + matrix: + python_version: ['3.8', '3.9'] + pytorch_version: ['1.9.1', '1.10.1', '1.11.0', '1.12.1', '1.13.0', '2.0.1', '2.1.0'] + platform: ['windows-latest', 'ubuntu-latest', 'macos-latest'] + jit_status: ['jit_disabled', 'jit_enabled'] + + + exclude: + - pytorch_version: '1.9.1' + platform: 'macos-latest' + + - pytorch_version: '1.9.1' + jit_status: 'jit_enabled' + + + + if: ${{ !github.event.pull_request.draft }} + steps: + + - name: Checkout repo + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python_version }} + + - name: Install Nox dependencies + shell: bash + run: pip install -r requirements/requirements-nox.txt + + - name: Install update + shell: bash + run: sudo apt-get update + if: startsWith(runner.os, 'Linux') == true + + - name: Install libsndfile and libgomp1 on Ubuntu + shell: bash + run: sudo apt-get install -y libsndfile-dev libgomp1 + if: startsWith(runner.os, 'Linux') == true + + - name: Install libomp on macOS + shell: bash + run: brew install libomp + if: startsWith(runner.os, 'macOS') == true + + - name: Run Nox session for brevitas_examples pytest + shell: bash + run: nox -v -s tests_brevitas_examples_llm-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\) diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py index 4cd6c6827..d491795ca 100644 --- a/.github/workflows/gen_github_actions.py +++ b/.github/workflows/gen_github_actions.py @@ -8,6 +8,7 @@ BASE_YML_REDUCED_TEMPLATE = 'base_reduced.yml.template' PYTEST_YML = 'pytest.yml' EXAMPLES_PYTEST_YML = 'examples_pytest.yml' +EXAMPLES_LLM_PYTEST_YML = 'examples_llm_pytest.yml' DEVELOP_INSTALL_YML = 'develop_install.yml' FINN_INTEGRATION_YML = 'finn_integration.yml' ORT_INTEGRATION_YML = 'ort_integration.yml' @@ -80,6 +81,13 @@ 'nox -v -s tests_brevitas_examples_cpu-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\)' )]),] +EXAMPLES_LLM_PYTEST_STEP_LIST = [ + od([('name', 'Run Nox session for brevitas_examples pytest'), ('shell', 'bash'), + ( + 'run', + 'nox -v -s tests_brevitas_examples_llm-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\)' + )]),] + FINN_INTEGRATION_STEP_LIST = [ od([('name', 'Install protobuf on Ubuntu'), ('shell', 'bash'), ('run', 'sudo apt-get install protobuf-compiler libprotoc-dev'), @@ -167,6 +175,23 @@ def gen_examples_pytest_yml(): pytest.gen_yaml(BASE_YML_REDUCED_TEMPLATE, 'reduced_' + EXAMPLES_PYTEST_YML) +def gen_examples_llm_pytest_yml(): + pytest = Action( + 'Examples LLM Pytest', + EXCLUDE_LIST + JIT_EXCLUDE_LIST, + combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]), + EXAMPLES_LLM_PYTEST_STEP_LIST, + STRATEGY) + pytest.gen_yaml(BASE_YML_TEMPLATE, EXAMPLES_LLM_PYTEST_YML) + pytest = Action( + 'Examples LLM Pytest', + EXCLUDE_LIST, + combine_od_list([MATRIX_REDUCED, PYTEST_MATRIX_EXTRA_REDUCED]), + EXAMPLES_LLM_PYTEST_STEP_LIST, + STRATEGY) + pytest.gen_yaml(BASE_YML_REDUCED_TEMPLATE, 'reduced_' + EXAMPLES_LLM_PYTEST_YML) + + def gen_test_develop_install_yml(): test_develop_install = Action( 'Test develop install', EXCLUDE_LIST, MATRIX, TEST_INSTALL_DEV_STEP_LIST, STRATEGY) @@ -243,6 +268,7 @@ def gen_test_brevitas_end_to_end(): if __name__ == '__main__': gen_pytest_yml() gen_examples_pytest_yml() + gen_examples_llm_pytest_yml() gen_test_develop_install_yml() gen_test_brevitas_finn_integration() gen_test_brevitas_ort_integration() diff --git a/.github/workflows/reduced_examples_llm_pytest.yml b/.github/workflows/reduced_examples_llm_pytest.yml new file mode 100644 index 000000000..37f8aea46 --- /dev/null +++ b/.github/workflows/reduced_examples_llm_pytest.yml @@ -0,0 +1,64 @@ +name: Examples LLM Pytest + +on: + pull_request: + types: + - opened + - reopened + - synchronize + - ready_for_review + + +jobs: + build: + runs-on: ${{ matrix.platform }} + strategy: + fail-fast: false + + + matrix: + python_version: ['3.8'] + pytorch_version: ['1.9.1', '1.13.0', '2.1.0'] + platform: ['ubuntu-latest'] + jit_status: ['jit_disabled'] + + + exclude: + - pytorch_version: '1.9.1' + platform: 'macos-latest' + + + + if: ${{ !github.event.pull_request.draft }} + steps: + + - name: Checkout repo + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python_version }} + + - name: Install Nox dependencies + shell: bash + run: pip install -r requirements/requirements-nox.txt + + - name: Install update + shell: bash + run: sudo apt-get update + if: startsWith(runner.os, 'Linux') == true + + - name: Install libsndfile and libgomp1 on Ubuntu + shell: bash + run: sudo apt-get install -y libsndfile-dev libgomp1 + if: startsWith(runner.os, 'Linux') == true + + - name: Install libomp on macOS + shell: bash + run: brew install libomp + if: startsWith(runner.os, 'macOS') == true + + - name: Run Nox session for brevitas_examples pytest + shell: bash + run: nox -v -s tests_brevitas_examples_llm-${{ matrix.python_version }}\(${{ matrix.jit_status }}\,\ pytorch_${{ matrix.pytorch_version }}\) From 1deb288d6a5131cf1363dcf2cea910e03ee4e2fd Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 12:32:31 +0100 Subject: [PATCH 27/53] test (ex/llm): Removed onnx dependency when collecting tests. --- tests/brevitas_examples/test_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 3a362b06e..90279a8af 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -8,7 +8,6 @@ import shutil import numpy as np -import onnx import pytest import torch @@ -323,6 +322,7 @@ def onnx_export_args(default_run_args, request): @pytest.mark.llm @jit_disabled_for_export() def test_small_models_onnx_export(caplog, onnx_export_args): + import onnx caplog.set_level(logging.INFO) args = onnx_export_args float_ppl, quant_ppl, model = main(args) From 7b637233f608decd487bcf2f781d9aca66854b9c Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 12:52:44 +0100 Subject: [PATCH 28/53] test (ex/llm): softened accuracy conditions --- tests/brevitas_examples/test_llm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 90279a8af..09f5db9fe 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -25,6 +25,10 @@ def allclose(x, y): return np.allclose(x, y, rtol=1e-04, atol=3e-00, equal_nan=False) +def allveryclose(x, y): + return np.allclose(x, y, rtol=1e-08, atol=1e-01, equal_nan=False) + + def allexact(x, y): return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False) @@ -181,8 +185,8 @@ def test_small_models_acc(caplog, acc_args_and_acc): assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" else: - assert allexact(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" - assert allexact(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" @pytest.fixture( From 0235b5c5da266622c2f99831cb4c57ec2b7d8f9f Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 13:01:01 +0100 Subject: [PATCH 29/53] test (ex/llm): reorganise to prevent export issues --- tests/brevitas_examples/test_llm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 09f5db9fe..c03c71f76 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -12,8 +12,6 @@ import torch from brevitas import config -from brevitas_examples.llm.main import main -from brevitas_examples.llm.main import parse_args from tests.marker import jit_disabled_for_export @@ -94,6 +92,7 @@ def small_models_with_ppl(request): @pytest.fixture() def default_run_args(request): + from brevitas_examples.llm.main import parse_args args = UpdatableNamespace(**vars(parse_args([]))) args.nsamples = 2 args.seqlen = 2 @@ -132,6 +131,7 @@ def toggle_run_args(default_run_args, request): @pytest.mark.llm def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): + from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args = toggle_run_args args.model = small_models_with_ppl.name @@ -176,6 +176,7 @@ def acc_args_and_acc(default_run_args, request): @pytest.mark.llm def test_small_models_acc(caplog, acc_args_and_acc): + from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc float_ppl, quant_ppl, model = main(args) @@ -298,6 +299,7 @@ def layer_args(default_run_args, request): @pytest.mark.llm def test_small_models_quant_layer(caplog, layer_args): + from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args, exp_layer_types = layer_args float_ppl, quant_ppl, model = main(args) @@ -326,6 +328,7 @@ def onnx_export_args(default_run_args, request): @pytest.mark.llm @jit_disabled_for_export() def test_small_models_onnx_export(caplog, onnx_export_args): + from brevitas_examples.llm.main import main import onnx caplog.set_level(logging.INFO) args = onnx_export_args @@ -358,6 +361,7 @@ def torch_export_args(default_run_args, request): @pytest.mark.llm @jit_disabled_for_export() def test_small_models_torch_export(caplog, torch_export_args): + from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args = torch_export_args float_ppl, quant_ppl, model = main(args) From bedf1b32271ee85666a2ebe238199cc17e575877 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 13:04:12 +0100 Subject: [PATCH 30/53] precommit --- tests/brevitas_examples/test_llm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index c03c71f76..67db55bc2 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -328,8 +328,9 @@ def onnx_export_args(default_run_args, request): @pytest.mark.llm @jit_disabled_for_export() def test_small_models_onnx_export(caplog, onnx_export_args): - from brevitas_examples.llm.main import main import onnx + + from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args = onnx_export_args float_ppl, quant_ppl, model = main(args) From 79c271a52fe774cbc168540898f5611851673c37 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 13:08:35 +0100 Subject: [PATCH 31/53] test (ex/llm): remove upgrade flag to prevent new pytorch from being installed --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 7af395105..43ada0632 100644 --- a/noxfile.py +++ b/noxfile.py @@ -114,7 +114,7 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): def tests_brevitas_examples_llm(session, pytorch, jit_status): session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled')) install_pytorch(pytorch, session) - session.install('--upgrade', '-e', '.[test, llm, export]') + session.install('-e', '.[test, llm, export]') session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples') From 44d2e56da96f86f9c97f7284d0e0a073b0ac4303 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 13:41:57 +0100 Subject: [PATCH 32/53] test (gha/ex/llm): Added custom pytorch versions for LLM tests. --- .github/workflows/examples_llm_pytest.yml | 2 +- .github/workflows/gen_github_actions.py | 12 ++++++++++-- .github/workflows/reduced_examples_llm_pytest.yml | 2 +- tests/brevitas_examples/test_llm.py | 5 +++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/examples_llm_pytest.yml b/.github/workflows/examples_llm_pytest.yml index bc7f82287..e939a93b2 100644 --- a/.github/workflows/examples_llm_pytest.yml +++ b/.github/workflows/examples_llm_pytest.yml @@ -16,7 +16,7 @@ jobs: matrix: python_version: ['3.8', '3.9'] - pytorch_version: ['1.9.1', '1.10.1', '1.11.0', '1.12.1', '1.13.0', '2.0.1', '2.1.0'] + pytorch_version: ['2.2.2', '2.3.1', '2.4.0'] platform: ['windows-latest', 'ubuntu-latest', 'macos-latest'] jit_status: ['jit_disabled', 'jit_enabled'] diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py index d491795ca..f24ab793b 100644 --- a/.github/workflows/gen_github_actions.py +++ b/.github/workflows/gen_github_actions.py @@ -26,6 +26,10 @@ ('pytorch_version', list(PYTORCH_LIST_REDUCED)), ('platform', PLATFORM_LIST_REDUCED)]) +EXAMPLES_LLM_PYTEST_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)), + ('pytorch_version', list( + ('2.2.2',))), ('platform', PLATFORM_LIST_REDUCED)]) + FINN_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)), ('pytorch_version', list(PYTORCH_LIST_REDUCED)), ('platform', PLATFORM_LIST_REDUCED)]) @@ -62,6 +66,10 @@ MATRIX = od([('python_version', list(PYTHON_VERSIONS)), ('pytorch_version', list(PYTORCH_VERSIONS)), ('platform', PLATFORM_LIST)]) +EXAMPLES_LLM_PYTEST_MATRIX = od([('python_version', list(PYTHON_VERSIONS)), + ('pytorch_version', list( + ('2.2.2', '2.3.1', '2.4.0'))), ('platform', PLATFORM_LIST)]) + FINN_MATRIX = od([('python_version', list(PYTHON_VERSIONS)), ('pytorch_version', list(PYTORCH_VERSIONS)), ('platform', FINN_PLATFORM_LIST)]) @@ -179,14 +187,14 @@ def gen_examples_llm_pytest_yml(): pytest = Action( 'Examples LLM Pytest', EXCLUDE_LIST + JIT_EXCLUDE_LIST, - combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]), + combine_od_list([EXAMPLES_LLM_PYTEST_MATRIX, PYTEST_MATRIX_EXTRA]), EXAMPLES_LLM_PYTEST_STEP_LIST, STRATEGY) pytest.gen_yaml(BASE_YML_TEMPLATE, EXAMPLES_LLM_PYTEST_YML) pytest = Action( 'Examples LLM Pytest', EXCLUDE_LIST, - combine_od_list([MATRIX_REDUCED, PYTEST_MATRIX_EXTRA_REDUCED]), + combine_od_list([EXAMPLES_LLM_PYTEST_MATRIX_REDUCED, PYTEST_MATRIX_EXTRA_REDUCED]), EXAMPLES_LLM_PYTEST_STEP_LIST, STRATEGY) pytest.gen_yaml(BASE_YML_REDUCED_TEMPLATE, 'reduced_' + EXAMPLES_LLM_PYTEST_YML) diff --git a/.github/workflows/reduced_examples_llm_pytest.yml b/.github/workflows/reduced_examples_llm_pytest.yml index 37f8aea46..ca7fa1dc6 100644 --- a/.github/workflows/reduced_examples_llm_pytest.yml +++ b/.github/workflows/reduced_examples_llm_pytest.yml @@ -18,7 +18,7 @@ jobs: matrix: python_version: ['3.8'] - pytorch_version: ['1.9.1', '1.13.0', '2.1.0'] + pytorch_version: ['2.2.2'] platform: ['ubuntu-latest'] jit_status: ['jit_disabled'] diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 67db55bc2..66c09d448 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -130,6 +130,7 @@ def toggle_run_args(default_run_args, request): @pytest.mark.llm +@requires_pt_ge('2.2') def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) @@ -175,6 +176,7 @@ def acc_args_and_acc(default_run_args, request): @pytest.mark.llm +@requires_pt_ge('2.2') def test_small_models_acc(caplog, acc_args_and_acc): from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) @@ -298,6 +300,7 @@ def layer_args(default_run_args, request): @pytest.mark.llm +@requires_pt_ge('2.2') def test_small_models_quant_layer(caplog, layer_args): from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) @@ -327,6 +330,7 @@ def onnx_export_args(default_run_args, request): @pytest.mark.llm @jit_disabled_for_export() +@requires_pt_ge('2.2') def test_small_models_onnx_export(caplog, onnx_export_args): import onnx @@ -361,6 +365,7 @@ def torch_export_args(default_run_args, request): @pytest.mark.llm @jit_disabled_for_export() +@requires_pt_ge('2.2') def test_small_models_torch_export(caplog, torch_export_args): from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) From 32ea1ef3d61fa0b4ddcbbdcc1d91a604148cc496 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 13:49:40 +0100 Subject: [PATCH 33/53] test (ex/llm/gha): Fixed custom pytorch versions for LLM test --- .github/workflows/gen_github_actions.py | 5 +++-- noxfile.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py index f24ab793b..316d21a4d 100644 --- a/.github/workflows/gen_github_actions.py +++ b/.github/workflows/gen_github_actions.py @@ -66,9 +66,10 @@ MATRIX = od([('python_version', list(PYTHON_VERSIONS)), ('pytorch_version', list(PYTORCH_VERSIONS)), ('platform', PLATFORM_LIST)]) +EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS = ('2.2.2', '2.3.1', '2.4.0') EXAMPLES_LLM_PYTEST_MATRIX = od([('python_version', list(PYTHON_VERSIONS)), - ('pytorch_version', list( - ('2.2.2', '2.3.1', '2.4.0'))), ('platform', PLATFORM_LIST)]) + ('pytorch_version', list(EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS)), + ('platform', PLATFORM_LIST)]) FINN_MATRIX = od([('python_version', list(PYTHON_VERSIONS)), ('pytorch_version', list(PYTORCH_VERSIONS)), ('platform', FINN_PLATFORM_LIST)]) diff --git a/noxfile.py b/noxfile.py index 43ada0632..93fc194e4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -9,6 +9,7 @@ from packaging import version sys.path.append(os.path.join(os.path.dirname(__file__), os.path.join('.', '.github', 'workflows'))) +from gen_github_actions import EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS from gen_github_actions import JIT_STATUSES from gen_github_actions import PYTHON_VERSIONS from gen_github_actions import PYTORCH_VERSIONS @@ -16,6 +17,8 @@ IS_OSX = system() == 'Darwin' PYTORCH_STABLE_WHEEL_SRC = 'https://download.pytorch.org/whl/torch_stable.html' PYTORCH_IDS = tuple([f'pytorch_{i}' for i in PYTORCH_VERSIONS]) +EXAMPLES_LLM_PYTEST_PYTORCH_IDS = tuple([ + f'pytorch_{i}' for i in EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS]) JIT_IDS = tuple([f'{i}'.lower() for i in JIT_STATUSES]) LSTM_EXPORT_MIN_PYTORCH = '1.10.1' @@ -109,7 +112,8 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): @nox.session(python=PYTHON_VERSIONS) -@nox.parametrize("pytorch", PYTORCH_VERSIONS, ids=PYTORCH_IDS) +@nox.parametrize( + "pytorch", EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS, ids=EXAMPLES_LLM_PYTEST_PYTORCH_IDS) @nox.parametrize("jit_status", JIT_STATUSES, ids=JIT_IDS) def tests_brevitas_examples_llm(session, pytorch, jit_status): session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled')) From 0a8d7ee51a232cc161cf74b39277088bad3b7719 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 13:57:24 +0100 Subject: [PATCH 34/53] Bugfixes --- noxfile.py | 3 ++- tests/brevitas_examples/test_llm.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 93fc194e4..77b90a9b5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -118,7 +118,8 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): def tests_brevitas_examples_llm(session, pytorch, jit_status): session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled')) install_pytorch(pytorch, session) - session.install('-e', '.[test, llm, export]') + session.install( + '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu') session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples') diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 66c09d448..9c39b878e 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -13,6 +13,7 @@ from brevitas import config from tests.marker import jit_disabled_for_export +from tests.marker import requires_pt_ge def ptid2pathname(string): From b3bcedd9b702a9beff2d571676bb2366d05d1838 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:05:43 +0100 Subject: [PATCH 35/53] test (ex/llm/nox): Better exclusion for examples. --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 77b90a9b5..ff0ea3d29 100644 --- a/noxfile.py +++ b/noxfile.py @@ -108,7 +108,7 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): install_pytorch(pytorch, session) install_torchvision(pytorch, session) # For CV eval scripts session.install('--upgrade', '.[test, tts, stt, vision]') - session.run('pytest', '-n', 'logical', '-k', 'not llm', 'tests/brevitas_examples') + session.run('pytest', '-n', 'logical', '--ignore-glob', 'tests/brevitas_examples/*llm*', 'tests/brevitas_examples') @nox.session(python=PYTHON_VERSIONS) @@ -120,7 +120,7 @@ def tests_brevitas_examples_llm(session, pytorch, jit_status): install_pytorch(pytorch, session) session.install( '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu') - session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples') + session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples/test_llm.py') @nox.session(python=PYTHON_VERSIONS) From 47d7cbbc8cb37ac0026ee9fe116decfda00bd737 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:06:36 +0100 Subject: [PATCH 36/53] Revert "test (ex/llm): reorganise to prevent export issues" This reverts commit a9d63613e7f67ee726c2884949d102ae00dc4a9c. --- tests/brevitas_examples/test_llm.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 9c39b878e..9e12fc3cf 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -12,6 +12,8 @@ import torch from brevitas import config +from brevitas_examples.llm.main import main +from brevitas_examples.llm.main import parse_args from tests.marker import jit_disabled_for_export from tests.marker import requires_pt_ge @@ -93,7 +95,6 @@ def small_models_with_ppl(request): @pytest.fixture() def default_run_args(request): - from brevitas_examples.llm.main import parse_args args = UpdatableNamespace(**vars(parse_args([]))) args.nsamples = 2 args.seqlen = 2 @@ -133,7 +134,6 @@ def toggle_run_args(default_run_args, request): @pytest.mark.llm @requires_pt_ge('2.2') def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): - from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args = toggle_run_args args.model = small_models_with_ppl.name @@ -179,7 +179,6 @@ def acc_args_and_acc(default_run_args, request): @pytest.mark.llm @requires_pt_ge('2.2') def test_small_models_acc(caplog, acc_args_and_acc): - from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc float_ppl, quant_ppl, model = main(args) @@ -303,7 +302,6 @@ def layer_args(default_run_args, request): @pytest.mark.llm @requires_pt_ge('2.2') def test_small_models_quant_layer(caplog, layer_args): - from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args, exp_layer_types = layer_args float_ppl, quant_ppl, model = main(args) @@ -368,7 +366,6 @@ def torch_export_args(default_run_args, request): @jit_disabled_for_export() @requires_pt_ge('2.2') def test_small_models_torch_export(caplog, torch_export_args): - from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args = torch_export_args float_ppl, quant_ppl, model = main(args) From 9e3638b417ac1714b877be4ab732172cec1a4d38 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:07:14 +0100 Subject: [PATCH 37/53] Revert "test (ex/llm): Removed onnx dependency when collecting tests." This reverts commit d4509c6e8cc56188b6b4f64af52c55fb4f323f79. --- tests/brevitas_examples/test_llm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 9e12fc3cf..b0eae9dcf 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -8,6 +8,7 @@ import shutil import numpy as np +import onnx import pytest import torch @@ -331,9 +332,6 @@ def onnx_export_args(default_run_args, request): @jit_disabled_for_export() @requires_pt_ge('2.2') def test_small_models_onnx_export(caplog, onnx_export_args): - import onnx - - from brevitas_examples.llm.main import main caplog.set_level(logging.INFO) args = onnx_export_args float_ppl, quant_ppl, model = main(args) From a60911e2ee5a67b02dad91267c1118b1b2310c95 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:08:23 +0100 Subject: [PATCH 38/53] Precommit --- noxfile.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index ff0ea3d29..2424e655a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -108,7 +108,13 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): install_pytorch(pytorch, session) install_torchvision(pytorch, session) # For CV eval scripts session.install('--upgrade', '.[test, tts, stt, vision]') - session.run('pytest', '-n', 'logical', '--ignore-glob', 'tests/brevitas_examples/*llm*', 'tests/brevitas_examples') + session.run( + 'pytest', + '-n', + 'logical', + '--ignore-glob', + 'tests/brevitas_examples/*llm*', + 'tests/brevitas_examples') @nox.session(python=PYTHON_VERSIONS) From a48c74e8f5f86c7aef01cdde6b51ce104ba7bb1e Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:14:22 +0100 Subject: [PATCH 39/53] test (nox): Updated pytorch torchvision version list --- noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2424e655a..27c469592 100644 --- a/noxfile.py +++ b/noxfile.py @@ -29,7 +29,10 @@ '1.12.1': '0.13.1', '1.13.0': '0.14.0', '2.0.1': '0.15.2', - '2.1.0': '0.16.0'} + '2.1.0': '0.16.0', + '2.2.2': '0.17.2', + '2.3.1': '0.18.1', + '2.4.0': '0.19.0'} PARSED_TORCHVISION_VERSION_DICT = {version.parse(k): v for k, v in TORCHVISION_VERSION_DICT.items()} @@ -124,6 +127,7 @@ def tests_brevitas_examples_cpu(session, pytorch, jit_status): def tests_brevitas_examples_llm(session, pytorch, jit_status): session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled')) install_pytorch(pytorch, session) + install_torchvision(pytorch, session) # Optimum seems to require torchvision session.install( '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu') session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples/test_llm.py') From 2c5e01adf1efe4d51b353cb4e47c3589c96e8550 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:34:49 +0100 Subject: [PATCH 40/53] Updated install method for PT>=2.4 --- noxfile.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/noxfile.py b/noxfile.py index 27c469592..8ffdd0c53 100644 --- a/noxfile.py +++ b/noxfile.py @@ -7,6 +7,7 @@ import nox from packaging import version +from packaging.version import parse sys.path.append(os.path.join(os.path.dirname(__file__), os.path.join('.', '.github', 'workflows'))) from gen_github_actions import EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS @@ -15,7 +16,8 @@ from gen_github_actions import PYTORCH_VERSIONS IS_OSX = system() == 'Darwin' -PYTORCH_STABLE_WHEEL_SRC = 'https://download.pytorch.org/whl/torch_stable.html' +PYTORCH_STABLE_WHEEL_SRC = 'https://download.pytorch.org/whl/cpu' +PYTORCH_STABLE_WHEEL_SRC_LEGACY = 'https://download.pytorch.org/whl/torch_stable.html' PYTORCH_IDS = tuple([f'pytorch_{i}' for i in PYTORCH_VERSIONS]) EXAMPLES_LLM_PYTEST_PYTORCH_IDS = tuple([ f'pytorch_{i}' for i in EXAMPLES_LLM_PYTEST_PYTORCH_VERSIONS]) @@ -39,7 +41,11 @@ def install_pytorch(pytorch, session): if not IS_OSX: - cmd = [f'torch=={pytorch}+cpu', '-f', PYTORCH_STABLE_WHEEL_SRC] + if parse(pytorch) < parse('2.4.0'): + cmd = [f'torch=={pytorch}+cpu', '-f', PYTORCH_STABLE_WHEEL_SRC_LEGACY] + else: + cmd = [f'torch=={pytorch}', '--index-url', PYTORCH_STABLE_WHEEL_SRC] + else: cmd = [f'torch=={pytorch}'] session.install(*cmd) @@ -48,11 +54,18 @@ def install_pytorch(pytorch, session): def install_torchvision(pytorch, session): torchvision = PARSED_TORCHVISION_VERSION_DICT[version.parse(pytorch)] if not IS_OSX: - cmd = [ - f'torch=={pytorch}+cpu', # make sure correct pytorch version is kept - f'torchvision=={torchvision}+cpu', - '-f', - PYTORCH_STABLE_WHEEL_SRC] + if parse(pytorch) < parse('2.4.0'): + cmd = [ + f'torch=={pytorch}+cpu', # make sure correct pytorch version is kept + f'torchvision=={torchvision}+cpu', + '-f', + PYTORCH_STABLE_WHEEL_SRC_LEGACY] + else: + cmd = [ + f'torch=={pytorch}', + f'torchvision=={torchvision}', + '--index-url', + PYTORCH_STABLE_WHEEL_SRC] else: cmd = [f'torch=={pytorch}', f'torchvision=={torchvision}'] session.install(*cmd) From 36c60377b27ccbd6ef738d4a2dc4134141fd53a0 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:41:14 +0100 Subject: [PATCH 41/53] test (llm/nox/gha): Updated env setup --- .github/workflows/gen_github_actions.py | 2 +- .github/workflows/reduced_examples_llm_pytest.yml | 2 +- noxfile.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gen_github_actions.py b/.github/workflows/gen_github_actions.py index 316d21a4d..2c4908a6c 100644 --- a/.github/workflows/gen_github_actions.py +++ b/.github/workflows/gen_github_actions.py @@ -28,7 +28,7 @@ EXAMPLES_LLM_PYTEST_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)), ('pytorch_version', list( - ('2.2.2',))), ('platform', PLATFORM_LIST_REDUCED)]) + ('2.4.0',))), ('platform', PLATFORM_LIST_REDUCED)]) FINN_MATRIX_REDUCED = od([('python_version', list(PYTHON_VERSIONS_REDUCED)), ('pytorch_version', list(PYTORCH_LIST_REDUCED)), diff --git a/.github/workflows/reduced_examples_llm_pytest.yml b/.github/workflows/reduced_examples_llm_pytest.yml index ca7fa1dc6..b9c3deffe 100644 --- a/.github/workflows/reduced_examples_llm_pytest.yml +++ b/.github/workflows/reduced_examples_llm_pytest.yml @@ -18,7 +18,7 @@ jobs: matrix: python_version: ['3.8'] - pytorch_version: ['2.2.2'] + pytorch_version: ['2.4.0'] platform: ['ubuntu-latest'] jit_status: ['jit_disabled'] diff --git a/noxfile.py b/noxfile.py index 8ffdd0c53..17a38789d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -141,8 +141,7 @@ def tests_brevitas_examples_llm(session, pytorch, jit_status): session.env['BREVITAS_JIT'] = '{}'.format(int(jit_status == 'jit_enabled')) install_pytorch(pytorch, session) install_torchvision(pytorch, session) # Optimum seems to require torchvision - session.install( - '-e', '.[test, llm, export]', f'torch=={pytorch}' if IS_OSX else f'torch=={pytorch}+cpu') + session.install('-e', '.[test, llm, export]') session.run('pytest', '-n', 'logical', '-k', 'llm', 'tests/brevitas_examples/test_llm.py') From 6215aa9353171a4707692196c16d8d8e2a53c5cf Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 14:49:13 +0100 Subject: [PATCH 42/53] test (ex/llm): Updated test settings across multiple versions --- tests/brevitas_examples/test_llm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index b0eae9dcf..86d91ea57 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -24,11 +24,11 @@ def ptid2pathname(string): def allclose(x, y): - return np.allclose(x, y, rtol=1e-04, atol=3e-00, equal_nan=False) + return np.allclose(x, y, rtol=1e-04, atol=1e+01, equal_nan=False) def allveryclose(x, y): - return np.allclose(x, y, rtol=1e-08, atol=1e-01, equal_nan=False) + return np.allclose(x, y, rtol=1e-08, atol=1e+01, equal_nan=False) def allexact(x, y): @@ -133,7 +133,7 @@ def toggle_run_args(default_run_args, request): @pytest.mark.llm -@requires_pt_ge('2.2') +@requires_pt_ge('2.4') def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): caplog.set_level(logging.INFO) args = toggle_run_args @@ -178,7 +178,7 @@ def acc_args_and_acc(default_run_args, request): @pytest.mark.llm -@requires_pt_ge('2.2') +@requires_pt_ge('2.4') def test_small_models_acc(caplog, acc_args_and_acc): caplog.set_level(logging.INFO) args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc @@ -301,7 +301,7 @@ def layer_args(default_run_args, request): @pytest.mark.llm -@requires_pt_ge('2.2') +@requires_pt_ge('2.4') def test_small_models_quant_layer(caplog, layer_args): caplog.set_level(logging.INFO) args, exp_layer_types = layer_args From 3cac63e96b06028a6f23a8baf894ee09c9f060bf Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Fri, 23 Aug 2024 17:13:02 +0100 Subject: [PATCH 43/53] test (ex/llm): partitioned tests between pytorch versions. --- tests/brevitas_examples/test_llm.py | 112 ++++++++++++++++++++++------ 1 file changed, 91 insertions(+), 21 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 86d91ea57..2873debd1 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -24,11 +24,11 @@ def ptid2pathname(string): def allclose(x, y): - return np.allclose(x, y, rtol=1e-04, atol=1e+01, equal_nan=False) + return np.allclose(x, y, rtol=1e-03, atol=1e+01, equal_nan=False) def allveryclose(x, y): - return np.allclose(x, y, rtol=1e-08, atol=1e+01, equal_nan=False) + return np.allclose(x, y, rtol=1e-04, atol=2e+02, equal_nan=False) def allexact(x, y): @@ -74,11 +74,6 @@ class ModelAndPpl: float_ppl=None, supports_fx=True, ), - ModelAndPpl( - name="hf-internal-testing/tiny-random-OPTForCausalLM", - float_ppl=None, - supports_fx=True, - ), ModelAndPpl( name="hf-internal-testing/tiny-random-MistralForCausalLM", float_ppl=None, @@ -133,7 +128,7 @@ def toggle_run_args(default_run_args, request): @pytest.mark.llm -@requires_pt_ge('2.4') +@requires_pt_ge('2.2') def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): caplog.set_level(logging.INFO) args = toggle_run_args @@ -145,6 +140,32 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with float_ppl, quant_ppl, model = main(args) +@pytest.fixture( + scope="session", + params=[ + ModelAndPpl( + name="hf-internal-testing/tiny-random-OPTForCausalLM", + float_ppl=None, + supports_fx=True, + ),]) +def small_models_with_ppl_pt_ge_2_4(request): + yield request.param + + +@pytest.mark.llm +@requires_pt_ge('2.4') +def test_small_models_toggle_run_args_pt_ge_2_4( + caplog, toggle_run_args, small_models_with_ppl_pt_ge_2_4): + caplog.set_level(logging.INFO) + args = toggle_run_args + args.model = small_models_with_ppl_pt_ge_2_4.name + exp_float_ppl = small_models_with_ppl_pt_ge_2_4.float_ppl + use_fx = requires_fx(args) + if use_fx and not small_models_with_ppl_pt_ge_2_4.supports_fx: + pytest.xfail(f"{small_models_with_ppl.name} does not support FX") + float_ppl, quant_ppl, model = main(args) + + @pytest.fixture( params=[ { @@ -158,7 +179,36 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with "act_equalization": "fx", "bias_corr": True, "float_ppl": 33239.5, - "quant_ppl": 33283.75390625}, + "quant_ppl": 33283.75390625},]) +def acc_args_and_acc(default_run_args, request): + args = default_run_args + run_dict = request.param + float_ppl = run_dict["float_ppl"] + quant_ppl = run_dict["quant_ppl"] + del run_dict["float_ppl"] + del run_dict["quant_ppl"] + args.update(**run_dict) + yield args, float_ppl, quant_ppl + + +@pytest.mark.llm +@requires_pt_ge('2.2') +def test_small_models_acc(caplog, acc_args_and_acc): + caplog.set_level(logging.INFO) + args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc + float_ppl, quant_ppl, model = main(args) + float_ppl = float_ppl.detach().cpu().numpy() + quant_ppl = quant_ppl.detach().cpu().numpy() + if config.JIT_ENABLED: + assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + else: + assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + + +@pytest.fixture( + params=[ { "model": "hf-internal-testing/tiny-random-OPTForCausalLM", "weight_equalization": True, @@ -166,7 +216,7 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with "replace_mha": True, "float_ppl": 50016.0, "quant_ppl": 50016.0},]) -def acc_args_and_acc(default_run_args, request): +def acc_args_and_acc_pt_ge_2_4(default_run_args, request): args = default_run_args run_dict = request.param float_ppl = run_dict["float_ppl"] @@ -179,9 +229,9 @@ def acc_args_and_acc(default_run_args, request): @pytest.mark.llm @requires_pt_ge('2.4') -def test_small_models_acc(caplog, acc_args_and_acc): +def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4): caplog.set_level(logging.INFO) - args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc + args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc_pt_ge_2_4 float_ppl, quant_ppl, model = main(args) float_ppl = float_ppl.detach().cpu().numpy() quant_ppl = quant_ppl.detach().cpu().numpy() @@ -278,14 +328,6 @@ def test_small_models_acc(caplog, acc_args_and_acc): "", "model.layers.0.self_attn.q_proj.layer": "",}}, - { - "model": "hf-internal-testing/tiny-random-OPTForCausalLM", - "replace_mha": True, - "exp_layer_types": { - "model.decoder.layers.0.self_attn": - "", - "model.decoder.layers.0.self_attn.mha": - "",}}, { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", "quantize_last_layer": True, @@ -301,7 +343,7 @@ def layer_args(default_run_args, request): @pytest.mark.llm -@requires_pt_ge('2.4') +@requires_pt_ge('2.2') def test_small_models_quant_layer(caplog, layer_args): caplog.set_level(logging.INFO) args, exp_layer_types = layer_args @@ -309,6 +351,34 @@ def test_small_models_quant_layer(caplog, layer_args): assert_layer_types(model, exp_layer_types) +@pytest.fixture( + params=[ + { + "model": "hf-internal-testing/tiny-random-OPTForCausalLM", + "replace_mha": True, + "exp_layer_types": { + "model.decoder.layers.0.self_attn": + "", + "model.decoder.layers.0.self_attn.mha": + "",}},]) +def layer_args_pt_ge_2_4(default_run_args, request): + args = default_run_args + layer_dict = request.param + exp_layer_types = layer_dict["exp_layer_types"] + del layer_dict["exp_layer_types"] + args.update(**layer_dict) + yield args, exp_layer_types + + +@pytest.mark.llm +@requires_pt_ge('2.4') +def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4): + caplog.set_level(logging.INFO) + args, exp_layer_types = layer_args_pt_ge_2_4 + float_ppl, quant_ppl, model = main(args) + assert_layer_types(model, exp_layer_types) + + @pytest.fixture( params=[ { From b0d6c63bb1c753828d7fc75985634362170d6458 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 26 Aug 2024 11:13:21 +0100 Subject: [PATCH 44/53] test (ex/llm): Check input_view_impl for MX types --- tests/brevitas_examples/test_llm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 2873debd1..9aae02614 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -314,11 +314,11 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4): "", "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant": "", - "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.stats_input_view_shape_impl": - "", + "model.layers.0.self_attn.q_proj.input_quant.fused_activation_quant_proxy.tensor_quant.input_view_impl": + "", "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant": "", - "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.scaling_impl.parameter_list_stats.first_tracked_param.view_shape_impl": + "model.layers.0.self_attn.q_proj.weight_quant.tensor_quant.input_view_impl": "",}}, { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", From f5dbed207cee1335634ea169814a31f437dd24c9 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 16:53:00 +0100 Subject: [PATCH 45/53] test (example/llm): Switched to PyTest cases. Added ids for more readable tests. --- tests/brevitas_examples/test_llm.py | 65 ++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 9aae02614..b2c89274f 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -10,6 +10,7 @@ import numpy as np import onnx import pytest +import pytest_cases import torch from brevitas import config @@ -66,8 +67,13 @@ class ModelAndPpl: supports_fx: bool -@pytest.fixture( +@pytest_cases.fixture( scope="session", + ids=[ + "llama", + "mistral", + #"mixtral", + ], params=[ ModelAndPpl( name="hf-internal-testing/tiny-random-LlamaForCausalLM", @@ -89,7 +95,7 @@ def small_models_with_ppl(request): yield request.param -@pytest.fixture() +@pytest_cases.fixture() def default_run_args(request): args = UpdatableNamespace(**vars(parse_args([]))) args.nsamples = 2 @@ -106,7 +112,16 @@ def default_run_args(request): return args -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "defaults", + "bias_corr=True", + "act_equalization=layerwise", + "act_equalization=fx", + "weight_equalization=True", + "gptq=True", + "ln_affine_merge=True", + ], params=[ {}, { @@ -140,8 +155,11 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with float_ppl, quant_ppl, model = main(args) -@pytest.fixture( +@pytest_cases.fixture( scope="session", + ids=[ + "opt", + ], params=[ ModelAndPpl( name="hf-internal-testing/tiny-random-OPTForCausalLM", @@ -166,7 +184,11 @@ def test_small_models_toggle_run_args_pt_ge_2_4( float_ppl, quant_ppl, model = main(args) -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "llama", + "mistral", + ], params=[ { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", @@ -207,7 +229,10 @@ def test_small_models_acc(caplog, acc_args_and_acc): assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "opt-replace-mha", + ], params=[ { "model": "hf-internal-testing/tiny-random-OPTForCausalLM", @@ -243,7 +268,16 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4): assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "mistral-int8", + "mistral-weight-only", + "mistral-fp8_ocp", + "mistral-fp8_fnuz", + "llama-mxfp8", + "llama-int8-act_equalization=layerwise", + "mistral-int8-quant-last-layer", + ], params=[ { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", @@ -351,7 +385,10 @@ def test_small_models_quant_layer(caplog, layer_args): assert_layer_types(model, exp_layer_types) -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "opt-replace-mha", + ], params=[ { "model": "hf-internal-testing/tiny-random-OPTForCausalLM", @@ -379,7 +416,11 @@ def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4): assert_layer_types(model, exp_layer_types) -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "qcdq-asym", + "qcdq-sym", + ], params=[ { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", @@ -409,7 +450,11 @@ def test_small_models_onnx_export(caplog, onnx_export_args): shutil.rmtree(args.export_prefix) -@pytest.fixture( +@pytest_cases.fixture( + ids=[ + "qcdq-asym", + "qcdq-sym", + ], params=[ { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", From c544d5730b7f3daa5961bad9c2493b9c01d091b2 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 16:57:45 +0100 Subject: [PATCH 46/53] test (example/llm): Added comment about Mixtral case --- tests/brevitas_examples/test_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index b2c89274f..26ca4a9a4 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -85,7 +85,7 @@ class ModelAndPpl: float_ppl=None, supports_fx=False, ), - #ModelAndPpl( + #ModelAndPpl( # Ready for MoE support # name="dacorvo/Mixtral-tiny", # float_ppl=None, # supports_fx=True, From 08b8b732bb2200d73c859069b4c14e9e92e480b1 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 17:01:34 +0100 Subject: [PATCH 47/53] pre-commit --- tests/brevitas_examples/test_llm.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 26ca4a9a4..0585b7219 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -71,8 +71,7 @@ class ModelAndPpl: scope="session", ids=[ "llama", - "mistral", - #"mixtral", + "mistral", #"mixtral", ], params=[ ModelAndPpl( @@ -120,8 +119,7 @@ def default_run_args(request): "act_equalization=fx", "weight_equalization=True", "gptq=True", - "ln_affine_merge=True", - ], + "ln_affine_merge=True",], params=[ {}, { @@ -158,8 +156,7 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with @pytest_cases.fixture( scope="session", ids=[ - "opt", - ], + "opt",], params=[ ModelAndPpl( name="hf-internal-testing/tiny-random-OPTForCausalLM", @@ -187,8 +184,7 @@ def test_small_models_toggle_run_args_pt_ge_2_4( @pytest_cases.fixture( ids=[ "llama", - "mistral", - ], + "mistral",], params=[ { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", @@ -231,8 +227,7 @@ def test_small_models_acc(caplog, acc_args_and_acc): @pytest_cases.fixture( ids=[ - "opt-replace-mha", - ], + "opt-replace-mha",], params=[ { "model": "hf-internal-testing/tiny-random-OPTForCausalLM", @@ -276,8 +271,7 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4): "mistral-fp8_fnuz", "llama-mxfp8", "llama-int8-act_equalization=layerwise", - "mistral-int8-quant-last-layer", - ], + "mistral-int8-quant-last-layer",], params=[ { "model": "hf-internal-testing/tiny-random-MistralForCausalLM", @@ -387,8 +381,7 @@ def test_small_models_quant_layer(caplog, layer_args): @pytest_cases.fixture( ids=[ - "opt-replace-mha", - ], + "opt-replace-mha",], params=[ { "model": "hf-internal-testing/tiny-random-OPTForCausalLM", @@ -419,8 +412,7 @@ def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4): @pytest_cases.fixture( ids=[ "qcdq-asym", - "qcdq-sym", - ], + "qcdq-sym",], params=[ { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", @@ -453,8 +445,7 @@ def test_small_models_onnx_export(caplog, onnx_export_args): @pytest_cases.fixture( ids=[ "qcdq-asym", - "qcdq-sym", - ], + "qcdq-sym",], params=[ { "model": "hf-internal-testing/tiny-random-LlamaForCausalLM", From 6d339908d85bf5df66829fe55a1a4dff32eb7824 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 17:02:06 +0100 Subject: [PATCH 48/53] test (example/llm): Removed JIT clause for accuracy tests. --- tests/brevitas_examples/test_llm.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 0585b7219..b1871045a 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -217,12 +217,8 @@ def test_small_models_acc(caplog, acc_args_and_acc): float_ppl, quant_ppl, model = main(args) float_ppl = float_ppl.detach().cpu().numpy() quant_ppl = quant_ppl.detach().cpu().numpy() - if config.JIT_ENABLED: - assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" - assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" - else: - assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" - assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" @pytest_cases.fixture( From e59287b116e394d7cde73f877e9bad5d784aad35 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 17:45:36 +0100 Subject: [PATCH 49/53] test (example/test): Removed extra JIT checks. --- tests/brevitas_examples/test_llm.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index b1871045a..7e786f4a4 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -251,12 +251,8 @@ def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4): float_ppl, quant_ppl, model = main(args) float_ppl = float_ppl.detach().cpu().numpy() quant_ppl = quant_ppl.detach().cpu().numpy() - if config.JIT_ENABLED: - assert allclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" - assert allclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" - else: - assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" - assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" + assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" + assert allveryclose(exp_quant_ppl, quant_ppl), f"Expected quant PPL {exp_quant_ppl}, measured PPL {quant_ppl}" @pytest_cases.fixture( From 1399d844aab6dc6629a339784cf8d6d235a35ef9 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 17:48:17 +0100 Subject: [PATCH 50/53] test (example/llm): refactor run tests. --- tests/brevitas_examples/test_llm.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 7e786f4a4..55974af86 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -111,6 +111,15 @@ def default_run_args(request): return args +def run_test_models_run_args(args, model_with_ppl): + args.model = model_with_ppl.name + exp_float_ppl = model_with_ppl.float_ppl + use_fx = requires_fx(args) + if use_fx and not model_with_ppl.supports_fx: + pytest.xfail(f"{model_with_ppl.name} does not support FX") + float_ppl, quant_ppl, model = main(args) + + @pytest_cases.fixture( ids=[ "defaults", @@ -144,13 +153,7 @@ def toggle_run_args(default_run_args, request): @requires_pt_ge('2.2') def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with_ppl): caplog.set_level(logging.INFO) - args = toggle_run_args - args.model = small_models_with_ppl.name - exp_float_ppl = small_models_with_ppl.float_ppl - use_fx = requires_fx(args) - if use_fx and not small_models_with_ppl.supports_fx: - pytest.xfail(f"{small_models_with_ppl.name} does not support FX") - float_ppl, quant_ppl, model = main(args) + run_test_models_run_args(toggle_run_args, small_models_with_ppl) @pytest_cases.fixture( @@ -172,13 +175,7 @@ def small_models_with_ppl_pt_ge_2_4(request): def test_small_models_toggle_run_args_pt_ge_2_4( caplog, toggle_run_args, small_models_with_ppl_pt_ge_2_4): caplog.set_level(logging.INFO) - args = toggle_run_args - args.model = small_models_with_ppl_pt_ge_2_4.name - exp_float_ppl = small_models_with_ppl_pt_ge_2_4.float_ppl - use_fx = requires_fx(args) - if use_fx and not small_models_with_ppl_pt_ge_2_4.supports_fx: - pytest.xfail(f"{small_models_with_ppl.name} does not support FX") - float_ppl, quant_ppl, model = main(args) + run_test_models_run_args(toggle_run_args, small_models_with_ppl_pt_ge_2_4) @pytest_cases.fixture( From 0e762f77a60f8776a544d712edd3b4ed2251463c Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Mon, 2 Sep 2024 18:03:06 +0100 Subject: [PATCH 51/53] test (example/llm): Added comments about PT versions. --- tests/brevitas_examples/test_llm.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 55974af86..e57bf46cd 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -14,6 +14,7 @@ import torch from brevitas import config +# LLM example depends on optimum-amd, which requires PyTorch>=2.2 from brevitas_examples.llm.main import main from brevitas_examples.llm.main import parse_args from tests.marker import jit_disabled_for_export @@ -162,7 +163,7 @@ def test_small_models_toggle_run_args(caplog, toggle_run_args, small_models_with "opt",], params=[ ModelAndPpl( - name="hf-internal-testing/tiny-random-OPTForCausalLM", + name="hf-internal-testing/tiny-random-OPTForCausalLM", # Requires PT>=2.4 to run float_ppl=None, supports_fx=True, ),]) @@ -223,7 +224,7 @@ def test_small_models_acc(caplog, acc_args_and_acc): "opt-replace-mha",], params=[ { - "model": "hf-internal-testing/tiny-random-OPTForCausalLM", + "model": "hf-internal-testing/tiny-random-OPTForCausalLM", # Requires PT>=2.4 to run "weight_equalization": True, "ln_affine_merge": True, "replace_mha": True, @@ -373,7 +374,7 @@ def test_small_models_quant_layer(caplog, layer_args): "opt-replace-mha",], params=[ { - "model": "hf-internal-testing/tiny-random-OPTForCausalLM", + "model": "hf-internal-testing/tiny-random-OPTForCausalLM", # Requires PT>=2.4 to run "replace_mha": True, "exp_layer_types": { "model.decoder.layers.0.self_attn": From 6e9283a87ebb8bc42d8aa281952de3c116cbadf0 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Tue, 10 Sep 2024 12:03:13 +0100 Subject: [PATCH 52/53] test (example/llm): Added tests to ensure all args to `main` are also in `parse_args` --- tests/brevitas_examples/test_llm.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index e57bf46cd..8fa406c78 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -37,6 +37,20 @@ def allexact(x, y): return np.allclose(x, y, rtol=0.0, atol=0.0, equal_nan=False) +# Check that all args in args are used +def validate_args(args): + a = vars(args) + da = vars(parse_args([])) + for k in a.keys(): + assert k in da.keys(), f"Key {k} does not seem to be a valid argument for `main`" + + +def validate_args_and_run_main(args): + validate_args(args) + float_ppl, quant_ppl, model = main(args) + return float_ppl, quant_ppl, model + + def assert_layer_types(model, exp_layer_types): for key, string in exp_layer_types.items(): matched = False @@ -118,7 +132,7 @@ def run_test_models_run_args(args, model_with_ppl): use_fx = requires_fx(args) if use_fx and not model_with_ppl.supports_fx: pytest.xfail(f"{model_with_ppl.name} does not support FX") - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) @pytest_cases.fixture( @@ -212,7 +226,7 @@ def acc_args_and_acc(default_run_args, request): def test_small_models_acc(caplog, acc_args_and_acc): caplog.set_level(logging.INFO) args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) float_ppl = float_ppl.detach().cpu().numpy() quant_ppl = quant_ppl.detach().cpu().numpy() assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" @@ -246,7 +260,7 @@ def acc_args_and_acc_pt_ge_2_4(default_run_args, request): def test_small_models_acc_pt_ge_2_4(caplog, acc_args_and_acc_pt_ge_2_4): caplog.set_level(logging.INFO) args, exp_float_ppl, exp_quant_ppl = acc_args_and_acc_pt_ge_2_4 - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) float_ppl = float_ppl.detach().cpu().numpy() quant_ppl = quant_ppl.detach().cpu().numpy() assert allveryclose(exp_float_ppl, float_ppl), f"Expected float PPL {exp_float_ppl}, measured PPL {float_ppl}" @@ -365,7 +379,7 @@ def layer_args(default_run_args, request): def test_small_models_quant_layer(caplog, layer_args): caplog.set_level(logging.INFO) args, exp_layer_types = layer_args - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) assert_layer_types(model, exp_layer_types) @@ -395,7 +409,7 @@ def layer_args_pt_ge_2_4(default_run_args, request): def test_small_models_quant_layer_pt_ge_2_4(caplog, layer_args_pt_ge_2_4): caplog.set_level(logging.INFO) args, exp_layer_types = layer_args_pt_ge_2_4 - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) assert_layer_types(model, exp_layer_types) @@ -427,7 +441,7 @@ def onnx_export_args(default_run_args, request): def test_small_models_onnx_export(caplog, onnx_export_args): caplog.set_level(logging.INFO) args = onnx_export_args - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) onnx_model = onnx.load(os.path.join(args.export_prefix, "model.onnx")) shutil.rmtree(args.export_prefix) @@ -462,7 +476,7 @@ def torch_export_args(default_run_args, request): def test_small_models_torch_export(caplog, torch_export_args): caplog.set_level(logging.INFO) args = torch_export_args - float_ppl, quant_ppl, model = main(args) + float_ppl, quant_ppl, model = validate_args_and_run_main(args) filepath = args.export_prefix + ".pt" torchscript_model = torch.jit.load(filepath) os.remove(filepath) From d07119913135d489302f411f329890966b16ce1b Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Wed, 11 Sep 2024 12:26:28 +0100 Subject: [PATCH 53/53] test (example/llm): fixed indentation for `toggle_run_args` --- tests/brevitas_examples/test_llm.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/brevitas_examples/test_llm.py b/tests/brevitas_examples/test_llm.py index 8fa406c78..6a98911a9 100644 --- a/tests/brevitas_examples/test_llm.py +++ b/tests/brevitas_examples/test_llm.py @@ -135,6 +135,7 @@ def run_test_models_run_args(args, model_with_ppl): float_ppl, quant_ppl, model = validate_args_and_run_main(args) +# yapf: disable @pytest_cases.fixture( ids=[ "defaults", @@ -146,18 +147,13 @@ def run_test_models_run_args(args, model_with_ppl): "ln_affine_merge=True",], params=[ {}, - { - "bias_corr": True}, - { - "act_equalization": "layerwise"}, - { - "act_equalization": "fx"}, - { - "weight_equalization": True}, - { - "gptq": True}, - { - "ln_affine_merge": True},]) + {"bias_corr": True}, + {"act_equalization": "layerwise"}, + {"act_equalization": "fx"}, + {"weight_equalization": True}, + {"gptq": True}, + {"ln_affine_merge": True},]) +# yapf: enable def toggle_run_args(default_run_args, request): args = default_run_args args.update(**request.param)